[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [XEN] Pin l3 shadows of older x86_64 linux guests.



# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Node ID 2fd223c64fc6bba1d0ced8322ecb0aa7a927c0b8
# Parent  47a8bb3cd1232b000152f7f0482c7584672552cb
[XEN] Pin l3 shadows of older x86_64 linux guests.
Older x86_64 linux kernels use one l4 table per cpu and context switch by
changing an l4 entry pointing to an l3 table.  If we're shadowing them
we need to pin l3 shadows to stop them being torn down on every
context switch.  (But don't do this for normal 64bit guests).
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
 xen/arch/x86/domain.c            |    2 
 xen/arch/x86/mm/shadow/common.c  |  164 ++++++++++++++++++---------------------
 xen/arch/x86/mm/shadow/multi.c   |   88 +++++++++++++-------
 xen/arch/x86/mm/shadow/private.h |   71 ++++++++++++++--
 xen/include/asm-x86/domain.h     |    3 
 xen/include/asm-x86/shadow.h     |   23 -----
 6 files changed, 203 insertions(+), 148 deletions(-)

diff -r 47a8bb3cd123 -r 2fd223c64fc6 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Nov 23 17:44:12 2006 +0000
+++ b/xen/arch/x86/domain.c     Thu Nov 23 17:46:52 2006 +0000
@@ -219,7 +219,7 @@ int arch_domain_create(struct domain *d)
         INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
     INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
     INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
-    INIT_LIST_HEAD(&d->arch.shadow.toplevel_shadows);
+    INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows);
 
     if ( !is_idle_domain(d) )
     {
diff -r 47a8bb3cd123 -r 2fd223c64fc6 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu Nov 23 17:44:12 2006 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Nov 23 17:46:52 2006 +0000
@@ -495,6 +495,7 @@ void shadow_prealloc(struct domain *d, u
     struct shadow_page_info *sp;
     cpumask_t flushmask = CPU_MASK_NONE;
     mfn_t smfn;
+    int i;
 
     if ( chunk_is_available(d, order) ) return; 
     
@@ -503,9 +504,9 @@ void shadow_prealloc(struct domain *d, u
         v = d->vcpu[0];
     ASSERT(v != NULL);
 
-    /* Stage one: walk the list of top-level pages, unpinning them */
+    /* Stage one: walk the list of pinned pages, unpinning them */
     perfc_incrc(shadow_prealloc_1);
-    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
+    list_for_each_backwards_safe(l, t, &d->arch.shadow.pinned_shadows)
     {
         sp = list_entry(l, struct shadow_page_info, list);
         smfn = shadow_page_to_mfn(sp);
@@ -521,31 +522,24 @@ void shadow_prealloc(struct domain *d, u
      * loaded in cr3 on some vcpu.  Walk them, unhooking the non-Xen
      * mappings. */
     perfc_incrc(shadow_prealloc_2);
-    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
-    {
-        sp = list_entry(l, struct shadow_page_info, list);
-        smfn = shadow_page_to_mfn(sp);
-        shadow_unhook_mappings(v, smfn);
-
-        /* Remember to flush TLBs: we have removed shadow entries that 
-         * were in use by some vcpu(s). */
-        for_each_vcpu(d, v2) 
-        {
-            if ( pagetable_get_pfn(v2->arch.shadow_table[0]) == mfn_x(smfn)
-                 || pagetable_get_pfn(v2->arch.shadow_table[1]) == mfn_x(smfn)
-                 || pagetable_get_pfn(v2->arch.shadow_table[2]) == mfn_x(smfn) 
-                 || pagetable_get_pfn(v2->arch.shadow_table[3]) == mfn_x(smfn)
-                )
+
+    for_each_vcpu(d, v2) 
+        for ( i = 0 ; i < 4 ; i++ )
+        {
+            if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
+            {
+                shadow_unhook_mappings(v, 
+                               pagetable_get_mfn(v2->arch.shadow_table[i]));
                 cpus_or(flushmask, v2->vcpu_dirty_cpumask, flushmask);
-        }
-
-        /* See if that freed up a chunk of appropriate size */
-        if ( chunk_is_available(d, order) ) 
-        {
-            flush_tlb_mask(flushmask);
-            return;
-        }
-    }
+
+                /* See if that freed up a chunk of appropriate size */
+                if ( chunk_is_available(d, order) ) 
+                {
+                    flush_tlb_mask(flushmask);
+                    return;
+                }
+            }
+        }
     
     /* Nothing more we can do: all remaining shadows are of pages that
      * hold Xen mappings for some vcpu.  This can never happen. */
@@ -558,52 +552,57 @@ void shadow_prealloc(struct domain *d, u
     BUG();
 }
 
+/* Deliberately free all the memory we can: this will tear down all of
+ * this domain's shadows */
+static void shadow_blow_tables(struct domain *d) 
+{
+    struct list_head *l, *t;
+    struct shadow_page_info *sp;
+    struct vcpu *v = d->vcpu[0];
+    mfn_t smfn;
+    int i;
+    
+    /* Pass one: unpin all pinned pages */
+    list_for_each_backwards_safe(l,t, &d->arch.shadow.pinned_shadows)
+    {
+        sp = list_entry(l, struct shadow_page_info, list);
+        smfn = shadow_page_to_mfn(sp);
+        sh_unpin(v, smfn);
+    }
+        
+    /* Second pass: unhook entries of in-use shadows */
+    for_each_vcpu(d, v) 
+        for ( i = 0 ; i < 4 ; i++ )
+            if ( !pagetable_is_null(v->arch.shadow_table[i]) )
+                shadow_unhook_mappings(v, 
+                               pagetable_get_mfn(v->arch.shadow_table[i]));
+
+    /* Make sure everyone sees the unshadowings */
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
+
 #ifndef NDEBUG
-/* Deliberately free all the memory we can: this can be used to cause the
+/* Blow all shadows of all shadowed domains: this can be used to cause the
  * guest's pagetables to be re-shadowed if we suspect that the shadows
  * have somehow got out of sync */
-static void shadow_blow_tables(unsigned char c)
-{
-    struct list_head *l, *t;
-    struct shadow_page_info *sp;
+static void shadow_blow_all_tables(unsigned char c)
+{
     struct domain *d;
-    struct vcpu *v;
-    mfn_t smfn;
-
+    printk("'%c' pressed -> blowing all shadow tables\n", c);
     for_each_domain(d)
-    {
-        if ( shadow_mode_enabled(d) && (v = d->vcpu[0]) != NULL)
+        if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
         {
             shadow_lock(d);
-            printk("Blowing shadow tables for domain %u\n", d->domain_id);
-
-            /* Pass one: unpin all top-level pages */
-            list_for_each_backwards_safe(l,t, &d->arch.shadow.toplevel_shadows)
-            {
-                sp = list_entry(l, struct shadow_page_info, list);
-                smfn = shadow_page_to_mfn(sp);
-                sh_unpin(v, smfn);
-            }
-
-            /* Second pass: unhook entries of in-use shadows */
-            list_for_each_backwards_safe(l,t, &d->arch.shadow.toplevel_shadows)
-            {
-                sp = list_entry(l, struct shadow_page_info, list);
-                smfn = shadow_page_to_mfn(sp);
-                shadow_unhook_mappings(v, smfn);
-            }
-            
-            /* Make sure everyone sees the unshadowings */
-            flush_tlb_mask(d->domain_dirty_cpumask);
+            shadow_blow_tables(d);
             shadow_unlock(d);
         }
-    }
 }
 
 /* Register this function in the Xen console keypress table */
 static __init int shadow_blow_tables_keyhandler_init(void)
 {
-    register_keyhandler('S', shadow_blow_tables, "reset shadow pagetables");
+    register_keyhandler('S', shadow_blow_all_tables,"reset shadow pagetables");
     return 0;
 }
 __initcall(shadow_blow_tables_keyhandler_init);
@@ -789,9 +788,9 @@ shadow_alloc_p2m_page(struct domain *d)
 shadow_alloc_p2m_page(struct domain *d)
 {
     struct list_head *entry;
+    struct page_info *pg;
     mfn_t mfn;
     void *p;
-    int ok;
 
     if ( list_empty(&d->arch.shadow.p2m_freelist) &&
          !shadow_alloc_p2m_pages(d) )
@@ -799,9 +798,9 @@ shadow_alloc_p2m_page(struct domain *d)
     entry = d->arch.shadow.p2m_freelist.next;
     list_del(entry);
     list_add_tail(entry, &d->arch.shadow.p2m_inuse);
-    mfn = page_to_mfn(list_entry(entry, struct page_info, list));
-    ok = sh_get_ref(mfn, 0);
-    ASSERT(ok); /* First sh_get_ref() can't possibly overflow */
+    pg = list_entry(entry, struct page_info, list);
+    pg->count_info = 1;
+    mfn = page_to_mfn(pg);
     p = sh_map_domain_page(mfn);
     clear_page(p);
     sh_unmap_domain_page(p);
@@ -2067,37 +2066,32 @@ void sh_remove_shadows(struct vcpu *v, m
      * This call to hash_foreach() looks dangerous but is in fact OK: each
      * call will remove at most one shadow, and terminate immediately when
      * it does remove it, so we never walk the hash after doing a deletion.  */
-#define DO_UNSHADOW(_type) do {                                 \
-    t = (_type);                                                \
-    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);               \
-    if ( !sh_remove_shadow_via_pointer(v, smfn) && !fast )      \
-        hash_foreach(v, masks[t], callbacks, smfn);             \
-} while (0)
-
-    /* Top-level shadows need to be unpinned */
-#define DO_UNPIN(_type) do {                            \
+#define DO_UNSHADOW(_type) do {                         \
     t = (_type);                                        \
     smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);       \
-    if ( mfn_to_shadow_page(smfn)->pinned )             \
+    if ( sh_type_is_pinnable(v, t) )                    \
         sh_unpin(v, smfn);                              \
+    else                                                \
+        sh_remove_shadow_via_pointer(v, smfn);          \
+    if ( (pg->count_info & PGC_page_table) && !fast )   \
+        hash_foreach(v, masks[t], callbacks, smfn);     \
 } while (0)
 
     if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(SH_type_l1_32_shadow);
-    if ( sh_flags & SHF_L2_32 )   DO_UNPIN(SH_type_l2_32_shadow);
+    if ( sh_flags & SHF_L2_32 )   DO_UNSHADOW(SH_type_l2_32_shadow);
 #if CONFIG_PAGING_LEVELS >= 3
     if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(SH_type_l1_pae_shadow);
-    if ( sh_flags & SHF_L2_PAE )  DO_UNPIN(SH_type_l2_pae_shadow);
-    if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(SH_type_l2h_pae_shadow);
+    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(SH_type_l2_pae_shadow);
+    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(SH_type_l2h_pae_shadow);
 #if CONFIG_PAGING_LEVELS >= 4
     if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(SH_type_l1_64_shadow);
     if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(SH_type_l2_64_shadow);
     if ( sh_flags & SHF_L3_64 )   DO_UNSHADOW(SH_type_l3_64_shadow);
-    if ( sh_flags & SHF_L4_64 )   DO_UNPIN(SH_type_l4_64_shadow);
+    if ( sh_flags & SHF_L4_64 )   DO_UNSHADOW(SH_type_l4_64_shadow);
 #endif
 #endif
 
 #undef DO_UNSHADOW
-#undef DO_UNPIN
 
     /* If that didn't catch the shadows, something is wrong */
     if ( !fast && (pg->count_info & PGC_page_table) )
@@ -2393,6 +2387,12 @@ int shadow_enable(struct domain *d, u32 
             goto out;
         }
 
+#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 
+    /* We assume we're dealing with an older 64bit linux guest until we 
+     * see the guest use more than one l4 per vcpu. */
+    d->arch.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
+#endif
+
     /* Update the bits */
     sh_new_mode(d, mode);
     shadow_audit_p2m(d);
@@ -2831,18 +2831,10 @@ static int shadow_log_dirty_op(
         
     if ( clean ) 
     {
-        struct list_head *l, *t;
-        struct shadow_page_info *sp;
-
         /* Need to revoke write access to the domain's pages again. 
          * In future, we'll have a less heavy-handed approach to this, 
          * but for now, we just unshadow everything except Xen. */
-        list_for_each_safe(l, t, &d->arch.shadow.toplevel_shadows)
-        {
-            sp = list_entry(l, struct shadow_page_info, list);
-            if ( d->vcpu[0] != NULL )
-                shadow_unhook_mappings(d->vcpu[0], shadow_page_to_mfn(sp));
-        }
+        shadow_blow_tables(d);
 
         d->arch.shadow.fault_count = 0;
         d->arch.shadow.dirty_count = 0;
diff -r 47a8bb3cd123 -r 2fd223c64fc6 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Nov 23 17:44:12 2006 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Nov 23 17:46:52 2006 +0000
@@ -964,7 +964,7 @@ static int shadow_set_l4e(struct vcpu *v
                           shadow_l4e_t new_sl4e, 
                           mfn_t sl4mfn)
 {
-    int flags = 0;
+    int flags = 0, ok;
     shadow_l4e_t old_sl4e;
     paddr_t paddr;
     ASSERT(sl4e != NULL);
@@ -976,12 +976,19 @@ static int shadow_set_l4e(struct vcpu *v
              | (((unsigned long)sl4e) & ~PAGE_MASK));
 
     if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) 
+    {
         /* About to install a new reference */        
-        if ( !sh_get_ref(shadow_l4e_get_mfn(new_sl4e), paddr) )
+        mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e);
+        ok = sh_get_ref(v, sl3mfn, paddr);
+        /* Are we pinning l3 shadows to handle wierd linux behaviour? */
+        if ( sh_type_is_pinnable(v, SH_type_l3_64_shadow) )
+            ok |= sh_pin(v, sl3mfn);
+        if ( !ok )
         {
             domain_crash(v->domain);
             return SHADOW_SET_ERROR;
         }
+    }
 
     /* Write the new entry */
     shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
@@ -1020,7 +1027,7 @@ static int shadow_set_l3e(struct vcpu *v
     
     if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
         /* About to install a new reference */        
-        if ( !sh_get_ref(shadow_l3e_get_mfn(new_sl3e), paddr) )
+        if ( !sh_get_ref(v, shadow_l3e_get_mfn(new_sl3e), paddr) )
         {
             domain_crash(v->domain);
             return SHADOW_SET_ERROR;
@@ -1076,7 +1083,7 @@ static int shadow_set_l2e(struct vcpu *v
 
     if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 
         /* About to install a new reference */
-        if ( !sh_get_ref(shadow_l2e_get_mfn(new_sl2e), paddr) )
+        if ( !sh_get_ref(v, shadow_l2e_get_mfn(new_sl2e), paddr) )
         {
             domain_crash(v->domain);
             return SHADOW_SET_ERROR;
@@ -1361,8 +1368,6 @@ do {                                    
 /**************************************************************************/
 /* Functions to install Xen mappings and linear mappings in shadow pages */
 
-static mfn_t sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type);
-
 // XXX -- this function should probably be moved to shadow-common.c, but that
 //        probably wants to wait until the shadow types have been moved from
 //        shadow-types.h to shadow-private.h
@@ -1546,6 +1551,44 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
          && shadow_type != SH_type_l4_64_shadow )
         /* Lower-level shadow, not yet linked form a higher level */
         mfn_to_shadow_page(smfn)->up = 0;
+
+#if GUEST_PAGING_LEVELS == 4
+#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 
+    if ( shadow_type == SH_type_l4_64_shadow &&
+         unlikely(v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) )
+    {
+        /* We're shadowing a new l4, but we've been assuming the guest uses
+         * only one l4 per vcpu and context switches using an l4 entry. 
+         * Count the number of active l4 shadows.  If there are enough
+         * of them, decide that this isn't an old linux guest, and stop
+         * pinning l3es.  This is not very quick but it doesn't happen
+         * very often. */
+        struct list_head *l, *t;
+        struct shadow_page_info *sp;
+        struct vcpu *v2;
+        int l4count = 0, vcpus = 0;
+        list_for_each(l, &v->domain->arch.shadow.pinned_shadows)
+        {
+            sp = list_entry(l, struct shadow_page_info, list);
+            if ( sp->type == SH_type_l4_64_shadow )
+                l4count++;
+        }
+        for_each_vcpu ( v->domain, v2 ) 
+            vcpus++;
+        if ( l4count > 2 * vcpus ) 
+        {
+            /* Unpin all the pinned l3 tables, and don't pin any more. */
+            list_for_each_safe(l, t, &v->domain->arch.shadow.pinned_shadows)
+            {
+                sp = list_entry(l, struct shadow_page_info, list);
+                if ( sp->type == SH_type_l3_64_shadow )
+                    sh_unpin(v, shadow_page_to_mfn(sp));
+            }
+            v->domain->arch.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL;
+        }
+    }
+#endif
+#endif
 
     // Create the Xen mappings...
     if ( !shadow_mode_external(v->domain) )
@@ -1893,9 +1936,6 @@ void sh_destroy_l4_shadow(struct vcpu *v
     gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
-    /* Take this shadow off the list of root shadows */
-    list_del_init(&mfn_to_shadow_page(smfn)->list);
-
     /* Decrement refcounts of all the old entries */
     xen_mappings = (!shadow_mode_external(v->domain));
     sl4mfn = smfn; 
@@ -1903,8 +1943,8 @@ void sh_destroy_l4_shadow(struct vcpu *v
         if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
         {
             sh_put_ref(v, shadow_l4e_get_mfn(*sl4e),
-                        (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
-                        | ((unsigned long)sl4e & ~PAGE_MASK));
+                       (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
+                       | ((unsigned long)sl4e & ~PAGE_MASK));
         }
     });
     
@@ -1958,10 +1998,6 @@ void sh_destroy_l2_shadow(struct vcpu *v
     gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
-#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3)
-    /* Take this shadow off the list of root shadows */
-    list_del_init(&mfn_to_shadow_page(smfn)->list);
-#endif
 
     /* Decrement refcounts of all the old entries */
     sl2mfn = smfn;
@@ -3276,13 +3312,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
 
     /* Guest mfn is valid: shadow it and install the shadow */
     smfn = get_shadow_status(v, gmfn, root_type);
-    if ( valid_mfn(smfn) )
-    {
-        /* Pull this root shadow out of the list of roots (we will put
-         * it back in at the head). */
-        list_del(&mfn_to_shadow_page(smfn)->list);
-    }
-    else
+    if ( !valid_mfn(smfn) )
     {
         /* Make sure there's enough free shadow memory. */
         shadow_prealloc(d, SHADOW_MAX_ORDER); 
@@ -3298,17 +3328,15 @@ sh_set_toplevel_shadow(struct vcpu *v,
 #endif
 
     /* Pin the shadow and put it (back) on the list of top-level shadows */
-    if ( sh_pin(smfn) )
-        list_add(&mfn_to_shadow_page(smfn)->list, 
-                 &d->arch.shadow.toplevel_shadows);
-    else 
+    if ( sh_pin(v, smfn) == 0 )
     {
         SHADOW_ERROR("can't pin %#lx as toplevel shadow\n", mfn_x(smfn));
         domain_crash(v->domain);
-    }        
-
-    /* Take a ref to this page: it will be released in sh_detach_old_tables. */
-    if ( !sh_get_ref(smfn, 0) )
+    }
+
+    /* Take a ref to this page: it will be released in sh_detach_old_tables()
+     * or the next call to set_toplevel_shadow() */
+    if ( !sh_get_ref(v, smfn, 0) )
     {
         SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn));
         domain_crash(v->domain);
diff -r 47a8bb3cd123 -r 2fd223c64fc6 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Thu Nov 23 17:44:12 2006 +0000
+++ b/xen/arch/x86/mm/shadow/private.h  Thu Nov 23 17:46:52 2006 +0000
@@ -157,9 +157,11 @@ struct shadow_page_info
     } __attribute__((packed));
     union {
         /* For unused shadow pages, a list of pages of this order; 
-         * for top-level shadows, a list of other top-level shadows */
+         * for pinnable shadows, if pinned, a list of other pinned shadows
+         * (see sh_type_is_pinnable() below for the definition of 
+         * "pinnable" shadow types). */
         struct list_head list;
-        /* For lower-level shadows, a higher entry that points at us */
+        /* For non-pinnable shadows, a higher entry that points at us */
         paddr_t up;
     };
 };
@@ -195,6 +197,36 @@ static inline void shadow_check_page_str
 #define SH_type_monitor_table (14U) /* in use as a monitor table */
 #define SH_type_unused        (15U)
 
+/* 
+ * What counts as a pinnable shadow?
+ */
+
+static inline int sh_type_is_pinnable(struct vcpu *v, unsigned int t) 
+{
+    /* Top-level shadow types in each mode can be pinned, so that they 
+     * persist even when not currently in use in a guest CR3 */
+    if ( t == SH_type_l2_32_shadow
+         || t == SH_type_l2_pae_shadow
+         || t == SH_type_l2h_pae_shadow 
+         || t == SH_type_l4_64_shadow )
+        return 1;
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 
+    /* Early 64-bit linux used three levels of pagetables for the guest
+     * and context switched by changing one l4 entry in a per-cpu l4
+     * page.  When we're shadowing those kernels, we have to pin l3
+     * shadows so they don't just evaporate on every context switch.
+     * For all other guests, we'd rather use the up-pointer field in l3s. */ 
+    if ( unlikely((v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) 
+                  && CONFIG_PAGING_LEVELS >= 4
+                  && t == SH_type_l3_64_shadow) )
+        return 1;
+#endif
+
+    /* Everything else is not pinnable, and can use the "up" pointer */
+    return 0;
+}
+
 /*
  * Definitions for the shadow_flags field in page_info.
  * These flags are stored on *guest* pages...
@@ -364,7 +396,7 @@ void sh_destroy_shadow(struct vcpu *v, m
  * and the physical address of the shadow entry that holds the ref (or zero
  * if the ref is held by something else).  
  * Returns 0 for failure, 1 for success. */
-static inline int sh_get_ref(mfn_t smfn, paddr_t entry_pa)
+static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
 {
     u32 x, nx;
     struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
@@ -385,7 +417,9 @@ static inline int sh_get_ref(mfn_t smfn,
     sp->count = nx;
 
     /* We remember the first shadow entry that points to each shadow. */
-    if ( entry_pa != 0 && sp->up == 0 ) 
+    if ( entry_pa != 0 
+         && sh_type_is_pinnable(v, sp->type) 
+         && sp->up == 0 ) 
         sp->up = entry_pa;
     
     return 1;
@@ -403,7 +437,9 @@ static inline void sh_put_ref(struct vcp
     ASSERT(sp->mbz == 0);
 
     /* If this is the entry in the up-pointer, remove it */
-    if ( entry_pa != 0 && sp->up == entry_pa ) 
+    if ( entry_pa != 0 
+         && sh_type_is_pinnable(v, sp->type) 
+         && sp->up == entry_pa ) 
         sp->up = 0;
 
     x = sp->count;
@@ -424,33 +460,48 @@ static inline void sh_put_ref(struct vcp
 }
 
 
-/* Pin a shadow page: take an extra refcount and set the pin bit.
+/* Pin a shadow page: take an extra refcount, set the pin bit,
+ * and put the shadow at the head of the list of pinned shadows.
  * Returns 0 for failure, 1 for success. */
-static inline int sh_pin(mfn_t smfn)
+static inline int sh_pin(struct vcpu *v, mfn_t smfn)
 {
     struct shadow_page_info *sp;
     
     ASSERT(mfn_valid(smfn));
     sp = mfn_to_shadow_page(smfn);
-    if ( !(sp->pinned) ) 
+    ASSERT(sh_type_is_pinnable(v, sp->type));
+    if ( sp->pinned ) 
     {
-        if ( !sh_get_ref(smfn, 0) )
+        /* Already pinned: take it out of the pinned-list so it can go 
+         * at the front */
+        list_del(&sp->list);
+    }
+    else
+    {
+        /* Not pinned: pin it! */
+        if ( !sh_get_ref(v, smfn, 0) )
             return 0;
         sp->pinned = 1;
     }
+    /* Put it at the head of the list of pinned shadows */
+    list_add(&sp->list, &v->domain->arch.shadow.pinned_shadows);
     return 1;
 }
 
-/* Unpin a shadow page: unset the pin bit and release the extra ref. */
+/* Unpin a shadow page: unset the pin bit, take the shadow off the list
+ * of pinned shadows, and release the extra ref. */
 static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
 {
     struct shadow_page_info *sp;
     
     ASSERT(mfn_valid(smfn));
     sp = mfn_to_shadow_page(smfn);
+    ASSERT(sh_type_is_pinnable(v, sp->type));
     if ( sp->pinned )
     {
         sp->pinned = 0;
+        list_del(&sp->list);
+        sp->up = 0; /* in case this stops being a pinnable type in future */
         sh_put_ref(v, smfn, 0);
     }
 }
diff -r 47a8bb3cd123 -r 2fd223c64fc6 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu Nov 23 17:44:12 2006 +0000
+++ b/xen/include/asm-x86/domain.h      Thu Nov 23 17:46:52 2006 +0000
@@ -65,10 +65,11 @@ struct shadow_domain {
     struct list_head  freelists[SHADOW_MAX_ORDER + 1]; 
     struct list_head  p2m_freelist;
     struct list_head  p2m_inuse;
-    struct list_head  toplevel_shadows;
+    struct list_head  pinned_shadows;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages in p2m map */
+    unsigned int      opt_flags;    /* runtime tunable optimizations on/off */
 
     /* Shadow hashtable */
     struct shadow_page_info **hash_table;
diff -r 47a8bb3cd123 -r 2fd223c64fc6 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Nov 23 17:44:12 2006 +0000
+++ b/xen/include/asm-x86/shadow.h      Thu Nov 23 17:46:52 2006 +0000
@@ -158,8 +158,9 @@ extern int shadow_audit_enable;
 #define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
 #define SHOPT_FAST_FAULT_PATH     0x04  /* Fast-path MMIO and not-present */
 #define SHOPT_PREFETCH            0x08  /* Shadow multiple entries per fault */
-
-#define SHADOW_OPTIMIZATIONS      0x0f
+#define SHOPT_LINUX_L3_TOPLEVEL   0x10  /* Pin l3es on early 64bit linux */
+
+#define SHADOW_OPTIMIZATIONS      0x1f
 
 
 /* With shadow pagetables, the different kinds of address start 
@@ -594,24 +595,6 @@ static inline unsigned int shadow_get_al
             + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
 }
 
-#if SHADOW_OPTIMIZATIONS & SHOPT_CACHE_WALKS
-/* Optimization: cache the results of guest walks.  This helps with MMIO
- * and emulated writes, which tend to issue very similar walk requests
- * repeatedly.  We keep the results of the last few walks, and blow
- * away the cache on guest cr3 write, mode change, or page fault. */
-
-#define SH_WALK_CACHE_ENTRIES 4
-
-/* Rather than cache a guest walk, which would include mapped pointers 
- * to pages, we cache what a TLB would remember about the walk: the 
- * permissions and the l1 gfn */
-struct shadow_walk_cache {
-    unsigned long va;           /* The virtual address (or 0 == unused) */
-    unsigned long gfn;          /* The gfn from the effective l1e   */
-    u32 permissions;            /* The aggregated permission bits   */
-};
-#endif
-
 
 /**************************************************************************/
 /* Guest physmap (p2m) support 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.