[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Because of a bug with reference counting against the target guest page



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID b41e51ffa5eacf07ed7843e5f85ae1748e19c370
# Parent  3fd239d8b640968c56fbd5aacfe3564f12203f54
Because of a bug with reference counting against the target guest page
when searching the list for L1 shadow pages to write protect that page
(at shadow_promote(), which is called by alloc_shadow_page()), the code
was always scanning _all_ the entries in the hash list. The length of
the hash list can be >500 for L1 shadow pages, and for each page we
needed to check all the PTEs in the page.

The patch attached does the following things:
- Correct the reference count (for the target guest page) so that=20
  it can exit the loop when all the L1 shadow pages to modify are found.
  Even with this, we can search the entire list if the page is at=20
  the end.
- Try to avoid the search in the hash list, by having a
  back pointer (as a hint) to the shadow page pfn. For most cases,=20
  there is a single translation for the guest page in the shadow.
- Cleanups, remove the nested function fix_entry

With those, the kernel build performance, for example, was improved
approximately by 20%, 40% on 32-bit, 64-bit unmodified Linux guests,
respectively. Tested log-dirty mode for plain 32-bit as well.

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Asit Mallick <asit.k.mallick@xxxxxxxxx>

diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow.c     Thu Oct 13 16:58:01 2005
@@ -616,13 +616,6 @@
         pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << 
L1_PAGETABLE_SHIFT;
         spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
 
-        /*
-        gpl1e = &(linear_pg_table[l1_linear_offset(va) &
-                              ~(L1_PAGETABLE_ENTRIES-1)]);
-
-        spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
-                                     ~(L1_PAGETABLE_ENTRIES-1)]);*/
-
         for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
         {
             l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
@@ -645,7 +638,8 @@
                 min = i;
             if ( likely(i > max) )
                 max = i;
-        }
+            set_guest_back_ptr(d, sl1e, sl1mfn, i);
+          }
 
         frame_table[sl1mfn].tlbflush_timestamp =
             SHADOW_ENCODE_MIN_MAX(min, max);
@@ -716,8 +710,8 @@
         }
     }
 
+    set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
     __shadow_set_l1e(v, va, &new_spte);
-
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 
@@ -1133,6 +1127,24 @@
         delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
         perfc_decr(writable_pte_predictions);
     }
+}
+
+static int fix_entry(
+    struct domain *d, 
+    l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+    l1_pgentry_t old = *pt;
+    l1_pgentry_t new = old;
+
+    l1e_remove_flags(new,_PAGE_RW);
+    if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+        BUG();
+    (*found)++;
+    *pt = new;
+    if ( is_l1_shadow )
+        shadow_put_page_from_l1e(old, d);
+
+    return (*found == max_refs_to_find);
 }
 
 static u32 remove_all_write_access_in_ptpage(
@@ -1156,49 +1168,28 @@
 
     match = l1e_from_pfn(readonly_gmfn, flags);
 
-    // returns true if all refs have been found and fixed.
-    //
-    int fix_entry(int i)
-    {
-        l1_pgentry_t old = pt[i];
-        l1_pgentry_t new = old;
-
-        l1e_remove_flags(new,_PAGE_RW);
-        if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
-            BUG();
-        found++;
-        pt[i] = new;
-        if ( is_l1_shadow )
-            shadow_put_page_from_l1e(old, d);
-
-#if 0
-        printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
-               "is_l1_shadow=%d\n",
-               readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
-
-        return (found == max_refs_to_find);
-    }
-
-    i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1);
-    if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
-    {
-        perfc_incrc(remove_write_fast_exit);
-        increase_writable_pte_prediction(d, readonly_gpfn, prediction);
-        unmap_domain_page(pt);
-        return found;
+    if ( shadow_mode_external(d) ) {
+        i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) 
+            >> PGT_va_shift;
+
+        if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+             !l1e_has_changed(pt[i], match, flags) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+             !prediction )
+            goto out;
     }
  
     for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++)
     {
-        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
             break;
     }
 
+out:
     unmap_domain_page(pt);
 
     return found;
-#undef MATCH_ENTRY
 }
 
 static int remove_all_write_access(
@@ -1206,8 +1197,8 @@
 {
     int i;
     struct shadow_status *a;
-    u32 found = 0, fixups, write_refs;
-    unsigned long prediction, predicted_gpfn, predicted_smfn;
+    u32 found = 0, write_refs;
+    unsigned long predicted_smfn;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(VALID_MFN(readonly_gmfn));
@@ -1238,26 +1229,18 @@
         return 1;
     }
 
-    // Before searching all the L1 page tables, check the typical culprit first
-    //
-    if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
-    {
-        predicted_gpfn = prediction & PGT_mfn_mask;
-        if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, 
PGT_l1_shadow)) &&
-             (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, 
predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
-        {
-            found += fixups;
-            if ( found == write_refs )
-            {
-                perfc_incrc(remove_write_predicted);
-                return 1;
-            }
-        }
-        else
-        {
-            perfc_incrc(remove_write_bad_prediction);
-            decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
-        }
+    if ( shadow_mode_external(d) ) {
+        if (write_refs-- == 0) 
+            return 0;
+
+         // Use the back pointer to locate the shadow page that can contain
+         // the PTE of interest
+         if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) 
) {
+             found += remove_all_write_access_in_ptpage(
+                 d, predicted_smfn, predicted_smfn, readonly_gpfn, 
readonly_gmfn, write_refs, 0);
+             if ( found == write_refs )
+                 return 0;
+         }
     }
 
     // Search all the shadow L1 page tables...
@@ -1276,7 +1259,7 @@
             {
                 found += remove_all_write_access_in_ptpage(d, 
a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, 
write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
                 if ( found == write_refs )
-                    return 1;
+                    return 0;
             }
 
             a = a->next;
@@ -1376,7 +1359,7 @@
                      guest_l1e_has_changed(guest1[i], snapshot1[i], 
PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_pte_change(d, guest1[i], 
&shadow1[i]);
-
+                    set_guest_back_ptr(d, shadow1[i], smfn, i);
                     // can't update snapshots of linear page tables -- they
                     // are used multiple times...
                     //
@@ -1604,6 +1587,8 @@
              !shadow_get_page_from_l1e(npte, d) )
             BUG();
         *ppte = npte;
+        set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, 
+                           (entry->writable_pl1e & 
~PAGE_MASK)/sizeof(l1_pgentry_t));
         shadow_put_page_from_l1e(opte, d);
 
         unmap_domain_page(ppte);
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow32.c   Thu Oct 13 16:58:01 2005
@@ -1032,7 +1032,12 @@
             if ( !get_page_type(page, PGT_writable_page) )
                 BUG();
             put_page_type(page);
-
+            /*
+             * We use tlbflush_timestamp as back pointer to smfn, and need to
+             * clean up it.
+             */
+            if ( shadow_mode_external(d) )
+                page->tlbflush_timestamp = 0;
             list_ent = page->list.next;
         }
     }
@@ -1390,18 +1395,6 @@
     return rc;
 }
 
-/*
- * XXX KAF: Why is this VMX specific?
- */
-void vmx_shadow_clear_state(struct domain *d)
-{
-    SH_VVLOG("%s:", __func__);
-    shadow_lock(d);
-    free_shadow_pages(d);
-    shadow_unlock(d);
-    update_pagetables(d->vcpu[0]);
-}
-
 unsigned long
 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
 {
@@ -1462,14 +1455,10 @@
 
     hl2 = map_domain_page(hl2mfn);
 
-#ifdef __i386__
     if ( shadow_mode_external(d) )
         limit = L2_PAGETABLE_ENTRIES;
     else
         limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
-    limit = 0; /* XXX x86/64 XXX */
-#endif
 
     memset(hl2, 0, limit * sizeof(l1_pgentry_t));
 
@@ -1665,6 +1654,7 @@
                 min = i;
             if ( likely(i > max) )
                 max = i;
+            set_guest_back_ptr(d, sl1e, sl1mfn, i);
         }
 
         frame_table[sl1mfn].tlbflush_timestamp =
@@ -2070,6 +2060,24 @@
 
         xfree(gpfn_list);
     }
+}
+
+static int fix_entry(
+    struct domain *d, 
+    l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+    l1_pgentry_t old = *pt;
+    l1_pgentry_t new = old;
+
+    l1e_remove_flags(new,_PAGE_RW);
+    if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+        BUG();
+    (*found)++;
+    *pt = new;
+    if ( is_l1_shadow )
+        shadow_put_page_from_l1e(old, d);
+
+    return (*found == max_refs_to_find);
 }
 
 static u32 remove_all_write_access_in_ptpage(
@@ -2088,49 +2096,28 @@
 
     match = l1e_from_pfn(readonly_gmfn, flags);
 
-    // returns true if all refs have been found and fixed.
-    //
-    int fix_entry(int i)
-    {
-        l1_pgentry_t old = pt[i];
-        l1_pgentry_t new = old;
-
-        l1e_remove_flags(new,_PAGE_RW);
-        if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
-            BUG();
-        found++;
-        pt[i] = new;
-        if ( is_l1_shadow )
-            shadow_put_page_from_l1e(old, d);
-
-#if 0
-        printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
-               "is_l1_shadow=%d\n",
-               readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
-
-        return (found == max_refs_to_find);
-    }
-
-    i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
-    if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
-    {
-        perfc_incrc(remove_write_fast_exit);
-        increase_writable_pte_prediction(d, readonly_gpfn, prediction);
-        unmap_domain_page(pt);
-        return found;
-    }
- 
+    if ( shadow_mode_external(d) ) {
+        i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) 
+            >> PGT_va_shift;
+
+        if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+             !l1e_has_changed(pt[i], match, flags) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+             !prediction )
+            goto out;
+    }
+
     for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
     {
-        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
             break;
     }
 
+out:
     unmap_domain_page(pt);
 
     return found;
-#undef MATCH_ENTRY
 }
 
 int shadow_remove_all_write_access(
@@ -2138,8 +2125,8 @@
 {
     int i;
     struct shadow_status *a;
-    u32 found = 0, fixups, write_refs;
-    unsigned long prediction, predicted_gpfn, predicted_smfn;
+    u32 found = 0, write_refs;
+    unsigned long predicted_smfn;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(VALID_MFN(readonly_gmfn));
@@ -2169,27 +2156,19 @@
         perfc_incrc(remove_write_no_work);
         return 1;
     }
-
-    // Before searching all the L1 page tables, check the typical culprit first
-    //
-    if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
-    {
-        predicted_gpfn = prediction & PGT_mfn_mask;
-        if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, 
PGT_l1_shadow)) &&
-             (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, 
predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
-        {
-            found += fixups;
-            if ( found == write_refs )
-            {
-                perfc_incrc(remove_write_predicted);
-                return 1;
-            }
-        }
-        else
-        {
-            perfc_incrc(remove_write_bad_prediction);
-            decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
-        }
+    
+    if ( shadow_mode_external(d) ) {
+        if (write_refs-- == 0) 
+            return 0;
+
+         // Use the back pointer to locate the shadow page that can contain
+         // the PTE of interest
+         if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) 
) {
+             found += remove_all_write_access_in_ptpage(
+                 d, predicted_smfn, predicted_smfn, readonly_gpfn, 
readonly_gmfn, write_refs, 0);
+             if ( found == write_refs )
+                 return 0;
+         }
     }
 
     // Search all the shadow L1 page tables...
@@ -2203,7 +2182,7 @@
             {
                 found += remove_all_write_access_in_ptpage(d, 
a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, 
write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
                 if ( found == write_refs )
-                    return 1;
+                    return 0;
             }
 
             a = a->next;
@@ -2376,12 +2355,12 @@
                      l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_pte_change(d, guest1[i], 
&shadow1[i]);
+                    set_guest_back_ptr(d, shadow1[i], smfn, i);
 
                     // can't update snapshots of linear page tables -- they
                     // are used multiple times...
                     //
                     // snapshot[i] = new_pte;
-
                     changed++;
                 }
             }
@@ -2530,6 +2509,8 @@
              !shadow_get_page_from_l1e(npte, d) )
             BUG();
         *ppte = npte;
+        set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, 
+                           (entry->writable_pl1e & 
~PAGE_MASK)/sizeof(l1_pgentry_t));
         shadow_put_page_from_l1e(opte, d);
 
         unmap_domain_page(ppte);
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow_public.c      Thu Oct 13 16:58:01 2005
@@ -1095,7 +1095,12 @@
             if ( !get_page_type(page, PGT_writable_page) )
                 BUG();
             put_page_type(page);
-
+            /*
+             * We use tlbflush_timestamp as back pointer to smfn, and need to
+             * clean up it.
+             */
+            if ( shadow_mode_external(d) )
+                page->tlbflush_timestamp = 0;
             list_ent = page->list.next;
         }
     }
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Oct 13 16:55:15 2005
+++ b/xen/include/asm-x86/shadow.h      Thu Oct 13 16:58:01 2005
@@ -718,6 +718,23 @@
     put_shadow_ref(smfn);
 }
 
+/*
+ * SMP issue. The following code assumes the shadow lock is held. Re-visit
+ * when working on finer-gained locks for shadow.
+ */
+static inline void set_guest_back_ptr(
+    struct domain *d, l1_pgentry_t spte, unsigned long smfn, unsigned int 
index)
+{
+    if ( shadow_mode_external(d) ) {
+        unsigned long gmfn;
+
+        ASSERT(shadow_lock_is_acquired(d));
+        gmfn = l1e_get_pfn(spte);
+        frame_table[gmfn].tlbflush_timestamp = smfn;
+        frame_table[gmfn].u.inuse.type_info &= ~PGT_va_mask;
+        frame_table[gmfn].u.inuse.type_info |= (unsigned long) index << 
PGT_va_shift;
+    }
+}
 
 /************************************************************************/
 #if CONFIG_PAGING_LEVELS <= 2
@@ -1611,10 +1628,11 @@
             if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
                 shadow_put_page_from_l1e(old_spte, d);
         }
-    }
-
+
+    }
+
+    set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
     shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
-
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 #endif

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.