[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: simplify page reference handling for partially (in-)validated pages



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1225708322 0
# Node ID 540483d2a98f3fbabf06961cc0cc52e3c59c245b
# Parent  303b1014f91e5fa0783a5d7095626a47e82db9d0
x86: simplify page reference handling for partially (in-)validated pages

Simplify general page reference management for preempted (partially
[in-]validated) pages: Reserve on reference that can be acquired
without the risk of overflowing the reference count, thus allowing to
have a simplified get_page() equivalent that cannot fail (but must be
used with care).

Doing this conversion pointed out a latent issue in the changes done
previously in this area: The extra reference must be acquired before
the 'normal' reference gets dropped, so the patch fixes this at once
in both the alloc_page_type() and free_page_type() paths (it's really
only the latter that failed to work with the change described above).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/mm.c |   90 +++++++++++++++++++++++++++---------------------------
 1 files changed, 46 insertions(+), 44 deletions(-)

diff -r 303b1014f91e -r 540483d2a98f xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Nov 03 10:24:17 2008 +0000
+++ b/xen/arch/x86/mm.c Mon Nov 03 10:32:02 2008 +0000
@@ -1856,7 +1856,8 @@ int get_page(struct page_info *page, str
         nx = x + 1;
         d  = nd;
         if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
-             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+             /* Keep one spare reference to be acquired by get_page_light(). */
+             unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
              unlikely(d != _domain) )                /* Wrong owner? */
         {
             if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
@@ -1878,16 +1879,34 @@ int get_page(struct page_info *page, str
     return 1;
 }
 
+/*
+ * Special version of get_page() to be used exclusively when
+ * - a page is known to already have a non-zero reference count
+ * - the page does not need its owner to be checked
+ * - it will not be called more than once without dropping the thus
+ *   acquired reference again.
+ * Due to get_page() reserving one reference, this call cannot fail.
+ */
+static void get_page_light(struct page_info *page)
+{
+    u32 x, nx, y = page->count_info;
+
+    do {
+        x  = y;
+        nx = x + 1;
+        BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
+        BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
+        y = cmpxchg(&page->count_info, x, nx);
+    }
+    while ( unlikely(y != x) );
+}
+
 
 static int alloc_page_type(struct page_info *page, unsigned long type,
                            int preemptible)
 {
     struct domain *owner = page_get_owner(page);
     int rc;
-
-    /* Obtain an extra reference to retain if we set PGT_partial. */
-    if ( preemptible && !get_page(page, owner) )
-        return -EINVAL;
 
     /* A page table is dirtied when its type count becomes non-zero. */
     if ( likely(owner != NULL) )
@@ -1922,14 +1941,10 @@ static int alloc_page_type(struct page_i
     wmb();
     if ( rc == -EAGAIN )
     {
+        get_page_light(page);
         page->u.inuse.type_info |= PGT_partial;
-        return -EAGAIN;
-    }
-
-    if ( preemptible )
-        put_page(page);
-
-    if ( rc == -EINTR )
+    }
+    else if ( rc == -EINTR )
     {
         ASSERT((page->u.inuse.type_info &
                 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
@@ -2044,8 +2059,8 @@ static int __put_final_page_type(
     }
     else if ( rc == -EINTR )
     {
-        ASSERT(!(page->u.inuse.type_info &
-                 (PGT_count_mask|PGT_validated|PGT_partial)));
+        ASSERT((page->u.inuse.type_info &
+                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
         if ( !(shadow_mode_enabled(page_get_owner(page)) &&
                (page->count_info & PGC_page_table)) )
             page->tlbflush_timestamp = tlbflush_current_time();
@@ -2056,13 +2071,9 @@ static int __put_final_page_type(
     {
         BUG_ON(rc != -EAGAIN);
         wmb();
+        get_page_light(page);
         page->u.inuse.type_info |= PGT_partial;
-        /* Must skip put_page() below. */
-        preemptible = 0;
-    }
-
-    if ( preemptible )
-        put_page(page);
+    }
 
     return rc;
 }
@@ -2072,10 +2083,7 @@ static int __put_page_type(struct page_i
                            int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
-
-    /* Obtain an extra reference to retain if we set PGT_partial. */
-    if ( preemptible && !get_page(page, page_get_owner(page)) )
-        return -EINVAL;
+    int rc = 0;
 
     for ( ; ; )
     {
@@ -2098,10 +2106,11 @@ static int __put_page_type(struct page_i
                 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
                                            x, nx)) != x) )
                     continue;
+                /* We cleared the 'valid bit' so we do the clean up. */
+                rc = __put_final_page_type(page, x, preemptible);
                 if ( x & PGT_partial )
                     put_page(page);
-                /* We cleared the 'valid bit' so we do the clean up. */
-                return __put_final_page_type(page, x, preemptible);
+                break;
             }
 
             /*
@@ -2120,17 +2129,10 @@ static int __put_page_type(struct page_i
             break;
 
         if ( preemptible && hypercall_preempt_check() )
-        {
-            if ( preemptible )
-                put_page(page);
             return -EINTR;
-        }
-    }
-
-    if ( preemptible )
-        put_page(page);
-
-    return 0;
+    }
+
+    return rc;
 }
 
 
@@ -2138,6 +2140,7 @@ static int __get_page_type(struct page_i
                            int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
+    int rc = 0;
 
     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 
@@ -2233,11 +2236,7 @@ static int __get_page_type(struct page_i
         }
 
         if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
-        {
-            if ( (x & PGT_partial) && !(nx & PGT_partial) )
-                put_page(page);
             break;
-        }
 
         if ( preemptible && hypercall_preempt_check() )
             return -EINTR;
@@ -2264,10 +2263,13 @@ static int __get_page_type(struct page_i
             page->nr_validated_ptes = 0;
             page->partial_pte = 0;
         }
-        return alloc_page_type(page, type, preemptible);
-    }
-
-    return 0;
+        rc = alloc_page_type(page, type, preemptible);
+    }
+
+    if ( (x & PGT_partial) && !(nx & PGT_partial) )
+        put_page(page);
+
+    return rc;
 }
 
 void put_page_type(struct page_info *page)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.