[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86: simplify page reference handling for partially (in-)validated pages



As suggested by Keir, simplify general page reference management for
preempted (partiall [in-]validated) pages: Reserve on reference that
can be acquired without the risk of overflowing the reference count,
thus allowing to have a simplified get_page() equivalent that cannot
fail (but must be used with care).

Doing this conversion pointed out a latent issue in the changes done
previously in this area: The extra reference must be acquired before
the 'normal' reference gets dropped, so the patch fixes this at once
in both the alloc_page_type() and free_page_type() paths (it's really
only the latter that failed to work with the change described above).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2008-10-27/xen/arch/x86/mm.c
===================================================================
--- 2008-10-27.orig/xen/arch/x86/mm.c   2008-10-29 16:46:52.000000000 +0100
+++ 2008-10-27/xen/arch/x86/mm.c        2008-10-31 16:13:56.000000000 +0100
@@ -1856,7 +1856,8 @@ int get_page(struct page_info *page, str
         nx = x + 1;
         d  = nd;
         if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
-             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+             /* Keep one spare reference to be acquired by get_page_light(). */
+             unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
              unlikely(d != _domain) )                /* Wrong owner? */
         {
             if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
@@ -1878,6 +1879,28 @@ int get_page(struct page_info *page, str
     return 1;
 }
 
+/*
+ * Special version of get_page() to be used exclusively when
+ * - a page is known to already have a non-zero reference count
+ * - the page does not need its owner to be checked
+ * - it will not be called more than once without dropping the thus
+ *   acquired reference again.
+ * Due to get_page() reserving one reference, this call cannot fail.
+ */
+static void get_page_light(struct page_info *page)
+{
+    u32 x, nx, y = page->count_info;
+
+    do {
+        x  = y;
+        nx = x + 1;
+        BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
+        BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
+        y = cmpxchg(&page->count_info, x, nx);
+    }
+    while ( unlikely(y != x) );
+}
+
 
 static int alloc_page_type(struct page_info *page, unsigned long type,
                            int preemptible)
@@ -1885,10 +1908,6 @@ static int alloc_page_type(struct page_i
     struct domain *owner = page_get_owner(page);
     int rc;
 
-    /* Obtain an extra reference to retain if we set PGT_partial. */
-    if ( preemptible && !get_page(page, owner) )
-        return -EINVAL;
-
     /* A page table is dirtied when its type count becomes non-zero. */
     if ( likely(owner != NULL) )
         paging_mark_dirty(owner, page_to_mfn(page));
@@ -1922,14 +1941,10 @@ static int alloc_page_type(struct page_i
     wmb();
     if ( rc == -EAGAIN )
     {
+        get_page_light(page);
         page->u.inuse.type_info |= PGT_partial;
-        return -EAGAIN;
     }
-
-    if ( preemptible )
-        put_page(page);
-
-    if ( rc == -EINTR )
+    else if ( rc == -EINTR )
     {
         ASSERT((page->u.inuse.type_info &
                 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
@@ -2037,8 +2052,8 @@ static int __put_page_type_final(struct 
         page->u.inuse.type_info--;
         break;
     case -EINTR:
-        ASSERT(!(page->u.inuse.type_info &
-                 (PGT_count_mask|PGT_validated|PGT_partial)));
+        ASSERT((page->u.inuse.type_info &
+                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
         if ( !(shadow_mode_enabled(page_get_owner(page)) &&
                (page->count_info & PGC_page_table)) )
             page->tlbflush_timestamp = tlbflush_current_time();
@@ -2047,17 +2062,13 @@ static int __put_page_type_final(struct 
         break;
     case -EAGAIN:
         wmb();
+        get_page_light(page);
         page->u.inuse.type_info |= PGT_partial;
-        /* Must skip put_page() below. */
-        preemptible = 0;
         break;
     default:
         BUG();
     }
 
-    if ( preemptible )
-        put_page(page);
-
     return rc;
 }
 
@@ -2066,10 +2077,7 @@ static int __put_page_type(struct page_i
                            int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
-
-    /* Obtain an extra reference to retain if we set PGT_partial. */
-    if ( preemptible && !get_page(page, page_get_owner(page)) )
-        return -EINVAL;
+    int rc = 0;
 
     for ( ; ; )
     {
@@ -2092,10 +2100,11 @@ static int __put_page_type(struct page_i
                 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
                                            x, nx)) != x) )
                     continue;
+                /* We cleared the 'valid bit' so we do the clean up. */
+                rc = __put_page_type_final(page, x, preemptible);
                 if ( x & PGT_partial )
                     put_page(page);
-                /* We cleared the 'valid bit' so we do the clean up. */
-                return __put_page_type_final(page, x, preemptible);
+                break;
             }
 
             /*
@@ -2114,17 +2123,10 @@ static int __put_page_type(struct page_i
             break;
 
         if ( preemptible && hypercall_preempt_check() )
-        {
-            if ( preemptible )
-                put_page(page);
             return -EINTR;
-        }
     }
 
-    if ( preemptible )
-        put_page(page);
-
-    return 0;
+    return rc;
 }
 
 
@@ -2132,6 +2134,7 @@ static int __get_page_type(struct page_i
                            int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
+    int rc = 0;
 
     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 
@@ -2227,11 +2230,7 @@ static int __get_page_type(struct page_i
         }
 
         if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
-        {
-            if ( (x & PGT_partial) && !(nx & PGT_partial) )
-                put_page(page);
             break;
-        }
 
         if ( preemptible && hypercall_preempt_check() )
             return -EINTR;
@@ -2258,10 +2257,13 @@ static int __get_page_type(struct page_i
             page->nr_validated_ptes = 0;
             page->partial_pte = 0;
         }
-        return alloc_page_type(page, type, preemptible);
+        rc = alloc_page_type(page, type, preemptible);
     }
 
-    return 0;
+    if ( (x & PGT_partial) && !(nx & PGT_partial) )
+        put_page(page);
+
+    return rc;
 }
 
 void put_page_type(struct page_info *page)



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.