[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 03 of 12] x86/mm: Add per-page locking for memory sharing, when audits are disabled



 xen/arch/x86/mm.c             |   74 +-----------
 xen/arch/x86/mm/mem_sharing.c |  257 ++++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/mm/mm-locks.h    |   15 +-
 xen/include/asm-x86/mm.h      |   27 +++-
 4 files changed, 275 insertions(+), 98 deletions(-)


With the removal of the hash table, all that is needed now is locking
of individual shared pages, as new (gfn,domain) pairs are removed or
added from the list of mappings.

We recycle PGT_locked and use it to lock individual pages. We ensure deadlock
is averted by locking pages in increasing order.

The global lock remains for the benefit of the auditing code, and is
thus enabled only as a compile-time option.

Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx>

diff -r e0d8333e4725 -r 11916fe20dd2 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1719,7 +1719,7 @@ static int free_l4_table(struct page_inf
 #define free_l4_table(page, preemptible) (-EINVAL)
 #endif
 
-static int page_lock(struct page_info *page)
+int page_lock(struct page_info *page)
 {
     unsigned long x, nx;
 
@@ -1736,7 +1736,7 @@ static int page_lock(struct page_info *p
     return 1;
 }
 
-static void page_unlock(struct page_info *page)
+void page_unlock(struct page_info *page)
 {
     unsigned long x, nx, y = page->u.inuse.type_info;
 
@@ -4295,76 +4295,6 @@ int steal_page(
     return -1;
 }
 
-int page_make_sharable(struct domain *d, 
-                       struct page_info *page, 
-                       int expected_refcnt)
-{
-    spin_lock(&d->page_alloc_lock);
-
-    /* Change page type and count atomically */
-    if ( !get_page_and_type(page, d, PGT_shared_page) )
-    {
-        spin_unlock(&d->page_alloc_lock);
-        return -EINVAL;
-    }
-
-    /* Check it wasn't already sharable and undo if it was */
-    if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
-    {
-        put_page_and_type(page);
-        spin_unlock(&d->page_alloc_lock);
-        return -EEXIST;
-    }
-
-    /* Check if the ref count is 2. The first from PGC_allocated, and
-     * the second from get_page_and_type at the top of this function */
-    if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
-    {
-        /* Return type count back to zero */
-        put_page_and_type(page);
-        spin_unlock(&d->page_alloc_lock);
-        return -E2BIG;
-    }
-
-    page_set_owner(page, dom_cow);
-    d->tot_pages--;
-    page_list_del(page, &d->page_list);
-    spin_unlock(&d->page_alloc_lock);
-    return 0;
-}
-
-int page_make_private(struct domain *d, struct page_info *page)
-{
-    if ( !get_page(page, dom_cow) )
-        return -EINVAL;
-    
-    spin_lock(&d->page_alloc_lock);
-
-    /* We can only change the type if count is one */
-    if ( (page->u.inuse.type_info & (PGT_type_mask | PGT_count_mask))
-         != (PGT_shared_page | 1) )
-    {
-        put_page(page);
-        spin_unlock(&d->page_alloc_lock);
-        return -EEXIST;
-    }
-
-    /* Drop the final typecount */
-    put_page_and_type(page);
-
-    /* Change the owner */
-    ASSERT(page_get_owner(page) == dom_cow);
-    page_set_owner(page, d);
-
-    d->tot_pages++;
-    page_list_add_tail(page, &d->page_list);
-    spin_unlock(&d->page_alloc_lock);
-
-    put_page(page);
-
-    return 0;
-}
-
 static int __do_update_va_mapping(
     unsigned long va, u64 val64, unsigned long flags, struct domain *pg_owner)
 {
diff -r e0d8333e4725 -r 11916fe20dd2 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -35,10 +35,21 @@
 
 #include "mm-locks.h"
 
+static shr_handle_t next_handle = 1;
+
 #if MEM_SHARING_AUDIT
+
+static mm_lock_t shr_lock;
+
+#define shr_lock()          _shr_lock()
+#define shr_unlock()        _shr_unlock()
+#define shr_locked_by_me()  _shr_locked_by_me()
+
 static int mem_sharing_audit(void);
+
 #define MEM_SHARING_DEBUG(_f, _a...)                                  \
     debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
+
 static struct list_head shr_audit_list;
 
 static inline void audit_add_list(struct page_info *page)
@@ -51,7 +62,21 @@ static inline void audit_del_list(struct
 {
     list_del(&page->shared_info->entry);
 }
+
+static inline int mem_sharing_page_lock(struct page_info *p)
+{
+    return 1;
+}
+#define mem_sharing_page_unlock(p)   ((void)0)
+
+#define get_next_handle()   next_handle++;
 #else
+
+#define shr_lock()          ((void)0)
+#define shr_unlock()        ((void)0)
+/* Only used inside audit code */
+//#define shr_locked_by_me()  ((void)0)
+
 static inline int mem_sharing_audit(void)
 {
     return 0;
@@ -59,6 +84,34 @@ static inline int mem_sharing_audit(void
 
 #define audit_add_list(p)  ((void)0)
 #define audit_del_list(p)  ((void)0)
+
+static inline int mem_sharing_page_lock(struct page_info *pg)
+{
+    int rc;
+    rc = page_lock(pg);
+    if ( rc )
+    {
+        preempt_disable();
+    }
+    return rc;
+}
+
+static inline void mem_sharing_page_unlock(struct page_info *pg)
+{
+    preempt_enable();
+    page_unlock(pg);
+}
+
+static inline shr_handle_t get_next_handle(void)
+{
+    /* Get the next handle get_page style */ 
+    uint64_t x, y = next_handle;
+    do {
+        x = y;
+    }
+    while ( (y = cmpxchg(&next_handle, x, x + 1)) != x );
+    return x + 1;
+}
 #endif /* MEM_SHARING_AUDIT */
 
 #define mem_sharing_enabled(d) \
@@ -71,7 +124,6 @@ static inline int mem_sharing_audit(void
 #undef page_to_mfn
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
-static shr_handle_t next_handle = 1;
 static atomic_t nr_saved_mfns   = ATOMIC_INIT(0); 
 
 typedef struct gfn_info
@@ -81,8 +133,6 @@ typedef struct gfn_info
     struct list_head list;
 } gfn_info_t;
 
-static mm_lock_t shr_lock;
-
 /* Returns true if list has only one entry. O(1) complexity. */
 static inline int list_has_one_entry(struct list_head *head)
 {
@@ -403,6 +453,113 @@ int mem_sharing_debug_gref(struct domain
     return mem_sharing_debug_gfn(d, gfn); 
 }
 
+/* Functions that change a page's type and ownership */
+static int page_make_sharable(struct domain *d, 
+                       struct page_info *page, 
+                       int expected_refcnt)
+{
+    spin_lock(&d->page_alloc_lock);
+
+    /* Change page type and count atomically */
+    if ( !get_page_and_type(page, d, PGT_shared_page) )
+    {
+        spin_unlock(&d->page_alloc_lock);
+        return -EINVAL;
+    }
+
+    /* Check it wasn't already sharable and undo if it was */
+    if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
+    {
+        put_page_and_type(page);
+        spin_unlock(&d->page_alloc_lock);
+        return -EEXIST;
+    }
+
+    /* Check if the ref count is 2. The first from PGC_allocated, and
+     * the second from get_page_and_type at the top of this function */
+    if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
+    {
+        /* Return type count back to zero */
+        put_page_and_type(page);
+        spin_unlock(&d->page_alloc_lock);
+        return -E2BIG;
+    }
+
+    page_set_owner(page, dom_cow);
+    d->tot_pages--;
+    page_list_del(page, &d->page_list);
+    spin_unlock(&d->page_alloc_lock);
+    return 0;
+}
+
+static int page_make_private(struct domain *d, struct page_info *page)
+{
+    unsigned long expected_type;
+
+    if ( !get_page(page, dom_cow) )
+        return -EINVAL;
+    
+    spin_lock(&d->page_alloc_lock);
+
+    /* We can only change the type if count is one */
+    /* If we are locking pages individually, then we need to drop
+     * the lock here, while the page is typed. We cannot risk the 
+     * race of page_unlock and then put_page_type. */
+#if MEM_SHARING_AUDIT
+    expected_type = (PGT_shared_page | PGT_validated | 1);
+#else
+    expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
+#endif
+    if ( page->u.inuse.type_info != expected_type )
+    {
+        put_page(page);
+        spin_unlock(&d->page_alloc_lock);
+        return -EEXIST;
+    }
+
+    /* Drop the final typecount */
+    put_page_and_type(page);
+
+#ifndef MEM_SHARING_AUDIT
+    /* Now that we've dropped the type, we can unlock */
+    mem_sharing_page_unlock(page);
+#endif
+
+    /* Change the owner */
+    ASSERT(page_get_owner(page) == dom_cow);
+    page_set_owner(page, d);
+
+    d->tot_pages++;
+    page_list_add_tail(page, &d->page_list);
+    spin_unlock(&d->page_alloc_lock);
+
+    put_page(page);
+
+    return 0;
+}
+
+static inline struct page_info *__grab_shared_page(mfn_t mfn)
+{
+    struct page_info *pg = NULL;
+
+    if ( !mfn_valid(mfn) )
+        return NULL;
+    pg = mfn_to_page(mfn);
+
+    /* If the page is not validated we can't lock it, and if it's  
+     * not validated it's obviously not shared. */
+    if ( !mem_sharing_page_lock(pg) )
+        return NULL;
+
+    if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
+    {
+        mem_sharing_page_unlock(pg);
+        return NULL;
+    }
+
+    return pg;
+}
+
 int mem_sharing_nominate_page(struct domain *d,
                               unsigned long gfn,
                               int expected_refcnt,
@@ -410,7 +567,7 @@ int mem_sharing_nominate_page(struct dom
 {
     p2m_type_t p2mt;
     mfn_t mfn;
-    struct page_info *page;
+    struct page_info *page = NULL; /* gcc... */
     int ret;
     struct gfn_info *gfn_info;
 
@@ -425,10 +582,17 @@ int mem_sharing_nominate_page(struct dom
         goto out;
 
     /* Return the handle if the page is already shared */
-    page = mfn_to_page(mfn);
     if ( p2m_is_shared(p2mt) ) {
-        *phandle = page->shared_info->handle;
+        struct page_info *pg = __grab_shared_page(mfn);
+        if ( !pg )
+        {
+            gdprintk(XENLOG_ERR, "Shared p2m entry gfn %lx, but could not "
+                        "grab page %lx dom %d\n", gfn, mfn_x(mfn), 
d->domain_id);
+            BUG();
+        }
+        *phandle = pg->shared_info->handle;
         ret = 0;
+        mem_sharing_page_unlock(pg);
         goto out;
     }
 
@@ -437,15 +601,24 @@ int mem_sharing_nominate_page(struct dom
         goto out;
 
     /* Try to convert the mfn to the sharable type */
+    page = mfn_to_page(mfn);
     ret = page_make_sharable(d, page, expected_refcnt); 
     if ( ret ) 
         goto out;
 
+    /* Now that the page is validated, we can lock it. There is no 
+     * race because we're holding the p2m entry, so no one else 
+     * could be nominating this gfn */
+    ret = -ENOENT;
+    if ( !mem_sharing_page_lock(page) )
+        goto out;
+
     /* Initialize the shared state */
     ret = -ENOMEM;
     if ( (page->shared_info = 
             xmalloc(struct page_sharing_info)) == NULL )
     {
+        /* Making a page private atomically unlocks it */
         BUG_ON(page_make_private(d, page) != 0);
         goto out;
     }
@@ -453,7 +626,7 @@ int mem_sharing_nominate_page(struct dom
     INIT_LIST_HEAD(&page->shared_info->gfns);
 
     /* Create the handle */
-    page->shared_info->handle = next_handle++;  
+    page->shared_info->handle = get_next_handle();  
 
     /* Create the local gfn info */
     if ( (gfn_info = mem_sharing_gfn_alloc(page, d, gfn)) == NULL )
@@ -484,6 +657,7 @@ int mem_sharing_nominate_page(struct dom
 
     *phandle = page->shared_info->handle;
     audit_add_list(page);
+    mem_sharing_page_unlock(page);
     ret = 0;
 
 out:
@@ -495,7 +669,7 @@ out:
 int mem_sharing_share_pages(struct domain *sd, unsigned long sgfn, 
shr_handle_t sh,
                             struct domain *cd, unsigned long cgfn, 
shr_handle_t ch) 
 {
-    struct page_info *spage, *cpage;
+    struct page_info *spage, *cpage, *firstpg, *secondpg;
     struct list_head *le, *te;
     gfn_info_t *gfn;
     struct domain *d;
@@ -510,26 +684,63 @@ int mem_sharing_share_pages(struct domai
     smfn = get_gfn(sd, sgfn, &smfn_type);
     cmfn = get_gfn(cd, cgfn, &cmfn_type);
 
-    ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
-    spage = mem_sharing_lookup(mfn_x(smfn));
-    if ( spage == NULL )
+    /* This tricky business is to avoid two callers deadlocking if 
+     * grabbing pages in opposite client/source order */
+    if( mfn_x(smfn) == mfn_x(cmfn) )
+    {
+        /* The pages are already the same.  We could return some
+         * kind of error here, but no matter how you look at it,
+         * the pages are already 'shared'.  It possibly represents
+         * a big problem somewhere else, but as far as sharing is
+         * concerned: great success! */
+        ret = 0;
         goto err_out;
+    }
+    else if ( mfn_x(smfn) < mfn_x(cmfn) )
+    {
+        ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+        spage = firstpg = __grab_shared_page(smfn);
+        if ( spage == NULL )
+            goto err_out;
+
+        ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+        cpage = secondpg = __grab_shared_page(cmfn);
+        if ( cpage == NULL )
+        {
+            mem_sharing_page_unlock(spage);
+            goto err_out;
+        }
+    } else {
+        ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+        cpage = firstpg = __grab_shared_page(cmfn);
+        if ( cpage == NULL )
+            goto err_out;
+
+        ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+        spage = secondpg = __grab_shared_page(smfn);
+        if ( spage == NULL )
+        {
+            mem_sharing_page_unlock(cpage);
+            goto err_out;
+        }
+    }
+
     ASSERT(smfn_type == p2m_ram_shared);
-    ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
-    cpage = mem_sharing_lookup(mfn_x(cmfn));
-    if ( cpage == NULL )
-        goto err_out;
     ASSERT(cmfn_type == p2m_ram_shared);
 
     /* Check that the handles match */
     if ( spage->shared_info->handle != sh )
     {
         ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+        mem_sharing_page_unlock(secondpg);
+        mem_sharing_page_unlock(firstpg);
         goto err_out;
     }
     if ( cpage->shared_info->handle != ch )
     {
         ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+        mem_sharing_page_unlock(secondpg);
+        mem_sharing_page_unlock(firstpg);
         goto err_out;
     }
 
@@ -556,6 +767,9 @@ int mem_sharing_share_pages(struct domai
     xfree(cpage->shared_info);
     cpage->shared_info = NULL;
 
+    mem_sharing_page_unlock(secondpg);
+    mem_sharing_page_unlock(firstpg);
+
     /* Free the client page */
     if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
         put_page(cpage);
@@ -597,7 +811,7 @@ int mem_sharing_unshare_page(struct doma
         return 0;
     }
 
-    page = mem_sharing_lookup(mfn_x(mfn));
+    page = __grab_shared_page(mfn);
     if ( page == NULL )
     {
         gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: "
@@ -633,18 +847,20 @@ gfn_found:
      * (possibly freeing the page), and exit early */
     if ( flags & MEM_SHARING_DESTROY_GFN )
     {
-        put_gfn(d, gfn);
-        shr_unlock();
         put_page_and_type(page);
+        mem_sharing_page_unlock(page);
         if ( last_gfn && 
             test_and_clear_bit(_PGC_allocated, &page->count_info) ) 
             put_page(page);
+        put_gfn(d, gfn);
+        shr_unlock();
 
         return 0;
     }
  
     if ( last_gfn )
     {
+        /* Making a page private atomically unlocks it */
         BUG_ON(page_make_private(d, page) != 0);
         goto private_page_found;
     }
@@ -653,6 +869,7 @@ gfn_found:
     page = alloc_domheap_page(d, 0);
     if ( !page ) 
     {
+        mem_sharing_page_unlock(old_page);
         put_gfn(d, gfn);
         mem_sharing_notify_helper(d, gfn);
         shr_unlock();
@@ -666,6 +883,7 @@ gfn_found:
     unmap_domain_page(t);
 
     BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) == 0);
+    mem_sharing_page_unlock(old_page);
     put_page_and_type(old_page);
 
 private_page_found:    
@@ -683,6 +901,7 @@ private_page_found:
     /* Now that the gfn<->mfn map is properly established,
      * marking dirty is feasible */
     paging_mark_dirty(d, mfn_x(page_to_mfn(page)));
+    /* We do not need to unlock a private page */
     put_gfn(d, gfn);
     shr_unlock();
     return 0;
@@ -831,8 +1050,8 @@ int mem_sharing_domctl(struct domain *d,
 void __init mem_sharing_init(void)
 {
     printk("Initing memory sharing.\n");
+#if MEM_SHARING_AUDIT
     mm_lock_init(&shr_lock);
-#if MEM_SHARING_AUDIT
     INIT_LIST_HEAD(&shr_audit_list);
 #endif
 }
diff -r e0d8333e4725 -r 11916fe20dd2 xen/arch/x86/mm/mm-locks.h
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -26,6 +26,8 @@
 #ifndef _MM_LOCKS_H
 #define _MM_LOCKS_H
 
+#include <asm/mem_sharing.h>
+
 /* Per-CPU variable for enforcing the lock ordering */
 DECLARE_PER_CPU(int, mm_lock_level);
 #define __get_lock_level()  (this_cpu(mm_lock_level))
@@ -141,15 +143,22 @@ static inline void mm_enforce_order_unlo
  *                                                                      *
  ************************************************************************/
 
+#if MEM_SHARING_AUDIT
 /* Page-sharing lock (global) 
  *
  * A single global lock that protects the memory-sharing code's
  * hash tables. */
 
 declare_mm_lock(shr)
-#define shr_lock()         mm_lock(shr, &shr_lock)
-#define shr_unlock()       mm_unlock(&shr_lock)
-#define shr_locked_by_me() mm_locked_by_me(&shr_lock)
+#define _shr_lock()         mm_lock(shr, &shr_lock)
+#define _shr_unlock()       mm_unlock(&shr_lock)
+#define _shr_locked_by_me() mm_locked_by_me(&shr_lock)
+
+#else
+
+/* We use an efficient per-page lock when AUDIT is not enabled. */
+
+#endif /* MEM_SHARING_AUDIT */
 
 /* Nested P2M lock (per-domain)
  *
diff -r e0d8333e4725 -r 11916fe20dd2 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -337,6 +337,29 @@ int is_iomem_page(unsigned long mfn);
 
 void clear_superpage_mark(struct page_info *page);
 
+/* Per page locks:
+ * page_lock() is used for two purposes: pte serialization, and memory sharing.
+ *
+ * All users of page lock for pte serialization live in mm.c, use it
+ * to lock a page table page during pte updates, do not take other locks within
+ * the critical section delimited by page_lock/unlock, and perform no
+ * nesting. 
+ *
+ * All users of page lock for memory sharing live in mm/mem_sharing.c. 
Page_lock
+ * is used in memory sharing to protect addition (share) and removal (unshare) 
+ * of (gfn,domain) tupples to a list of gfn's that the shared page is 
currently 
+ * backing. Nesting may happen when sharing (and locking) two pages -- 
deadlock 
+ * is avoided by locking pages in increasing order.
+ * Memory sharing may take the p2m_lock within a page_lock/unlock
+ * critical section. 
+ *
+ * These two users (pte serialization and memory sharing) do not collide, since
+ * sharing is only supported for hvm guests, which do not perform pv pte 
updates.
+ * 
+ */
+int page_lock(struct page_info *page);
+void page_unlock(struct page_info *page);
+
 struct domain *page_get_owner_and_reference(struct page_info *page);
 void put_page(struct page_info *page);
 int  get_page(struct page_info *page, struct domain *domain);
@@ -588,10 +611,6 @@ int steal_page(
     struct domain *d, struct page_info *page, unsigned int memflags);
 int donate_page(
     struct domain *d, struct page_info *page, unsigned int memflags);
-int page_make_sharable(struct domain *d, 
-                       struct page_info *page, 
-                       int expected_refcnt);
-int page_make_private(struct domain *d, struct page_info *page);
 
 int map_ldt_shadow_page(unsigned int);
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.