[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/mm: Eliminate global lock in mem sharing code



 xen/arch/x86/mm.c             |   16 +++-
 xen/arch/x86/mm/mem_sharing.c |  169 ++++++++++++++++++++++++++++++++++++-----
 xen/arch/x86/mm/mm-locks.h    |    6 +-
 xen/include/asm-x86/mm.h      |    2 +-
 4 files changed, 163 insertions(+), 30 deletions(-)


With the removal of the hash table, all that is needed now is locking
of individual shared pages, as new (gfn,domain) pairs are removed or
added from the list of mappings.

The global lock remains for the benefit of the auditing code, and is
thus enabled only as a compile-time option.

Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r ecf95feef9ac -r 62ea5c05ae7b xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4337,16 +4337,22 @@ int page_make_sharable(struct domain *d,
     return 0;
 }
 
-int page_make_private(struct domain *d, struct page_info *page)
+int page_make_private(struct domain *d, struct page_info *page,
+                        int unlocked)
 {
+    unsigned long expected_type;
+
     if ( !get_page(page, dom_cow) )
         return -EINVAL;
     
     spin_lock(&d->page_alloc_lock);
 
     /* We can only change the type if count is one */
-    if ( (page->u.inuse.type_info & (PGT_type_mask | PGT_count_mask))
-         != (PGT_shared_page | PGT_validated | 1) )
+    /* If we are locking pages individually, then we need to drop
+     * the lock here, while the page is typed */
+    expected_type = (unlocked) ? (PGT_shared_page | PGT_validated | 1)
+                    : (PGT_shared_page | PGT_validated | PGT_locked | 2);
+    if ( page->u.inuse.type_info != expected_type )
     {
         put_page(page);
         spin_unlock(&d->page_alloc_lock);
@@ -4356,6 +4362,10 @@ int page_make_private(struct domain *d, 
     /* Drop the final typecount */
     put_page_and_type(page);
 
+    if ( !unlocked )
+        /* Now that we've dropped the type, we can unlock */
+        page_unlock(page);
+
     /* Change the owner */
     ASSERT(page_get_owner(page) == dom_cow);
     page_set_owner(page, d);
diff -r ecf95feef9ac -r 62ea5c05ae7b xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -35,13 +35,24 @@
 
 #include "mm-locks.h"
 
+static shr_handle_t next_handle = 1;
+
 /* Auditing of memory sharing code? */
 #define MEM_SHARING_AUDIT 0
 
 #if MEM_SHARING_AUDIT
+
+static mm_lock_t shr_lock;
+
+#define shr_lock()          _shr_lock()
+#define shr_unlock()        _shr_unlock()
+#define shr_locked_by_me()  _shr_locked_by_me()
+
 static int mem_sharing_audit(void);
+
 #define MEM_SHARING_DEBUG(_f, _a...)                                  \
     debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
+
 static struct list_head shr_audit_list;
 
 static inline void audit_add_list(struct page_info *page)
@@ -53,7 +64,21 @@ static inline void audit_del_list(struct
 {
     list_del(&page->shared_info->entry);
 }
+
+static inline int mem_sharing_page_lock(struct page_info *p)
+{
+    return 1;
+}
+#define mem_sharing_page_unlock(p)  ((void)0)
+
+#define get_next_handle()   next_handle++;
 #else
+
+#define shr_lock()          ((void)0)
+#define shr_unlock()        ((void)0)
+/* Only used inside audit code */
+//#define shr_locked_by_me()  ((void)0)
+
 static inline int mem_sharing_audit(void)
 {
     return 0;
@@ -61,6 +86,31 @@ static inline int mem_sharing_audit(void
 
 #define audit_add_list(p)  ((void)0)
 #define audit_del_list(p)  ((void)0)
+
+static inline int mem_sharing_page_lock(struct page_info *pg)
+{
+    int rc = page_lock(pg);
+    if ( rc )
+        preempt_disable();
+    return rc;
+}
+
+static inline void mem_sharing_page_unlock(struct page_info *pg)
+{
+    preempt_enable();
+    page_unlock(pg);
+}
+
+static inline shr_handle_t get_next_handle(void)
+{
+    /* Get the next handle get_page style */ 
+    uint64_t x, y = next_handle;
+    do {
+        x = y;
+    }
+    while ( (y = cmpxchg(&next_handle, x, x + 1)) != x );
+    return x + 1;
+}
 #endif /* MEM_SHARING_AUDIT */
 
 #define mem_sharing_enabled(d) \
@@ -73,7 +123,6 @@ static inline int mem_sharing_audit(void
 #undef page_to_mfn
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
-static shr_handle_t next_handle = 1;
 static atomic_t nr_saved_mfns   = ATOMIC_INIT(0); 
 static atomic_t nr_shared_mfns  = ATOMIC_INIT(0);
 
@@ -84,8 +133,6 @@ typedef struct gfn_info
     struct list_head list;
 } gfn_info_t;
 
-static mm_lock_t shr_lock;
-
 /* Returns true if list has only one entry. O(1) complexity. */
 static inline int list_has_one_entry(struct list_head *head)
 {
@@ -438,6 +485,28 @@ int mem_sharing_debug_gref(struct domain
     return mem_sharing_debug_gfn(d, gfn); 
 }
 
+static inline struct page_info *__grab_shared_page(mfn_t mfn)
+{
+    struct page_info *pg = NULL;
+
+    if ( !mfn_valid(mfn) )
+        return NULL;
+    pg = mfn_to_page(mfn);
+
+    /* If the page is not validated we can't lock it, and if it's  
+     * not validated it's obviously not shared. */
+    if ( !mem_sharing_page_lock(pg) )
+        return NULL;
+
+    if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
+    {
+        mem_sharing_page_unlock(pg);
+        return NULL;
+    }
+
+    return pg;
+}
+
 int mem_sharing_nominate_page(struct domain *d,
                               unsigned long gfn,
                               int expected_refcnt,
@@ -445,7 +514,7 @@ int mem_sharing_nominate_page(struct dom
 {
     p2m_type_t p2mt;
     mfn_t mfn;
-    struct page_info *page;
+    struct page_info *page = NULL; /* gcc... */
     int ret;
     struct gfn_info *gfn_info;
 
@@ -460,10 +529,17 @@ int mem_sharing_nominate_page(struct dom
         goto out;
 
     /* Return the handle if the page is already shared */
-    page = mfn_to_page(mfn);
     if ( p2m_is_shared(p2mt) ) {
-        *phandle = page->shared_info->handle;
+        struct page_info *pg = __grab_shared_page(mfn);
+        if ( !pg )
+        {
+            gdprintk(XENLOG_ERR, "Shared p2m entry gfn %lx, but could not "
+                        "grab page %lx dom %d\n", gfn, mfn_x(mfn), 
d->domain_id);
+            BUG();
+        }
+        *phandle = pg->shared_info->handle;
         ret = 0;
+        mem_sharing_page_unlock(pg);
         goto out;
     }
 
@@ -472,16 +548,25 @@ int mem_sharing_nominate_page(struct dom
         goto out;
 
     /* Try to convert the mfn to the sharable type */
+    page = mfn_to_page(mfn);
     ret = page_make_sharable(d, page, expected_refcnt); 
     if ( ret ) 
         goto out;
 
+    /* Now that the page is typed, we can lock it. There is no race
+     * because we're holding the p2m entry, so no one else could be
+     * nominating this gfn */
+    ret = -ENOENT;
+    if ( !mem_sharing_page_lock(page) )
+        goto out;
+
     /* Initialize the shared state */
     ret = -ENOMEM;
     if ( (page->shared_info = 
             xmalloc(struct page_sharing_info)) == NULL )
     {
-        BUG_ON(page_make_private(d, page) != 0);
+        /* Making a page private atomically unlocks it */
+        BUG_ON(page_make_private(d, page, MEM_SHARING_AUDIT) != 0);
         goto out;
     }
     page->shared_info->pg = page;
@@ -489,14 +574,14 @@ int mem_sharing_nominate_page(struct dom
     INIT_LIST_HEAD(&page->shared_info->entry);
 
     /* Create the handle */
-    page->shared_info->handle = next_handle++;  
+    page->shared_info->handle = get_next_handle();  
 
     /* Create the local gfn info */
     if ( (gfn_info = mem_sharing_gfn_alloc(page, d, gfn)) == NULL )
     {
         xfree(page->shared_info);
         page->shared_info = NULL;
-        BUG_ON(page_make_private(d, page) != 0);
+        BUG_ON(page_make_private(d, page, MEM_SHARING_AUDIT) != 0);
         goto out;
     }
 
@@ -511,7 +596,7 @@ int mem_sharing_nominate_page(struct dom
         xfree(page->shared_info);
         page->shared_info = NULL;
         /* NOTE: We haven't yet added this to the audit list. */
-        BUG_ON(page_make_private(d, page) != 0);
+        BUG_ON(page_make_private(d, page, MEM_SHARING_AUDIT) != 0);
         goto out;
     }
 
@@ -520,6 +605,7 @@ int mem_sharing_nominate_page(struct dom
 
     *phandle = page->shared_info->handle;
     audit_add_list(page);
+    mem_sharing_page_unlock(page);
     ret = 0;
 
 out:
@@ -531,7 +617,7 @@ out:
 int mem_sharing_share_pages(struct domain *sd, unsigned long sgfn, 
shr_handle_t sh,
                             struct domain *cd, unsigned long cgfn, 
shr_handle_t ch) 
 {
-    struct page_info *spage, *cpage;
+    struct page_info *spage, *cpage, *firstpg, *secondpg;
     struct list_head *le, *te;
     gfn_info_t *gfn;
     struct domain *d;
@@ -546,26 +632,53 @@ int mem_sharing_share_pages(struct domai
     smfn = get_gfn(sd, sgfn, &smfn_type);
     cmfn = get_gfn(cd, cgfn, &cmfn_type);
 
-    ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
-    spage = mem_sharing_lookup(mfn_x(smfn));
-    if ( spage == NULL )
-        goto err_out;
+    /* This tricky business is to avoid two callers deadlocking if 
+     * grabbing pages in opposite client/source order */
+    if ( mfn_x(smfn) < mfn_x(cmfn) )
+    {
+        ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+        spage = firstpg = __grab_shared_page(smfn);
+        if ( spage == NULL )
+            goto err_out;
+
+        ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+        cpage = secondpg = __grab_shared_page(cmfn);
+        if ( cpage == NULL )
+        {
+            mem_sharing_page_unlock(spage);
+            goto err_out;
+        }
+    } else {
+        ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+        cpage = firstpg = __grab_shared_page(cmfn);
+        if ( cpage == NULL )
+            goto err_out;
+
+        ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+        spage = secondpg = __grab_shared_page(smfn);
+        if ( spage == NULL )
+        {
+            mem_sharing_page_unlock(cpage);
+            goto err_out;
+        }
+    }
+
     ASSERT(smfn_type == p2m_ram_shared);
-    ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
-    cpage = mem_sharing_lookup(mfn_x(cmfn));
-    if ( cpage == NULL )
-        goto err_out;
     ASSERT(cmfn_type == p2m_ram_shared);
 
     /* Check that the handles match */
     if ( spage->shared_info->handle != sh )
     {
         ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+        mem_sharing_page_unlock(secondpg);
+        mem_sharing_page_unlock(firstpg);
         goto err_out;
     }
     if ( cpage->shared_info->handle != ch )
     {
         ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+        mem_sharing_page_unlock(secondpg);
+        mem_sharing_page_unlock(firstpg);
         goto err_out;
     }
 
@@ -604,6 +717,9 @@ int mem_sharing_share_pages(struct domai
     xfree(cpage->shared_info);
     cpage->shared_info = NULL;
 
+    mem_sharing_page_unlock(secondpg);
+    mem_sharing_page_unlock(firstpg);
+
     /* Free the client page */
     if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
         put_page(cpage);
@@ -644,7 +760,7 @@ int mem_sharing_unshare_page(struct doma
         return 0;
     }
 
-    page = mem_sharing_lookup(mfn_x(mfn));
+    page = __grab_shared_page(mfn);
     if ( page == NULL )
     {
         gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: "
@@ -688,12 +804,13 @@ gfn_found:
             page->shared_info = NULL;
         }
 
-        put_gfn(d, gfn);
-        shr_unlock();
         put_page_and_type(page);
+        mem_sharing_page_unlock(page);
         if( last_gfn && 
             test_and_clear_bit(_PGC_allocated, &page->count_info)) 
             put_page(page);
+        put_gfn(d, gfn);
+        shr_unlock();
 
         return 0;
     }
@@ -704,7 +821,8 @@ gfn_found:
         mem_sharing_gfn_destroy(gfn_info, !last_gfn);
         xfree(page->shared_info);
         page->shared_info = NULL;
-        BUG_ON(page_make_private(d, page) != 0);
+        /* Making a page private atomically unlocks it */
+        BUG_ON(page_make_private(d, page, MEM_SHARING_AUDIT) != 0);
         goto private_page_found;
     }
         
@@ -712,6 +830,7 @@ gfn_found:
     page = mem_sharing_alloc_page(d, gfn);
     if ( !page ) 
     {
+        mem_sharing_page_unlock(old_page);
         put_gfn(d, gfn);
         shr_unlock();
         return -ENOMEM;
@@ -726,6 +845,7 @@ gfn_found:
     BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) == 0);
     /* We've got a private page, we can commit the gfn destruction */
     mem_sharing_gfn_destroy(gfn_info, !last_gfn);
+    mem_sharing_page_unlock(old_page);
     put_page_and_type(old_page);
 
 private_page_found:    
@@ -749,6 +869,7 @@ private_page_found:
     /* Now that the gfn<->mfn map is properly established,
      * marking dirty is feasible */
     paging_mark_dirty(d, mfn_x(page_to_mfn(page)));
+    /* We do not need to unlock a private page */
     put_gfn(d, gfn);
     shr_unlock();
     return 0;
@@ -905,6 +1026,8 @@ int mem_sharing_domctl(struct domain *d,
 void __init mem_sharing_init(void)
 {
     printk("Initing memory sharing.\n");
+#if MEM_SHARING_AUDIT
     mm_lock_init(&shr_lock);
+#endif
 }
 
diff -r ecf95feef9ac -r 62ea5c05ae7b xen/arch/x86/mm/mm-locks.h
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -147,9 +147,9 @@ static inline void mm_enforce_order_unlo
  * hash tables. */
 
 declare_mm_lock(shr)
-#define shr_lock()         mm_lock(shr, &shr_lock)
-#define shr_unlock()       mm_unlock(&shr_lock)
-#define shr_locked_by_me() mm_locked_by_me(&shr_lock)
+#define _shr_lock()         mm_lock(shr, &shr_lock)
+#define _shr_unlock()       mm_unlock(&shr_lock)
+#define _shr_locked_by_me() mm_locked_by_me(&shr_lock)
 
 /* Nested P2M lock (per-domain)
  *
diff -r ecf95feef9ac -r 62ea5c05ae7b xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -594,7 +594,7 @@ int donate_page(
 int page_make_sharable(struct domain *d, 
                        struct page_info *page, 
                        int expected_refcnt);
-int page_make_private(struct domain *d, struct page_info *page);
+int page_make_private(struct domain *d, struct page_info *page, int unlocked);
 
 int map_ldt_shadow_page(unsigned int);
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.