[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] tmem: shared ephemeral (SE) pool (clustering) fixes



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1243877847 -3600
# Node ID 4294a04b24bc55ffc18215d85a9eda517935b816
# Parent  027f19e97e2852b643c2f05413ba24d8286ff3a5
tmem: shared ephemeral (SE) pool (clustering) fixes

Tmem can share clean page cache pages for Linux domains
in a virtual cluster (currently only the ocfs2 filesystem
has a patch on the Linux side).  So when one domain
"puts" (evicts) a page, any domain in the cluster can
"get" it, thus saving disk reads.  This functionality
is already present; these are only bug fixes.

- fix bugs when an SE pool is destroyed
- fixes in parsing tool for xm tmem-list output for SE pools
- incorrect locking in one case for destroying an SE pool
- clearer verbosity for transfer when an SE pool is destroyed
- minor cleanup: merge routines that are mostly duplicate

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
---
 tools/misc/xen-tmem-list-parse.c |   24 ++++++++++--
 xen/common/tmem.c                |   73 ++++++++++++++-------------------------
 2 files changed, 47 insertions(+), 50 deletions(-)

diff -r 027f19e97e28 -r 4294a04b24bc tools/misc/xen-tmem-list-parse.c
--- a/tools/misc/xen-tmem-list-parse.c  Mon Jun 01 15:52:19 2009 +0100
+++ b/tools/misc/xen-tmem-list-parse.c  Mon Jun 01 18:37:27 2009 +0100
@@ -29,6 +29,20 @@ unsigned long long parse(char *s,char *m
     return ret;
 }
 
+unsigned long long parse_hex(char *s,char *match)
+{
+    char *s1 = strstr(s,match);
+    unsigned long long ret;
+
+    if ( s1 == NULL )
+        return 0LL;
+    s1 += 2;
+    if ( *s1++ != ':' )
+        return 0LL;
+    sscanf(s1,"%llx",&ret);
+    return ret;
+}
+
 unsigned long long parse2(char *s,char *match1, char *match2)
 {
     char match[3];
@@ -64,7 +78,7 @@ void parse_sharers(char *s, char *match,
         s1 += 2;
         if (*s1++ != ':')
             return;
-        while (*s1 <= '0' && *s1 <= '9')
+        while (*s1 >= '0' && *s1 <= '9')
             *b++ = *s1++;
         *b++ = ',';
         s1 = strstr(s1,match);
@@ -196,6 +210,8 @@ void parse_pool(char *s)
     unsigned long long flush_objs = parse(s,"ot");
 
     parse_string(s,"PT",pool_type,2);
+    if (pool_type[1] == 'S')
+        return; /* no need to repeat print data for shared pools */
     printf("domid%lu,id%lu[%s]:pgp=%llu(max=%llu) obj=%llu(%llu) "
            "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) "
            "gets=%llu/%llu(%llu%%) "
@@ -216,8 +232,8 @@ void parse_shared_pool(char *s)
     char pool_type[3];
     char buf[BUFSIZE];
     unsigned long pool_id = parse(s,"PI");
-    unsigned long long uid0 = parse(s,"U0");
-    unsigned long long uid1 = parse(s,"U1");
+    unsigned long long uid0 = parse_hex(s,"U0");
+    unsigned long long uid1 = parse_hex(s,"U1");
     unsigned long long pgp_count = parse(s,"Pc");
     unsigned long long max_pgp_count = parse(s,"Pm");
     unsigned long long obj_count = parse(s,"Oc");
@@ -238,7 +254,7 @@ void parse_shared_pool(char *s)
 
     parse_string(s,"PT",pool_type,2);
     parse_sharers(s,"SC",buf,BUFSIZE);
-    printf("poolid=%lu[%s] uuid=%llu.%llu, shared-by:%s: "
+    printf("poolid=%lu[%s] uuid=%llx.%llx, shared-by:%s: "
            "pgp=%llu(max=%llu) obj=%llu(%llu) "
            "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) "
            "gets=%llu/%llu(%llu%%) "
diff -r 027f19e97e28 -r 4294a04b24bc xen/common/tmem.c
--- a/xen/common/tmem.c Mon Jun 01 15:52:19 2009 +0100
+++ b/xen/common/tmem.c Mon Jun 01 18:37:27 2009 +0100
@@ -581,21 +581,6 @@ static NOINLINE void obj_free(obj_t *obj
     tmem_free(obj,sizeof(obj_t),pool);
 }
 
-static NOINLINE void obj_rb_destroy_node(struct rb_node *node)
-{
-    obj_t * obj;
-
-    if ( node == NULL )
-        return;
-    obj_rb_destroy_node(node->rb_left);
-    obj_rb_destroy_node(node->rb_right);
-    obj = container_of(node, obj_t, rb_tree_node);
-    tmem_spin_lock(&obj->obj_spinlock);
-    ASSERT(obj->no_evict == 0);
-    radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
-    obj_free(obj,1);
-}
-
 static NOINLINE int obj_rb_insert(struct rb_root *root, obj_t *obj)
 {
     struct rb_node **new, *parent = NULL;
@@ -650,26 +635,15 @@ static NOINLINE obj_t * obj_new(pool_t *
 }
 
 /* free an object after destroying any pgps in it */
-static NOINLINE void obj_destroy(obj_t *obj)
+static NOINLINE void obj_destroy(obj_t *obj, int no_rebalance)
 {
     ASSERT_WRITELOCK(&obj->pool->pool_rwlock);
     radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
-    obj_free(obj,0);
-}
-
-/* destroy all objects in a pool */
-static NOINLINE void obj_rb_destroy_all(pool_t *pool)
-{
-    int i;
-
-    tmem_write_lock(&pool->pool_rwlock);
-    for (i = 0; i < OBJ_HASH_BUCKETS; i++)
-        obj_rb_destroy_node(pool->obj_rb_root[i].rb_node);
-    tmem_write_unlock(&pool->pool_rwlock);
-}
-
-/* destroys all objects in a pool that have last_client set to cli_id */
-static void obj_free_selective(pool_t *pool, cli_id_t cli_id)
+    obj_free(obj,no_rebalance);
+}
+
+/* destroys all objs in a pool, or only if obj->last_client matches cli_id */
+static void pool_destroy_objs(pool_t *pool, bool_t selective, cli_id_t cli_id)
 {
     struct rb_node *node;
     obj_t *obj;
@@ -684,8 +658,11 @@ static void obj_free_selective(pool_t *p
             obj = container_of(node, obj_t, rb_tree_node);
             tmem_spin_lock(&obj->obj_spinlock);
             node = rb_next(node);
-            if ( obj->last_client == cli_id )
-                obj_destroy(obj);
+            ASSERT(obj->no_evict == 0);
+            if ( !selective )
+                obj_destroy(obj,1);
+            else if ( obj->last_client == cli_id )
+                obj_destroy(obj,0);
             else
                 tmem_spin_unlock(&obj->obj_spinlock);
         }
@@ -740,8 +717,9 @@ static int shared_pool_join(pool_t *pool
         return -1;
     sl->client = new_client;
     list_add_tail(&sl->share_list, &pool->share_list);
-    printk("adding new %s %d to shared pool owned by %s %d\n",
-        client_str, new_client->cli_id, client_str, pool->client->cli_id);
+    if ( new_client->cli_id != pool->client->cli_id )
+        printk("adding new %s %d to shared pool owned by %s %d\n",
+            client_str, new_client->cli_id, client_str, pool->client->cli_id);
     return ++pool->shared_count;
 }
 
@@ -766,6 +744,10 @@ static NOINLINE void shared_pool_reassig
         if (new_client->pools[poolid] == pool)
             break;
     ASSERT(poolid != MAX_POOLS_PER_DOMAIN);
+    new_client->eph_count += _atomic_read(pool->pgp_count);
+    old_client->eph_count -= _atomic_read(pool->pgp_count);
+    list_splice_init(&old_client->ephemeral_page_list,
+                     &new_client->ephemeral_page_list);
     printk("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
         cli_id_str, old_client->cli_id, cli_id_str, new_client->cli_id, 
poolid);
     pool->pool_id = poolid;
@@ -781,7 +763,8 @@ static NOINLINE int shared_pool_quit(poo
     ASSERT(is_shared(pool));
     ASSERT(pool->client != NULL);
     
-    obj_free_selective(pool,cli_id);
+    ASSERT_WRITELOCK(&tmem_rwlock);
+    pool_destroy_objs(pool,1,cli_id);
     list_for_each_entry(sl,&pool->share_list, share_list)
     {
         if (sl->client->cli_id != cli_id)
@@ -812,15 +795,15 @@ static void pool_flush(pool_t *pool, cli
     ASSERT(pool != NULL);
     if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) )
     {
-        printk("tmem: unshared shared pool %d from %s=%d\n",
-           pool->pool_id, cli_id_str,pool->client->cli_id);
+        printk("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
+           cli_id_str, cli_id, pool->pool_id, cli_id_str,pool->client->cli_id);
         return;
     }
     printk("%s %s-%s tmem pool ",destroy?"destroying":"flushing",
         is_persistent(pool) ? "persistent" : "ephemeral" ,
         is_shared(pool) ? "shared" : "private");
     printk("%s=%d pool_id=%d\n", 
cli_id_str,pool->client->cli_id,pool->pool_id);
-    obj_rb_destroy_all(pool);
+    pool_destroy_objs(pool,0,CLI_ID_NULL);
     if ( destroy )
     {
         pool->client->pools[pool->pool_id] = NULL;
@@ -1378,7 +1361,7 @@ static NOINLINE int do_tmem_flush_object
     if ( obj == NULL )
         goto out;
     tmem_write_lock(&pool->pool_rwlock);
-    obj_destroy(obj);
+    obj_destroy(obj,0);
     pool->flush_objs_found++;
     tmem_write_unlock(&pool->pool_rwlock);
 
@@ -1455,7 +1438,7 @@ static NOINLINE int do_tmem_new_pool(uin
             {
                 if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi )
                 {
-                    printk("(matches shared pool uuid=%"PRIx64".%"PRIu64") ",
+                    printk("(matches shared pool uuid=%"PRIx64".%"PRIx64") ",
                         uuid_hi, uuid_lo);
                     printk("pool_id=%d\n",d_poolid);
                     client->pools[d_poolid] = global_shared_pools[s_poolid];
@@ -1507,10 +1490,8 @@ static int tmemc_freeze_pools(int cli_id
     if ( cli_id == CLI_ID_NULL )
     {
         list_for_each_entry(client,&global_client_list,client_list)
-        {
             client->frozen = freeze;
-            printk("tmem: all pools %s for all %ss\n",s,client_str);
-        }
+        printk("tmem: all pools %s for all %ss\n",s,client_str);
     }
     else
     {
@@ -1878,7 +1859,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
         }
     }
 
-    if ( op.cmd == TMEM_NEW_POOL )
+    if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL )
     {
         if ( !tmem_write_lock_set )
         {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.