[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] PING? RE: [PATCH] tmem (hypervisor-side): ABI v1 to handle long object-ids (XEN-4.0-TESTING and XEN-UNSTABLE)
Just returned from a week of vacation. I see these patches have not yet been applied to unstable for 4.0-testing, hypervisor or tools. Did I drop the ball on something? (sorry if I am suffering from post-vacation amnesia). Thanks, Dan > -----Original Message----- > From: Dan Magenheimer > Sent: Friday, September 03, 2010 9:48 AM > To: Xen-Devel (xen-devel@xxxxxxxxxxxxxxxxxxx) > Cc: Ian Jackson; Keir Fraser > Subject: [PATCH] tmem (hypervisor-side): ABI v1 to handle long object- > ids (XEN-4.0-TESTING and XEN-UNSTABLE) > > [PATCH] tmem (hypervisor-side): move to new ABI version to handle > long object-ids > > Please apply patch to both xen-4.0-testing and xen-unstable > (same patch applies cleanly to both). > > (Note to Keir/Ian: These patches should be applied > together, but I'm not clear on how to submit patches > that cross MAINTAINERS boundaries as this one does.) > > After a great deal of discussion and review with linux > kernel developers, it appears there are "next-generation" > filesystems (such as btrfs, xfs, Lustre) that will not > be able to use tmem due to an ABI limitation... a field > that represents a unique file identifier is 64-bits in > the tmem ABI and may need to be as large as 192-bits. > So to support these guest filesystems, the tmem ABI must be > revised, from "v0" to "v1". > > I *think* it is still the case that tmem is experimental > and is not used anywhere yet in production. > > The tmem ABI is designed to support multiple revisions, > so the Xen tmem implementation could be updated to > handle both v0 and v1. However this is a bit > messy and would require data structures for both v0 > and v1 to appear in public Xen header files. > > I am inclined to update the Xen tmem implementation > to only support v1 and gracefully fail v0. This would > result in only a performance loss (as if tmem were > disabled) for newly launched tmem-v0-enabled guests, > but live-migration between old tmem-v0 Xen and new > tmem-v1 Xen machines would fail, and saved tmem-v0 > guests will not be able to be restored on a tmem-v1 > Xen machine. I would plan to update both pre-4.0.2 > and unstable (future 4.1) to only support v1. > > I believe these restrictions are reasonable at this > point in the tmem lifecycle, though they may not > be reasonable in the near future; should the tmem > ABI need to be revised from v1 to v2, I understand > backwards compatibility will be required. > > Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx> > > diff -r 07ac5459b250 xen/common/tmem.c > --- a/xen/common/tmem.c Wed Aug 25 09:23:31 2010 +0100 > +++ b/xen/common/tmem.c Thu Sep 02 16:43:33 2010 -0600 > @@ -26,7 +26,7 @@ > #define EXPORT /* indicates code other modules are dependent upon */ > #define FORWARD > > -#define TMEM_SPEC_VERSION 0 > +#define TMEM_SPEC_VERSION 1 > > /************ INTERFACE TO TMEM HOST-DEPENDENT (tmh) CODE > ************/ > > @@ -149,14 +149,13 @@ typedef struct share_list sharelist_t; > > #define OBJ_HASH_BUCKETS 256 /* must be power of two */ > #define OBJ_HASH_BUCKETS_MASK (OBJ_HASH_BUCKETS-1) > -#define OBJ_HASH(_oid) (tmh_hash(_oid, BITS_PER_LONG) & > OBJ_HASH_BUCKETS_MASK) > > struct tm_pool { > bool_t shared; > bool_t persistent; > bool_t is_dying; > int pageshift; /* 0 == 2**12 */ > - struct list_head pool_list; /* FIXME do we need this anymore? */ > + struct list_head pool_list; > client_t *client; > uint64_t uuid[2]; /* 0 for private, non-zero for shared */ > uint32_t pool_id; > @@ -189,9 +188,14 @@ typedef struct tm_pool pool_t; > #define is_shared(_p) (_p->shared) > #define is_private(_p) (!(_p->shared)) > > +struct oid { > + uint64_t oid[3]; > +}; > +typedef struct oid OID; > + > struct tmem_object_root { > DECL_SENTINEL > - uint64_t oid; > + OID oid; > struct rb_node rb_tree_node; /* protected by pool->pool_rwlock */ > unsigned long objnode_count; /* atomicity depends on obj_spinlock > */ > long pgp_count; /* atomicity depends on obj_spinlock */ > @@ -217,12 +221,14 @@ struct tmem_page_descriptor { > struct list_head client_inv_pages; > }; > union { > - struct list_head client_eph_pages; > - struct list_head pool_pers_pages; > - }; > - union { > - obj_t *obj; > - uint64_t inv_oid; /* used for invalid list only */ > + struct { > + union { > + struct list_head client_eph_pages; > + struct list_head pool_pers_pages; > + }; > + obj_t *obj; > + } us; > + OID inv_oid; /* used for invalid list only */ > }; > pagesize_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid, > else compressed data (cdata) */ > @@ -467,9 +473,9 @@ static NOINLINE int pcd_associate(pgp_t > > if ( !tmh_dedup_enabled() ) > return 0; > - ASSERT(pgp->obj != NULL); > - ASSERT(pgp->obj->pool != NULL); > - ASSERT(!pgp->obj->pool->persistent); > + ASSERT(pgp->us.obj != NULL); > + ASSERT(pgp->us.obj->pool != NULL); > + ASSERT(!pgp->us.obj->pool->persistent); > if ( cdata == NULL ) > { > ASSERT(pgp->pfp != NULL); > @@ -528,7 +534,7 @@ static NOINLINE int pcd_associate(pgp_t > /* match! if not compressed, free the no-longer-needed > page */ > /* but if compressed, data is assumed static so don't > free! */ > if ( cdata == NULL ) > - tmem_page_free(pgp->obj->pool,pgp->pfp); > + tmem_page_free(pgp->us.obj->pool,pgp->pfp); > deduped_puts++; > goto match; > } > @@ -540,7 +546,7 @@ static NOINLINE int pcd_associate(pgp_t > ret = -ENOMEM; > goto unlock; > } else if ( cdata != NULL ) { > - if ( (pcd->cdata = tmem_malloc_bytes(csize,pgp->obj->pool)) == > NULL ) > + if ( (pcd->cdata = tmem_malloc_bytes(csize,pgp->us.obj->pool)) > == NULL ) > { > tmem_free(pcd,sizeof(pcd_t),NULL); > ret = -ENOMEM; > @@ -561,11 +567,11 @@ static NOINLINE int pcd_associate(pgp_t > pcd->size = 0; > pcd->tze = NULL; > } else if ( pfp_size < PAGE_SIZE && > - ((pcd->tze = tmem_malloc_bytes(pfp_size,pgp->obj->pool)) != > NULL) ) { > + ((pcd->tze = tmem_malloc_bytes(pfp_size,pgp->us.obj->pool)) > != NULL) ) { > tmh_tze_copy_from_pfp(pcd->tze,pgp->pfp,pfp_size); > pcd->size = pfp_size; > pcd_tot_tze_size += pfp_size; > - tmem_page_free(pgp->obj->pool,pgp->pfp); > + tmem_page_free(pgp->us.obj->pool,pgp->pfp); > } else { > pcd->pfp = pgp->pfp; > pcd->size = PAGE_SIZE; > @@ -602,9 +608,9 @@ static NOINLINE pgp_t *pgp_alloc(obj_t * > pool = obj->pool; > if ( (pgp = tmem_malloc(pgp_t, pool)) == NULL ) > return NULL; > - pgp->obj = obj; > + pgp->us.obj = obj; > INIT_LIST_HEAD(&pgp->global_eph_pages); > - INIT_LIST_HEAD(&pgp->client_eph_pages); > + INIT_LIST_HEAD(&pgp->us.client_eph_pages); > pgp->pfp = NULL; > if ( tmh_dedup_enabled() ) > { > @@ -642,7 +648,7 @@ static NOINLINE void pgp_free_data(pgp_t > else if ( pgp_size ) > tmem_free(pgp->cdata,pgp_size,pool); > else > - tmem_page_free(pgp->obj->pool,pgp->pfp); > + tmem_page_free(pgp->us.obj->pool,pgp->pfp); > if ( pool != NULL && pgp_size ) > { > pool->client->compressed_pages--; > @@ -657,18 +663,18 @@ static NOINLINE void pgp_free(pgp_t *pgp > pool_t *pool = NULL; > > ASSERT_SENTINEL(pgp,PGD); > - ASSERT(pgp->obj != NULL); > - ASSERT_SENTINEL(pgp->obj,OBJ); > - ASSERT_SENTINEL(pgp->obj->pool,POOL); > - ASSERT(pgp->obj->pool->client != NULL); > + ASSERT(pgp->us.obj != NULL); > + ASSERT_SENTINEL(pgp->us.obj,OBJ); > + ASSERT_SENTINEL(pgp->us.obj->pool,POOL); > + ASSERT(pgp->us.obj->pool->client != NULL); > if ( from_delete ) > - ASSERT(pgp_lookup_in_obj(pgp->obj,pgp->index) == NULL); > - ASSERT(pgp->obj->pool != NULL); > - pool = pgp->obj->pool; > + ASSERT(pgp_lookup_in_obj(pgp->us.obj,pgp->index) == NULL); > + ASSERT(pgp->us.obj->pool != NULL); > + pool = pgp->us.obj->pool; > if ( is_ephemeral(pool) ) > { > ASSERT(list_empty(&pgp->global_eph_pages)); > - ASSERT(list_empty(&pgp->client_eph_pages)); > + ASSERT(list_empty(&pgp->us.client_eph_pages)); > } > pgp_free_data(pgp, pool); > atomic_dec_and_assert(global_pgp_count); > @@ -676,12 +682,12 @@ static NOINLINE void pgp_free(pgp_t *pgp > pgp->size = -1; > if ( is_persistent(pool) && pool->client->live_migrating ) > { > - pgp->inv_oid = pgp->obj->oid; > + pgp->inv_oid = pgp->us.obj->oid; > pgp->pool_id = pool->pool_id; > return; > } > INVERT_SENTINEL(pgp,PGD); > - pgp->obj = NULL; > + pgp->us.obj = NULL; > pgp->index = -1; > tmem_free(pgp,sizeof(pgp_t),pool); > } > @@ -693,7 +699,7 @@ static NOINLINE void pgp_free_from_inv_l > ASSERT_SENTINEL(pool,POOL); > ASSERT_SENTINEL(pgp,PGD); > INVERT_SENTINEL(pgp,PGD); > - pgp->obj = NULL; > + pgp->us.obj = NULL; > pgp->index = -1; > tmem_free(pgp,sizeof(pgp_t),pool); > } > @@ -704,18 +710,18 @@ static void pgp_delist(pgp_t *pgp, bool_ > client_t *client; > > ASSERT(pgp != NULL); > - ASSERT(pgp->obj != NULL); > - ASSERT(pgp->obj->pool != NULL); > - client = pgp->obj->pool->client; > + ASSERT(pgp->us.obj != NULL); > + ASSERT(pgp->us.obj->pool != NULL); > + client = pgp->us.obj->pool->client; > ASSERT(client != NULL); > - if ( is_ephemeral(pgp->obj->pool) ) > + if ( is_ephemeral(pgp->us.obj->pool) ) > { > if ( !no_eph_lock ) > tmem_spin_lock(&eph_lists_spinlock); > - if ( !list_empty(&pgp->client_eph_pages) ) > + if ( !list_empty(&pgp->us.client_eph_pages) ) > client->eph_count--; > ASSERT(client->eph_count >= 0); > - list_del_init(&pgp->client_eph_pages); > + list_del_init(&pgp->us.client_eph_pages); > if ( !list_empty(&pgp->global_eph_pages) ) > global_eph_count--; > ASSERT(global_eph_count >= 0); > @@ -728,12 +734,12 @@ static void pgp_delist(pgp_t *pgp, bool_ > tmem_spin_lock(&pers_lists_spinlock); > list_add_tail(&pgp->client_inv_pages, > &client->persistent_invalidated_list); > - if ( pgp != pgp->obj->pool->cur_pgp ) > - list_del_init(&pgp->pool_pers_pages); > + if ( pgp != pgp->us.obj->pool->cur_pgp ) > + list_del_init(&pgp->us.pool_pers_pages); > tmem_spin_unlock(&pers_lists_spinlock); > } else { > tmem_spin_lock(&pers_lists_spinlock); > - list_del_init(&pgp->pool_pers_pages); > + list_del_init(&pgp->us.pool_pers_pages); > tmem_spin_unlock(&pers_lists_spinlock); > } > } > @@ -745,10 +751,10 @@ static NOINLINE void pgp_delete(pgp_t *p > uint64_t life; > > ASSERT(pgp != NULL); > - ASSERT(pgp->obj != NULL); > - ASSERT(pgp->obj->pool != NULL); > + ASSERT(pgp->us.obj != NULL); > + ASSERT(pgp->us.obj->pool != NULL); > life = get_cycles() - pgp->timestamp; > - pgp->obj->pool->sum_life_cycles += life; > + pgp->us.obj->pool->sum_life_cycles += life; > pgp_delist(pgp, no_eph_lock); > pgp_free(pgp,1); > } > @@ -758,11 +764,11 @@ static NOINLINE void pgp_destroy(void *v > { > pgp_t *pgp = (pgp_t *)v; > > - ASSERT_SPINLOCK(&pgp->obj->obj_spinlock); > + ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock); > pgp_delist(pgp,0); > - ASSERT(pgp->obj != NULL); > - pgp->obj->pgp_count--; > - ASSERT(pgp->obj->pgp_count >= 0); > + ASSERT(pgp->us.obj != NULL); > + pgp->us.obj->pgp_count--; > + ASSERT(pgp->us.obj->pgp_count >= 0); > pgp_free(pgp,0); > } > > @@ -849,37 +855,74 @@ static void rtn_free(rtn_t *rtn) > > /************ POOL OBJECT COLLECTION MANIPULATION ROUTINES > *******************/ > > +int oid_compare(OID *left, OID *right) > +{ > + if ( left->oid[2] == right->oid[2] ) > + { > + if ( left->oid[1] == right->oid[1] ) > + { > + if ( left->oid[0] == right->oid[0] ) > + return 0; > + else if ( left->oid[0] < left->oid[0] ) > + return -1; > + else > + return 1; > + } > + else if ( left->oid[1] < left->oid[1] ) > + return -1; > + else > + return 1; > + } > + else if ( left->oid[2] < left->oid[2] ) > + return -1; > + else > + return 1; > +} > + > +void oid_set_invalid(OID *oidp) > +{ > + oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; > +} > + > +unsigned oid_hash(OID *oidp) > +{ > + return (tmh_hash(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], > + BITS_PER_LONG) & OBJ_HASH_BUCKETS_MASK); > +} > + > /* searches for object==oid in pool, returns locked object if found */ > -static NOINLINE obj_t * obj_find(pool_t *pool, uint64_t oid) > +static NOINLINE obj_t * obj_find(pool_t *pool, OID *oidp) > { > struct rb_node *node; > obj_t *obj; > > restart_find: > tmem_read_lock(&pool->pool_rwlock); > - node = pool->obj_rb_root[OBJ_HASH(oid)].rb_node; > + node = pool->obj_rb_root[oid_hash(oidp)].rb_node; > while ( node ) > { > obj = container_of(node, obj_t, rb_tree_node); > - if ( obj->oid == oid ) > + switch ( oid_compare(&obj->oid, oidp) ) > { > - if ( tmh_lock_all ) > - obj->no_evict = 1; > - else > - { > - if ( !tmem_spin_trylock(&obj->obj_spinlock) ) > + case 0: /* equal */ > + if ( tmh_lock_all ) > + obj->no_evict = 1; > + else > { > + if ( !tmem_spin_trylock(&obj->obj_spinlock) ) > + { > + tmem_read_unlock(&pool->pool_rwlock); > + goto restart_find; > + } > tmem_read_unlock(&pool->pool_rwlock); > - goto restart_find; > } > - tmem_read_unlock(&pool->pool_rwlock); > - } > - return obj; > + return obj; > + case -1: > + node = node->rb_left; > + break; > + case 1: > + node = node->rb_right; > } > - else if ( oid < obj->oid ) > - node = node->rb_left; > - else > - node = node->rb_right; > } > tmem_read_unlock(&pool->pool_rwlock); > return NULL; > @@ -889,7 +932,7 @@ static NOINLINE void obj_free(obj_t *obj > static NOINLINE void obj_free(obj_t *obj, int no_rebalance) > { > pool_t *pool; > - uint64_t old_oid; > + OID old_oid; > > ASSERT_SPINLOCK(&obj->obj_spinlock); > ASSERT(obj != NULL); > @@ -908,12 +951,12 @@ static NOINLINE void obj_free(obj_t *obj > INVERT_SENTINEL(obj,OBJ); > obj->pool = NULL; > old_oid = obj->oid; > - obj->oid = -1; > + oid_set_invalid(&obj->oid); > obj->last_client = CLI_ID_NULL; > atomic_dec_and_assert(global_obj_count); > /* use no_rebalance only if all objects are being destroyed anyway > */ > if ( !no_rebalance ) > - rb_erase(&obj->rb_tree_node,&pool- > >obj_rb_root[OBJ_HASH(old_oid)]); > + rb_erase(&obj->rb_tree_node,&pool- > >obj_rb_root[oid_hash(&old_oid)]); > tmem_free(obj,sizeof(obj_t),pool); > } > > @@ -927,12 +970,17 @@ static NOINLINE int obj_rb_insert(struct > { > this = container_of(*new, obj_t, rb_tree_node); > parent = *new; > - if ( obj->oid < this->oid ) > - new = &((*new)->rb_left); > - else if ( obj->oid > this->oid ) > - new = &((*new)->rb_right); > - else > - return 0; > + switch ( oid_compare(&obj->oid, &this->oid) ) > + { > + case 0: > + return 0; > + case -1: > + new = &((*new)->rb_left); > + break; > + case 1: > + new = &((*new)->rb_right); > + break; > + } > } > rb_link_node(&obj->rb_tree_node, parent, new); > rb_insert_color(&obj->rb_tree_node, root); > @@ -943,7 +991,7 @@ static NOINLINE int obj_rb_insert(struct > * allocate, initialize, and insert an tmem_object_root > * (should be called only if find failed) > */ > -static NOINLINE obj_t * obj_new(pool_t *pool, uint64_t oid) > +static NOINLINE obj_t * obj_new(pool_t *pool, OID *oidp) > { > obj_t *obj; > > @@ -958,13 +1006,13 @@ static NOINLINE obj_t * obj_new(pool_t * > INIT_RADIX_TREE(&obj->tree_root,0); > spin_lock_init(&obj->obj_spinlock); > obj->pool = pool; > - obj->oid = oid; > + obj->oid = *oidp; > obj->objnode_count = 0; > obj->pgp_count = 0; > obj->last_client = CLI_ID_NULL; > SET_SENTINEL(obj,OBJ); > tmem_spin_lock(&obj->obj_spinlock); > - obj_rb_insert(&pool->obj_rb_root[OBJ_HASH(oid)], obj); > + obj_rb_insert(&pool->obj_rb_root[oid_hash(oidp)], obj); > obj->no_evict = 1; > ASSERT_SPINLOCK(&obj->obj_spinlock); > return obj; > @@ -1256,7 +1304,7 @@ static void client_freeze(client_t *clie > > static bool_t tmem_try_to_evict_pgp(pgp_t *pgp, bool_t > *hold_pool_rwlock) > { > - obj_t *obj = pgp->obj; > + obj_t *obj = pgp->us.obj; > pool_t *pool = obj->pool; > client_t *client = pool->client; > uint16_t firstbyte = pgp->firstbyte; > @@ -1280,8 +1328,8 @@ static bool_t tmem_try_to_evict_pgp(pgp_ > pgp->eviction_attempted++; > list_del(&pgp->global_eph_pages); > list_add_tail(&pgp- > >global_eph_pages,&global_ephemeral_page_list); > - list_del(&pgp->client_eph_pages); > - list_add_tail(&pgp->client_eph_pages,&client- > >ephemeral_page_list); > + list_del(&pgp->us.client_eph_pages); > + list_add_tail(&pgp->us.client_eph_pages,&client- > >ephemeral_page_list); > goto pcd_unlock; > } > } > @@ -1314,7 +1362,7 @@ static int tmem_evict(void) > if ( (client != NULL) && client_over_quota(client) && > !list_empty(&client->ephemeral_page_list) ) > { > - list_for_each_entry_safe(pgp,pgp2,&client- > >ephemeral_page_list,client_eph_pages) > + list_for_each_entry_safe(pgp,pgp2,&client- > >ephemeral_page_list,us.client_eph_pages) > if ( tmem_try_to_evict_pgp(pgp,&hold_pool_rwlock) ) > goto found; > } else if ( list_empty(&global_ephemeral_page_list) ) { > @@ -1331,7 +1379,7 @@ found: > found: > ASSERT(pgp != NULL); > ASSERT_SENTINEL(pgp,PGD); > - obj = pgp->obj; > + obj = pgp->us.obj; > ASSERT(obj != NULL); > ASSERT(obj->no_evict == 0); > ASSERT(obj->pool != NULL); > @@ -1407,16 +1455,16 @@ static NOINLINE int do_tmem_put_compress > DECL_LOCAL_CYC_COUNTER(compress); > > ASSERT(pgp != NULL); > - ASSERT(pgp->obj != NULL); > - ASSERT_SPINLOCK(&pgp->obj->obj_spinlock); > - ASSERT(pgp->obj->pool != NULL); > - ASSERT(pgp->obj->pool->client != NULL); > + ASSERT(pgp->us.obj != NULL); > + ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock); > + ASSERT(pgp->us.obj->pool != NULL); > + ASSERT(pgp->us.obj->pool->client != NULL); > #ifdef __i386__ > return -ENOMEM; > #endif > > if ( pgp->pfp != NULL ) > - pgp_free_data(pgp, pgp->obj->pool); > + pgp_free_data(pgp, pgp->us.obj->pool); > START_CYC_COUNTER(compress); > ret = tmh_compress_from_client(cmfn, &dst, &size, cva); > if ( (ret == -EFAULT) || (ret == 0) ) > @@ -1424,10 +1472,10 @@ static NOINLINE int do_tmem_put_compress > else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) { > ret = 0; > goto out; > - } else if ( tmh_dedup_enabled() && !is_persistent(pgp->obj->pool) > ) { > + } else if ( tmh_dedup_enabled() && !is_persistent(pgp->us.obj- > >pool) ) { > if ( (ret = pcd_associate(pgp,dst,size)) == -ENOMEM ) > goto out; > - } else if ( (p = tmem_malloc_bytes(size,pgp->obj->pool)) == NULL ) > { > + } else if ( (p = tmem_malloc_bytes(size,pgp->us.obj->pool)) == > NULL ) { > ret = -ENOMEM; > goto out; > } else { > @@ -1435,8 +1483,8 @@ static NOINLINE int do_tmem_put_compress > pgp->cdata = p; > } > pgp->size = size; > - pgp->obj->pool->client->compressed_pages++; > - pgp->obj->pool->client->compressed_sum_size += size; > + pgp->us.obj->pool->client->compressed_pages++; > + pgp->us.obj->pool->client->compressed_sum_size += size; > ret = 1; > > out: > @@ -1456,7 +1504,7 @@ static NOINLINE int do_tmem_dup_put(pgp_ > ASSERT(pgp != NULL); > ASSERT(pgp->pfp != NULL); > ASSERT(pgp->size != -1); > - obj = pgp->obj; > + obj = pgp->us.obj; > ASSERT_SPINLOCK(&obj->obj_spinlock); > ASSERT(obj != NULL); > pool = obj->pool; > @@ -1535,7 +1583,7 @@ cleanup: > > > static NOINLINE int do_tmem_put(pool_t *pool, > - uint64_t oid, uint32_t index, > + OID *oidp, uint32_t index, > tmem_cli_mfn_t cmfn, pagesize_t tmem_offset, > pagesize_t pfn_offset, pagesize_t len, void *cva) > { > @@ -1547,7 +1595,7 @@ static NOINLINE int do_tmem_put(pool_t * > ASSERT(pool != NULL); > pool->puts++; > /* does page already exist (dup)? if so, handle specially */ > - if ( (obj = objfound = obj_find(pool,oid)) != NULL ) > + if ( (obj = objfound = obj_find(pool,oidp)) != NULL ) > { > ASSERT_SPINLOCK(&objfound->obj_spinlock); > if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL) > @@ -1561,7 +1609,7 @@ static NOINLINE int do_tmem_put(pool_t * > if ( (objfound == NULL) ) > { > tmem_write_lock(&pool->pool_rwlock); > - if ( (obj = objnew = obj_new(pool,oid)) == NULL ) > + if ( (obj = objnew = obj_new(pool,oidp)) == NULL ) > { > tmem_write_unlock(&pool->pool_rwlock); > return -ENOMEM; > @@ -1627,14 +1675,14 @@ insert_page: > &global_ephemeral_page_list); > if (++global_eph_count > global_eph_count_max) > global_eph_count_max = global_eph_count; > - list_add_tail(&pgp->client_eph_pages, > + list_add_tail(&pgp->us.client_eph_pages, > &client->ephemeral_page_list); > if (++client->eph_count > client->eph_count_max) > client->eph_count_max = client->eph_count; > tmem_spin_unlock(&eph_lists_spinlock); > } else { /* is_persistent */ > tmem_spin_lock(&pers_lists_spinlock); > - list_add_tail(&pgp->pool_pers_pages, > + list_add_tail(&pgp->us.pool_pers_pages, > &pool->persistent_page_list); > tmem_spin_unlock(&pers_lists_spinlock); > } > @@ -1678,7 +1726,7 @@ free: > return ret; > } > > -static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t > index, > +static NOINLINE int do_tmem_get(pool_t *pool, OID *oidp, uint32_t > index, > tmem_cli_mfn_t cmfn, pagesize_t tmem_offset, > pagesize_t pfn_offset, pagesize_t len, void *cva) > { > @@ -1691,7 +1739,7 @@ static NOINLINE int do_tmem_get(pool_t * > return -EEMPTY; > > pool->gets++; > - obj = obj_find(pool,oid); > + obj = obj_find(pool,oidp); > if ( obj == NULL ) > return 0; > > @@ -1737,8 +1785,8 @@ static NOINLINE int do_tmem_get(pool_t * > tmem_spin_lock(&eph_lists_spinlock); > list_del(&pgp->global_eph_pages); > list_add_tail(&pgp- > >global_eph_pages,&global_ephemeral_page_list); > - list_del(&pgp->client_eph_pages); > - list_add_tail(&pgp->client_eph_pages,&client- > >ephemeral_page_list); > + list_del(&pgp->us.client_eph_pages); > + list_add_tail(&pgp->us.client_eph_pages,&client- > >ephemeral_page_list); > tmem_spin_unlock(&eph_lists_spinlock); > ASSERT(obj != NULL); > obj->last_client = tmh_get_cli_id_from_current(); > @@ -1763,13 +1811,13 @@ bad_copy: > > } > > -static NOINLINE int do_tmem_flush_page(pool_t *pool, uint64_t oid, > uint32_t index) > +static NOINLINE int do_tmem_flush_page(pool_t *pool, OID *oidp, > uint32_t index) > { > obj_t *obj; > pgp_t *pgp; > > pool->flushs++; > - obj = obj_find(pool,oid); > + obj = obj_find(pool,oidp); > if ( obj == NULL ) > goto out; > pgp = pgp_delete_from_obj(obj, index); > @@ -1798,12 +1846,12 @@ out: > return 1; > } > > -static NOINLINE int do_tmem_flush_object(pool_t *pool, uint64_t oid) > +static NOINLINE int do_tmem_flush_object(pool_t *pool, OID *oidp) > { > obj_t *obj; > > pool->flush_objs++; > - obj = obj_find(pool,oid); > + obj = obj_find(pool,oidp); > if ( obj == NULL ) > goto out; > tmem_write_lock(&pool->pool_rwlock); > @@ -1863,6 +1911,16 @@ static NOINLINE int do_tmem_new_pool(cli > if ( pagebits != (PAGE_SHIFT - 12) ) > { > printk("failed... unsupported pagesize > %d\n",1<<(pagebits+12)); > + return -EPERM; > + } > + if ( flags & TMEM_POOL_PRECOMPRESSED ) > + { > + printk("failed... precompression flag set but unsupported\n"); > + return -EPERM; > + } > + if ( flags & TMEM_POOL_RESERVED_BITS ) > + { > + printk("failed... reserved bits must be zero\n"); > return -EPERM; > } > if ( (pool = pool_alloc()) == NULL ) > @@ -2369,6 +2427,7 @@ static NOINLINE int tmemc_save_get_next_ > pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) > ? NULL : client->pools[pool_id]; > pgp_t *pgp; > + OID oid; > int ret = 0; > struct tmem_handle *h; > unsigned int pagesize = 1 << (pool->pageshift+12); > @@ -2389,22 +2448,23 @@ static NOINLINE int tmemc_save_get_next_ > { > /* process the first one */ > pool->cur_pgp = pgp = list_entry((&pool- > >persistent_page_list)->next, > - pgp_t,pool_pers_pages); > - } else if ( list_is_last(&pool->cur_pgp->pool_pers_pages, > + pgp_t,us.pool_pers_pages); > + } else if ( list_is_last(&pool->cur_pgp->us.pool_pers_pages, > &pool->persistent_page_list) ) > { > /* already processed the last one in the list */ > ret = -1; > goto out; > } > - pgp = list_entry((&pool->cur_pgp->pool_pers_pages)->next, > - pgp_t,pool_pers_pages); > + pgp = list_entry((&pool->cur_pgp->us.pool_pers_pages)->next, > + pgp_t,us.pool_pers_pages); > pool->cur_pgp = pgp; > + oid = pgp->us.obj->oid; > h = (struct tmem_handle *)buf.p; > - h->oid = pgp->obj->oid; > + *(OID *)&h->oid[0] = oid; > h->index = pgp->index; > buf.p = (void *)(h+1); > - ret = do_tmem_get(pool, h->oid, h->index,0,0,0,pagesize,buf.p); > + ret = do_tmem_get(pool, &oid, h->index,0,0,0,pagesize,buf.p); > > out: > tmem_spin_unlock(&pers_lists_spinlock); > @@ -2444,7 +2504,7 @@ static NOINLINE int tmemc_save_get_next_ > } > h = (struct tmem_handle *)buf.p; > h->pool_id = pgp->pool_id; > - h->oid = pgp->inv_oid; > + *(OID *)&h->oid = pgp->inv_oid; > h->index = pgp->index; > ret = 1; > out: > @@ -2452,7 +2512,7 @@ out: > return ret; > } > > -static int tmemc_restore_put_page(int cli_id, int pool_id, uint64_t > oid, > +static int tmemc_restore_put_page(int cli_id, int pool_id, OID *oidp, > uint32_t index, tmem_cli_va_t buf, uint32_t > bufsize) > { > client_t *client = tmh_client_from_cli_id(cli_id); > @@ -2461,10 +2521,10 @@ static int tmemc_restore_put_page(int cl > > if ( pool == NULL ) > return -1; > - return do_tmem_put(pool,oid,index,0,0,0,bufsize,buf.p); > + return do_tmem_put(pool,oidp,index,0,0,0,bufsize,buf.p); > } > > -static int tmemc_restore_flush_page(int cli_id, int pool_id, uint64_t > oid, > +static int tmemc_restore_flush_page(int cli_id, int pool_id, OID > *oidp, > uint32_t index) > { > client_t *client = tmh_client_from_cli_id(cli_id); > @@ -2473,7 +2533,7 @@ static int tmemc_restore_flush_page(int > > if ( pool == NULL ) > return -1; > - return do_tmem_flush_page(pool,oid,index); > + return do_tmem_flush_page(pool,oidp,index); > } > > static NOINLINE int do_tmem_control(struct tmem_op *op) > @@ -2481,6 +2541,7 @@ static NOINLINE int do_tmem_control(stru > int ret; > uint32_t pool_id = op->pool_id; > uint32_t subop = op->u.ctrl.subop; > + OID *oidp = (OID *)(&op->u.ctrl.oid[0]); > > if (!tmh_current_is_privileged()) > { > @@ -2533,12 +2594,12 @@ static NOINLINE int do_tmem_control(stru > break; > case TMEMC_RESTORE_PUT_PAGE: > ret = tmemc_restore_put_page(op->u.ctrl.cli_id,pool_id, > - op->u.ctrl.arg3, op->u.ctrl.arg2, > + oidp, op->u.ctrl.arg2, > op->u.ctrl.buf, op->u.ctrl.arg1); > break; > case TMEMC_RESTORE_FLUSH_PAGE: > ret = tmemc_restore_flush_page(op->u.ctrl.cli_id,pool_id, > - op->u.ctrl.arg3, op- > >u.ctrl.arg2); > + oidp, op->u.ctrl.arg2); > break; > default: > ret = -1; > @@ -2553,6 +2614,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop > struct tmem_op op; > client_t *client = tmh_client_from_current(); > pool_t *pool = NULL; > + OID *oidp; > int rc = 0; > bool_t succ_get = 0, succ_put = 0; > bool_t non_succ_get = 0, non_succ_put = 0; > @@ -2656,6 +2718,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop > ASSERT_SENTINEL(pool,POOL); > } > > + oidp = (OID *)&op.u.gen.oid[0]; > switch ( op.cmd ) > { > case TMEM_NEW_POOL: > @@ -2664,28 +2727,28 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop > break; > case TMEM_NEW_PAGE: > tmem_ensure_avail_pages(); > - rc = do_tmem_put(pool, op.u.gen.object, > + rc = do_tmem_put(pool, oidp, > op.u.gen.index, op.u.gen.cmfn, 0, 0, 0, > NULL); > break; > case TMEM_PUT_PAGE: > tmem_ensure_avail_pages(); > - rc = do_tmem_put(pool, op.u.gen.object, > + rc = do_tmem_put(pool, oidp, > op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE, > NULL); > if (rc == 1) succ_put = 1; > else non_succ_put = 1; > break; > case TMEM_GET_PAGE: > - rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, > op.u.gen.cmfn, > + rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn, > 0, 0, PAGE_SIZE, 0); > if (rc == 1) succ_get = 1; > else non_succ_get = 1; > break; > case TMEM_FLUSH_PAGE: > flush = 1; > - rc = do_tmem_flush_page(pool, op.u.gen.object, > op.u.gen.index); > + rc = do_tmem_flush_page(pool, oidp, op.u.gen.index); > break; > case TMEM_FLUSH_OBJECT: > - rc = do_tmem_flush_object(pool, op.u.gen.object); > + rc = do_tmem_flush_object(pool, oidp); > flush_obj = 1; > break; > case TMEM_DESTROY_POOL: > @@ -2693,12 +2756,12 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop > rc = do_tmem_destroy_pool(op.pool_id); > break; > case TMEM_READ: > - rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, > op.u.gen.cmfn, > + rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn, > op.u.gen.tmem_offset, op.u.gen.pfn_offset, > op.u.gen.len,0); > break; > case TMEM_WRITE: > - rc = do_tmem_put(pool, op.u.gen.object, > + rc = do_tmem_put(pool, oidp, > op.u.gen.index, op.u.gen.cmfn, > op.u.gen.tmem_offset, op.u.gen.pfn_offset, > op.u.gen.len, NULL); > diff -r 07ac5459b250 xen/include/public/tmem.h > --- a/xen/include/public/tmem.h Wed Aug 25 09:23:31 2010 +0100 > +++ b/xen/include/public/tmem.h Thu Sep 02 16:43:33 2010 -0600 > @@ -28,6 +28,9 @@ > #define __XEN_PUBLIC_TMEM_H__ > > #include "xen.h" > + > +/* version of ABI */ > +#define TMEM_SPEC_VERSION 1 > > /* Commands to HYPERVISOR_tmem_op() */ > #define TMEM_CONTROL 0 > @@ -75,10 +78,12 @@ > /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ > #define TMEM_POOL_PERSIST 1 > #define TMEM_POOL_SHARED 2 > +#define TMEM_POOL_PRECOMPRESSED 4 > #define TMEM_POOL_PAGESIZE_SHIFT 4 > #define TMEM_POOL_PAGESIZE_MASK 0xf > #define TMEM_POOL_VERSION_SHIFT 24 > #define TMEM_POOL_VERSION_MASK 0xff > +#define TMEM_POOL_RESERVED_BITS 0x00ffff00 > > /* Bits for client flags (save/restore) */ > #define TMEM_CLIENT_COMPRESS 1 > @@ -106,12 +111,12 @@ struct tmem_op { > uint32_t cli_id; > uint32_t arg1; > uint32_t arg2; > - uint64_t arg3; > + uint64_t oid[3]; > tmem_cli_va_t buf; > } ctrl; /* for cmd == TMEM_CONTROL */ > struct { > > - uint64_t object; > + uint64_t oid[3]; > uint32_t index; > uint32_t tmem_offset; > uint32_t pfn_offset; > @@ -126,9 +131,8 @@ struct tmem_handle { > struct tmem_handle { > uint32_t pool_id; > uint32_t index; > - uint64_t oid; > + uint64_t oid[3]; > }; > - > #endif > > #endif /* __XEN_PUBLIC_TMEM_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |