[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Update radix-tree.[ch] from upstream Linux to gain RCU awareness.
# HG changeset patch # User Keir Fraser <keir@xxxxxxx> # Date 1304929523 -3600 # Node ID 44bfebf40b2bb7f219333ef5bf97eb7493592cdc # Parent 4b0692880dfa557d4e1537c7a58c412c1286a416 Update radix-tree.[ch] from upstream Linux to gain RCU awareness. We still leave behind features we don't need such as tagged nodes. Other changes: - Allow callers to define their own node alloc routines. - Only allocate per-node rcu_head when using the default RCU-safe alloc routines. - Keep our own radix_tree_destroy(). In future it may also be worth getting rid of the complex and pointless special-casing of radix-tree height==0, in which a single data item can be stored directly in radix_tree_root. It introduces a whole lot of special cases and complicates RCU handling. If we get rid of it we can reclaim the 'indirect pointer' tag in bit 0 of every slot entry. Signed-off-by: Keir Fraser <keir@xxxxxxx> --- diff -r 4b0692880dfa -r 44bfebf40b2b xen/common/radix-tree.c --- a/xen/common/radix-tree.c Thu May 05 17:40:34 2011 +0100 +++ b/xen/common/radix-tree.c Mon May 09 09:25:23 2011 +0100 @@ -1,7 +1,8 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig - * Copyright (C) 2005 SGI, Christoph Lameter <clameter@xxxxxxx> + * Copyright (C) 2005 SGI, Christoph Lameter + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -18,432 +19,740 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* - * Copyright (C) 2009 adaption for Xen tmem by Dan Magenheimer, Oracle Corp. - * Changed: - * o Linux 2.6.18 source used (prior to read-copy-update addition) - * o constants and data structures moved out to radix-tree.h header - * o tagging code removed - * o radix_tree_insert has func parameter for dynamic data struct allocation - * o radix_tree_destroy added (including recursive helper function) - * o __init functions must be called explicitly - * o other include files adapted to Xen - */ - #include <xen/config.h> #include <xen/init.h> -#include <xen/lib.h> -#include <xen/types.h> -#include <xen/errno.h> #include <xen/radix-tree.h> -#include <asm/cache.h> +struct radix_tree_path { + struct radix_tree_node *node; + int offset; +}; + +#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) +#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ + RADIX_TREE_MAP_SHIFT)) + +/* + * The height_to_maxindex array needs to be one deeper than the maximum + * path as height 0 holds only 1 entry. + */ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; +static inline void *ptr_to_indirect(void *ptr) +{ + return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); +} + +struct rcu_node { + struct radix_tree_node node; + struct rcu_head rcu_head; +}; + +static struct radix_tree_node *rcu_node_alloc(void *arg) +{ + struct rcu_node *rcu_node = xmalloc(struct rcu_node); + return rcu_node ? &rcu_node->node : NULL; +} + +static void _rcu_node_free(struct rcu_head *head) +{ + struct rcu_node *rcu_node = + container_of(head, struct rcu_node, rcu_head); + xfree(rcu_node); +} + +static void rcu_node_free(struct radix_tree_node *node, void *arg) +{ + struct rcu_node *rcu_node = container_of(node, struct rcu_node, node); + call_rcu(&rcu_node->rcu_head, _rcu_node_free); +} + +static struct radix_tree_node *radix_tree_node_alloc( + struct radix_tree_root *root) +{ + struct radix_tree_node *ret; + ret = root->node_alloc(root->node_alloc_free_arg); + if (ret) + memset(ret, 0, sizeof(*ret)); + return ret; +} + +static void radix_tree_node_free( + struct radix_tree_root *root, struct radix_tree_node *node) +{ + root->node_free(node, root->node_alloc_free_arg); +} + /* - * Return the maximum key which can be store into a - * radix tree with height HEIGHT. + * Return the maximum key which can be store into a + * radix tree with height HEIGHT. */ static inline unsigned long radix_tree_maxindex(unsigned int height) { - return height_to_maxindex[height]; + return height_to_maxindex[height]; } /* - * Extend a radix tree so it can store key @index. + * Extend a radix tree so it can store key @index. */ -static int radix_tree_extend(struct radix_tree_root *root, unsigned long index, - struct radix_tree_node *(*node_alloc)(void *), void *arg) +static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) { - struct radix_tree_node *node; - unsigned int height; + struct radix_tree_node *node; + unsigned int height; - /* Figure out what the height should be. */ - height = root->height + 1; - if (index > radix_tree_maxindex(height)) - while (index > radix_tree_maxindex(height)) - height++; + /* Figure out what the height should be. */ + height = root->height + 1; + while (index > radix_tree_maxindex(height)) + height++; - if (root->rnode == NULL) { - root->height = height; - goto out; - } + if (root->rnode == NULL) { + root->height = height; + goto out; + } - do { - if (!(node = node_alloc(arg))) - return -ENOMEM; + do { + unsigned int newheight; + if (!(node = radix_tree_node_alloc(root))) + return -ENOMEM; - /* Increase the height. */ - node->slots[0] = root->rnode; + /* Increase the height. */ + node->slots[0] = indirect_to_ptr(root->rnode); - node->count = 1; - root->rnode = node; - root->height++; - } while (height > root->height); - out: - return 0; + newheight = root->height+1; + node->height = newheight; + node->count = 1; + node = ptr_to_indirect(node); + rcu_assign_pointer(root->rnode, node); + root->height = newheight; + } while (height > root->height); +out: + return 0; } /** - * radix_tree_insert - insert into a radix tree - * @root: radix tree root - * @index: index key - * @item: item to insert + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert * - * Insert an item into the radix tree at position @index. + * Insert an item into the radix tree at position @index. */ -int radix_tree_insert(struct radix_tree_root *root, unsigned long index, - void *item, struct radix_tree_node *(*node_alloc)(void *), void *arg) +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) { - struct radix_tree_node *node = NULL, *slot; - unsigned int height, shift; - int offset; - int error; + struct radix_tree_node *node = NULL, *slot; + unsigned int height, shift; + int offset; + int error; - /* Make sure the tree is high enough. */ - if (index > radix_tree_maxindex(root->height)) { - error = radix_tree_extend(root, index, node_alloc, arg); - if (error) - return error; - } + BUG_ON(radix_tree_is_indirect_ptr(item)); - slot = root->rnode; - height = root->height; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; + /* Make sure the tree is high enough. */ + if (index > radix_tree_maxindex(root->height)) { + error = radix_tree_extend(root, index); + if (error) + return error; + } - offset = 0; /* uninitialised var warning */ - while (height > 0) { - if (slot == NULL) { - /* Have to add a child node. */ - if (!(slot = node_alloc(arg))) - return -ENOMEM; - if (node) { + slot = indirect_to_ptr(root->rnode); - node->slots[offset] = slot; - node->count++; - } else - root->rnode = slot; - } + height = root->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - /* Go a level down */ - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = node->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } + offset = 0; /* uninitialised var warning */ + while (height > 0) { + if (slot == NULL) { + /* Have to add a child node. */ + if (!(slot = radix_tree_node_alloc(root))) + return -ENOMEM; + slot->height = height; + if (node) { + rcu_assign_pointer(node->slots[offset], slot); + node->count++; + } else + rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); + } - if (slot != NULL) - return -EEXIST; + /* Go a level down */ + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + node = slot; + slot = node->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } - if (node) { - node->count++; - node->slots[offset] = item; - } else { - root->rnode = item; - } + if (slot != NULL) + return -EEXIST; - return 0; + if (node) { + node->count++; + rcu_assign_pointer(node->slots[offset], item); + } else { + rcu_assign_pointer(root->rnode, item); + } + + return 0; } EXPORT_SYMBOL(radix_tree_insert); -static inline void **__lookup_slot(struct radix_tree_root *root, - unsigned long index) +/* + * is_slot == 1 : search for the slot. + * is_slot == 0 : search for the node. + */ +static void *radix_tree_lookup_element(struct radix_tree_root *root, + unsigned long index, int is_slot) { - unsigned int height, shift; - struct radix_tree_node **slot; + unsigned int height, shift; + struct radix_tree_node *node, **slot; - height = root->height; + node = rcu_dereference(root->rnode); + if (node == NULL) + return NULL; - if (index > radix_tree_maxindex(height)) - return NULL; + if (!radix_tree_is_indirect_ptr(node)) { + if (index > 0) + return NULL; + return is_slot ? (void *)&root->rnode : node; + } + node = indirect_to_ptr(node); - if (height == 0 && root->rnode) - return (void **)&root->rnode; + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - while (height > 0) { - if (*slot == NULL) - return NULL; + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = rcu_dereference(*slot); + if (node == NULL) + return NULL; - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); - return (void **)slot; + return is_slot ? (void *)slot : indirect_to_ptr(node); } /** - * radix_tree_lookup_slot - lookup a slot in a radix tree - * @root: radix tree root - * @index: index key + * radix_tree_lookup_slot - lookup a slot in a radix tree + * @root: radix tree root + * @index: index key * - * Lookup the slot corresponding to the position @index in the radix tree - * @root. This is useful for update-if-exists operations. + * Returns: the slot corresponding to the position @index in the + * radix tree @root. This is useful for update-if-exists operations. + * + * This function can be called under rcu_read_lock iff the slot is not + * modified by radix_tree_replace_slot, otherwise it must be called + * exclusive from other writers. Any dereference of the slot must be done + * using radix_tree_deref_slot. */ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { - return __lookup_slot(root, index); + return (void **)radix_tree_lookup_element(root, index, 1); } EXPORT_SYMBOL(radix_tree_lookup_slot); /** - * radix_tree_lookup - perform lookup operation on a radix tree - * @root: radix tree root - * @index: index key + * radix_tree_lookup - perform lookup operation on a radix tree + * @root: radix tree root + * @index: index key * - * Lookup the item at the position @index in the radix tree @root. + * Lookup the item at the position @index in the radix tree @root. + * + * This function can be called under rcu_read_lock, however the caller + * must manage lifetimes of leaf nodes (eg. RCU may also be used to free + * them safely). No RCU barriers are required to access or modify the + * returned item, however. */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - void **slot; - - slot = __lookup_slot(root, index); - return slot != NULL ? *slot : NULL; + return radix_tree_lookup_element(root, index, 0); } EXPORT_SYMBOL(radix_tree_lookup); +/** + * radix_tree_next_hole - find the next hole (not-present entry) + * @root: tree root + * @index: index key + * @max_scan: maximum range to search + * + * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest + * indexed hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'return - index >= max_scan' + * will be true). In rare cases of index wrap-around, 0 will be returned. + * + * radix_tree_next_hole may be called under rcu_read_lock. However, like + * radix_tree_gang_lookup, this will not atomically search a snapshot of + * the tree at a single point in time. For example, if a hole is created + * at index 5, then subsequently a hole is created at index 10, + * radix_tree_next_hole covering both indexes may return 10 if called + * under rcu_read_lock. + */ +unsigned long radix_tree_next_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + if (!radix_tree_lookup(root, index)) + break; + index++; + if (index == 0) + break; + } + + return index; +} +EXPORT_SYMBOL(radix_tree_next_hole); + +/** + * radix_tree_prev_hole - find the prev hole (not-present entry) + * @root: tree root + * @index: index key + * @max_scan: maximum range to search + * + * Search backwards in the range [max(index-max_scan+1, 0), index] + * for the first hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'index - return >= max_scan' + * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. + * + * radix_tree_next_hole may be called under rcu_read_lock. However, like + * radix_tree_gang_lookup, this will not atomically search a snapshot of + * the tree at a single point in time. For example, if a hole is created + * at index 10, then subsequently a hole is created at index 5, + * radix_tree_prev_hole covering both indexes may return 5 if called under + * rcu_read_lock. + */ +unsigned long radix_tree_prev_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + if (!radix_tree_lookup(root, index)) + break; + index--; + if (index == ULONG_MAX) + break; + } + + return index; +} +EXPORT_SYMBOL(radix_tree_prev_hole); + static unsigned int -__lookup(struct radix_tree_root *root, void **results, unsigned long index, - unsigned int max_items, unsigned long *next_index) +__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, + unsigned int max_items, unsigned long *next_index) { - unsigned int nr_found = 0; - unsigned int shift, height; - struct radix_tree_node *slot; - unsigned long i; + unsigned int nr_found = 0; + unsigned int shift, height; + unsigned long i; - height = root->height; - if (index > radix_tree_maxindex(height)) - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; - goto out; - } + height = slot->height; + if (height == 0) + goto out; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; + for ( ; height > 1; height--) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } - for ( ; height > 1; height--) { + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + goto out; + } - for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; - i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - } - if (i == RADIX_TREE_MAP_SIZE) - goto out; - - shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } - - /* Bottom level: grab some items */ - for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; - if (slot->slots[i]) { - results[nr_found++] = slot->slots[i]; - if (nr_found == max_items) - goto out; - } - } - out: - *next_index = index; - return nr_found; + /* Bottom level: grab some items */ + for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + index++; + if (slot->slots[i]) { + results[nr_found++] = &(slot->slots[i]); + if (nr_found == max_items) + goto out; + } + } +out: + *next_index = index; + return nr_found; } /** - * radix_tree_gang_lookup - perform multiple lookup on a radix tree - * @root: radix tree root - * @results: where the results of the lookup are placed - * @first_index: start the lookup from this key - * @max_items: place up to this many items at *results + * radix_tree_gang_lookup - perform multiple lookup on a radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results * - * Performs an index-ascending scan of the tree for present items. Places - * them at *@results and returns the number of items which were placed at - * *@results. + * Performs an index-ascending scan of the tree for present items. Places + * them at *@results and returns the number of items which were placed at + * *@results. * - * The implementation is naive. + * The implementation is naive. + * + * Like radix_tree_lookup, radix_tree_gang_lookup may be called under + * rcu_read_lock. In this case, rather than the returned results being + * an atomic snapshot of the tree at a single point in time, the semantics + * of an RCU protected gang lookup are as though multiple radix_tree_lookups + * have been issued in individual locks, and results stored in 'results'. */ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items) + unsigned long first_index, unsigned int max_items) { - const unsigned long max_index = radix_tree_maxindex(root->height); - unsigned long cur_index = first_index; - unsigned int ret = 0; + unsigned long max_index; + struct radix_tree_node *node; + unsigned long cur_index = first_index; + unsigned int ret; - while (ret < max_items) { - unsigned int nr_found; - unsigned long next_index; /* Index of next search */ + node = rcu_dereference(root->rnode); + if (!node) + return 0; - if (cur_index > max_index) - break; - nr_found = __lookup(root, results + ret, cur_index, - max_items - ret, &next_index); - ret += nr_found; - if (next_index == 0) - break; - cur_index = next_index; - } - return ret; + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = node; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int nr_found, slots_found, i; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup(node, (void ***)results + ret, cur_index, + max_items - ret, &next_index); + nr_found = 0; + for (i = 0; i < slots_found; i++) { + struct radix_tree_node *slot; + slot = *(((void ***)results)[ret + i]); + if (!slot) + continue; + results[ret + nr_found] = + indirect_to_ptr(rcu_dereference(slot)); + nr_found++; + } + ret += nr_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup); /** - * radix_tree_shrink - shrink height of a radix tree to minimal - * @root radix tree root + * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * + * Performs an index-ascending scan of the tree for present items. Places + * their slots at *@results and returns the number of items which were + * placed at *@results. + * + * The implementation is naive. + * + * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must + * be dereferenced with radix_tree_deref_slot, and if using only RCU + * protection, radix_tree_deref_slot may fail requiring a retry. */ -static inline void radix_tree_shrink(struct radix_tree_root *root, - void (*node_free)(struct radix_tree_node *)) +unsigned int +radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, + unsigned long first_index, unsigned int max_items) { - /* try to shrink tree height */ - while (root->height > 0 && - root->rnode->count == 1 && - root->rnode->slots[0]) { - struct radix_tree_node *to_free = root->rnode; + unsigned long max_index; + struct radix_tree_node *node; + unsigned long cur_index = first_index; + unsigned int ret; - root->rnode = to_free->slots[0]; - root->height--; - to_free->slots[0] = NULL; - to_free->count = 0; - node_free(to_free); - } + node = rcu_dereference(root->rnode); + if (!node) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = (void **)&root->rnode; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int slots_found; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup(node, results + ret, cur_index, + max_items - ret, &next_index); + ret += slots_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; +} +EXPORT_SYMBOL(radix_tree_gang_lookup_slot); + +/** + * radix_tree_shrink - shrink height of a radix tree to minimal + * @root radix tree root + */ +static inline void radix_tree_shrink(struct radix_tree_root *root) +{ + /* try to shrink tree height */ + while (root->height > 0) { + struct radix_tree_node *to_free = root->rnode; + void *newptr; + + BUG_ON(!radix_tree_is_indirect_ptr(to_free)); + to_free = indirect_to_ptr(to_free); + + /* + * The candidate node has more than one child, or its child + * is not at the leftmost slot, we cannot shrink. + */ + if (to_free->count != 1) + break; + if (!to_free->slots[0]) + break; + + /* + * We don't need rcu_assign_pointer(), since we are simply + * moving the node from one part of the tree to another: if it + * was safe to dereference the old pointer to it + * (to_free->slots[0]), it will be safe to dereference the new + * one (root->rnode) as far as dependent read barriers go. + */ + newptr = to_free->slots[0]; + if (root->height > 1) + newptr = ptr_to_indirect(newptr); + root->rnode = newptr; + root->height--; + + /* + * We have a dilemma here. The node's slot[0] must not be + * NULLed in case there are concurrent lookups expecting to + * find the item. However if this was a bottom-level node, + * then it may be subject to the slot pointer being visible + * to callers dereferencing it. If item corresponding to + * slot[0] is subsequently deleted, these callers would expect + * their slot to become empty sooner or later. + * + * For example, lockless pagecache will look up a slot, deref + * the page pointer, and if the page is 0 refcount it means it + * was concurrently deleted from pagecache so try the deref + * again. Fortunately there is already a requirement for logic + * to retry the entire slot lookup -- the indirect pointer + * problem (replacing direct root node with an indirect pointer + * also results in a stale slot). So tag the slot as indirect + * to force callers to retry. + */ + if (root->height == 0) + *((unsigned long *)&to_free->slots[0]) |= + RADIX_TREE_INDIRECT_PTR; + + radix_tree_node_free(root, to_free); + } } /** - * radix_tree_delete - delete an item from a radix tree - * @root: radix tree root - * @index: index key + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key * - * Remove the item at @index from the radix tree rooted at @root. + * Remove the item at @index from the radix tree rooted at @root. * - * Returns the address of the deleted item, or NULL if it was not present. + * Returns the address of the deleted item, or NULL if it was not present. */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index, - void(*node_free)(struct radix_tree_node *)) +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { - struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; - struct radix_tree_node *slot = NULL; - unsigned int height, shift; - int offset; + /* + * The radix tree path needs to be one longer than the maximum path + * since the "list" is null terminated. + */ + struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *slot = NULL; + struct radix_tree_node *to_free; + unsigned int height, shift; + int offset; - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; + height = root->height; + if (index > radix_tree_maxindex(height)) + goto out; - slot = root->rnode; - if (height == 0 && root->rnode) { - root->rnode = NULL; - goto out; - } + slot = root->rnode; + if (height == 0) { + root->rnode = NULL; + goto out; + } + slot = indirect_to_ptr(slot); - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + pathp->node = NULL; - do { - if (slot == NULL) - goto out; + do { + if (slot == NULL) + goto out; - pathp++; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp->offset = offset; - pathp->node = slot; - slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } while (height > 0); + pathp++; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp->offset = offset; + pathp->node = slot; + slot = slot->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); - if (slot == NULL) - goto out; + if (slot == NULL) + goto out; - /* Now free the nodes we do not need anymore */ - while (pathp->node) { - pathp->node->slots[pathp->offset] = NULL; - pathp->node->count--; + to_free = NULL; + /* Now free the nodes we do not need anymore */ + while (pathp->node) { + pathp->node->slots[pathp->offset] = NULL; + pathp->node->count--; + /* + * Queue the node for deferred freeing after the + * last reference to it disappears (set NULL, above). + */ + if (to_free) + radix_tree_node_free(root, to_free); - if (pathp->node->count) { - if (pathp->node == root->rnode) - radix_tree_shrink(root, node_free); - goto out; - } + if (pathp->node->count) { + if (pathp->node == indirect_to_ptr(root->rnode)) + radix_tree_shrink(root); + goto out; + } - /* Node with zero slots in use so free it */ - node_free(pathp->node); + /* Node with zero slots in use so free it */ + to_free = pathp->node; + pathp--; - pathp--; - } - root->height = 0; - root->rnode = NULL; + } + root->height = 0; + root->rnode = NULL; + if (to_free) + radix_tree_node_free(root, to_free); - out: - return slot; +out: + return slot; } EXPORT_SYMBOL(radix_tree_delete); static void -radix_tree_node_destroy(struct radix_tree_node *node, unsigned int height, - void (*slot_free)(void *), void (*node_free)(struct radix_tree_node *)) +radix_tree_node_destroy( + struct radix_tree_root *root, struct radix_tree_node *node, + void (*slot_free)(void *)) { - int i; + int i; - if (height == 0) - return; - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[i]) { - if (height == 1) { - slot_free(node->slots[i]); - node->slots[i] = NULL; - continue; - } - radix_tree_node_destroy(node->slots[i], height-1, - slot_free, node_free); - node_free(node->slots[i]); - node->slots[i] = NULL; - } - } + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + struct radix_tree_node *slot = node->slots[i]; + BUG_ON(radix_tree_is_indirect_ptr(slot)); + if (slot == NULL) + continue; + if (node->height == 1) { + if (slot_free) + slot_free(slot); + } else { + radix_tree_node_destroy(root, slot, slot_free); + } + } + + radix_tree_node_free(root, node); } -void radix_tree_destroy(struct radix_tree_root *root, - void (*slot_free)(void *), void (*node_free)(struct radix_tree_node *)) +void radix_tree_destroy( + struct radix_tree_root *root, + void (*slot_free)(void *)) { - if (root->rnode == NULL) - return; - if (root->height == 0) - slot_free(root->rnode); - else { - radix_tree_node_destroy(root->rnode, root->height, - slot_free, node_free); - node_free(root->rnode); - root->height = 0; - } - root->rnode = NULL; - /* caller must delete root if desired */ -} -EXPORT_SYMBOL(radix_tree_destroy); - -static unsigned long __init __maxindex(unsigned int height) -{ - unsigned int tmp = height * RADIX_TREE_MAP_SHIFT; - unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1; - - if (tmp >= RADIX_TREE_INDEX_BITS) - index = ~0UL; - return index; + struct radix_tree_node *node = root->rnode; + if (node == NULL) + return; + if (!radix_tree_is_indirect_ptr(node)) { + if (slot_free) + slot_free(node); + } else { + node = indirect_to_ptr(node); + radix_tree_node_destroy(root, node, slot_free); + } + radix_tree_init(root); } -void __init radix_tree_init(void) +void radix_tree_init(struct radix_tree_root *root) { - unsigned int i; + memset(root, 0, sizeof(*root)); + root->node_alloc = rcu_node_alloc; + root->node_free = rcu_node_free; +} - for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) - height_to_maxindex[i] = __maxindex(i); +void radix_tree_set_alloc_callbacks( + struct radix_tree_root *root, + radix_tree_alloc_fn_t *node_alloc, + radix_tree_free_fn_t *node_free, + void *node_alloc_free_arg) +{ + root->node_alloc = node_alloc; + root->node_free = node_free; + root->node_alloc_free_arg = node_alloc_free_arg; } + +static __init unsigned long __maxindex(unsigned int height) +{ + unsigned int width = height * RADIX_TREE_MAP_SHIFT; + int shift = RADIX_TREE_INDEX_BITS - width; + + if (shift < 0) + return ~0UL; + if (shift >= BITS_PER_LONG) + return 0UL; + return ~0UL >> shift; +} + +static __init int radix_tree_init_maxindex(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) + height_to_maxindex[i] = __maxindex(i); + + return 0; +} +/* pre-SMP just so it runs before 'normal' initcalls */ +presmp_initcall(radix_tree_init_maxindex); diff -r 4b0692880dfa -r 44bfebf40b2b xen/common/tmem.c --- a/xen/common/tmem.c Thu May 05 17:40:34 2011 +0100 +++ b/xen/common/tmem.c Mon May 09 09:25:23 2011 +0100 @@ -772,15 +772,12 @@ pgp_free(pgp,0); } -FORWARD static rtn_t *rtn_alloc(void *arg); -FORWARD static void rtn_free(rtn_t *rtn); - static int pgp_add_to_obj(obj_t *obj, uint32_t index, pgp_t *pgp) { int ret; ASSERT_SPINLOCK(&obj->obj_spinlock); - ret = radix_tree_insert(&obj->tree_root, index, pgp, rtn_alloc, obj); + ret = radix_tree_insert(&obj->tree_root, index, pgp); if ( !ret ) obj->pgp_count++; return ret; @@ -795,7 +792,7 @@ ASSERT_SENTINEL(obj,OBJ); ASSERT(obj->pool != NULL); ASSERT_SENTINEL(obj->pool,POOL); - pgp = radix_tree_delete(&obj->tree_root, index, rtn_free); + pgp = radix_tree_delete(&obj->tree_root, index); if ( pgp != NULL ) obj->pgp_count--; ASSERT(obj->pgp_count >= 0); @@ -828,15 +825,12 @@ } /* called only indirectly from radix_tree_delete/destroy */ -static void rtn_free(rtn_t *rtn) +static void rtn_free(rtn_t *rtn, void *arg) { pool_t *pool; objnode_t *objnode; - int i; ASSERT(rtn != NULL); - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) - ASSERT(rtn->slots[i] == NULL); objnode = container_of(rtn,objnode_t,rtn); ASSERT_SENTINEL(objnode,OBJNODE); INVERT_SENTINEL(objnode,OBJNODE); @@ -943,7 +937,7 @@ ASSERT(pool->client != NULL); ASSERT_WRITELOCK(&pool->pool_rwlock); if ( obj->tree_root.rnode != NULL ) /* may be a "stump" with no leaves */ - radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free); + radix_tree_destroy(&obj->tree_root, pgp_destroy); ASSERT((long)obj->objnode_count == 0); ASSERT(obj->tree_root.rnode == NULL); pool->obj_count--; @@ -1003,7 +997,8 @@ if (pool->obj_count > pool->obj_count_max) pool->obj_count_max = pool->obj_count; atomic_inc_and_max(global_obj_count); - INIT_RADIX_TREE(&obj->tree_root,0); + radix_tree_init(&obj->tree_root); + radix_tree_set_alloc_callbacks(&obj->tree_root, rtn_alloc, rtn_free, obj); spin_lock_init(&obj->obj_spinlock); obj->pool = pool; obj->oid = *oidp; @@ -1022,7 +1017,7 @@ static NOINLINE void obj_destroy(obj_t *obj, int no_rebalance) { ASSERT_WRITELOCK(&obj->pool->pool_rwlock); - radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free); + radix_tree_destroy(&obj->tree_root, pgp_destroy); obj_free(obj,no_rebalance); } @@ -2925,7 +2920,6 @@ if ( !tmh_enabled() ) return 0; - radix_tree_init(); if ( tmh_dedup_enabled() ) for (i = 0; i < 256; i++ ) { diff -r 4b0692880dfa -r 44bfebf40b2b xen/include/xen/lib.h --- a/xen/include/xen/lib.h Thu May 05 17:40:34 2011 +0100 +++ b/xen/include/xen/lib.h Mon May 09 09:25:23 2011 +0100 @@ -48,7 +48,8 @@ #define SWAP(_a, _b) \ do { typeof(_a) _t = (_a); (_a) = (_b); (_b) = _t; } while ( 0 ) -#define DIV_ROUND(x, y) (((x) + (y) / 2) / (y)) +#define DIV_ROUND(n, d) (((n) + (d) / 2) / (d)) +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x)) diff -r 4b0692880dfa -r 44bfebf40b2b xen/include/xen/radix-tree.h --- a/xen/include/xen/radix-tree.h Thu May 05 17:40:34 2011 +0100 +++ b/xen/include/xen/radix-tree.h Mon May 09 09:25:23 2011 +0100 @@ -1,7 +1,7 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig - * Adapted for Xen by Dan Magenheimer, Oracle Corp. + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -20,59 +20,190 @@ #ifndef _XEN_RADIX_TREE_H #define _XEN_RADIX_TREE_H -/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ -struct radix_tree_root { - unsigned int height; - struct radix_tree_node *rnode; +#include <xen/config.h> +#include <xen/types.h> +#include <xen/lib.h> +#include <xen/rcupdate.h> + +/* + * An indirect pointer (root->rnode pointing to a radix_tree_node, rather + * than a data item) is signalled by the low bit set in the root->rnode + * pointer. + * + * In this case root->height is > 0, but the indirect pointer tests are + * needed for RCU lookups (because root->height is unreliable). The only + * time callers need worry about this is when doing a lookup_slot under + * RCU. + * + * Indirect pointer in fact is also used to tag the last pointer of a node + * when it is shrunk, before we rcu free the node. See shrink code for + * details. + */ +#define RADIX_TREE_INDIRECT_PTR 1 + +static inline int radix_tree_is_indirect_ptr(void *ptr) +{ + return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); +} + +/* + *** Radix tree structure definitions. + *** These are public to allow users to allocate instances of them. + *** However all fields are absolutely private. + */ + +#define RADIX_TREE_MAP_SHIFT 6 +#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) +#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) + +struct radix_tree_node { + unsigned int height; /* Height from the bottom */ + unsigned int count; + void __rcu *slots[RADIX_TREE_MAP_SIZE]; }; -#define RADIX_TREE_MAP_SHIFT 6 +typedef struct radix_tree_node *radix_tree_alloc_fn_t(void *); +typedef void radix_tree_free_fn_t(struct radix_tree_node *, void *); -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) -#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) +struct radix_tree_root { + unsigned int height; + struct radix_tree_node __rcu *rnode; -#define RADIX_TREE_TAG_LONGS \ - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) - -struct radix_tree_node { - unsigned int count; - void *slots[RADIX_TREE_MAP_SIZE]; + /* Allow to specify custom node alloc/dealloc routines. */ + radix_tree_alloc_fn_t *node_alloc; + radix_tree_free_fn_t *node_free; + void *node_alloc_free_arg; }; -struct radix_tree_path { - struct radix_tree_node *node; - int offset; -}; +/* + *** radix-tree API starts here ** + */ -#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) +void radix_tree_init(struct radix_tree_root *root); +void radix_tree_set_alloc_callbacks( + struct radix_tree_root *root, + radix_tree_alloc_fn_t *node_alloc, + radix_tree_free_fn_t *node_free, + void *node_alloc_free_arg); +void radix_tree_destroy( + struct radix_tree_root *root, + void (*slot_free)(void *)); -#define RADIX_TREE_INIT(mask) { \ - .height = 0, \ - .rnode = NULL, \ +/** + * Radix-tree synchronization + * + * The radix-tree API requires that users provide all synchronisation (with + * specific exceptions, noted below). + * + * Synchronization of access to the data items being stored in the tree, and + * management of their lifetimes must be completely managed by API users. + * + * For API usage, in general, + * - any function _modifying_ the tree (inserting or deleting items) must + * exclude other modifications, and exclude any functions reading the tree. + * - any function _reading_ the tree (looking up items) must exclude + * modifications to the tree, but may occur concurrently with other readers. + * + * The notable exceptions to this rule are the following functions: + * radix_tree_lookup + * radix_tree_lookup_slot + * radix_tree_gang_lookup + * radix_tree_gang_lookup_slot + * + * The first 7 functions are able to be called locklessly, using RCU. The + * caller must ensure calls to these functions are made within rcu_read_lock() + * regions. Other readers (lock-free or otherwise) and modifications may be + * running concurrently. + * + * It is still required that the caller manage the synchronization and lifetimes + * of the items. So if RCU lock-free lookups are used, typically this would mean + * that the items have their own locks, or are amenable to lock-free access; and + * that the items are freed by RCU (or only freed after having been deleted from + * the radix tree *and* a synchronize_rcu() grace period). + * + * (Note, rcu_assign_pointer and rcu_dereference are not needed to control + * access to data items when inserting into or looking up from the radix tree) + */ + +/** + * radix_tree_deref_slot - dereference a slot + * @pslot: pointer to slot, returned by radix_tree_lookup_slot + * Returns: item that was stored in that slot with any direct pointer flag + * removed. + * + * For use with radix_tree_lookup_slot(). Caller must hold tree at least read + * locked across slot lookup and dereference. Not required if write lock is + * held (ie. items cannot be concurrently inserted). + * + * radix_tree_deref_retry must be used to confirm validity of the pointer if + * only the read lock is held. + */ +static inline void *radix_tree_deref_slot(void **pslot) +{ + return rcu_dereference(*pslot); } -#define RADIX_TREE(name, mask) \ - struct radix_tree_root name = RADIX_TREE_INIT(mask) +/** + * radix_tree_deref_retry - check radix_tree_deref_slot + * @arg: pointer returned by radix_tree_deref_slot + * Returns: 0 if retry is not required, otherwise retry is required + * + * radix_tree_deref_retry must be used with radix_tree_deref_slot. + */ +static inline int radix_tree_deref_retry(void *arg) +{ + return unlikely((unsigned long)arg & RADIX_TREE_INDIRECT_PTR); +} -#define INIT_RADIX_TREE(root, mask) \ -do { \ - (root)->height = 0; \ - (root)->rnode = NULL; \ -} while (0) +/** + * radix_tree_replace_slot - replace item in a slot + * @pslot: pointer to slot, returned by radix_tree_lookup_slot + * @item: new item to store in the slot. + * + * For use with radix_tree_lookup_slot(). Caller must hold tree write locked + * across slot lookup and replacement. + */ +static inline void radix_tree_replace_slot(void **pslot, void *item) +{ + BUG_ON(radix_tree_is_indirect_ptr(item)); + rcu_assign_pointer(*pslot, item); +} -int radix_tree_insert(struct radix_tree_root *root, unsigned long index, - void *item, struct radix_tree_node *(*node_alloc)(void *), void *arg); + +/** + * radix_tree_{int_to_ptr,ptr_to_int}: + * + * Allow storage of signed integers in radix-tree slots. We use an encoding + * in which the bottom two bits of the slot pointer are reserved (bit 0 for + * the indirect-pointer tag; bit 1 always set to prevent an in-use + * integer-valued slot from being NULL and thus mistakenly being reaped). + */ +static inline void *radix_tree_int_to_ptr(int val) +{ + ASSERT((val <= (LONG_MAX >> 2)) && (val >= (LONG_MIN >> 2))); + return (void *)(((long)val << 2) | 0x2ul); +} + +static inline int radix_tree_ptr_to_int(void *ptr) +{ + ASSERT(((long)ptr & 0x3) == 0x2); + return (int)((long)ptr >> 2); +} + +int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); -void radix_tree_destroy(struct radix_tree_root *root, - void (*slot_free)(void *), void (*node_free)(struct radix_tree_node *)); -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index, - void(*node_free)(struct radix_tree_node *)); +void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items); -void radix_tree_init(void); + unsigned long first_index, unsigned int max_items); +unsigned int +radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, + unsigned long first_index, unsigned int max_items); +unsigned long radix_tree_next_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan); +unsigned long radix_tree_prev_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan); #endif /* _XEN_RADIX_TREE_H */ diff -r 4b0692880dfa -r 44bfebf40b2b xen/include/xen/rcupdate.h --- a/xen/include/xen/rcupdate.h Thu May 05 17:40:34 2011 +0100 +++ b/xen/include/xen/rcupdate.h Mon May 09 09:25:23 2011 +0100 @@ -38,6 +38,8 @@ #include <xen/cpumask.h> #include <xen/preempt.h> +#define __rcu + /** * struct rcu_head - callback structure for use with RCU * @next: next update requests in a list _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |