[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Generic bi-directional map, and related initialisation functions. At the moment



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1261031276 0
# Node ID 7d28228db41de5020b4bf7ee621ec79a6f2b86bc
# Parent  fe42b16855aa727c36833f19df9ba0e7a163941e
Generic bi-directional map, and related initialisation functions. At the moment
a single map is used to store mappings between sharing handles and disk blocks.
This is used to share pages which store data read of the same blocks on
(virtual) disk.
Note that the map is stored in a shared memory region, as it needs to be
accessed by multiple tapdisk processes. This complicates memory allocation
(malloc cannot be used), prevents poniters to be stored directly (as the shared
memory region might and is mapped at different base address) and finally pthread
locks need to be multi-process aware.

Signed-off-by: Grzegorz Milos <Grzegorz.Milos@xxxxxxxxxx>
---
 tools/Makefile                      |    2 
 tools/blktap/drivers/Makefile       |    4 
 tools/blktap/drivers/blktapctrl.c   |    3 
 tools/blktap2/drivers/Makefile      |   11 
 tools/blktap2/drivers/tapdisk-vbd.c |    3 
 tools/memshr/Makefile               |   47 +
 tools/memshr/bidir-hash.c           | 1275 ++++++++++++++++++++++++++++++++++++
 tools/memshr/bidir-hash.h           |  113 +++
 tools/memshr/bidir-namedefs.h       |   80 ++
 tools/memshr/interface.c            |   92 ++
 tools/memshr/memshr-priv.h          |   34 
 tools/memshr/memshr.h               |   29 
 tools/memshr/shm.c                  |  182 +++++
 tools/memshr/shm.h                  |   35 
 14 files changed, 1905 insertions(+), 5 deletions(-)

diff -r fe42b16855aa -r 7d28228db41d tools/Makefile
--- a/tools/Makefile    Thu Dec 17 06:27:56 2009 +0000
+++ b/tools/Makefile    Thu Dec 17 06:27:56 2009 +0000
@@ -21,9 +21,11 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm
 SUBDIRS-$(VTPM_TOOLS) += vtpm
 SUBDIRS-y += xenstat
 SUBDIRS-$(CONFIG_Linux) += libaio
+SUBDIRS-$(CONFIG_Linux) += memshr 
 SUBDIRS-$(CONFIG_Linux) += blktap
 SUBDIRS-$(CONFIG_Linux) += blktap2
 SUBDIRS-$(CONFIG_NetBSD) += libaio
+SUBDIRS-$(CONFIG_NetBSD) += memshr 
 SUBDIRS-$(CONFIG_NetBSD) += blktap2
 SUBDIRS-$(CONFIG_NetBSD) += xenbackendd
 SUBDIRS-y += libfsimage
diff -r fe42b16855aa -r 7d28228db41d tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Thu Dec 17 06:27:56 2009 +0000
+++ b/tools/blktap/drivers/Makefile     Thu Dec 17 06:27:56 2009 +0000
@@ -4,6 +4,7 @@ IBIN         = blktapctrl tapdisk
 IBIN         = blktapctrl tapdisk
 QCOW_UTIL    = img2qcow qcow2raw qcow-create
 LIBAIO_DIR   = ../../libaio/src
+MEMSHR_DIR   = ../../memshr
 
 CFLAGS   += -Werror
 CFLAGS   += -Wno-unused
@@ -11,6 +12,7 @@ CFLAGS   += $(CFLAGS_libxenctrl)
 CFLAGS   += $(CFLAGS_libxenctrl)
 CFLAGS   += $(CFLAGS_libxenstore)
 CFLAGS   += -I $(LIBAIO_DIR)
+CFLAGS   += -I $(MEMSHR_DIR)
 CFLAGS   += -D_GNU_SOURCE
 
 ifeq ($(shell . ./check_gcrypt $(CC)),yes)
@@ -21,7 +23,7 @@ CRYPT_LIB := -lcrypto
 $(warning === libgcrypt not installed: falling back to libcrypto ===)
 endif
 
-LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib 
-lblktap
+LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) 
$(MEMSHR_DIR)/libmemshr.a -L../lib -lblktap -lrt -lm
 LDFLAGS_img := $(LIBAIO_DIR)/libaio.a $(CRYPT_LIB) -lpthread -lz
 
 BLK-OBJS-y  := block-aio.o
diff -r fe42b16855aa -r 7d28228db41d tools/blktap/drivers/blktapctrl.c
--- a/tools/blktap/drivers/blktapctrl.c Thu Dec 17 06:27:56 2009 +0000
+++ b/tools/blktap/drivers/blktapctrl.c Thu Dec 17 06:27:56 2009 +0000
@@ -50,6 +50,7 @@
 #include <xs.h>
 #include <sys/time.h>
 #include <syslog.h>
+#include <memshr.h>
                                                                      
 #include "blktaplib.h"
 #include "blktapctrl.h"
@@ -858,6 +859,8 @@ int main(int argc, char *argv[])
                goto open_failed;
        }
 
+    memshr_daemon_initialize();
+
  retry:
        /* Set up store connection and watch. */
        h = xs_daemon_open();
diff -r fe42b16855aa -r 7d28228db41d tools/blktap2/drivers/Makefile
--- a/tools/blktap2/drivers/Makefile    Thu Dec 17 06:27:56 2009 +0000
+++ b/tools/blktap2/drivers/Makefile    Thu Dec 17 06:27:56 2009 +0000
@@ -15,6 +15,7 @@ CFLAGS    += -I../lib -I../../libxc
 CFLAGS    += -I../lib -I../../libxc
 CFLAGS    += -I../include -I../../include
 CFLAGS    += -I $(LIBAIO_DIR)
+CFLAGS    += -I $(MEMSHR_DIR)
 CFLAGS    += -D_GNU_SOURCE
 CFLAGS    += -DUSE_NFS_LOCKS
 
@@ -52,7 +53,9 @@ REMUS-OBJS  += hashtable_utility.o
 $(REMUS-OBJS): CFLAGS += -I$(XEN_XENSTORE)
 
 LIBAIO_DIR = $(XEN_ROOT)/tools/libaio/src
-tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := 
$(LIBAIO_DIR)/libaio.a
+MEMSHR_DIR = $(XEN_ROOT)/tools/memshr
+MEMSHRLIBS := $(MEMSHR_DIR)/libmemshr.a
+tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := 
$(LIBAIO_DIR)/libaio.a 
 tapdisk-client tapdisk-stream tapdisk-diff $(QCOW_UTIL): CFLAGS  += 
-I$(LIBAIO_DIR) -I$(XEN_LIBXC)
 
 ifeq ($(VHD_STATIC),y)
@@ -94,13 +97,13 @@ all: $(IBIN) lock-util qcow-util
 
 
 tapdisk2: $(TAP-OBJS-y) $(BLK-OBJS-y) $(MISC-OBJS-y) tapdisk2.c
-       $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(LDFLAGS_img)
+       $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) 
$(LDFLAGS_img)
 
 tapdisk-client: tapdisk-client.o
        $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS)  $(LDFLAGS_img)
 
 tapdisk-stream tapdisk-diff: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y)
-       $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS)  $(LDFLAGS_img)
+       $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) 
$(LDFLAGS_img)
 
 td-util: td.o tapdisk-utils.o tapdisk-log.o $(PORTABLE-OBJS-y)
        $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS)  $(LDFLAGS_img)
@@ -112,7 +115,7 @@ qcow-util: img2qcow qcow2raw qcow-create
 qcow-util: img2qcow qcow2raw qcow-create
 
 img2qcow qcow2raw qcow-create: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y)
-       $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(LDFLAGS_img)
+       $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) 
$(LDFLAGS_img)
 
 install: all
        $(INSTALL_DIR) -p $(DESTDIR)$(INST_DIR)
diff -r fe42b16855aa -r 7d28228db41d tools/blktap2/drivers/tapdisk-vbd.c
--- a/tools/blktap2/drivers/tapdisk-vbd.c       Thu Dec 17 06:27:56 2009 +0000
+++ b/tools/blktap2/drivers/tapdisk-vbd.c       Thu Dec 17 06:27:56 2009 +0000
@@ -34,6 +34,7 @@
 #include <libgen.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
+#include <memshr.h>
 
 #include "libvhd.h"
 #include "tapdisk-image.h"
@@ -105,6 +106,8 @@ tapdisk_vbd_initialize(int rfd, int wfd,
        /* default blktap ring completion */
        vbd->callback = tapdisk_vbd_callback;
        vbd->argument = vbd;
+    
+    memshr_vbd_initialize();
 
        INIT_LIST_HEAD(&vbd->driver_stack);
        INIT_LIST_HEAD(&vbd->images);
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/Makefile     Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,47 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+LIBMEMSHR-BUILD := libmemshr.a
+
+CFLAGS          += -Werror
+CFLAGS          += -Wno-unused
+CFLAGS          += -I../include
+CFLAGS          += $(CFLAGS_libxenctrl)
+CFLAGS          += -D_GNU_SOURCE
+CFLAGS          += -fPIC
+CFLAGS          += -g
+
+# Get gcc to generate the dependencies for us.
+CFLAGS          += -Wp,-MD,.$(@F).d
+DEPS             = .*.d
+
+LIB-SRCS        := interface.c
+LIB-SRCS        += shm.c
+LIB-SRCS        += bidir-hash.c
+
+LIB-OBJS        := interface.o
+LIB-OBJS        += shm.o
+LIB-OBJS        += bidir-hash-fgprtshr.o
+LIB-OBJS        += bidir-hash-blockshr.o
+
+all: build
+
+build: $(LIBMEMSHR-BUILD)
+
+bidir-hash-fgprtshr.o: bidir-hash.c
+       $(CC) $(CFLAGS) -DFINGERPRINT_MAP -c -o $*.o bidir-hash.c 
+
+bidir-hash-blockshr.o: bidir-hash.c
+       $(CC) $(CFLAGS) -DBLOCK_MAP -c -o $*.o bidir-hash.c 
+
+libmemshr.a: $(LIB-OBJS)
+       $(AR) rc $@ $^
+
+install: all
+
+clean:
+       rm -rf *.a *.o *~ $(DEPS)
+
+.PHONY: all build clean install
+
+-include $(DEPS)
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/bidir-hash.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/bidir-hash.c Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,1275 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bidir-hash.h"
+
+static const uint32_t hash_sizes[] = {53, 97, 193, 389, 769, 1543, 3079, 6151,
+    12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739,
+    6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189,
+    805306457, 1610612741};
+static const uint16_t hash_sizes_len =
+            sizeof(hash_sizes)/sizeof(hash_sizes[0]);
+static const float hash_max_load_fact = 0.65;
+static const float hash_min_load_fact = 0.10;
+
+/* How many buckets will be covered by a single rw lock */
+#define BUCKETS_PER_LOCK    64
+#define nr_locks(_nr_buckets)   (1 + (_nr_buckets) / BUCKETS_PER_LOCK)
+
+
+#define HASH_LOCK                                                              
\
+    pthread_rwlock_t hash_lock
+
+#define BUCKET_LOCK                                                            
\
+    pthread_rwlock_t bucket_lock
+
+struct hash_entry
+{
+    __k_t key;
+    __v_t value;
+    /* This structure will belong to two buckets, one in each hash table */
+    struct hash_entry *key_next;
+    struct hash_entry *value_next;
+};
+
+struct bucket
+{
+    struct hash_entry *hash_entry;
+};
+
+struct bucket_lock
+{
+    BUCKET_LOCK;
+};
+
+struct __hash
+{
+    int lock_alive;
+    HASH_LOCK;                            /* protects:
+                                           * *_tab, tab_size, size_idx, *_load
+                                           * (all writes with wrlock)
+                                           */
+    volatile uint32_t nr_ent;             /* # entries held in hashtables */
+    struct bucket *key_tab;               /* forward mapping hashtable    */
+    struct bucket *value_tab;             /* backward mapping hashtable   */
+    struct bucket_lock *key_lock_tab;     /* key table bucket locks       */
+    struct bucket_lock *value_lock_tab;   /* value table bucket locks     */
+    uint32_t tab_size;                    /* # buckets is hashtables      */
+    uint16_t size_idx;                    /* table size index             */
+    uint32_t max_load;                    /* # entries before rehash      */
+    uint32_t min_load;                    /* # entries before rehash      */
+};
+
+struct __hash *__hash_init   (struct __hash *h, uint32_t min_size);
+int            __key_lookup  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_lookup(struct __hash *h, __v_t v, __k_t *kp);
+int            __insert      (struct __hash *h, __k_t k, __v_t v);
+int            __key_remove  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_remove(struct __hash *h, __v_t v, __k_t *kp);
+int            __hash_destroy(struct __hash *h,
+                    void (*entry_consumer)(__k_t k, __v_t v, void *p),
+                    void *d);
+int            __hash_iterator(struct __hash *h,
+                        int (*entry_consumer)(__k_t k, __v_t v, void *p),
+                        void *d);
+static void      hash_resize(struct __hash *h);
+
+
+#ifdef BIDIR_USE_STDMALLOC
+
+static void* alloc_entry(struct __hash *h, int size)
+{
+    return malloc(size);
+}
+
+static void alloc_buckets(struct __hash *h,
+                          int nr_buckets,
+                          struct bucket **bucket_tab,
+                          struct bucket_lock **bucket_locks_tab)
+{
+    *bucket_tab = (struct bucket*)
+        malloc(nr_buckets * sizeof(struct bucket));
+    *bucket_locks_tab = (struct bucket_lock*)
+        malloc(nr_locks(nr_buckets) * sizeof(struct bucket_lock));
+}
+
+static void free_entry(struct __hash *h, void *p)
+{
+    free(p);
+}
+
+static void free_buckets(struct __hash *h,
+                         struct bucket *buckets,
+                         struct bucket_lock *bucket_locks)
+{
+    if(buckets) free(buckets);
+    if(bucket_locks) free(bucket_locks);
+}
+
+static int max_entries(struct __hash *h)
+{
+    /* There are no explicit restrictions to how many entries we can store */
+    return -1;
+}
+
+#else
+
+/*****************************************************************************/
+/** Memory allocator for shared memory region **/
+/*****************************************************************************/
+#define SHM_TABLE_SLOTS 4
+
+struct shm_hdr
+{
+    int             hash_allocated;
+    pthread_mutex_t mutex;
+    int             free_tab_slots[SHM_TABLE_SLOTS];
+
+    unsigned long   freelist_offset;
+                    
+    unsigned long   entries_offset;
+    unsigned long   nr_entries;
+                    
+    unsigned long   tabs_offset;
+    unsigned long   max_tab_size;
+    unsigned long   max_lock_tab_size;
+
+    struct __hash   hash;
+};
+
+static unsigned long get_shm_baddr(void *hdr)
+{
+    return ((unsigned long)hdr - offsetof(struct shm_hdr, hash));
+}
+
+
+/** Locations of various structures/memory areas **/
+static struct shm_hdr* get_shm_hdr(struct __hash *h)
+{
+    return (struct shm_hdr *)
+            ((unsigned long)h - offsetof(struct shm_hdr, hash));
+}
+
+static uint32_t* get_shm_freelist(struct shm_hdr *hdr)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((uint32_t *)(shm_baddr + hdr->freelist_offset));
+}
+
+static struct hash_entry* get_shm_entries(struct shm_hdr *hdr)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((struct hash_entry *)(shm_baddr + hdr->entries_offset));
+}
+
+static struct bucket* get_shm_tab(struct shm_hdr *hdr, int i)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((struct bucket *)
+               ((shm_baddr + hdr->tabs_offset) +
+                 i * (hdr->max_tab_size + hdr->max_lock_tab_size)));
+}
+
+static struct bucket_lock* get_shm_lock_tab(struct shm_hdr *hdr, int i)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((struct bucket_lock *)
+               ((shm_baddr + hdr->tabs_offset) +
+                 i * (hdr->max_tab_size + hdr->max_lock_tab_size) +
+                 hdr->max_tab_size));
+}
+
+static int get_shm_slot(struct shm_hdr *hdr, void *p)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((unsigned long)p - (shm_baddr + hdr->tabs_offset)) /
+              (hdr->max_tab_size + hdr->max_lock_tab_size);
+}
+
+/* Shared memory allocator locks */
+static int shm_mutex_init(struct shm_hdr *h)
+{
+    int ret;
+    pthread_mutexattr_t _attr;
+
+    ret = pthread_mutexattr_init(&_attr);
+    if(ret == 0)
+        ret = pthread_mutexattr_setpshared(&_attr, PTHREAD_PROCESS_SHARED);
+    if(ret == 0)
+        ret = pthread_mutex_init(&h->mutex, &_attr);
+    if(ret == 0)
+        ret = pthread_mutexattr_destroy(&_attr);
+
+    return ret;
+};
+
+static int shm_mutex_lock(struct shm_hdr *h)
+{
+    return pthread_mutex_lock(&h->mutex);
+}
+
+static int shm_mutex_unlock(struct shm_hdr *h)
+{
+    return pthread_mutex_unlock(&h->mutex);
+}
+
+
+/* Shared memory allocator freelist */
+static void shm_add_to_freelist(struct shm_hdr *hdr, uint32_t sl)
+{
+    uint32_t *freelist = get_shm_freelist(hdr);
+
+    shm_mutex_lock(hdr);
+    freelist[sl+1] = freelist[0];
+    freelist[0] = sl;
+    shm_mutex_unlock(hdr);
+}
+
+static uint32_t shm_get_from_freelist(struct shm_hdr *hdr)
+{
+    uint32_t *freelist = get_shm_freelist(hdr);
+    uint32_t slot;
+
+    shm_mutex_lock(hdr);
+    slot = freelist[0];
+    freelist[0] = freelist[slot+1];
+    shm_mutex_unlock(hdr);
+
+    return (slot == 0 ? -1 : slot);
+}
+
+
+#define SHM_ALLOC_MAIN(_n)
+
+static unsigned long shm_init_offsets(
+                                    struct shm_hdr *hdr, int nr_entries)
+{
+    hdr->freelist_offset = sizeof(struct shm_hdr);
+
+    /* Freelist needs one extra slot in the array for the freelist head */
+    hdr->entries_offset =
+        hdr->freelist_offset + (nr_entries + 1) * sizeof(uint32_t);
+    hdr->nr_entries = nr_entries;
+
+    hdr->tabs_offset = hdr->entries_offset +
+        nr_entries * sizeof(struct hash_entry);
+    /* We want to allocate table 1.5 larger than the number of entries
+       we want to hold in it */
+    hdr->max_tab_size =
+        (nr_entries * 3 / 2) * sizeof(struct bucket);
+    hdr->max_lock_tab_size =
+        nr_locks(hdr->max_tab_size) * sizeof(struct bucket_lock);
+
+    return hdr->tabs_offset + (hdr->max_tab_size + hdr->max_lock_tab_size) * 4;
+}
+
+struct __hash* __shm_hash_init(unsigned long shm_baddr, unsigned long shm_size)
+{
+    uint32_t i;
+    struct shm_hdr *hdr;
+
+    /* Some sanity checks */
+    hdr = (struct shm_hdr *)shm_baddr;
+    memset(hdr, 0, sizeof(struct shm_hdr));
+
+    /* Find the maximum number of entries we can store in the given shm_size */
+    for(i=1; shm_init_offsets(hdr, i) < shm_size; i++){};
+    shm_init_offsets(hdr, (i-1));
+
+    memset(get_shm_freelist(hdr), 0,
+           (hdr->nr_entries + 1) * sizeof(uint32_t));
+    if(shm_mutex_init(hdr) != 0)
+        return NULL;
+    for(i=0; i<hdr->nr_entries; i++)
+        shm_add_to_freelist(hdr, i);
+    for(i=0; i<SHM_TABLE_SLOTS; i++)
+        hdr->free_tab_slots[i] = 1;
+
+    shm_mutex_lock(hdr);
+    assert(!hdr->hash_allocated);
+    hdr->hash_allocated = 1;
+    shm_mutex_unlock(hdr);
+
+    return __hash_init(&hdr->hash, 1000);
+}
+
+struct __hash* __shm_hash_get(unsigned long shm_baddr)
+{
+    struct shm_hdr *hdr = (struct shm_hdr *)shm_baddr;
+
+    return (hdr->hash_allocated ? &hdr->hash : NULL);
+}
+
+static void* alloc_entry(struct __hash *h, int size)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    uint32_t slot = shm_get_from_freelist(hdr);
+
+    assert(size == sizeof(struct hash_entry));
+    if(slot == -1)
+        return NULL;
+
+    return (get_shm_entries(hdr) + slot);
+}
+
+static void alloc_buckets(struct __hash *h,
+                          int nr_buckets,
+                          struct bucket **buckets_tab,
+                          struct bucket_lock **bucket_locks_tab)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    int free_slot;
+
+    *buckets_tab = NULL;
+    *bucket_locks_tab = NULL;
+
+    if(((nr_buckets * sizeof(struct bucket)) > hdr->max_tab_size) ||
+       ((nr_locks(nr_buckets) * sizeof(struct bucket_lock)) >
+                                                hdr->max_lock_tab_size))
+        return;
+
+    shm_mutex_lock(hdr);
+    for(free_slot=0; free_slot<SHM_TABLE_SLOTS; free_slot++)
+        if(hdr->free_tab_slots[free_slot])
+            break;
+    if(free_slot == SHM_TABLE_SLOTS)
+    {
+        shm_mutex_unlock(hdr);
+        return;
+    }
+    hdr->free_tab_slots[free_slot] = 0;
+    shm_mutex_unlock(hdr);
+    *buckets_tab      = get_shm_tab(hdr, free_slot);
+    *bucket_locks_tab = get_shm_lock_tab(hdr, free_slot);
+}
+
+static void free_entry(struct __hash *h, void *p)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    uint32_t slot;
+
+    slot = ((uint32_t)((struct hash_entry *)p -
+                get_shm_entries(hdr)));
+    shm_add_to_freelist(hdr, slot);
+}
+
+static void free_buckets(struct __hash *h,
+                         struct bucket *buckets,
+                         struct bucket_lock *bucket_locks)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    int slot;
+
+    if(!buckets || !bucket_locks)
+    {
+        assert(!buckets && !bucket_locks);
+        return;
+    }
+    slot = get_shm_slot(hdr, buckets);
+    assert(slot < SHM_TABLE_SLOTS);
+    assert((char *)bucket_locks == (char *)buckets + hdr->max_tab_size);
+    shm_mutex_lock(hdr);
+    assert(hdr->free_tab_slots[slot] == 0);
+    hdr->free_tab_slots[slot] = 1;
+    shm_mutex_unlock(hdr);
+}
+
+static int max_entries(struct __hash *h)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+
+    return hdr->nr_entries;
+}
+
+#endif /* !BIDIR_USE_STDMALLOC */
+
+
+/* The structures may be stored in shared memory region, with base address */
+/* stored in shm_base_addr. All the pointers in the above structures need  */
+/* to be relative to this base address (otherwise they would not make      */
+/* sense to other processes). Bellow accessor functions are used to        */
+/* convert between canonical (base address relative) and local addresses.  */
+/* C2L stands for CANONICAL_TO_LOCAL, and vice versa                       */
+#define C2L(_h, _p) ((typeof(_p))((unsigned long)(_p) +                        
\
+            get_shm_baddr(_h)))
+#define L2C(_h, _p) ((typeof(_p))((unsigned long)(_p) -                        
\
+            get_shm_baddr(_h)))
+
+
+#define HASH_LOCK_INIT(_h) ({                                                  
\
+    int _ret;                                                                  
\
+    pthread_rwlockattr_t _attr;                                                
\
+                                                                               
\
+    h->lock_alive = 1;                                                         
\
+    _ret = pthread_rwlockattr_init(&_attr);                                    
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlockattr_setpshared(&_attr,                           
\
+                                             PTHREAD_PROCESS_SHARED);          
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlock_init(&(_h)->hash_lock, &_attr);                  
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlockattr_destroy(&_attr);                             
\
+                                                                               
\
+    _ret;                                                                      
\
+})
+
+#define HASH_LOCK_RDLOCK(_h) ({                                                
\
+    int _ret;                                                                  
\
+                                                                               
\
+    if(!_h->lock_alive) _ret = ENOLCK;                                         
\
+    else                                                                       
\
+    {                                                                          
\
+        struct timespec _ts;                                                   
\
+        /* 10s timeout, long but ~matches disk spin-up times */                
\
+        _ts.tv_sec = time(NULL) + 10;                                          
\
+        _ts.tv_nsec = 0;                                                       
\
+        _ret = pthread_rwlock_timedrdlock(&(_h)->hash_lock, &_ts);             
\
+        if(_ret == ETIMEDOUT) _h->lock_alive = 0;                              
\
+    }                                                                          
\
+    _ret;                                                                      
\
+})
+
+#define HASH_LOCK_RDUNLOCK(_h)                                                 
\
+    pthread_rwlock_unlock(&(_h)->hash_lock)
+
+#define HASH_LOCK_WRLOCK(_h) ({                                                
\
+    int _ret;                                                                  
\
+                                                                               
\
+    if(!_h->lock_alive) _ret = ENOLCK;                                         
\
+    else                                                                       
\
+    {                                                                          
\
+        struct timespec _ts;                                                   
\
+        _ts.tv_sec = time(NULL) + 10;                                          
\
+        _ts.tv_nsec = 0UL;                                                     
\
+        _ret = pthread_rwlock_timedwrlock(&(_h)->hash_lock, &_ts);             
\
+        if(_ret == ETIMEDOUT) _h->lock_alive = 0;                              
\
+    }                                                                          
\
+    _ret;                                                                      
\
+})
+
+#define HASH_LOCK_TRYWRLOCK(_h) ({                                             
\
+    int _ret = (_h->lock_alive ?                                               
\
+                    pthread_rwlock_trywrlock(&(_h)->hash_lock) :               
\
+                    ENOLCK);                                                   
\
+    _ret;                                                                      
\
+})
+
+#define HASH_LOCK_WRUNLOCK(_h)                                                 
\
+    pthread_rwlock_unlock(&(_h)->hash_lock)
+
+
+#define BUCKET_LOCK_INIT(_h, _b) ({                                            
\
+    int _ret;                                                                  
\
+    pthread_rwlockattr_t _attr;                                                
\
+                                                                               
\
+    _ret = pthread_rwlockattr_init(&_attr);                                    
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlockattr_setpshared(&_attr,                           
\
+                                             PTHREAD_PROCESS_SHARED);          
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlock_init(&(_b)->bucket_lock, &_attr);                
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlockattr_destroy(&_attr);                             
\
+                                                                               
\
+    _ret;                                                                      
\
+})
+
+
+#define BUCKET_LOCK_RDLOCK(_h, _lock_tab, _idx) ({                             
\
+    int _ret;                                                                  
\
+    struct timespec _ts;                                                       
\
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       
\
+                                                                               
\
+    _ts.tv_sec = time(NULL) + 10;                                              
\
+    _ts.tv_nsec = 0;                                                           
\
+    _ret = pthread_rwlock_timedrdlock(&(_lock)->bucket_lock, &_ts);            
\
+    if(_ret == ETIMEDOUT) (_h)->lock_alive = 0;                                
\
+    _ret;                                                                      
\
+})
+
+
+#define BUCKET_LOCK_RDUNLOCK(_h, _lock_tab, _idx) ({                           
\
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       
\
+    pthread_rwlock_unlock(&(_lock)->bucket_lock);                              
\
+})
+
+#define BUCKET_LOCK_WRLOCK(_h, _lock_tab, _idx) ({                             
\
+    int _ret;                                                                  
\
+    struct timespec _ts;                                                       
\
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       
\
+                                                                               
\
+    _ts.tv_sec = time(NULL) + 10;                                              
\
+    _ts.tv_nsec = 0;                                                           
\
+    _ret = pthread_rwlock_timedwrlock(&(_lock)->bucket_lock, &_ts);            
\
+    if(_ret == ETIMEDOUT) (_h)->lock_alive = 0;                                
\
+    _ret;                                                                      
\
+})
+
+#define BUCKET_LOCK_WRUNLOCK(_h, _lock_tab, _idx) ({                           
\
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       
\
+    pthread_rwlock_unlock(&(_lock)->bucket_lock);                              
\
+})
+
+#define TWO_BUCKETS_LOCK_WRLOCK(_h, _blt1, _idx1, _blt2, _idx2)  ({            
\
+    int _ret;                                                                  
\
+    pthread_rwlock_t *_l1, *_l2;                                               
\
+    struct timespec _ts;                                                       
\
+    struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK];           
\
+    struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK];           
\
+                                                                               
\
+    assert((_bl1) != (_bl2));                                                  
\
+    if((_bl1) < (_bl2))                                                        
\
+    {                                                                          
\
+        _l1 = &(_bl1)->bucket_lock;                                            
\
+        _l2 = &(_bl2)->bucket_lock;                                            
\
+    }                                                                          
\
+    else                                                                       
\
+    {                                                                          
\
+        _l1 = &(_bl2)->bucket_lock;                                            
\
+        _l2 = &(_bl1)->bucket_lock;                                            
\
+    }                                                                          
\
+    _ts.tv_sec = time(NULL) + 10;                                              
\
+    _ts.tv_nsec = 0;                                                           
\
+    _ret = pthread_rwlock_timedwrlock(_l1, &_ts);                              
\
+    _ts.tv_sec = time(NULL) + 10;                                              
\
+    _ts.tv_nsec = 0;                                                           
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlock_timedwrlock(_l2, &_ts);                          
\
+    if(_ret == ETIMEDOUT) (_h)->lock_alive = 0;                                
\
+                                                                               
\
+    _ret;                                                                      
\
+})
+
+#define TWO_BUCKETS_LOCK_WRUNLOCK(_h, _blt1, _idx1, _blt2, _idx2) ({           
\
+    int _ret;                                                                  
\
+    struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK];           
\
+    struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK];           
\
+                                                                               
\
+    _ret = pthread_rwlock_unlock(&(_bl1)->bucket_lock);                        
\
+    if(_ret == 0)                                                              
\
+        _ret = pthread_rwlock_unlock(&(_bl2)->bucket_lock);                    
\
+                                                                               
\
+    _ret;                                                                      
\
+})
+
+
+
+
+static uint32_t hash_to_idx(struct __hash *h, uint32_t hash)
+{
+    return (hash % h->tab_size);
+}
+
+static void alloc_tab(struct __hash *h,
+                      int size,
+                      struct bucket **buckets_tab,
+                      struct bucket_lock **bucket_locks_tab)
+{
+    int i;
+
+    alloc_buckets(h, size, buckets_tab, bucket_locks_tab);
+    if(!(*buckets_tab) || !(*bucket_locks_tab))
+        goto error_out;
+    memset(*buckets_tab, 0, size * sizeof(struct bucket));
+    memset(*bucket_locks_tab, 0, nr_locks(size) * sizeof(struct bucket_lock));
+    for(i=0; i<nr_locks(size); i++)
+        if(BUCKET_LOCK_INIT(h, *bucket_locks_tab + i) != 0)
+            goto error_out;
+
+    return;
+error_out:
+    free_buckets(h, *buckets_tab, *bucket_locks_tab);
+    *buckets_tab = NULL;
+    *bucket_locks_tab = NULL;
+    return;
+}
+
+
+struct __hash *__hash_init(struct __hash *h, uint32_t min_size)
+{
+    uint32_t size;
+    uint16_t size_idx;
+    struct bucket *buckets;
+    struct bucket_lock *bucket_locks;
+
+    /* Sanity check on args */
+    if (min_size > hash_sizes[hash_sizes_len-1]) return NULL;
+    /* Find least size greater than init_size */
+    for(size_idx = 0; size_idx < hash_sizes_len; size_idx++)
+            if(hash_sizes[size_idx] >= min_size)
+                break;
+    size = hash_sizes[size_idx];
+
+    if(!h) return NULL;
+    alloc_tab(h, size, &buckets, &bucket_locks);
+    if(!buckets || !bucket_locks) goto alloc_fail;
+    h->key_tab         = L2C(h, buckets);
+    h->key_lock_tab    = L2C(h, bucket_locks);
+    alloc_tab(h, size, &buckets, &bucket_locks);
+    if(!buckets || !bucket_locks) goto alloc_fail;
+    h->value_tab       = L2C(h, buckets);
+    h->value_lock_tab  = L2C(h, bucket_locks);
+    /* Init all h variables */
+    if(HASH_LOCK_INIT(h) != 0) goto alloc_fail;
+    h->nr_ent = 0;
+    h->tab_size = size;
+    h->size_idx = size_idx;
+    h->max_load = (uint32_t)ceilf(hash_max_load_fact * size);
+    h->min_load = (uint32_t)ceilf(hash_min_load_fact * size);
+
+    return h;
+
+alloc_fail:
+    if(h->key_tab || h->key_lock_tab)
+        free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+    return NULL;
+}
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+
+#define __prim             key
+#define __prim_t         __k_t
+#define __prim_tab         key_tab
+#define __prim_lock_tab    key_lock_tab
+#define __prim_hash      __key_hash
+#define __prim_cmp       __key_cmp
+#define __prim_next        key_next
+#define __sec              value
+#define __sec_t          __v_t
+int __key_lookup(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *entry;
+    struct bucket *b;
+    struct bucket_lock *blt;
+    uint32_t idx;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    idx = hash_to_idx(h, __prim_hash(k));
+    b = C2L(h, &h->__prim_tab[idx]);
+    blt = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK;
+    entry = b->hash_entry;
+    while(entry != NULL)
+    {
+        entry = C2L(h, entry);
+        if(__prim_cmp(k, entry->__prim))
+        {
+            /* Unlock here */
+            *vp = entry->__sec;
+            BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+            HASH_LOCK_RDUNLOCK(h);
+            return 1;
+        }
+        entry = entry->__prim_next;
+    }
+    BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+    HASH_LOCK_RDUNLOCK(h);
+    return 0;
+}
+
+/* value lookup is an almost exact copy of key lookup */
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+
+#define __prim             value
+#define __prim_t         __v_t
+#define __prim_tab         value_tab
+#define __prim_lock_tab    value_lock_tab
+#define __prim_hash      __value_hash
+#define __prim_cmp       __value_cmp
+#define __prim_next        value_next
+#define __sec              key
+#define __sec_t          __k_t
+int __value_lookup(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *entry;
+    struct bucket *b;
+    struct bucket_lock *blt;
+    uint32_t idx;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    idx = hash_to_idx(h, __prim_hash(k));
+    b = C2L(h, &h->__prim_tab[idx]);
+    blt = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK;
+    entry = b->hash_entry;
+    while(entry != NULL)
+    {
+        entry = C2L(h, entry);
+        if(__prim_cmp(k, entry->__prim))
+        {
+            /* Unlock here */
+            *vp = entry->__sec;
+            BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+            HASH_LOCK_RDUNLOCK(h);
+            return 1;
+        }
+        entry = entry->__prim_next;
+    }
+    BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+    HASH_LOCK_RDUNLOCK(h);
+    return 0;
+}
+
+int __insert(struct __hash *h, __k_t k, __v_t v)
+{
+    uint32_t k_idx, v_idx;
+    struct hash_entry *entry;
+    struct bucket *bk, *bv;
+    struct bucket_lock *bltk, *bltv;
+
+
+    /* Allocate new entry before any locks (in case it fails) */
+    entry = (struct hash_entry*)
+                    alloc_entry(h, sizeof(struct hash_entry));
+    if(!entry) return 0;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    /* Read from nr_ent is atomic(TODO check), no need for fancy accessors */
+    if(h->nr_ent+1 > h->max_load)
+    {
+        /* Resize needs the write lock, drop read lock temporarily */
+        HASH_LOCK_RDUNLOCK(h);
+        hash_resize(h);
+        if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    }
+
+    /* Init the entry */
+    entry->key = k;
+    entry->value = v;
+
+    /* Work out the indicies */
+    k_idx = hash_to_idx(h, __key_hash(k));
+    v_idx = hash_to_idx(h, __value_hash(v));
+
+    /* Insert */
+    bk   = C2L(h, &h->key_tab[k_idx]);
+    bv   = C2L(h, &h->value_tab[v_idx]);
+    bltk = C2L(h, h->key_lock_tab);
+    bltv = C2L(h, h->value_lock_tab);
+    if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, k_idx, bltv, v_idx) != 0)
+        return -ENOLCK;
+    entry->key_next = bk->hash_entry;
+    bk->hash_entry = L2C(h, entry);
+    entry->value_next = bv->hash_entry;
+    bv->hash_entry = L2C(h, entry);
+    TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, k_idx, bltv, v_idx);
+
+    /* Book keeping */
+    __sync_add_and_fetch(&h->nr_ent, 1);
+
+    HASH_LOCK_RDUNLOCK(h);
+
+    return 1;
+}
+
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+#undef __sec_tab
+#undef __sec_lock_tab
+#undef __sec_hash
+#undef __sec_next
+
+#define __prim             key
+#define __prim_t         __k_t
+#define __prim_tab         key_tab
+#define __prim_lock_tab    key_lock_tab
+#define __prim_hash      __key_hash
+#define __prim_cmp       __key_cmp
+#define __prim_next        key_next
+#define __sec              value
+#define __sec_t          __v_t
+#define __sec_tab          value_tab
+#define __sec_lock_tab     value_lock_tab
+#define __sec_hash       __value_hash
+#define __sec_next         value_next
+
+int __key_remove(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *e, *es, **pek, **pev;
+    struct bucket *bk, *bv;
+    struct bucket_lock *bltk, *bltv;
+    uint32_t old_kidx, kidx, vidx, min_load, nr_ent;
+    __prim_t ks;
+    __sec_t vs;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+again:
+    old_kidx = kidx = hash_to_idx(h, __prim_hash(k));
+    bk = C2L(h, &h->__prim_tab[kidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(__prim_cmp(k, e->__prim))
+        {
+            goto found;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    HASH_LOCK_RDUNLOCK(h);
+
+    return 0;
+
+found:
+    /*
+     * Make local copy of key and value.
+     */
+    es = e;
+    ks = e->__prim;
+    vs = e->__sec;
+    kidx = hash_to_idx(h, __prim_hash(ks));
+    /* Being paranoid: check if kidx has not changed, so that we unlock the
+     * right bucket */
+    assert(old_kidx == kidx);
+    vidx = hash_to_idx(h, __sec_hash(vs));
+    bk   = C2L(h, &h->__prim_tab[kidx]);
+    bv   = C2L(h, &h->__sec_tab[vidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    bltv = C2L(h, h->__sec_lock_tab);
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    pev = &(bv->hash_entry);
+
+    /* Find the entry in both tables */
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Being paranoid: make sure that the key and value are
+             * still the same. This is still not 100%, because, in
+             * principle, the entry could have got deleted, when we
+             * didn't hold the locks for a little while, and exactly
+             * the same entry reinserted. If the __k_t & __v_t are
+             * simple types than it probably doesn't matter, but if
+             * either is a pointer type, the actual structure might
+             * now be different. The chances that happens are very
+             * slim, but still, if that's a problem, the user needs to
+             * pay attention to the structure re-allocation */
+            if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) ||
+               (memcmp(&(e->__sec), &vs, sizeof(__sec_t))))
+                break;
+            goto found_again;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+
+    /* Entry got removed in the meantime, try again */
+    goto again;
+
+found_again:
+    /* We are now comitted to the removal */
+    e = *pev;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Both pek and pev are pointing to the right place, remove */
+            *pek = e->__prim_next;
+            *pev = e->__sec_next;
+
+            nr_ent = __sync_sub_and_fetch(&h->nr_ent, 1);
+            /* read min_load still under the hash lock! */
+            min_load = h->min_load;
+
+            TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+            HASH_LOCK_RDUNLOCK(h);
+
+            if(nr_ent < min_load)
+                hash_resize(h);
+            if(vp != NULL)
+                *vp = e->__sec;
+            free_entry(h, e);
+            return 1;
+        }
+        pev = &(e->__sec_next);
+        e = *pev;
+    }
+
+    /* We should never get here!, no need to unlock anything */
+    return -ENOLCK;
+}
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+#undef __sec_tab
+#undef __sec_lock_tab
+#undef __sec_hash
+#undef __sec_next
+
+#define __prim             value
+#define __prim_t         __v_t
+#define __prim_tab         value_tab
+#define __prim_lock_tab    value_lock_tab
+#define __prim_hash      __value_hash
+#define __prim_cmp       __value_cmp
+#define __prim_next        value_next
+#define __sec              key
+#define __sec_t          __k_t
+#define __sec_tab          key_tab
+#define __sec_lock_tab     key_lock_tab
+#define __sec_hash       __key_hash
+#define __sec_next         key_next
+
+int __value_remove(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *e, *es, **pek, **pev;
+    struct bucket *bk, *bv;
+    struct bucket_lock *bltk, *bltv;
+    uint32_t old_kidx, kidx, vidx, min_load, nr_ent;
+    __prim_t ks;
+    __sec_t vs;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+again:
+    old_kidx = kidx = hash_to_idx(h, __prim_hash(k));
+    bk = C2L(h, &h->__prim_tab[kidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(__prim_cmp(k, e->__prim))
+        {
+            goto found;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    HASH_LOCK_RDUNLOCK(h);
+
+    return 0;
+
+found:
+    /*
+     * Make local copy of key and value.
+     */
+    es = e;
+    ks = e->__prim;
+    vs = e->__sec;
+    kidx = hash_to_idx(h, __prim_hash(ks));
+    /* Being paranoid: check if kidx has not changed, so that we unlock the
+     * right bucket */
+    assert(old_kidx == kidx);
+    vidx = hash_to_idx(h, __sec_hash(vs));
+    bk   = C2L(h, &h->__prim_tab[kidx]);
+    bv   = C2L(h, &h->__sec_tab[vidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    bltv = C2L(h, h->__sec_lock_tab);
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    pev = &(bv->hash_entry);
+
+    /* Find the entry in both tables */
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Being paranoid: make sure that the key and value are
+             * still the same. This is still not 100%, because, in
+             * principle, the entry could have got deleted, when we
+             * didn't hold the locks for a little while, and exactly
+             * the same entry reinserted. If the __k_t & __v_t are
+             * simple types than it probably doesn't matter, but if
+             * either is a pointer type, the actual structure might
+             * now be different. The chances that happens are very
+             * slim, but still, if that's a problem, the user needs to
+             * pay attention to the structure re-allocation */
+            if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) ||
+               (memcmp(&(e->__sec), &vs, sizeof(__sec_t))))
+                break;
+            goto found_again;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+
+    /* Entry got removed in the meantime, try again */
+    goto again;
+
+found_again:
+    /* We are now comitted to the removal */
+    e = *pev;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Both pek and pev are pointing to the right place, remove */
+            *pek = e->__prim_next;
+            *pev = e->__sec_next;
+
+            nr_ent = __sync_sub_and_fetch(&h->nr_ent, 1);
+            /* read min_load still under the hash lock! */
+            min_load = h->min_load;
+
+            TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+            HASH_LOCK_RDUNLOCK(h);
+
+            if(nr_ent < min_load)
+                hash_resize(h);
+            if(vp != NULL)
+                *vp = e->__sec;
+            free_entry(h, e);
+            return 1;
+        }
+        pev = &(e->__sec_next);
+        e = *pev;
+    }
+
+    /* We should never get here!, no need to unlock anything */
+    return -ENOLCK;
+}
+
+
+int __hash_destroy(struct __hash *h,
+                   void (*entry_consumer)(__k_t k, __v_t v, void *p),
+                   void *d)
+{
+    struct hash_entry *e, *n;
+    struct bucket *b;
+    int i;
+
+    if(HASH_LOCK_WRLOCK(h) != 0) return -ENOLCK;
+
+    /* No need to lock individual buckets, with hash write lock  */
+    for(i=0; i < h->tab_size; i++)
+    {
+        b = C2L(h, &h->key_tab[i]);
+        e = b->hash_entry;
+        while(e != NULL)
+        {
+            e = C2L(h, e);
+            n = e->key_next;
+            if(entry_consumer)
+                entry_consumer(e->key, e->value, d);
+            free_entry(h, e);
+            e = n;
+        }
+    }
+    free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+    free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab));
+
+    HASH_LOCK_WRUNLOCK(h);
+    h->lock_alive = 0;
+
+    return 0;
+}
+
+static void hash_resize(struct __hash *h)
+{
+    int new_size_idx, i, lock_ret;
+    uint32_t size, old_size, kidx, vidx;
+    struct bucket *t1, *t2, *b;
+    struct bucket_lock *l1, *l2;
+    struct hash_entry *e, *n;
+
+    /* We may fail to allocate the lock, if the resize is triggered while
+       we are iterating (under read lock) */
+    lock_ret = HASH_LOCK_TRYWRLOCK(h);
+    if(lock_ret != 0) return;
+
+    new_size_idx = h->size_idx;
+    /* Work out the new size */
+    if(h->nr_ent >= h->max_load)
+        new_size_idx = h->size_idx+1;
+    if(h->nr_ent < h->min_load)
+        new_size_idx = h->size_idx-1;
+    if((new_size_idx == h->size_idx) ||
+       (new_size_idx >= hash_sizes_len) ||
+       (new_size_idx < 0))
+    {
+        HASH_LOCK_WRUNLOCK(h);
+        return;
+    }
+
+    size = hash_sizes[new_size_idx];
+
+    /* Allocate the new sizes */
+    t1 = t2 = NULL;
+    l1 = l2 = NULL;
+    alloc_tab(h, size, &t1, &l1);
+    if(!t1 || !l1) goto alloc_fail;
+    alloc_tab(h, size, &t2, &l2);
+    if(!t2 || !l2) goto alloc_fail;
+
+    old_size = h->tab_size;
+    h->tab_size = size;
+    h->size_idx = new_size_idx;
+    h->max_load = (uint32_t)ceilf(hash_max_load_fact * size);
+    h->min_load = (uint32_t)ceilf(hash_min_load_fact * size);
+
+    /* Move the entries */
+    for(i=0; i < old_size; i++)
+    {
+        b = C2L(h, &h->key_tab[i]);
+        e = b->hash_entry;
+        while(e != NULL)
+        {
+            e = C2L(h, e);
+            n = e->key_next;
+            kidx =hash_to_idx(h, __key_hash(e->key));
+            vidx =hash_to_idx(h, __value_hash(e->value));
+            /* Move to the correct bucket */
+            e->key_next = t1[kidx].hash_entry;
+            t1[kidx].hash_entry = L2C(h, e);
+            e->value_next = t2[vidx].hash_entry;
+            t2[vidx].hash_entry = L2C(h, e);
+            e = n;
+        }
+    }
+    free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+    free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab));
+    h->key_tab         = L2C(h, t1);
+    h->key_lock_tab    = L2C(h, l1);
+    h->value_tab       = L2C(h, t2);
+    h->value_lock_tab  = L2C(h, l2);
+
+    HASH_LOCK_WRUNLOCK(h);
+
+    return;
+
+alloc_fail:
+    /* If we failed to resize, adjust max/min load. This will stop us from
+     * retrying resize too frequently */ 
+    if(new_size_idx > h->size_idx)
+        h->max_load = (h->max_load + 2 * h->tab_size) / 2 + 1;
+    else 
+    if (new_size_idx < h->size_idx)
+        h->min_load = h->min_load / 2;
+    HASH_LOCK_WRUNLOCK(h);
+    if(t1 || l1) free_buckets(h, t1, l1);
+    if(t2 || l2) free_buckets(h, t2, l2);
+    return;
+}
+
+int __hash_iterator(struct __hash *h,
+                    int (*entry_consumer)(__k_t k, __v_t v, void *p),
+                    void *d)
+{
+    struct hash_entry *e, *n;
+    struct bucket *b;
+    struct bucket_lock *blt;
+    int i, brk_early;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+    for(i=0; i < h->tab_size; i++)
+    {
+        b = C2L(h, &h->key_tab[i]);
+        blt = C2L(h, h->key_lock_tab);
+        if(BUCKET_LOCK_RDLOCK(h, blt, i) != 0) return -ENOLCK;
+        e = b->hash_entry;
+        while(e != NULL)
+        {
+            e = C2L(h, e);
+            n = e->key_next;
+            brk_early = entry_consumer(e->key, e->value, d);
+            if(brk_early)
+            {
+                BUCKET_LOCK_RDUNLOCK(h, blt, i);
+                goto out;
+            }
+            e = n;
+        }
+        BUCKET_LOCK_RDUNLOCK(h, blt, i);
+    }
+out:
+    HASH_LOCK_RDUNLOCK(h);
+    return 0;
+}
+
+void __hash_sizes(struct __hash *h,
+                  uint32_t *nr_ent,
+                  uint32_t *max_nr_ent,
+                  uint32_t *tab_size,
+                  uint32_t *max_load,
+                  uint32_t *min_load)
+{
+    if(nr_ent     != NULL) *nr_ent     = h->nr_ent;
+    if(max_nr_ent != NULL) *max_nr_ent = max_entries(h); 
+    if(tab_size   != NULL) *tab_size   = h->tab_size;
+    if(max_load   != NULL) *max_load   = h->max_load;
+    if(min_load   != NULL) *min_load   = h->min_load;
+}
+
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/bidir-hash.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/bidir-hash.h Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,113 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __BIDIR_HASH_H__
+#define __BIDIR_HASH_H__
+
+#include <stdint.h>
+#include "memshr-priv.h"
+
+typedef struct vbdblk {
+    uint64_t sec;
+    uint16_t disk_id;
+} vbdblk_t;
+
+
+#if defined FINGERPRINT_MAP || BLOCK_MAP 
+#define DEFINE_SINGLE_MAP 
+#endif
+
+/*******************************************************/
+/* Fingerprint map                                     */
+/*******************************************************/
+#if defined FINGERPRINT_MAP || !defined DEFINE_SINGLE_MAP
+
+#undef BIDIR_NAME_PREFIX
+#undef BIDIR_KEY
+#undef BIDIR_VALUE
+#undef BIDIR_KEY_T
+#undef BIDIR_VALUE_T
+static uint32_t fgprtshr_fgprt_hash(uint32_t h)
+{
+    return h;
+}
+
+static uint32_t fgprtshr_mfn_hash(uint64_t m)
+{
+    return (uint32_t)m;
+}
+
+static int fgprtshr_fgprt_cmp(uint32_t h1, uint32_t h2)
+{
+    return (h1 == h2);
+}
+
+static int fgprtshr_mfn_cmp(uint32_t m1, uint32_t m2)
+{
+    return (m1 == m2);
+}
+#define BIDIR_NAME_PREFIX       fgprtshr 
+#define BIDIR_KEY               fgprt 
+#define BIDIR_VALUE             mfn 
+#define BIDIR_KEY_T             uint32_t
+#define BIDIR_VALUE_T           xen_mfn_t
+#include "bidir-namedefs.h"
+
+#endif /* FINGERPRINT_MAP */
+
+
+/*******************************************************/
+/* Block<->Memory sharing handles                      */
+/*******************************************************/
+#if defined BLOCK_MAP || !defined DEFINE_SINGLE_MAP
+
+#undef BIDIR_NAME_PREFIX
+#undef BIDIR_KEY
+#undef BIDIR_VALUE
+#undef BIDIR_KEY_T
+#undef BIDIR_VALUE_T
+/* TODO better hashes! */
+static inline uint32_t blockshr_block_hash(vbdblk_t block)
+{
+    return (uint32_t)(block.sec) ^ (uint32_t)(block.disk_id);
+}
+
+static inline uint32_t blockshr_shrhnd_hash(uint64_t shrhnd)
+{
+    return (uint32_t)shrhnd;
+}
+
+static inline int blockshr_block_cmp(vbdblk_t b1, vbdblk_t b2)
+{
+    return (b1.sec == b2.sec) && (b1.disk_id == b2.disk_id);
+}
+
+static inline int blockshr_shrhnd_cmp(uint64_t h1, uint64_t h2)
+{
+    return (h1 == h2);
+}
+#define BIDIR_NAME_PREFIX       blockshr
+#define BIDIR_KEY               block
+#define BIDIR_VALUE             shrhnd
+#define BIDIR_KEY_T             vbdblk_t
+#define BIDIR_VALUE_T           uint64_t
+#include "bidir-namedefs.h"
+
+#endif /* BLOCK_MAP */
+
+#endif /* __BIDIR_HASH_H__ */
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/bidir-namedefs.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/bidir-namedefs.h     Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,80 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "memshr-priv.h"
+
+/* Macros used to assemble the names */
+#define BIDIR_NAME_ONE_INTERNAL(prefix, name) \
+                                prefix ## _ ## name
+#define BIDIR_NAME_TWO_INTERNAL(prefix, name1, name2) \
+                                prefix ## _ ## name1 ## _ ## name2
+
+#define BIDIR_NAME_ONE(prefix, name) \
+                                BIDIR_NAME_ONE_INTERNAL(prefix, name)
+#define BIDIR_NAME_TWO(prefix, name1, name2) \
+                                BIDIR_NAME_TWO_INTERNAL(prefix, name1, name2)
+
+#define INTERNAL_NAME_ONE(name) BIDIR_NAME_ONE(BIDIR_NAME_PREFIX, name)
+#define INTERNAL_NAME_TWO(name1, name2) \
+                                BIDIR_NAME_TWO(BIDIR_NAME_PREFIX, name1, name2)
+
+/* Function/type names */
+#define __k_t                   BIDIR_KEY_T
+#define __v_t                   BIDIR_VALUE_T
+
+#define __hash                  INTERNAL_NAME_ONE(hash)
+#define __shm_hash_init         INTERNAL_NAME_ONE(shm_hash_init)
+#define __shm_hash_get          INTERNAL_NAME_ONE(shm_hash_get)
+#define __hash_init             INTERNAL_NAME_ONE(hash_init)
+#define __key_lookup            INTERNAL_NAME_TWO(BIDIR_KEY, lookup)
+#define __value_lookup          INTERNAL_NAME_TWO(BIDIR_VALUE, lookup)
+#define __insert                INTERNAL_NAME_ONE(insert)
+#define __key_remove            INTERNAL_NAME_TWO(BIDIR_KEY, remove)
+#define __value_remove          INTERNAL_NAME_TWO(BIDIR_VALUE, remove)
+#define __hash_destroy          INTERNAL_NAME_ONE(hash_destroy)
+#define __hash_iterator         INTERNAL_NAME_ONE(hash_iterator)
+
+#define __key_hash              INTERNAL_NAME_TWO(BIDIR_KEY, hash)
+#define __key_cmp               INTERNAL_NAME_TWO(BIDIR_KEY, cmp)
+#define __value_hash            INTERNAL_NAME_TWO(BIDIR_VALUE, hash)
+#define __value_cmp             INTERNAL_NAME_TWO(BIDIR_VALUE, cmp)
+
+#define __hash_sizes            INTERNAL_NAME_ONE(hash_sizes)
+
+
+/* Final function exports */
+struct __hash* __shm_hash_init(unsigned long shm_baddr, unsigned long 
shm_size);
+struct __hash* __shm_hash_get(unsigned long shm_baddr);
+struct __hash *__hash_init   (struct __hash *h, uint32_t min_size);
+int            __key_lookup  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_lookup(struct __hash *h, __v_t v, __k_t *kp);
+int            __insert      (struct __hash *h, __k_t k, __v_t v);
+int            __key_remove  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_remove(struct __hash *h, __v_t v, __k_t *kp);
+int            __hash_destroy(struct __hash *h,
+                    void (*entry_consumer)(__k_t k, __v_t v, void *p),
+                    void *d);
+int            __hash_iterator(struct __hash *h,
+                        int (*entry_consumer)(__k_t k, __v_t v, void *p),
+                        void *d);
+void           __hash_sizes(struct __hash *h,
+                            uint32_t *nr_ent,
+                            uint32_t *max_nr_ent,
+                            uint32_t *tab_size,
+                            uint32_t *max_load,
+                            uint32_t *min_load);
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/interface.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/interface.c  Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,92 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <string.h>
+
+#include "memshr-priv.h"
+#include "shm.h"
+
+
+typedef struct {
+    struct shared_memshr_info *shared_info;
+    struct fgprtshr_hash      *fgprts;
+    struct blockshr_hash      *blks;
+} private_memshr_info_t;
+
+private_memshr_info_t memshr;
+
+#define SHARED_INFO  (memshr.shared_info)
+
+void memshr_daemon_initialize(void)
+{
+    void *shm_base_addr;
+    struct fgprtshr_hash *h;
+
+    memset(&memshr, 0, sizeof(private_memshr_info_t));
+
+    if((SHARED_INFO = shm_shared_info_open(1)) == NULL)
+    {
+        DPRINTF("Failed to init shared info.\n");
+        return;
+    }
+
+    if((memshr.fgprts = shm_fgprtshr_hash_open(1)) == NULL) 
+    {
+        DPRINTF("Failed to init fgprtshr hash.\n");
+        return;
+    }
+    memshr.shared_info->fgprtshr_hash_inited = 1;
+
+    if((memshr.blks = shm_blockshr_hash_open(1)) == NULL) 
+    {
+        DPRINTF("Failed to init blockshr hash.\n");
+        return;
+    }
+    memshr.shared_info->blockshr_hash_inited = 1;
+}
+
+
+void memshr_vbd_initialize(void)
+{
+    memset(&memshr, 0, sizeof(private_memshr_info_t));
+
+    if((SHARED_INFO = shm_shared_info_open(0)) == NULL)
+    {
+        DPRINTF("Failed to open shared info.\n");
+        return;
+    }
+
+    if(!SHARED_INFO->fgprtshr_hash_inited)
+    {
+        DPRINTF("fgprtshr hash not inited.\n");
+        return;
+    }
+
+    if((memshr.fgprts = shm_fgprtshr_hash_open(0)) == NULL)
+    {
+        DPRINTF("Failed to open fgprtshr_hash.\n");
+        return;
+    }
+
+    if((memshr.blks = shm_blockshr_hash_open(0)) == NULL)
+    {
+        DPRINTF("Failed to open blockshr_hash.\n");
+        return;
+    }
+}
+
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/memshr-priv.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/memshr-priv.h        Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,34 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __MEMSHR_PRIV_H__
+#define __MEMSHR_PRIV_H__
+
+#include <syslog.h>
+#include <xenctrl.h>
+#include "memshr.h"
+
+#if 1
+#define DPRINTF(_f, _a...) syslog(LOG_INFO, _f, ##_a)
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+#define EPRINTF(_f, _a...) syslog(LOG_ERR, "memshr:%s: " _f, __func__, ##_a)
+
+#endif /* __MEMSHR_PRIV_H__ */
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/memshr.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/memshr.h     Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,29 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __MEMSHR_H__
+#define __MEMSHR_H__
+
+#include <stdint.h>
+
+typedef uint64_t xen_mfn_t;
+
+extern void memshr_daemon_initialize(void);
+extern void memshr_vbd_initialize(void);
+
+#endif /* __MEMSHR_H__ */
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/shm.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/shm.c        Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,182 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "memshr-priv.h"
+#include "bidir-hash.h"
+#include "shm.h"
+
+#define MEMSHR_INFO_SHM_FILE  "/memshr-info"
+#define MEMSHR_INFO_MAGIC     0x15263748 
+
+#define FGPRT_HASH_SHM_FILE "/blktap-fgprts"
+#define FGPRT_HASH_PAGES    10000
+
+#define BLOCK_HASH_SHM_FILE "/blktap-blks"
+#define BLOCK_HASH_PAGES    10000
+
+typedef struct shm_area {
+    void*  base_addr;
+    size_t size;
+    int    fd;
+} shm_area_t;
+
+typedef struct {
+    struct shm_area shared_info_area;
+    struct shm_area fgprts_area;
+    struct shm_area blocks_area;
+} private_shm_info_t;
+
+private_shm_info_t shm_info;
+
+
+
+static int shm_area_open(const char *file, size_t size, int unlink, shm_area_t 
*shma)
+{ 
+    /* TODO: If blktapctrl can be restarted while system is running, this needs
+     * to be cleverer */
+    if(unlink) shm_unlink(file);
+
+    shma->size = size;
+    shma->fd = shm_open(file,
+                        (O_CREAT | O_RDWR),
+                        (S_IREAD | S_IWRITE));
+
+    if(shma->fd < 0) return -1;
+
+    if(ftruncate(shma->fd, size) < 0) return -2;
+
+    shma->base_addr = mmap(NULL, 
+                      size,
+                      PROT_READ | PROT_WRITE, 
+                      MAP_SHARED,
+                      shma->fd,
+                      0);
+    
+    if(shma->base_addr == MAP_FAILED) return -2;
+
+    return 0;
+}
+
+static void shm_area_close(shm_area_t *shma)
+{
+    munmap(shma->base_addr, shma->size);
+    close(shma->fd);
+}
+
+
+shared_memshr_info_t * shm_shared_info_open(int unlink)
+{
+    shared_memshr_info_t *shared_info;
+    pthread_mutexattr_t  lock_attr;
+
+    assert(sizeof(shared_memshr_info_t) < XC_PAGE_SIZE);
+    if(shm_area_open(MEMSHR_INFO_SHM_FILE, 
+                     XC_PAGE_SIZE,
+                     unlink, 
+                     &(shm_info.shared_info_area)) < 0)
+    {
+        DPRINTF("Failed to open shma for shared info.\n");
+        return NULL;
+    }
+    shared_info = (shared_memshr_info_t *)
+                             shm_info.shared_info_area.base_addr;
+    if(unlink)
+    {
+        memset(shared_info, 0, sizeof(shared_memshr_info_t));
+        if(pthread_mutexattr_init(&lock_attr) ||
+           pthread_mutexattr_setpshared(&lock_attr, PTHREAD_PROCESS_SHARED) ||
+           pthread_mutex_init(&shared_info->lock, &lock_attr) ||
+           pthread_mutexattr_destroy(&lock_attr))
+        {
+            DPRINTF("Failed to init shared info lock.\n");
+            return NULL;
+        }
+        shared_info->magic = MEMSHR_INFO_MAGIC;
+    } 
+    else
+    if(shared_info->magic != MEMSHR_INFO_MAGIC)
+    {
+        DPRINTF("Incorrect magic in shared info.\n");
+        return NULL;
+    }
+    
+    return shared_info;
+}
+
+
+struct fgprtshr_hash * shm_fgprtshr_hash_open(int unlink)
+{
+    struct fgprtshr_hash *h;
+    if(shm_area_open(FGPRT_HASH_SHM_FILE, 
+                     FGPRT_HASH_PAGES * XC_PAGE_SIZE,
+                     unlink, 
+                     &(shm_info.fgprts_area)) < 0)
+    {
+        DPRINTF("Failed to init shma for fgprtshr_hash.\n");
+        return NULL;
+    }
+
+    if(unlink)
+    {
+        h = fgprtshr_shm_hash_init(
+                     (unsigned long) shm_info.fgprts_area.base_addr, 
+                     FGPRT_HASH_PAGES * XC_PAGE_SIZE);
+    } else
+    {
+        h = fgprtshr_shm_hash_get(
+                     (unsigned long) shm_info.fgprts_area.base_addr); 
+    }
+        
+    return h;
+} 
+
+struct blockshr_hash * shm_blockshr_hash_open(int unlink)
+{
+    struct blockshr_hash *h;
+    if(shm_area_open(BLOCK_HASH_SHM_FILE, 
+                     BLOCK_HASH_PAGES * XC_PAGE_SIZE,
+                     unlink, 
+                     &(shm_info.blocks_area)) < 0)
+    {
+        DPRINTF("Failed to init shma for blockshr_hash.\n");
+        return NULL;
+    }
+
+    if(unlink)
+    {
+        h = blockshr_shm_hash_init(
+                     (unsigned long) shm_info.blocks_area.base_addr, 
+                     BLOCK_HASH_PAGES * XC_PAGE_SIZE);
+    } else
+    {
+        h = blockshr_shm_hash_get(
+                     (unsigned long) shm_info.blocks_area.base_addr); 
+    }
+        
+    return h;
+} 
+
diff -r fe42b16855aa -r 7d28228db41d tools/memshr/shm.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/memshr/shm.h        Thu Dec 17 06:27:56 2009 +0000
@@ -0,0 +1,35 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __SHM_H__
+#define __SHM_H__
+
+#include <pthread.h>
+
+typedef struct shared_memshr_info {
+    unsigned long   magic;
+    pthread_mutex_t lock;
+    int             fgprtshr_hash_inited;
+    int             blockshr_hash_inited;
+} shared_memshr_info_t;
+
+shared_memshr_info_t * shm_shared_info_open(int unlink);
+struct fgprtshr_hash * shm_fgprtshr_hash_open(int unlink);
+struct blockshr_hash * shm_blockshr_hash_open(int unlink);
+
+#endif /* __SHM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.