[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Remove old blktap tools.



# HG changeset patch
# User akw@xxxxxxxxxxxxxxxxxxxxx
# Node ID 840f33e54054270e3f4b9704111ed52bd381653b
# Parent  533bad7c0883189e26c2a7f43011801c417b01fe
Remove old blktap tools.

Signed-off-by: Andrew Warfield <andrew.warfield@xxxxxxxxxxxx>
---
 tools/blktap/Makefile                   |   93 --
 tools/blktap/README                     |  137 ---
 tools/blktap/README.sept05              |   33 
 tools/blktap/blkdump.c                  |   62 -
 tools/blktap/blkif.c                    |  212 -----
 tools/blktap/blktaplib.c                |  453 ----------
 tools/blktap/blktaplib.h                |  171 ----
 tools/blktap/list.h                     |   55 -
 tools/blktap/parallax/Makefile          |   62 -
 tools/blktap/parallax/README            |  171 ----
 tools/blktap/parallax/block-async.c     |  393 ---------
 tools/blktap/parallax/block-async.h     |   69 -
 tools/blktap/parallax/blockstore.c      | 1348 --------------------------------
 tools/blktap/parallax/blockstore.h      |  134 ---
 tools/blktap/parallax/blockstored.c     |  275 ------
 tools/blktap/parallax/bstest.c          |  191 ----
 tools/blktap/parallax/parallax.c        |  608 --------------
 tools/blktap/parallax/radix.c           |  631 --------------
 tools/blktap/parallax/radix.h           |   45 -
 tools/blktap/parallax/requests-async.c  |  762 ------------------
 tools/blktap/parallax/requests-async.h  |   29 
 tools/blktap/parallax/snaplog.c         |  238 -----
 tools/blktap/parallax/snaplog.h         |   61 -
 tools/blktap/parallax/vdi.c             |  367 --------
 tools/blktap/parallax/vdi.h             |   55 -
 tools/blktap/parallax/vdi_create.c      |   52 -
 tools/blktap/parallax/vdi_fill.c        |   81 -
 tools/blktap/parallax/vdi_list.c        |   47 -
 tools/blktap/parallax/vdi_snap.c        |   43 -
 tools/blktap/parallax/vdi_snap_delete.c |   48 -
 tools/blktap/parallax/vdi_snap_list.c   |   82 -
 tools/blktap/parallax/vdi_tree.c        |  132 ---
 tools/blktap/parallax/vdi_unittest.c    |  184 ----
 tools/blktap/parallax/vdi_validate.c    |   97 --
 tools/blktap/ublkback/Makefile          |   40 
 tools/blktap/ublkback/ublkback.c        |   18 
 tools/blktap/ublkback/ublkbacklib.c     |  473 -----------
 tools/blktap/ublkback/ublkbacklib.h     |   16 
 tools/blktap/xenbus.c                   |  568 -------------
 39 files changed, 8536 deletions(-)

diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/Makefile
--- a/tools/blktap/Makefile     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-MAJOR    = 3.0
-MINOR    = 0
-SONAME   = libblktap.so.$(MAJOR)
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-SUBDIRS :=
-SUBDIRS += ublkback
-#SUBDIRS += parallax
-
-BLKTAP_INSTALL_DIR = /usr/sbin
-
-INSTALL            = install
-INSTALL_PROG       = $(INSTALL) -m0755
-INSTALL_DIR        = $(INSTALL) -d -m0755
-
-INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
-
-LIBS     := -lpthread -lz
-
-SRCS     :=
-SRCS     += blktaplib.c xenbus.c blkif.c
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# get asprintf():
-CFLAGS   += -D _GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-CFLAGS   += $(INCLUDES) 
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS   :=
-#IBINS   += blkdump
-
-LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
-
-.PHONY: all
-all: mk-symlinks libblktap.so #blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: install
-install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/include
-       $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
-       #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: clean
-clean:
-       rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: rpm
-rpm: all
-       rm -rf staging
-       mkdir staging
-       mkdir staging/i386
-       rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
-               --define "_rpmdir$$PWD/staging" -bb rpm.spec
-       mv staging/i386/*.rpm .
-       rm -rf staging
-
-libblktap.so: $(OBJS) 
-       $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
-             -L$(XEN_XENSTORE) -l xenstore                       \
-             -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
-       ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
-       ln -sf libblktap.so.$(MAJOR) $@
-
-blkdump: libblktap.so
-       $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
-             -l blktap blkdump.c
-
-.PHONY: TAGS clean install mk-symlinks rpm
-
-.PHONY: TAGS
-TAGS:
-       etags -t $(SRCS) *.h
-
--include $(DEPS)
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/README
--- a/tools/blktap/README       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-Block Tap User-level Interfaces
-Andrew Warfield
-andrew.warfield@xxxxxxxxxxxx
-February 8, 2005
-
-NOTE #1: The blktap is _experimental_ code.  It works for me.  Your
-mileage may vary.  Don't use it for anything important.  Please. ;)
-
-NOTE #2: All of the interfaces here are likely to change.  This is all
-early code, and I am checking it in because others want to play with
-it.  If you use it for anything, please let me know!
-
-Overview:
----------
-
-This directory contains a library and set of example applications for
-the block tap device.  The block tap hooks into the split block device
-interfaces above Xen allowing them to be extended.  This extension can
-be done in userspace with the help of a library.
-
-The tap can be installed either as an interposition domain in between
-a frontend and backend driver pair, or as a terminating backend, in
-which case it is responsible for serving all requests itself.
-
-There are two reasons that you might want to use the tap,
-corresponding to these configurations:
-
- 1. To examine or modify a stream of block requests while they are
-    in-flight (e.g. to encrypt data, or add data-driven watchpoints)
-
- 2. To prototype a new backend driver, serving requests from the tap
-    rather than passing them along to the XenLinux blkback driver.
-    (e.g. to forward block requests to a remote host)
-
-
-Interface:
-----------
-
-At the moment, the tap interface is similar in spirit to that of the
-Linux netfilter.  Requests are messages from a client (frontend)
-domain to a disk (backend) domain.  Responses are messages travelling
-back, acknowledging the completion of a request.  the library allows
-chains of functions to be attached to these events.  In addition,
-hooks may be attached to handle control messages, which signify things
-like connections from new domains.
-
-At present the control messages especially expose a lot of the
-underlying driver interfaces.  This may change in the future in order
-to simplify writing hooks.
-
-Here are the public interfaces:
-
-These allow hook functions to be chained:
-
- void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
- void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
- void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
-
-This allows a response to be injected, in the case where a request has
-been removed using BLKTAP_STOLEN.
-
- void blktap_inject_response(blkif_response_t *);
-
-These let you add file descriptors and handlers to the main poll loop:
-
- int  blktap_attach_poll(int fd, short events, int (*func)(int));
- void blktap_detach_poll(int fd);
-
-This starts the main poll loop:
-
- int  blktap_listen(void);
-
-Example:
---------
-
-blkimage.c uses an image on the local file system to serve requests to
-a domain.  Here's what it looks like:
-
----[blkimg.c]---
-
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
-    image_init();
-    
-    blktap_register_ctrl_hook("image_control", image_control);
-    blktap_register_request_hook("image_request", image_request);
-    blktap_listen();
-    
-    return 0;
-}
-
-----------------
-
-All of the real work is in blkimglib.c, but this illustrates the
-actual tap interface well enough.  image_control() will be called with
-all control messages.  image_request() handles requests.  As it reads
-from an on-disk image file, no requests are ever passed on to a
-backend, and so there will be no responses to process -- so there is
-nothing registered as a response hook.
-
-Other examples:
----------------
-
-Here is a list of other examples in the directory:
-
-Things that terminate a block request stream:
-
-  blkimg    - Use a image file/device to serve requests
-  blkgnbd   - Use a remote gnbd server to serve requests
-  blkaio    - Use libaio... (DOES NOT WORK)
-  
-Things that don't:
-
-  blkdump   - Print in-flight requests.
-  blkcow    - Really inefficient copy-on-write disks using libdb to store
-              writes.
-
-There are examples of plugging these things together, for instance
-blkcowgnbd is a read-only gnbd device with copy-on-write to a local
-file.
-
-TODO:
------
-
-- Make session tracking work.  At the moment these generally just handle a 
-  single front-end client at a time.
-
-- Integrate with Xend.  Need to cleanly pass a image identifier in the connect
-  message.
-
-- Make an asynchronous file-io terminator.  The libaio attempt is
-  tragically stalled because mapped foreign pages make pfn_valid fail
-  (they are VM_IO), and so cannot be passed to aio as targets.  A
-  better solution may be to tear the disk interfaces out of the real
-  backend and expose them somehow.
-
-- Make CoW suck less.
-
-- Do something more along the lines of dynamic linking for the
-  plugins, so thatthey don't all need a new main().
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/README.sept05
--- a/tools/blktap/README.sept05        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-The blktap has been rewritten substantially based on the current
-blkback driver.  I've removed passthrough support, as this is broken
-by the move to grant tables and the lack of transitive grants.  A
-blktap VM is now only capable of terminating block requests in
-userspace.
-
-ublkback/ contains a _very_ initial cut at a user-level version of the block
-backend driver.  It gives a working example of how the current tap
-interfaces are used, in particular w.r.t. the vbd directories in
-xenstore.
-
-parallax/ contains fairly recent parallax code.  This does not run on
-the changed blktap interface, but should only be a couple of hours
-work to get going again.
-
-All of the tricky bits are done, but there is plenty of cleaning to
-do, and the top-level functionality is not here yet.  At the moment,
-the daemon ignores the pdev requested by the tools and opens the file 
-or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
-
-TODO:
-1. Fix to allow pdev in the store to specify the device to open.
-2. Add support (to tools as well) to mount arbitrary files...
-   just write the filename to mount into the store, instead of pdev.
-3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
-   - creating a blkif should take a reference.
-   - each inflight request should take a reference on dequeue in blktaplib
-   - sending responses should drop refs.
-   - blkif should be implicitly freed when refcounts fall to 0.
-4. Modify the parallax req/rsp code as per ublkback to use the new tap 
-   interfaces. 
-5. Write a front end that allows parallax and normal mounts to coexist
-6. Allow blkback and blktap to run at the same time.
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blkdump.c
--- a/tools/blktap/blkdump.c    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/* blkdump.c
- *
- * show a running trace of block requests as they fly by.
- * 
- * (c) 2004 Andrew Warfield.
- */
- 
-#include <stdio.h>
-#include "blktaplib.h"
- 
-int request_print(blkif_request_t *req)
-{
-    int i;
-    
-    if ( (req->operation == BLKIF_OP_READ) ||
-         (req->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
-                ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
-                blkif_op_name[req->operation], 
-                req->nr_segments, req->handle, 
-                req->sector_number);
-        
-        
-        for (i=0; i < req->nr_segments; i++) {
-            printf("              (gref: 0x%8x start: %u stop: %u)\n",
-                   req->seg[i].gref,
-                   req->seg[i].first_sect,
-                   req->seg[i].last_sect);
-        }
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    
-    return BLKTAP_PASS;
-}
-
-int response_print(blkif_response_t *rsp)
-{   
-    if ( (rsp->operation == BLKIF_OP_READ) ||
-         (rsp->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u>%5s] (status: %d)\n", 
-                ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
-                blkif_op_name[rsp->operation], 
-                rsp->status);
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    return BLKTAP_PASS;
-}
-
-int main(int argc, char *argv[])
-{
-    blktap_register_request_hook("request_print", request_print);
-    blktap_register_response_hook("response_print", response_print);
-    blktap_listen();
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blkif.c
--- a/tools/blktap/blkif.c      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,212 +0,0 @@
-/*
- * blkif.c
- * 
- * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <err.h>
-
-#include "blktaplib.h"
-
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-blkif_t *alloc_blkif(domid_t domid)
-{
-    blkif_t *blkif;
-
-    blkif = (blkif_t *)malloc(sizeof(blkif_t));
-    if (!blkif)
-        return NULL;
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-
-    return blkif;
-}
-
-static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
-{
-    new_blkif_hook = fn;
-}
-
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly)
-{
-    domid_t domid;
-    blkif_t **pblkif;
-    
-    if (blkif == NULL)
-        return -EINVAL;
-
-    domid = blkif->domid;
-    blkif->handle   = handle;
-    blkif->pdev     = pdev;
-    blkif->readonly = readonly;
-
-    /*
-     * Call out to the new_blkif_hook. The tap application should define this,
-     * and it should return having set blkif->ops
-     * 
-     */
-    if (new_blkif_hook == NULL)
-    {
-        warn("Probe detected a new blkif, but no new_blkif_hook!");
-        return -1;
-    }
-    new_blkif_hook(blkif);
-
-    /* Now wire it in. */
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists\n");
-            return -1;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-    blkif->hash_next = NULL;
-    *pblkif = blkif;
-
-    return 0;
-}
-
-void free_blkif(blkif_t *blkif)
-{
-    blkif_t **pblkif, *curs;
-    
-    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
-    while ( (curs = *pblkif) != NULL )
-    {
-        if ( blkif == curs )
-        {
-            *pblkif = curs->hash_next;
-        }
-        pblkif = &curs->hash_next;
-    }
-    free(blkif);
-}
-
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                                 int (*rh)(blkif_t *, blkif_request_t *, int)) 
-{
-    request_hook_t *rh_ent, **c;
-    
-    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
-    if (!rh_ent) 
-    {
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->request_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                                  int (*rh)(blkif_t *, blkif_response_t *, 
int)) 
-{
-    response_hook_t *rh_ent, **c;
-    
-    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
-    if (!rh_ent) 
-    { 
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->response_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_print_hooks(blkif_t *blkif)
-{
-    request_hook_t  *req_hook;
-    response_hook_t *rsp_hook;
-    
-    DPRINTF("Request Hooks:\n");
-    req_hook = blkif->request_hook_chain;
-    while (req_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
-        req_hook = req_hook->next;
-    }
-    
-    DPRINTF("Response Hooks:\n");
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-long int vbd_size(blkif_t *blkif)
-{
-    return 1000000000;
-}
-
-long int vbd_secsize(blkif_t *blkif)
-{
-    return 512;
-}
-
-unsigned vbd_info(blkif_t *blkif)
-{
-    return 0;
-}
-
-
-void __init_blkif(void)
-{    
-    memset(blkif_hash, 0, sizeof(blkif_hash));
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,453 +0,0 @@
-/*
- * blktaplib.c
- * 
- * userspace interface routines for the blktap driver.
- *
- * (threadsafe(r) version) 
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <err.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <linux/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <xs.h>
-                                                                     
-#define __COMPILING_BLKTAP_LIB
-#include "blktaplib.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-#define DEBUG_RING_IDXS 0
-
-#define POLLRDNORM     0x040 
-
-#define BLKTAP_IOCTL_KICK 1
-
-
-void got_sig_bus();
-void got_sig_int();
-
-/* in kernel these are opposite, but we are a consumer now. */
-blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
-blkif_front_ring_t be_ring; 
-
-unsigned long mmap_vstart = 0;
-char *blktap_mem;
-int fd = 0;
-
-#define BLKTAP_RING_PAGES       1 /* Front */
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
-    
-int bad_count = 0;
-void bad(void)
-{
-    bad_count ++;
-    if (bad_count > 50) exit(0);
-}
-/*-----[ ID Manipulation from tap driver code ]--------------------------*/
-
-#define ACTIVE_RING_IDX unsigned short
-
-inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
-    return ( (fe_dom << 16) | idx );
-}
-
-inline unsigned int ID_TO_IDX(unsigned long id) 
-{ 
-        return ( id & 0x0000ffff );
-}
-
-inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
-
-static int (*request_hook)(blkif_request_t *req) = NULL;
-static int (*response_hook)(blkif_response_t *req) = NULL;
-        
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-/*
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
-    blkif_request_t *req_d;
-    static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&be_prod_mutex);
-    req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
-    memcpy(req_d, req, sizeof(blkif_request_t));
-    wmb();
-    be_ring.req_prod_pvt++;
-    pthread_mutex_unlock(&be_prod_mutex);
-    
-    return 0;
-}
-*/
-
-inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
-{
-    blkif_response_t *rsp_d;
-    static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&fe_prod_mutex);
-    rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
-    memcpy(rsp_d, rsp, sizeof(blkif_response_t));
-    wmb();
-    fe_ring.rsp_prod_pvt++;
-    pthread_mutex_unlock(&fe_prod_mutex);
-
-    return 0;
-}
-
-static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
-{
-    response_hook_t  *rsp_hook;
-    
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        switch(rsp_hook->func(blkif, rsp, 1))
-        {
-        case BLKTAP_PASS:
-            break;
-        default:
-            printf("Only PASS is supported for resp hooks!\n");
-        }
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
-{
-    
-    apply_rsp_hooks(blkif, rsp);
-  
-    write_rsp_to_fe_ring(rsp);
-}
-
-void blktap_kick_responses(void)
-{
-    pthread_mutex_lock(&push_mutex);
-    
-    RING_PUSH_RESPONSES(&fe_ring);
-    ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-    
-    pthread_mutex_unlock(&push_mutex);
-}
-
-/*-----[ Polling fd listeners ]------------------------------------------*/
-
-#define MAX_POLLFDS 64
-
-typedef struct {
-    int (*func)(int fd);
-    struct pollfd *pfd;
-    int fd;
-    short events;
-    int active;
-} pollhook_t;
-
-static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
-static pollhook_t     pollhooks[MAX_POLLFDS];
-static unsigned int   ph_freelist[MAX_POLLFDS];
-static unsigned int   ph_cons, ph_prod;
-#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
-#define PH_IDX(x) (x % MAX_POLLFDS)
-
-int blktap_attach_poll(int fd, short events, int (*func)(int fd))
-{
-    pollhook_t *ph;
-    
-    if (nr_pollhooks() == MAX_POLLFDS) {
-        printf("Too many pollhooks!\n");
-        return -1;
-    }
-    
-    ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
-    
-    ph->func        = func;
-    ph->fd          = fd;
-    ph->events      = events;
-    ph->active      = 1;
-    
-    DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 
-            nr_pollhooks());
-    
-    return 0;
-}
-
-void blktap_detach_poll(int fd)
-{
-    int i;
-    
-    for (i=0; i<MAX_POLLFDS; i++)
-        if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
-            ph_freelist[PH_IDX(ph_prod++)] = i;
-            pollhooks[i].pfd->fd = -1;
-            pollhooks[i].active = 0;
-            break;
-        }
-        
-    DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 
-            nr_pollhooks());
-}
-
-void pollhook_init(void)
-{
-    int i;
-    
-    for (i=0; i < MAX_POLLFDS; i++) {
-        ph_freelist[i] = (i+1) % MAX_POLLFDS;
-        pollhooks[i].active = 0;
-    }
-    
-    ph_cons = 0;
-    ph_prod = MAX_POLLFDS;
-}
-
-void __attribute__ ((constructor)) blktaplib_init(void)
-{
-    pollhook_init();
-}
-
-/*-----[ The main listen loop ]------------------------------------------*/
-
-int blktap_listen(void)
-{
-    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
-    struct xs_handle *h;
-    blkif_t *blkif;
-
-    /* comms rings: */
-    blkif_request_t  *req;
-    blkif_response_t *rsp;
-    blkif_sring_t    *sring;
-    RING_IDX          rp, i, pfd_count; 
-    
-    /* pending rings */
-    blkif_request_t req_pending[BLK_RING_SIZE];
-    /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
-    
-    /* handler hooks: */
-    request_hook_t   *req_hook;
-    response_hook_t  *rsp_hook;
-    
-    signal (SIGBUS, got_sig_bus);
-    signal (SIGINT, got_sig_int);
-    
-    __init_blkif();
-
-    fd = open("/dev/blktap", O_RDWR);
-    if (fd == -1)
-        err(-1, "open failed!");
-
-    blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
-             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
-    if ((int)blktap_mem == -1) 
-        err(-1, "mmap failed!");
-
-    /* assign the rings to the mapped memory */
-/*
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
-    FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
-*/  
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
-    BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
-
-    mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
-
-
-    /* Set up store connection and watch. */
-    h = xs_daemon_open();
-    if (h == NULL) 
-        err(-1, "xs_daemon_open");
-    
-    ret = add_blockdevice_probe_watch(h, "Domain-0");
-    if (ret != 0)
-        err(0, "adding device probewatch");
-    
-    ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
-
-    while(1) {
-        int ret;
-        
-        /* build the poll list */
-        pfd_count = 0;
-        for ( i=0; i < MAX_POLLFDS; i++ ) {
-            pollhook_t *ph = &pollhooks[i];
-            
-            if (ph->active) {
-                pfd[pfd_count].fd     = ph->fd;
-                pfd[pfd_count].events = ph->events;
-                ph->pfd               = &pfd[pfd_count];
-                pfd_count++;
-            }
-        }
-
-        tap_pfd = pfd_count++;
-        pfd[tap_pfd].fd = fd;
-        pfd[tap_pfd].events = POLLIN;
-
-        store_pfd = pfd_count++;
-        pfd[store_pfd].fd = xs_fileno(h);
-        pfd[store_pfd].events = POLLIN;
-        
-        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
-            if (DEBUG_RING_IDXS)
-                ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
-            continue;
-        }
-
-        for (i=0; i < MAX_POLLFDS; i++) {
-            if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
-                pollhooks[i].func(pollhooks[i].pfd->fd);
-        }
-        
-        if (pfd[store_pfd].revents) {
-            ret = xs_fire_next_watch(h);
-        }
-
-        if (pfd[tap_pfd].revents) 
-        {    
-            /* empty the fe_ring */
-            notify_fe = 0;
-            notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
-            rp = fe_ring.sring->req_prod;
-            rmb();
-            for (i = fe_ring.req_cons; i != rp; i++)
-            {
-                int done = 0; 
-
-                req = RING_GET_REQUEST(&fe_ring, i);
-                memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
-                req = &req_pending[ID_TO_IDX(req->id)];
-
-                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
-
-                if (blkif != NULL)
-                {
-                    req_hook = blkif->request_hook_chain;
-                    while (req_hook != NULL)
-                    {
-                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
-                        {
-                        case BLKTAP_RESPOND:
-                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
-                            write_rsp_to_fe_ring((blkif_response_t *)req);
-                            notify_fe = 1;
-                            done = 1;
-                            break;
-                        case BLKTAP_STOLEN:
-                            done = 1;
-                            break;
-                        case BLKTAP_PASS:
-                            break;
-                        default:
-                            printf("Unknown request hook return value!\n");
-                        }
-                        if (done) break;
-                        req_hook = req_hook->next;
-                    }
-                }
-
-                if (done == 0) 
-                {
-                    /* this was:  */
-                    /* write_req_to_be_ring(req); */
-
-                    unsigned long id = req->id;
-                    unsigned short operation = req->operation;
-                    printf("Unterminated request!\n");
-                    rsp = (blkif_response_t *)req;
-                    rsp->id = id;
-                    rsp->operation = operation;
-                    rsp->status = BLKIF_RSP_ERROR;
-                    write_rsp_to_fe_ring(rsp);
-                    notify_fe = 1;
-                    done = 1;
-                }
-
-            }
-            fe_ring.req_cons = i;
-
-            /* empty the be_ring */
-/*
-            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
-            rp = be_ring.sring->rsp_prod;
-            rmb();
-            for (i = be_ring.rsp_cons; i != rp; i++)
-            {
-
-                rsp = RING_GET_RESPONSE(&be_ring, i);
-                memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
-                rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
-
-                DPRINTF("copying a be request\n");
-
-                apply_rsp_hooks(rsp);
-                write_rsp_to_fe_ring(rsp);
-            }
-            be_ring.rsp_cons = i;
-*/
-            /* notify the domains */
-/*
-            if (notify_be) {
-                DPRINTF("notifying be\n");
-pthread_mutex_lock(&push_mutex);
-                RING_PUSH_REQUESTS(&be_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_BE);
-pthread_mutex_unlock(&push_mutex);
-            }
-*/
-            if (notify_fe) {
-                DPRINTF("notifying fe\n");
-                pthread_mutex_lock(&push_mutex);
-                RING_PUSH_RESPONSES(&fe_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-                pthread_mutex_unlock(&push_mutex);
-            }
-        }        
-    }
-
-
-    munmap(blktap_mem, PAGE_SIZE);
-
- mmap_failed:
-    close(fd);
-
- open_failed:
-    return 0;
-}
-
-void got_sig_bus() {
-    printf("Attempted to access a page that isn't.\n");
-    exit(-1);
-}
-
-void got_sig_int() {
-    DPRINTF("quitting -- returning to passthrough mode.\n");
-    if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
-    close(fd);
-    fd = 0;
-    exit(0);
-} 
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-/* blktaplib.h
- *
- * userland accessors to the block tap.
- *
- * Sept 2/05 -- I'm scaling this back to only support block remappings
- * to user in a backend domain.  Passthrough and interposition can be readded
- * once transitive grants are available.
- */
- 
-#ifndef __BLKTAPLIB_H__
-#define __BLKTAPLIB_H__
-
-#include <xenctrl.h>
-#include <sys/user.h>
-#include <xen/xen.h>
-#include <xen/io/blkif.h>
-#include <xen/io/ring.h>
-#include <xen/io/domain_controller.h>
-#include <xs.h>
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-/* /dev/xen/blktap resides at device number major=10, minor=202        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLK_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-#define BLKTAP_IOCTL_PRINT_IDXS      100   
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) );
-/*
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-*/
-}
-
-/* Return values for handling messages in hooks. */
-#define BLKTAP_PASS     0 /* Keep passing this request as normal. */
-#define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
-#define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
-
-//#define domid_t unsigned short
-
-inline unsigned int ID_TO_IDX(unsigned long id);
-inline domid_t ID_TO_DOM(unsigned long id);
-
-int  blktap_attach_poll(int fd, short events, int (*func)(int));
-void blktap_detach_poll(int fd);
-int  blktap_listen(void);
-
-struct blkif;
-
-typedef struct request_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_request_t *, int);
-    struct request_hook_st *next;
-} request_hook_t;
-
-typedef struct response_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_response_t *, int);
-    struct response_hook_st *next;
-} response_hook_t;
-
-struct blkif_ops {
-    long int (*get_size)(struct blkif *blkif);
-    long int (*get_secsize)(struct blkif *blkif);
-    unsigned (*get_info)(struct blkif *blkif);
-};
-
-typedef struct blkif {
-    domid_t domid;
-    long int handle;
-
-    long int pdev;
-    long int readonly;
-
-    enum { DISCONNECTED, CONNECTED } state;
-
-    struct blkif_ops *ops;
-    request_hook_t *request_hook_chain;
-    response_hook_t *response_hook_chain;
-
-    struct blkif *hash_next;
-
-    void *prv;  /* device-specific data */
-} blkif_t;
-
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-blkif_t *alloc_blkif(domid_t domid);
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly);
-void free_blkif(blkif_t *blkif);
-void __init_blkif(void);
-
-
-/* xenstore/xenbus: */
-extern int add_blockdevice_probe_watch(struct xs_handle *h, 
-                                       const char *domname);
-int xs_fire_next_watch(struct xs_handle *h);
-
-
-void blkif_print_hooks(blkif_t *blkif);
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_request_t *, int));
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_response_t *, int));
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
-void blktap_kick_responses(void);
-
-/* this must match the underlying driver... */
-#define MAX_PENDING_REQS 64
-
-/* Accessing attached data page mappings */
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                                   \
-    (mmap_vstart +                                              \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
-
-extern unsigned long mmap_vstart;
-
-/* Defines that are only used by library clients */
-
-#ifndef __COMPILING_BLKTAP_LIB
-
-static char *blkif_op_name[] = {
-    [BLKIF_OP_READ]       = "READ",
-    [BLKIF_OP_WRITE]      = "WRITE",
-};
-
-#endif /* __COMPILING_BLKTAP_LIB */
-    
-#endif /* __BLKTAPLIB_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/list.h
--- a/tools/blktap/list.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-/*
- * list.h
- * 
- * This is a subset of linux's list.h intended to be used in user-space.
- * 
- */
-
-#ifndef __LIST_H__
-#define __LIST_H__
-
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
-
-struct list_head {
-        struct list_head *next, *prev;
-};
- 
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
- 
-#define LIST_HEAD(name) \
-        struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void __list_add(struct list_head *new,
-                              struct list_head *prev,
-                              struct list_head *next)
-{
-        next->prev = new;
-        new->next = next;
-        new->prev = prev;
-        prev->next = new;
-}
-
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-        __list_add(new, head, head->next);
-}
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-        next->prev = prev;
-        prev->next = next;
-}
-static inline void list_del(struct list_head *entry)
-{
-        __list_del(entry->prev, entry->next);
-        entry->next = LIST_POISON1;
-        entry->prev = LIST_POISON2;
-}
-#define list_entry(ptr, type, member)                                   \
-        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#define list_for_each_entry(pos, head, member)                          \
-        for (pos = list_entry((head)->next, typeof(*pos), member);      \
-             &pos->member != (head);                                    \
-             pos = list_entry(pos->member.next, typeof(*pos), member))
-
-#endif /* __LIST_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/Makefile
--- a/tools/blktap/parallax/Makefile    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-PARALLAX_INSTALL_DIR   = /usr/sbin
-
-INSTALL         = install
-INSTALL_PROG    = $(INSTALL) -m0755
-INSTALL_DIR     = $(INSTALL) -d -m0755
-
-INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
-
-LDFLAGS = -L.. -lpthread -lz -lblktap
-
-#PLX_SRCS := 
-PLX_SRCS := vdi.c 
-PLX_SRCS += radix.c 
-PLX_SRCS += snaplog.c
-PLX_SRCS += blockstore.c 
-PLX_SRCS += block-async.c
-PLX_SRCS += requests-async.c
-VDI_SRCS := $(PLX_SRCS)
-PLX_SRCS += parallax.c
-
-#VDI_TOOLS :=
-VDI_TOOLS := vdi_create
-VDI_TOOLS += vdi_list
-VDI_TOOLS += vdi_snap
-VDI_TOOLS += vdi_snap_list
-VDI_TOOLS += vdi_snap_delete
-VDI_TOOLS += vdi_fill
-VDI_TOOLS += vdi_tree
-VDI_TOOLS += vdi_validate
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += $(INCLUDES)
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS    = parallax $(VDI_TOOLS)
-
-.PHONY: all
-all: $(VDI_TOOLS) parallax blockstored
-
-.PHONY: install
-install: all
-       $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
-
-.PHONY: clean
-clean:
-       rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
-
-parallax: $(PLX_SRCS)
-       $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
-
-${VDI_TOOLS}: %: %.c $(VDI_SRCS)
-       $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
-
--include $(DEPS)
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/README
--- a/tools/blktap/parallax/README      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-Parallax Quick Overview
-March 3, 2005
-
-This is intended to provide a quick set of instructions to let you
-guys play with the current parallax source.  In it's current form, the
-code will let you run an arbitrary number of VMs off of a single disk
-image, doing copy-on-write as they make updates.  Each domain is
-assigned a virtual disk image (VDI), which may be based on a snapshot
-of an existing image.  All of the VDI and snapshot management should
-currently work.
-
-The current implementation uses a single file as a blockstore for
-_everything_ this will soon be replaced by the fancier backend code
-and the local cache.  As it stands, Parallax will create
-"blockstore.dat" in the directory that you run it from, and use
-largefile support to make this grow to unfathomable girth.  So, you
-probably want to run the daemon off of a local disk, with a lot of
-free space.
-
-Here's how to get going:
-
-0. Setup:
----------
-
-Pick a local directory on a disk with lots of room.  You should be
-running from a privileged domain (e.g. dom0) with the blocktap
-configured in and block backend NOT.
-
-For convenience (for the moment) copy all of the vdi tools (vdi_*) and
-the parallax daemon from tools/blktap into this directory.
-
-1. Populate the blockstore:
----------------------------
-
-First you need to put at least one image into the blockstore.  You
-will need a disk image, either as a file or local partition.  My
-general approach has been to
-
-(a) make a really big sparse file with 
-
-        dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
-
-(b) put a filesystem into it
-
-        mkfs.ext3 ./image
-
-(c) mount it using loopback
-
-        mkdir ./mnt
-        mount -o loop ./image
-
-(d) cd into it and untar one of the image files from srg-roots.
-
-        cd mnt
-        tar ...
-
-NOTE: Beware if your system is FC3.  mkfs is not compatible with old
-versions of fedora, and so you don't have much choice but to install
-further fc3 images if you have used the fc3 version of mkfs.
-
-(e) unmount the image
-
-        cd ..
-        umount mnt
-
-(f) now, create a new VDI to hold the image 
-
-        ./vdi_create "My new FC3 VDI"
-
-(g) get the id of the new VDI.
-
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-
-(0 is the VDI id... create a few more if you want.)
-
-(h) hoover your image into the new VDI.
-
-        ./vdi_fill 0 ./image
-
-This will pull the entire image into the blockstore and set up a
-mapping tree for it for VDI 0.  Passing a device (i.e. /dev/sda3)
-should also work, but vdi_fill has NO notion of sparseness yet, so you
-are going to pump a block into the store for each block you read.
-
-vdi_fill will count up until it is done, and you should be ready to
-go.  If you want to be anal, you can use vdi_validate to test the VDI
-against the original image.
-
-2. Create some extra VDIs
--------------------------
-
-VDIs are actually a list of snapshots, and each snapshot is a full
-image of mappings.  So, to preserve an immutable copy of a current
-VDI, do this:
-
-(a) Snapshot your new VDI.
-
-        ./vdi_snap 0
-
-Snapshotting writes the current radix root to the VDI's snapshot log,
-and assigns it a new writable root.
-
-(b) look at the VDI's snapshot log.
-
-        ./vdi_snap_list 0
-
-        | 16   0      Thu Mar  3 19:27:48 2005 565111           31
-
-The first two columns constitute a snapshot id and represent the
-(block, offset) of the snapshot record.  The Date tells you when the
-snapshot was made, and 31 is the radix root node of the snapshot.
-
-(c) Create a new VDI, based on that snapshot, and look at the list.
-
-        ./vdi_create "FC3 - Copy 1" 16 0
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-        |      1                       FC3 - Copy 1
-
-NOTE: If you have Graphviz installed on your system, you can use
-vdi_tree to generate a postscript of your current set of VDIs and
-snapshots.
-
-
-Create as many VDIs as you need for the VMs that you want to run.
-
-3. Boot some VMs:
------------------
-
-Parallax currently uses a hack in xend to pass the VDI id, you need to
-modify the disk line of the VM config that is going to mount it.
-
-(a) set up your vm config, by using the following disk line:
-
-        disk = ['parallax:1,sda1,w,0' ]
-
-This example uses VDI 1 (from vdi_list above), presents it as sda1
-(writable), and uses dom 0 as the backend.  If you were running the
-daemon (and tap driver) in some domain other than 0, you would change
-this last parameter.
-
-NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so 
that it knows what to do with "parallax:".
-
-(b) Run parallax in the backend domain.
-
-        ./parallax
-
-(c) create your new domain.
-
-        xm create ...
-
----
-
-That's pretty much all there is to it at the moment.  Hope this is
-clear enough to get you going.  Now, a few serious caveats that will
-be sorted out in the almost immediate future:
-
-WARNINGS:
----------
-
-1. There is NO locking in the VDI tools at the moment, so I'd avoid
-running them in parallel, or more importantly, running them while the
-daemon is running.
-
-2. I doubt that xend will be very happy about restarting if you have
-parallax-using domains.  So if it dies while there are active parallax
-doms, you may need to reboot.
-
-3. I've turned off write-in-place.  So at the moment, EVERY block
-write is a log append on the blockstore.  I've been having some probs
-with the radix tree's marking of writable blocks after snapshots and
-will sort this out very soon.
-
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/block-async.c
--- a/tools/blktap/parallax/block-async.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/* block-async.c
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "block-async.h"
-#include "blockstore.h"
-#include "vdi.h"
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* We have a queue of outstanding I/O requests implemented as a 
- * circular producer-consumer ring with free-running buffers.
- * to allow reordering, this ring indirects to indexes in an 
- * ring of io_structs.
- * 
- * the block_* calls may either add an entry to this ring and return, 
- * or satisfy the request immediately and call the callback directly.
- * None of the io calls in parallax should be nested enough to worry 
- * about stack problems with this approach.
- */
-
-struct read_args {
-    uint64_t addr;
-};
-
-struct write_args {
-    uint64_t   addr;
-    char *block;
-};
-
-struct alloc_args {
-    char *block;
-};
- 
-struct pending_io_req {
-    enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
-    union {
-        struct read_args  r;
-        struct write_args w;
-        struct alloc_args a;
-    } u;
-    io_cb_t cb;
-    void *param;
-};
-
-void radix_lock_init(struct radix_lock *r)
-{
-    int i;
-    
-    pthread_mutex_init(&r->lock, NULL);
-    for (i=0; i < 1024; i++) {
-        r->lines[i] = 0;
-        r->waiters[i] = NULL;
-        r->state[i] = ANY;
-    }
-}
-
-/* maximum outstanding I/O requests issued asynchronously */
-/* must be a power of 2.*/
-#define MAX_PENDING_IO 1024
-
-/* how many threads to concurrently issue I/O to the disk. */
-#define IO_POOL_SIZE   10
-
-static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
-static int pending_io_list[MAX_PENDING_IO];
-static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
-#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
-#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
-#define PENDING_IO_ENT(_x) \
-       (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
-#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
-#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
-static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;
-
-static void init_pending_io(void)
-{
-    int i;
-       
-    for (i=0; i<MAX_PENDING_IO; i++)
-        pending_io_list[i] = i;
-               
-} 
-
-void block_read(uint64_t addr, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_READ;
-    req->u.r.addr = addr;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_WRITE;
-    req->u.w.addr  = addr;
-    req->u.w.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_alloc(char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-       
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    req->op = IO_ALLOC;
-    req->u.a.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
-        r->lines[row]++;
-        r->state[row] = READ;
-        DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = RLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-}
-
-
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    /* the second check here is redundant -- just here for debugging now. */
-    if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
-        r->state[row] = STOP;
-        r->lines[row] = -1;
-        DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = WLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-       
-}
-
-/* called with radix_lock locked and lock count of zero. */
-static void wake_waiters(struct radix_lock *r, int row)
-{
-    struct pending_io_req *req;
-    struct radix_wait *rw;
-    
-    if (r->lines[row] != 0) return;
-    if (r->waiters[row] == NULL) return; 
-    
-    if (r->waiters[row]->type == WLOCK) {
-
-        rw = r->waiters[row];
-        pthread_mutex_lock(&pending_io_lock);
-        assert(CAN_PRODUCE_PENDING_IO);
-        
-        req = PENDING_IO_ENT(io_prod++);
-        req->op    = IO_WWAKE;
-        req->cb    = rw->cb;
-        req->param = rw->param;
-        r->lines[row] = -1; /* write lock the row. */
-        r->state[row] = STOP;
-        r->waiters[row] = rw->next;
-        free(rw);
-        pthread_mutex_unlock(&pending_io_lock);
-    
-    } else /* RLOCK */ {
-
-        while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
-            rw = r->waiters[row];
-            pthread_mutex_lock(&pending_io_lock);
-            assert(CAN_PRODUCE_PENDING_IO);
-            
-            req = PENDING_IO_ENT(io_prod++);
-            req->op    = IO_RWAKE;
-            req->cb    = rw->cb;
-            req->param = rw->param;
-            r->lines[row]++; /* read lock the row. */
-            r->state[row] = READ; 
-            r->waiters[row] = rw->next;
-            free(rw);
-            pthread_mutex_unlock(&pending_io_lock);
-        }
-
-        if (r->waiters[row] != NULL) /* There is a write queued still */
-            r->state[row] = STOP;
-    }  
-    
-    pthread_mutex_lock(&pending_io_lock);
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-       
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] > 0); /* try to catch misuse. */
-    r->lines[row]--;
-    if (r->lines[row] == 0) {
-        r->state[row] = ANY;
-        wake_waiters(r, row);
-    }
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] == -1); /* try to catch misuse. */
-    r->lines[row] = 0;
-    r->state[row] = ANY;
-    wake_waiters(r, row);
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-/* consumer calls */
-static void do_next_io_req(struct pending_io_req *req)
-{
-    struct io_ret          ret;
-    void  *param;
-    
-    switch (req->op) {
-    case IO_READ:
-        ret.type = IO_BLOCK_T;
-        ret.u.b  = readblock(req->u.r.addr);
-        break;
-    case IO_WRITE:
-        ret.type = IO_INT_T;
-        ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);
-        DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
-        break;
-    case IO_ALLOC:
-        ret.type = IO_ADDR_T;
-        ret.u.a  = allocblock(req->u.a.block);
-        break;
-    case IO_RWAKE:
-        DPRINTF("WAKE DEFERRED RLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    case IO_WWAKE:
-        DPRINTF("WAKE DEFERRED WLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    default:
-        DPRINTF("Unknown IO operation on pending list!\n");
-        return;
-    }
-    
-    param = req->param;
-    pthread_mutex_lock(&pending_io_lock);
-    pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    assert(req->cb != NULL);
-    req->cb(ret, param);
-    
-}
-
-void *io_thread(void *param) 
-{
-    int tid;
-    struct pending_io_req *req;
-    
-    /* Set this thread's tid. */
-    tid = *(int *)param;
-    free(param);
-    
-start:
-    pthread_mutex_lock(&pending_io_lock);
-    while (io_prod == io_cons) {
-        pthread_cond_wait(&pending_io_cond, &pending_io_lock);
-    }
-    
-    if (io_prod == io_cons) {
-        /* unnecessary wakeup. */
-        pthread_mutex_unlock(&pending_io_lock);
-        goto start;
-    }
-    
-    req = PENDING_IO_ENT(io_cons++);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    do_next_io_req(req);
-    
-    goto start;
-       
-}
-
-static pthread_t io_pool[IO_POOL_SIZE];
-void start_io_threads(void)
-
-{      
-    int i, tid=0;
-    
-    for (i=0; i < IO_POOL_SIZE; i++) {
-        int ret, *t;
-        t = (int *)malloc(sizeof(int));
-        *t = tid++;
-        ret = pthread_create(&io_pool[i], NULL, io_thread, t);
-        if (ret != 0) printf("Error starting thread %d\n", i);
-    }
-       
-}
-
-void init_block_async(void)
-{
-    init_pending_io();
-    start_io_threads();
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-/* block-async.h
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
-#ifndef _BLOCKASYNC_H_
-#define _BLOCKASYNC_H_
-
-#include <assert.h>
-#include <xenctrl.h>
-#include "vdi.h"
-
-struct io_ret
-{
-    enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
-    union {
-        uint64_t   a;
-        char *b;
-        int   i;
-    } u;
-};
-
-typedef void (*io_cb_t)(struct io_ret r, void *param);
-
-/* per-vdi lock structures to make sure requests run in a safe order. */
-struct radix_wait {
-    enum {RLOCK, WLOCK} type;
-    io_cb_t  cb;
-    void    *param;
-    struct radix_wait *next;
-};
-
-struct radix_lock {
-    pthread_mutex_t lock;
-    int                    lines[1024];
-    struct radix_wait     *waiters[1024];
-    enum {ANY, READ, STOP} state[1024];
-};
-void radix_lock_init(struct radix_lock *r);
-
-void block_read(uint64_t addr, io_cb_t cb, void *param);
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
-void block_alloc(char *block, io_cb_t cb, void *param);
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void init_block_async(void);
-
-static inline uint64_t IO_ADDR(struct io_ret r)
-{
-    assert(r.type == IO_ADDR_T);
-    return r.u.a;
-}
-
-static inline char *IO_BLOCK(struct io_ret r)
-{
-    assert(r.type == IO_BLOCK_T);
-    return r.u.b;
-}
-
-static inline int IO_INT(struct io_ret r)
-{
-    assert(r.type == IO_INT_T);
-    return r.u.i;
-}
-
-
-#endif //_BLOCKASYNC_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstore.c
--- a/tools/blktap/parallax/blockstore.c        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1348 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.c
- *
- * Simple block store interface
- *
- */
- 
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <stdarg.h>
-#include "blockstore.h"
-#include <pthread.h>
-
-//#define BLOCKSTORE_REMOTE
-//#define BSDEBUG
-
-#define RETRY_TIMEOUT 1000000 /* microseconds */
-
-/*****************************************************************************
- * Debugging
- */
-#ifdef BSDEBUG
-void DB(char *format, ...)
-{
-    va_list args;
-    fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
-    va_start(args, format);
-    vfprintf(stderr, format, args);
-    va_end(args);
-}
-#else
-#define DB(format, ...) (void)0
-#endif
-
-#ifdef BLOCKSTORE_REMOTE
-
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-/*****************************************************************************
- * Network state                                                             *
- *****************************************************************************/
-
-/* The individual disk servers we talks to. These will be referenced by
- * an integer index into bsservers[].
- */
-bsserver_t bsservers[MAX_SERVERS];
-
-/* The cluster map. This is indexed by an integer cluster number.
- */
-bscluster_t bsclusters[MAX_CLUSTERS];
-
-/* Local socket.
- */
-struct sockaddr_in sin_local;
-int bssock = 0;
-
-/*****************************************************************************
- * Notification                                                              *
- *****************************************************************************/
-
-typedef struct pool_thread_t_struct {
-    pthread_mutex_t ptmutex;
-    pthread_cond_t ptcv;
-    int newdata;
-} pool_thread_t;
-
-pool_thread_t pool_thread[READ_POOL_SIZE+1];
-
-#define RECV_NOTIFY(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    pool_thread[tid].newdata = 1; \
-    DB("CV Waking %u", tid); \
-    pthread_cond_signal(&(pool_thread[tid].ptcv)); \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-#define RECV_AWAIT(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    if (pool_thread[tid].newdata) { \
-        pool_thread[tid].newdata = 0; \
-        DB("CV Woken %u", tid); \
-    } \
-    else { \
-        DB("CV Waiting %u", tid); \
-        pthread_cond_wait(&(pool_thread[tid].ptcv), \
-                          &(pool_thread[tid].ptmutex)); \
-    } \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-
-/*****************************************************************************
- * Message queue management                                                  *
- *****************************************************************************/
-
-/* Protects the queue manipulation critcal regions.
- */
-pthread_mutex_t ptmutex_queue;
-#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
-#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
-
-pthread_mutex_t ptmutex_recv;
-#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
-#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
-
-/* A message queue entry. We allocate one of these for every request we send.
- * Asynchronous reply reception also used one of these.
- */
-typedef struct bsq_t_struct {
-    struct bsq_t_struct *prev;
-    struct bsq_t_struct *next;
-    int status;
-    int server;
-    int length;
-    struct msghdr msghdr;
-    struct iovec iov[2];
-    int tid;
-    struct timeval tv_sent;
-    bshdr_t message;
-    void *block;
-} bsq_t;
-
-#define BSQ_STATUS_MATCHED 1
-
-pthread_mutex_t ptmutex_luid;
-#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
-#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
-
-static uint64_t luid_cnt = 0x1000ULL;
-uint64_t new_luid(void) {
-    uint64_t luid;
-    ENTER_LUID_CR;
-    luid = luid_cnt++;
-    LEAVE_LUID_CR;
-    return luid;
-}
-
-/* Queue of outstanding requests.
- */
-bsq_t *bs_head = NULL;
-bsq_t *bs_tail = NULL;
-int bs_qlen = 0;
-
-/*
- */
-void queuedebug(char *msg) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
-    for (q = bs_head; q; q = q->next) {
-        fprintf(stderr, "  luid=%016llx server=%u\n",
-                q->message.luid, q->server);
-    }
-    LEAVE_QUEUE_CR;
-}
-
-int enqueue(bsq_t *qe) {
-    ENTER_QUEUE_CR;
-    qe->next = NULL;
-    qe->prev = bs_tail;
-    if (!bs_head)
-        bs_head = qe;
-    else
-        bs_tail->next = qe;
-    bs_tail = qe;
-    bs_qlen++;
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("enqueue");
-#endif
-    return 0;
-}
-
-int dequeue(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if (q == qe) {
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 0;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 1;
-}
-
-bsq_t *queuesearch(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if ((qe->server == q->server) &&
-            (qe->message.operation == q->message.operation) &&
-            (qe->message.luid == q->message.luid)) {
-
-            if ((q->message.operation == BSOP_READBLOCK) &&
-                ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
-                q->block = qe->block;
-                qe->block = NULL;
-            }
-            q->length = qe->length;
-            q->message.flags = qe->message.flags;
-            q->message.id = qe->message.id;
-            q->status |= BSQ_STATUS_MATCHED;
-
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            q->next = NULL;
-            q->prev = NULL;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch not found");
-#endif
-    return NULL;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch found");
-#endif
-    return q;
-}
-
-/*****************************************************************************
- * Network communication                                                     *
- *****************************************************************************/
-
-int send_message(bsq_t *qe) {
-    int rc;
-
-    qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    qe->message.luid = new_luid();
-
-    qe->status = 0;
-    qe->tid = (int)pthread_getspecific(tid_key);
-    if (enqueue(qe) < 0) {
-        fprintf(stderr, "Error enqueuing request.\n");
-        return -1;
-    }
-
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
-    //           (struct sockaddr *)&(bsservers[qe->server].sin),
-    //           sizeof(struct sockaddr_in));
-    if (rc < 0)
-        return rc;
-
-    return rc;
-}
-
-int recv_message(bsq_t *qe) {
-    struct sockaddr_in from;
-    //int flen = sizeof(from);
-    int rc;
-
-    qe->msghdr.msg_name = &from;
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    rc = recvmsg(bssock, &(qe->msghdr), 0);
-
-    //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
-    //               (struct sockaddr *)&from, &flen);
-    return rc;
-}
-
-int get_server_number(struct sockaddr_in *sin) {
-    int i;
-
-#ifdef BSDEBUG2
-    fprintf(stderr,
-            "get_server_number(%u.%u.%u.%u/%u)\n",
-            (unsigned int)sin->sin_addr.s_addr & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
-            (unsigned int)sin->sin_port);
-#endif
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (bsservers[i].hostname) {
-#ifdef BSDEBUG2
-            fprintf(stderr,
-                    "get_server_number check %u.%u.%u.%u/%u\n",
-                    (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
16)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
24)&0xff,
-                    (unsigned int)bsservers[i].sin.sin_port);
-#endif
-            if ((sin->sin_family == bsservers[i].sin.sin_family) &&
-                (sin->sin_port == bsservers[i].sin.sin_port) &&
-                (memcmp((void *)&(sin->sin_addr),
-                        (void *)&(bsservers[i].sin.sin_addr),
-                        sizeof(struct in_addr)) == 0)) {
-                return i;
-            }
-        }        
-    }
-
-    return -1;
-}
-
-void *rx_buffer = NULL;
-bsq_t rx_qe;
-bsq_t *recv_any(void) {
-    struct sockaddr_in from;
-    int rc;
-    
-    DB("ENTER recv_any\n");
-
-    rx_qe.msghdr.msg_name = &from;
-    rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    rx_qe.msghdr.msg_iov = rx_qe.iov;
-    if (!rx_buffer) {
-        rx_buffer = malloc(BLOCK_SIZE);
-        if (!rx_buffer) {
-            perror("recv_any malloc");
-            return NULL;
-        }
-    }
-    rx_qe.block = rx_buffer;
-    rx_buffer = NULL;
-    rx_qe.msghdr.msg_iovlen = 2;
-    rx_qe.msghdr.msg_control = NULL;
-    rx_qe.msghdr.msg_controllen = 0;
-    rx_qe.msghdr.msg_flags = 0;
-    
-    rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
-    rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
-    rx_qe.iov[1].iov_base = rx_qe.block;
-    rx_qe.iov[1].iov_len = BLOCK_SIZE;
-
-    rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
-    if (rc < 0) {
-        perror("recv_any");
-        return NULL;
-    }
-
-    rx_qe.length = rc;    
-    rx_qe.server = get_server_number(&from);
-
-    DB("recv_any from %d luid=%016llx len=%u\n",
-       rx_qe.server, rx_qe.message.luid, rx_qe.length);
-
-    return &rx_qe;
-}
-
-void recv_recycle_buffer(bsq_t *q) {
-    if (q->block) {
-        rx_buffer = q->block;
-        q->block = NULL;
-    }
-}
-
-// cycle through reading any incoming, searching for a match in the
-// queue, until we have all we need.
-int wait_recv(bsq_t **reqs, int numreqs) {
-    bsq_t *q, *m;
-    unsigned int x, i;
-    int tid = (int)pthread_getspecific(tid_key);
-
-    DB("ENTER wait_recv %u\n", numreqs);
-
-    checkmatch:
-    x = 0xffffffff;
-    for (i = 0; i < numreqs; i++) {
-        x &= reqs[i]->status;
-    }
-    if ((x & BSQ_STATUS_MATCHED)) {
-        DB("LEAVE wait_recv\n");
-        return numreqs;
-    }
-
-    RECV_AWAIT(tid);
-
-    /*
-    rxagain:
-    ENTER_RECV_CR;
-    q = recv_any();
-    LEAVE_RECV_CR;
-    if (!q)
-        return -1;
-
-    m = queuesearch(q);
-    recv_recycle_buffer(q);
-    if (!m) {
-        fprintf(stderr, "Unmatched RX\n");
-        goto rxagain;
-    }
-    */
-
-    goto checkmatch;
-
-}
-
-/* retry
- */
-static int retry_count = 0;
-int retry(bsq_t *qe)
-{
-    int rc;
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
-    retry_count++;
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    if (rc < 0)
-        return rc;
-    return 0;
-}
-
-/* queue runner
- */
-void *queue_runner(void *arg)
-{
-    for (;;) {
-        struct timeval now;
-        long long nowus, sus;
-        bsq_t *q;
-        int r;
-
-        sleep(1);
-
-        gettimeofday(&now, NULL);
-        nowus = now.tv_usec + now.tv_sec * 1000000;
-        ENTER_QUEUE_CR;
-        r = retry_count;
-        for (q = bs_head; q; q = q->next) {
-            sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
-            if ((nowus - sus) > RETRY_TIMEOUT) {
-                if (retry(q) < 0) {
-                    fprintf(stderr, "Error on sendmsg retry.\n");
-                }
-            }
-        }
-        if (r != retry_count) {
-            fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
-        }
-        LEAVE_QUEUE_CR;
-    }
-}
-
-/* receive loop
- */
-void *receive_loop(void *arg)
-{
-    bsq_t *q, *m;
-
-    for(;;) {
-        q = recv_any();
-        if (!q) {
-            fprintf(stderr, "recv_any error\n");
-        }
-        else {
-            m = queuesearch(q);
-            recv_recycle_buffer(q);
-            if (!m) {
-                fprintf(stderr, "Unmatched RX\n");
-            }
-            else {
-                DB("RX MATCH");
-                RECV_NOTIFY(m->tid);
-            }
-        }
-    }
-}
-pthread_t pthread_recv;
-
-/*****************************************************************************
- * Reading                                                                   *
- *****************************************************************************/
-
-void *readblock_indiv(int server, uint64_t id) {
-    void *block;
-    bsq_t *qe;
-    int len, rc;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("readblock qe malloc");
-        return NULL;
-    }
-    qe->block = NULL;
-    
-    /*
-    qe->block = malloc(BLOCK_SIZE);
-    if (!qe->block) {
-        perror("readblock qe malloc");
-        free((void *)qe);
-        return NULL;
-    }
-    */
-
-    qe->server = server;
-
-    qe->message.operation = BSOP_READBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    qe->length = MSGBUFSIZE_ID;
-
-    if (send_message(qe) < 0) {
-        perror("readblock sendto");
-        goto err;
-    }
-    
-    /*len = recv_message(qe);
-    if (len < 0) {
-        perror("readblock recv");
-        goto err;
-    }*/
-
-    rc = wait_recv(&qe, 1);
-    if (rc < 0) {
-        perror("readblock recv");
-        goto err;
-    }
-
-    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "readblock server error\n");
-        goto err;
-    }
-    if (qe->length < MSGBUFSIZE_BLOCK) {
-        fprintf(stderr, "readblock recv short (%u)\n", len);
-        goto err;
-    }
-    /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    memcpy(block, qe->message.block, BLOCK_SIZE);
-    */    
-    block = qe->block;
-
-    free((void *)qe);
-    return block;
-
-    err:
-    free(qe->block);
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-void *readblock(uint64_t id) {
-    int map = (int)BSID_MAP(id);
-    uint64_t xid;
-    static int i = CLUSTER_MAX_REPLICAS - 1;
-    void *block = NULL;
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        block = readblock_indiv(bsclusters[map].servers[0], id);
-        goto out;
-    }
-
-    i++;
-    if (i >= CLUSTER_MAX_REPLICAS)
-        i = 0;
-    switch (i) {
-    case 0:
-        xid = BSID_REPLICA0(id);
-        break;
-    case 1:
-        xid = BSID_REPLICA1(id);
-        break;
-    case 2:
-        xid = BSID_REPLICA2(id);
-        break;
-    }
-    
-    block = readblock_indiv(bsclusters[map].servers[i], xid);
-
-    out:
-#ifdef BSDEBUG
-    if (block)
-        fprintf(stderr, "READ:  %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-                id,
-                (unsigned int)((unsigned char *)block)[0],
-                (unsigned int)((unsigned char *)block)[1],
-                (unsigned int)((unsigned char *)block)[2],
-                (unsigned int)((unsigned char *)block)[3],
-                (unsigned int)((unsigned char *)block)[4],
-                (unsigned int)((unsigned char *)block)[5],
-                (unsigned int)((unsigned char *)block)[6],
-                (unsigned int)((unsigned char *)block)[7]);
-    else
-        fprintf(stderr, "READ:  %016llx NULL\n", id);
-#endif
-    return block;
-}
-
-/*****************************************************************************
- * Writing                                                                   *
- *****************************************************************************/
-
-bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
-
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("writeblock qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_WRITEBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("writeblock sendto");
-        goto err;
-    }
-
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-    
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int map = (int)BSID_MAP(id);
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-#ifdef BSDEBUG
-    fprintf(stderr,
-            "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            id,
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        reqs[0] = writeblock_indiv(rep0, id, block);
-        if (!reqs[0])
-            return -1;
-        rc = wait_recv(reqs, 1);
-        return rc;
-    }
-
-    reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("writeblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server2 error\n");
-        goto err;
-    }
-
-
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return 0;
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return -1;
-}
-
-/*****************************************************************************
- * Allocation                                                                *
- *****************************************************************************/
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock(void *block) {
-    return allocblock_hint(block, 0);
-}
-
-bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("allocblock_hint qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_ALLOCBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = hint;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("allocblock_hint sendto");
-        goto err;
-    }
-    
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    int map = (int)hint;
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-    uint64_t id0, id1, id2;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-    DB("ENTER allocblock\n");
-
-    reqs[0] = allocblock_hint_indiv(rep0, block, hint);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = allocblock_hint_indiv(rep1, block, hint);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = allocblock_hint_indiv(rep2, block, hint);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("allocblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server2 error\n");
-        goto err;
-    }
-
-    id0 = reqs[0]->message.id;
-    id1 = reqs[1]->message.id;
-    id2 = reqs[2]->message.id;
-
-#ifdef BSDEBUG
-    fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            BSID(map, id0, id1, id2),
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-    
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return BSID(map, id0, id1, id2);
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return 0;
-}
-
-#else /* /BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Local storage version                                                     *
- *****************************************************************************/
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-
-void *readblock(uint64_t id) {
-    void *block;
-    int block_fp;
-   
-//printf("readblock(%llu)\n", id); 
-    block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return NULL;
-    }
-    
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld ", id);
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        goto err;
-    }
-    if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        free(block);
-        goto err;
-    }
-    close(block_fp);
-    return block;
-    
-err:
-    close(block_fp);
-    return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        goto err;
-    }
-    close(block_fp);
-    return 0;
-
-err:
-    close(block_fp);
-    return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return 0;
-    }
-
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        goto err;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        goto err;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-//printf("alloc(%Ld)\n", lb);
-    close(block_fp);
-    return lb;
-    
-err:
-    close(block_fp);
-    return 0;
-    
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    return allocblock(block);
-}
-
-#endif /* BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Memory management                                                         *
- *****************************************************************************/
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-static freeblock_t *new_freeblock(void)
-{
-    freeblock_t *fb;
-    
-    fb = newblock();
-    
-    if (fb == NULL) return NULL;
-    
-    fb->magic = FREEBLOCK_MAGIC;
-    fb->next  = 0ULL;
-    fb->count = 0ULL;
-    memset(fb->list, 0, sizeof fb->list);
-    
-    return fb;
-}
-
-void releaseblock(uint64_t id)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fl_current;
-    
-    /* get superblock */
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    /* get freeblock_current */
-    if (bs_super->freelist_current == 0ULL) 
-    {
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    } else {
-        fl_current = readblock(bs_super->freelist_current);
-    }
-    
-    /* if full, chain to superblock and allocate new current */
-    
-    if (fl_current->count == FREEBLOCK_SIZE) {
-        fl_current->next = bs_super->freelist_full;
-        writeblock(bs_super->freelist_current, fl_current);
-        bs_super->freelist_full = bs_super->freelist_current;
-        freeblock(fl_current);
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    }
-    
-    /* append id to current */
-    fl_current->list[fl_current->count++] = id;
-    writeblock(bs_super->freelist_current, fl_current);
-    
-    freeblock(fl_current);
-    freeblock(bs_super);
-    
-    
-}
-
-/* freelist debug functions: */
-void freelist_count(int print_each)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fb;
-    uint64_t total = 0, next;
-    
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    if (bs_super->freelist_current == 0ULL) {
-        printf("freelist is empty!\n");
-        return;
-    }
-    
-    fb = readblock(bs_super->freelist_current);
-    printf("%Ld entires on current.\n", fb->count);
-    total += fb->count;
-    if (print_each == 1)
-    {
-        int i;
-        for (i=0; i< fb->count; i++)
-            printf("  %Ld\n", fb->list[i]);
-    }
-    
-    freeblock(fb);
-    
-    if (bs_super->freelist_full == 0ULL) {
-        printf("freelist_full is empty!\n");
-        return;
-    }
-    
-    next = bs_super->freelist_full;
-    for (;;) {
-        fb = readblock(next);
-        total += fb->count;
-        if (print_each == 1)
-        {
-            int i;
-            for (i=0; i< fb->count; i++)
-                printf("  %Ld\n", fb->list[i]);
-        }
-        next = fb->next;
-        freeblock(fb);
-        if (next == 0ULL) break;
-    }
-    printf("Total of %Ld ids on freelist.\n", total);
-}
-
-/*****************************************************************************
- * Initialisation                                                            *
- *****************************************************************************/
-
-int __init_blockstore(void)
-{
-    int i;
-    blockstore_super_t *bs_super;
-    uint64_t ret;
-    int block_fp;
-    
-#ifdef BLOCKSTORE_REMOTE
-    struct hostent *addr;
-
-    pthread_mutex_init(&ptmutex_queue, NULL);
-    pthread_mutex_init(&ptmutex_luid, NULL);
-    pthread_mutex_init(&ptmutex_recv, NULL);
-    /*pthread_mutex_init(&ptmutex_notify, NULL);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pool_thread[i].newdata = 0;
-        pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
-        pthread_cond_init(&(pool_thread[i].ptcv), NULL);
-    }
-
-    bsservers[0].hostname = "firebug.cl.cam.ac.uk";
-    bsservers[1].hostname = "planb.cl.cam.ac.uk";
-    bsservers[2].hostname = "simcity.cl.cam.ac.uk";
-    bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
-    bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
-    bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
-    bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
-    bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
-    bsservers[8].hostname = NULL;
-    bsservers[9].hostname = NULL;
-    bsservers[10].hostname = NULL;
-    bsservers[11].hostname = NULL;
-    bsservers[12].hostname = NULL;
-    bsservers[13].hostname = NULL;
-    bsservers[14].hostname = NULL;
-    bsservers[15].hostname = NULL;
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (!bsservers[i].hostname)
-            continue;
-        addr = gethostbyname(bsservers[i].hostname);
-        if (!addr) {
-            perror("bad hostname");
-            return -1;
-        }
-        bsservers[i].sin.sin_family = addr->h_addrtype;
-        bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
-        bsservers[i].sin.sin_addr.s_addr = 
-            ((struct in_addr *)(addr->h_addr))->s_addr;
-    }
-
-    /* Cluster map
-     */
-    bsclusters[0].servers[0] = 0;
-    bsclusters[0].servers[1] = 1;
-    bsclusters[0].servers[2] = 2;
-    bsclusters[1].servers[0] = 1;
-    bsclusters[1].servers[1] = 2;
-    bsclusters[1].servers[2] = 3;
-    bsclusters[2].servers[0] = 2;
-    bsclusters[2].servers[1] = 3;
-    bsclusters[2].servers[2] = 4;
-    bsclusters[3].servers[0] = 3;
-    bsclusters[3].servers[1] = 4;
-    bsclusters[3].servers[2] = 5;
-    bsclusters[4].servers[0] = 4;
-    bsclusters[4].servers[1] = 5;
-    bsclusters[4].servers[2] = 6;
-    bsclusters[5].servers[0] = 5;
-    bsclusters[5].servers[1] = 6;
-    bsclusters[5].servers[2] = 7;
-    bsclusters[6].servers[0] = 6;
-    bsclusters[6].servers[1] = 7;
-    bsclusters[6].servers[2] = 0;
-    bsclusters[7].servers[0] = 7;
-    bsclusters[7].servers[1] = 0;
-    bsclusters[7].servers[2] = 1;
-
-    /* Local socket set up
-     */
-    bssock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (bssock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sin_local, 0, sizeof(sin_local));
-    sin_local.sin_family = AF_INET;
-    sin_local.sin_port = htons(BLOCKSTORED_PORT);
-    sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
-        perror("bind");
-        close(bssock);
-        return -1;
-    }
-
-    pthread_create(&pthread_recv, NULL, receive_loop, NULL);
-    pthread_create(&pthread_recv, NULL, queue_runner, NULL);
-
-#else /* /BLOCKSTORE_REMOTE */
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-        exit(-1);
-    }
-    
-    if (lseek(block_fp, 0, SEEK_END) == 0) {
-        bs_super = newblock();
-        bs_super->magic            = BLOCKSTORE_MAGIC;
-        bs_super->freelist_full    = 0LL;
-        bs_super->freelist_current = 0LL;
-        
-        ret = allocblock(bs_super);
-        
-        freeblock(bs_super);
-    } else {
-        bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-        if (bs_super->magic != BLOCKSTORE_MAGIC)
-        {
-            printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
-            exit(-1);
-        }
-        freeblock(bs_super);
-    }
-        
-    close(block_fp);
-        
-#endif /*  BLOCKSTORE_REMOTE */   
-    return 0;
-}
-
-void __exit_blockstore(void)
-{
-    int i;
-#ifdef BLOCKSTORE_REMOTE
-    pthread_mutex_destroy(&ptmutex_recv);
-    pthread_mutex_destroy(&ptmutex_luid);
-    pthread_mutex_destroy(&ptmutex_queue);
-    /*pthread_mutex_destroy(&ptmutex_notify);
-      pthread_cond_destroy(&ptcv_notify);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pthread_mutex_destroy(&(pool_thread[i].ptmutex));
-        pthread_cond_destroy(&(pool_thread[i].ptcv));
-    }
-#endif
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.h
- *
- * Simple block store interface
- *
- */
- 
-#ifndef __BLOCKSTORE_H__
-#define __BLOCKSTORE_H__
-
-#include <netinet/in.h>
-#include <xenctrl.h>
-
-#define BLOCK_SIZE  4096
-#define BLOCK_SHIFT   12
-#define BLOCK_MASK  0xfffffffffffff000LL
-
-/* XXX SMH: where is the below supposed to be defined???? */
-#ifndef SECTOR_SHIFT 
-#define SECTOR_SHIFT   9 
-#endif
-
-#define FREEBLOCK_SIZE  (BLOCK_SIZE / sizeof(uint64_t)) - (3 * 
sizeof(uint64_t))
-#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t next;
-    uint64_t count;
-    uint64_t list[FREEBLOCK_SIZE];
-} freeblock_t; 
-
-#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
-#define BLOCKSTORE_SUPER 1ULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t freelist_full;
-    uint64_t freelist_current;
-} blockstore_super_t;
-
-extern void *newblock();
-extern void *readblock(uint64_t id);
-extern uint64_t allocblock(void *block);
-extern uint64_t allocblock_hint(void *block, uint64_t hint);
-extern int writeblock(uint64_t id, void *block);
-
-/* Add this blockid to a freelist, to be recycled by the allocator. */
-extern void releaseblock(uint64_t id);
-
-/* this is a memory free() operation for block-sized allocations */
-extern void freeblock(void *block);
-extern int __init_blockstore(void);
-
-/* debug for freelist. */
-void freelist_count(int print_each);
-#define ALLOCFAIL (((uint64_t)(-1)))
-
-/* Distribution
- */
-#define BLOCKSTORED_PORT 9346
-
-struct bshdr_t_struct {
-    uint32_t            operation;
-    uint32_t            flags;
-    uint64_t            id;
-    uint64_t            luid;
-} __attribute__ ((packed));
-typedef struct bshdr_t_struct bshdr_t;
-
-struct bsmsg_t_struct {
-    bshdr_t        hdr;
-    unsigned char  block[BLOCK_SIZE];
-} __attribute__ ((packed));
-
-typedef struct bsmsg_t_struct bsmsg_t;
-
-#define MSGBUFSIZE_OP    sizeof(uint32_t)
-#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
-#define MSGBUFSIZE_ID    (sizeof(uint32_t) + sizeof(uint32_t) + 
sizeof(uint64_t) + sizeof(uint64_t))
-#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
-
-#define BSOP_READBLOCK  0x01
-#define BSOP_WRITEBLOCK 0x02
-#define BSOP_ALLOCBLOCK 0x03
-#define BSOP_FREEBLOCK  0x04
-
-#define BSOP_FLAG_ERROR 0x01
-
-#define BS_ALLOC_SKIP 10
-#define BS_ALLOC_HACK
-
-/* Remote hosts and cluster map - XXX need to generalise
- */
-
-/*
-
-  Interim ID format is
-
-  63 60 59                40 39                20 19                 0
-  +----+--------------------+--------------------+--------------------+
-  |map | replica 2          | replica 1          | replica 0          |
-  +----+--------------------+--------------------+--------------------+
-
-  The map is an index into a table detailing which machines form the
-  cluster.
-
- */
-
-#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
-#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
-#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
-#define BSID_MAP(_id)      (((_id)>>60)&0xfULL)
-
-#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
-                                         (((uint64_t)(_rep2))<<40) | \
-                                         (((uint64_t)(_rep1))<<20) | 
((uint64_t)(_rep0)))
-
-typedef struct bsserver_t_struct {
-    char              *hostname;
-    struct sockaddr_in sin;
-} bsserver_t;
-
-#define MAX_SERVERS 16
-
-#define CLUSTER_MAX_REPLICAS 3
-typedef struct bscluster_t_struct {
-    int servers[CLUSTER_MAX_REPLICAS];
-} bscluster_t;
-
-#define MAX_CLUSTERS 16
-
-#endif /* __BLOCKSTORE_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstored.c
--- a/tools/blktap/parallax/blockstored.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,275 +0,0 @@
-/**************************************************************************
- * 
- * blockstored.c
- *
- * Block store daemon.
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <errno.h>
-#include "blockstore.h"
-
-//#define BSDEBUG
-
-int readblock_into(uint64_t id, void *block);
-
-int open_socket(uint16_t port) {
-    
-    struct sockaddr_in sn;
-    int sock;
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(port);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        return -1;
-    }
-
-    return sock;
-}
-
-static int block_fp = -1;
-static int bssock = -1;
-
-int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
-
-    int rc;
-    
-#ifdef BSDEBUG
-    fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
-            len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t 
*)buffer)->hdr.id);
-#endif
-    rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, 
sizeof(*peer));
-    if (rc < 0) {
-        perror("send_reply");
-        return 1;
-    }
-
-
-    return 0;
-}
-
-static bsmsg_t msgbuf;
-
-void service_loop(void) {
-
-    for (;;) {
-        int rc, len;
-        struct sockaddr_in from;
-        size_t slen = sizeof(from);
-        uint64_t bid;
-
-        len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                       (struct sockaddr *)&from, &slen);
-
-        if (len < 0) {
-            perror("recvfrom");
-            continue;
-        }
-
-        if (len < MSGBUFSIZE_OP) {
-            fprintf(stderr, "Short packet.\n");
-            continue;
-        }
-
-#ifdef BSDEBUG
-        fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
-                len, msgbuf.hdr.operation, msgbuf.hdr.id);
-#endif
-
-        switch (msgbuf.hdr.operation) {
-        case BSOP_READBLOCK:
-            if (len < MSGBUFSIZE_ID) {
-                fprintf(stderr, "Short packet (readblock %u).\n", len);
-                continue;
-            }
-            rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "readblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
-            break;
-        case BSOP_WRITEBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (writeblock %u).\n", len);
-                continue;
-            }
-            rc = writeblock(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "writeblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        case BSOP_ALLOCBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (allocblock %u).\n", len);
-                continue;
-            }
-            bid = allocblock(msgbuf.block);
-            if (bid == ALLOCFAIL) {
-                fprintf(stderr, "allocblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.id = bid;
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        }
-
-    }
-}
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *   @block: pointer to buffer to receive block
- *
- *   @return: 0 if OK, other on error
- */
-
-int readblock_into(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        return -1;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        return -1;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-static uint64_t lastblock = 0;
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-
-    retry:
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        return ALLOCFAIL;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        return ALLOCFAIL;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        return ALLOCFAIL;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-
-#ifdef BS_ALLOC_HACK
-    if (lb < BS_ALLOC_SKIP)
-        goto retry;
-#endif
-    
-    if (lb <= lastblock)
-        printf("[*** %Ld alredy allocated! ***]\n", lb);
-    
-    lastblock = lb;
-    return lb;
-}
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-
-int main(int argc, char **argv)
-{
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    bssock = open_socket(BLOCKSTORED_PORT);
-    if (bssock < 0) {
-        return -1;
-    }
-
-    service_loop();
-    
-    close(bssock);
-
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/bstest.c
--- a/tools/blktap/parallax/bstest.c    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-/**************************************************************************
- * 
- * bstest.c
- *
- * Block store daemon test program.
- *
- * usage: bstest <host>|X {r|w|a} ID 
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <errno.h>
-#include "blockstore.h"
-
-int direct(char *host, uint32_t op, uint64_t id, int len) {
-    struct sockaddr_in sn, peer;
-    int sock;
-    bsmsg_t msgbuf;
-    int rc, slen;
-    struct hostent *addr;
-
-    addr = gethostbyname(host);
-    if (!addr) {
-        perror("bad hostname");
-        exit(1);
-    }
-    peer.sin_family = addr->h_addrtype;
-    peer.sin_port = htons(BLOCKSTORED_PORT);
-    peer.sin_addr.s_addr =  ((struct in_addr *)(addr->h_addr))->s_addr;
-    fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
-            (unsigned int)(unsigned char)addr->h_addr[0],
-            (unsigned int)(unsigned char)addr->h_addr[1],
-            (unsigned int)(unsigned char)addr->h_addr[2],
-            (unsigned int)(unsigned char)addr->h_addr[3]);
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        exit(1);
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(BLOCKSTORED_PORT);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        exit(1);
-    }
-
-    memset((void *)&msgbuf, 0, sizeof(msgbuf));
-    msgbuf.operation = op;
-    msgbuf.id = id;
-
-    rc = sendto(sock, (void *)&msgbuf, len, 0,
-                (struct sockaddr *)&peer, sizeof(peer));
-    if (rc < 0) {
-        perror("sendto");
-        exit(1);
-    }
-
-    slen = sizeof(peer);
-    len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                   (struct sockaddr *)&peer, &slen);
-    if (len < 0) {
-        perror("recvfrom");
-        exit(1);
-    }
-
-    printf("Reply %u bytes:\n", len);
-    if (len >= MSGBUFSIZE_OP)
-        printf("  operation: %u\n", msgbuf.operation);
-    if (len >= MSGBUFSIZE_FLAGS)
-        printf("  flags: 0x%x\n", msgbuf.flags);
-    if (len >= MSGBUFSIZE_ID)
-        printf("  id: %llu\n", msgbuf.id);
-    if (len >= (MSGBUFSIZE_ID + 4))
-        printf("  data: %02x %02x %02x %02x...\n",
-               (unsigned int)msgbuf.block[0],
-               (unsigned int)msgbuf.block[1],
-               (unsigned int)msgbuf.block[2],
-               (unsigned int)msgbuf.block[3]);
-    
-    if (sock > 0)
-        close(sock);
-   
-    return 0;
-}
-
-int main (int argc, char **argv) {
-
-    uint32_t op = 0;
-    uint64_t id = 0;
-    int len = 0, rc;
-    void *block;
-
-    if (argc < 3) {
-        fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
-        return 1;
-    }
-
-    switch (argv[2][0]) {
-    case 'r':
-    case 'R':
-        op = BSOP_READBLOCK;
-        len = MSGBUFSIZE_ID;
-        break;
-    case 'w':
-    case 'W':
-        op = BSOP_WRITEBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    case 'a':
-    case 'A':
-        op = BSOP_ALLOCBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    default:
-        fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
-        return 1;
-    }
-
-    if (argc >= 4)
-        id = atoll(argv[3]);
-
-    if (strcmp(argv[1], "X") == 0) {
-        rc = __init_blockstore();
-        if (rc < 0) {
-            fprintf(stderr, "blockstore init failed.\n");
-            return 1;
-        }
-        switch(op) {
-        case BSOP_READBLOCK:
-            block = readblock(id);
-            if (block) {
-                printf("data: %02x %02x %02x %02x...\n",
-                       (unsigned int)((unsigned char*)block)[0],
-                       (unsigned int)((unsigned char*)block)[1],
-                       (unsigned int)((unsigned char*)block)[2],
-                       (unsigned int)((unsigned char*)block)[3]);
-            }
-            break;
-        case BSOP_WRITEBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            rc = writeblock(id, block);
-            if (rc != 0) {
-                printf("error\n");
-            }
-            else {
-                printf("OK\n");
-            }
-            break;
-        case BSOP_ALLOCBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            id = allocblock_hint(block, id);
-            if (id == 0) {
-                printf("error\n");
-            }
-            else {
-                printf("ID: %llu\n", id);
-            }
-            break;
-        }
-    }
-    else {
-        direct(argv[1], op, id, len);
-    }
-
-
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/parallax.c
--- a/tools/blktap/parallax/parallax.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,608 +0,0 @@
-/**************************************************************************
- * 
- * parallax.c
- *
- * The Parallax Storage Server
- *
- */
- 
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "blktaplib.h"
-#include "blockstore.h"
-#include "vdi.h"
-#include "block-async.h"
-#include "requests-async.h"
-
-#define PARALLAX_DEV     61440
-#define SECTS_PER_NODE   8
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* ------[ session records ]----------------------------------------------- */
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-#define VDI_HASHSZ 16
-#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
-
-typedef struct blkif {
-    domid_t       domid;
-    unsigned int  handle;
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    vdi_t        *vdi_hash[VDI_HASHSZ];
-    struct blkif *hash_next;
-} blkif_t;
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    if ( handle != 0 )
-        printf("blktap/parallax don't currently support non-0 dev handles!\n");
-    
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
-{
-    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
-    
-    while ((vdi != NULL) && (vdi->vdevice != device))
-        vdi = vdi->next;
-    
-    return vdi;
-}
-
-/* ------[ control message handling ]-------------------------------------- */
-
-void blkif_create(blkif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_create): create is %p\n", create); 
-    
-    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
-    {
-        DPRINTF("Could not create blkif: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists (%d,%d)\n",
-                domid, handle);
-            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            free(blkif);
-            return;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
-
-    DPRINTF("Successfully created blkif\n");
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
-    
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            if ( blkif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pblkif = blkif->hash_next;
-    free(blkif);
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_create(blkif_be_vbd_create_t *create)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = create->vdevice;
-
-    DPRINTF("parallax (vbd_create): create=%p\n", create); 
-    
-    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
-                create->domid, create->blkif_handle); 
-        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    /* VDI identifier is in grow->extent.sector_start */
-    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
-            (unsigned long)create->dev_handle);
-
-    vdi = vdi_get(create->dev_handle);
-    if (vdi == NULL)
-    {
-        printf("parallax (vbd_create): VDI %lx not found.\n",
-               (unsigned long)create->dev_handle);
-        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
-        return;
-    }
-    
-    vdi->next = NULL;
-    vdi->vdevice = vdevice;
-    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
-    while (*vdip != NULL)
-        vdip = &(*vdip)->next;
-    *vdip = vdi;
-    
-    DPRINTF("blkif_create succeeded\n"); 
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = destroy->vdevice;
-    
-    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
-                destroy->domid, destroy->blkif_handle); 
-        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
-    while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice))
-        vdip = &(*vdip)->next;
-
-    if (*vdip != NULL) 
-    {
-        vdi = *vdip;
-        *vdip = vdi->next;
-        vdi_put(vdi);
-    }
-        
-}
-
-int parallax_control(control_msg_t *msg)
-{
-    domid_t  domid;
-    int      ret;
-
-    DPRINTF("parallax_control: msg is %p\n", msg); 
-    
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("Unexpected control message (%d)\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        blkif_create((blkif_be_create_t *)msg->msg);
-        break;   
-        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        blkif_destroy((blkif_be_destroy_t *)msg->msg);
-        break;  
-        
-    case CMSG_BLKIF_BE_VBD_CREATE:
-        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
-            goto parse_error;
-        vbd_create((blkif_be_vbd_create_t *)msg->msg);
-        break;
-        
-    case CMSG_BLKIF_BE_VBD_DESTROY:
-        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
-            goto parse_error;
-        vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg);
-        break;
-
-    case CMSG_BLKIF_BE_CONNECT:
-    case CMSG_BLKIF_BE_DISCONNECT:
-        /* we don't manage the device channel, the tap does. */
-        break;
-
-    default:
-        goto parse_error;
-    }
-    return 0;
-parse_error:
-    printf("Bad control message!\n");
-    return 0;
-    
-}    
-
-int parallax_probe(blkif_request_t *req, blkif_t *blkif)
-{
-    blkif_response_t *rsp;
-    vdisk_t *img_info;
-    vdi_t *vdi;
-    int i, nr_vdis = 0; 
-
-    DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 
-
-    /* We expect one buffer only. */
-    if ( req->nr_segments != 1 )
-      goto err;
-
-    /* Make sure the buffer is page-sized. */
-    if ( (req->seg[0].first_sect != 0) || (req->seg[0].last_sect != 7) )
-      goto err;
-
-    /* fill the list of devices */
-    for (i=0; i<VDI_HASHSZ; i++) {
-        vdi = blkif->vdi_hash[i];
-        while (vdi) {
-            img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
-            img_info[nr_vdis].device   = vdi->vdevice;
-            img_info[nr_vdis].info     = 0;
-            /* The -1 here accounts for the LSB in the radix tree */
-            img_info[nr_vdis].capacity = 
-                    ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
-            nr_vdis++;
-            vdi = vdi->next;
-        }
-    }
-
-    
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_PROBE;
-    rsp->status = nr_vdis; /* number of disks */
-
-    DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
-    return  BLKTAP_RESPOND;
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_PROBE;
-    rsp->status = BLKIF_RSP_ERROR;
-    
-    DPRINTF("parallax_probe: send error response\n"); 
-    return BLKTAP_RESPOND;  
-}
-
-typedef struct {
-    blkif_request_t *req;
-    int              count;
-    int              error;
-    pthread_mutex_t  mutex;
-} pending_t;
-
-#define MAX_REQUESTS 64
-pending_t pending_list[MAX_REQUESTS];
-
-struct cb_param {
-    pending_t *pent;
-    int       segment;
-    uint64_t       sector; 
-    uint64_t       vblock; /* for debug printing -- can be removed. */
-};
-
-static void read_cb(struct io_ret r, void *in_param)
-{
-    struct cb_param *param = (struct cb_param *)in_param;
-    pending_t *p = param->pent;
-    int segment = param->segment;
-    blkif_request_t *req = p->req;
-    unsigned long size, offset, start;
-    char *dpage, *spage;
-       
-    spage  = IO_BLOCK(r);
-    if (spage == NULL) { p->error++; goto finish; }
-    dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
-    
-    /* Calculate read size and offset within the read block. */
-
-    offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
-    size = (req->seg[segment].last_sect - req->seg[segment].first_sect + 1) <<
-        SECTOR_SHIFT;
-    start = req->seg[segment].first_sect << SECTOR_SHIFT;
-
-    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
-            "vblock %llx, "
-            "size %lx\n", 
-            param->sector,
-            p->req->seg[segment].first_sect,
-            p->req->seg[segment].last_sect,
-            param->vblock, size); 
-
-    memcpy(dpage + start, spage + offset, size);
-    freeblock(spage);
-    
-    /* Done the read.  Now update the pending record. */
- finish:
-    pthread_mutex_lock(&p->mutex);
-    p->count--;
-    
-    if (p->count == 0) {
-       blkif_response_t *rsp;
-       
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_READ;
-       if (p->error == 0) {
-            rsp->status = BLKIF_RSP_OKAY;
-       } else {
-            rsp->status = BLKIF_RSP_ERROR;
-       }
-        blktap_inject_response(rsp);       
-    }
-    
-    pthread_mutex_unlock(&p->mutex);
-       
-    free(param); /* TODO: replace with cached alloc/dealloc */
-}      
-
-int parallax_read(blkif_request_t *req, blkif_t *blkif)
-{
-    blkif_response_t *rsp;
-    uint64_t vblock, gblock;
-    vdi_t *vdi;
-    uint64_t sector;
-    int i;
-    char *dpage, *spage;
-    pending_t *pent;
-
-    vdi = blkif_get_vdi(blkif, req->device);
-    
-    if ( vdi == NULL )
-        goto err;
-        
-    pent = &pending_list[ID_TO_IDX(req->id)];
-    pent->count = req->nr_segments;
-    pent->req = req;
-    pthread_mutex_init(&pent->mutex, NULL);
-    
-    for (i = 0; i < req->nr_segments; i++) {
-        pthread_t tid;
-        int ret;
-        struct cb_param *p;
-        
-        /* Round the requested segment to a block address. */
-        sector  = req->sector_number + (8*i);
-        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-        
-        /* TODO: Replace this call to malloc with a cached allocation */
-        p = (struct cb_param *)malloc(sizeof(struct cb_param));
-        p->pent = pent;
-        p->sector = sector; 
-        p->segment = i;     
-        p->vblock = vblock; /* dbg */
-        
-        /* Get that block from the store. */
-        vdi_read(vdi, vblock, read_cb, (void *)p);    
-    }
-    
-    return BLKTAP_STOLEN;
-
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_READ;
-    rsp->status = BLKIF_RSP_ERROR;
-    
-    return BLKTAP_RESPOND;  
-}
-
-static void write_cb(struct io_ret r, void *in_param)
-{
-    struct cb_param *param = (struct cb_param *)in_param;
-    pending_t *p = param->pent;
-    blkif_request_t *req = p->req;
-    
-    /* catch errors from the block code. */
-    if (IO_INT(r) < 0) p->error++;
-    
-    pthread_mutex_lock(&p->mutex);
-    p->count--;
-    
-    if (p->count == 0) {
-       blkif_response_t *rsp;
-       
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_WRITE;
-       if (p->error == 0) {
-            rsp->status = BLKIF_RSP_OKAY;
-       } else {
-            rsp->status = BLKIF_RSP_ERROR;
-       }
-        blktap_inject_response(rsp);       
-    }
-    
-    pthread_mutex_unlock(&p->mutex);
-       
-    free(param); /* TODO: replace with cached alloc/dealloc */
-}
-
-int parallax_write(blkif_request_t *req, blkif_t *blkif)
-{
-    blkif_response_t *rsp;
-    uint64_t sector;
-    int i, writable = 0;
-    uint64_t vblock, gblock;
-    char *spage;
-    unsigned long size, offset, start;
-    vdi_t *vdi;
-    pending_t *pent;
-
-    vdi = blkif_get_vdi(blkif, req->device);
-    
-    if ( vdi == NULL )
-        goto err;
-        
-    pent = &pending_list[ID_TO_IDX(req->id)];
-    pent->count = req->nr_segments;
-    pent->req = req;
-    pthread_mutex_init(&pent->mutex, NULL);
-    
-    for (i = 0; i < req->nr_segments; i++) {
-        struct cb_param *p;
-        
-        spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-        
-        /* Round the requested segment to a block address. */
-        
-        sector  = req->sector_number + (8*i);
-        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-        
-        /* Calculate read size and offset within the read block. */
-        
-        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
-        size = (req->seg[i].last_sect - req->seg[i].first_sect + 1) <<
-            SECTOR_SHIFT;
-        start = req->seg[i].first_sect << SECTOR_SHIFT;
-
-        DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld),  "
-                "vblock %llx, gblock %llx, "
-                "size %lx\n", 
-                sector, 
-                req->seg[i].first_sect, req->seg[i].last_sect,
-                vblock, gblock, size); 
-      
-        /* XXX: For now we just freak out if they try to write a   */
-        /* non block-sized, block-aligned page.                    */
-        
-        if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
-            printf("]\n] STRANGE WRITE!\n]\n");
-            goto err;
-        }
-        
-        /* TODO: Replace this call to malloc with a cached allocation */
-        p = (struct cb_param *)malloc(sizeof(struct cb_param));
-        p->pent = pent;
-        p->sector = sector; 
-        p->segment = i;     
-        p->vblock = vblock; /* dbg */
-        
-        /* Issue the write to the store. */
-        vdi_write(vdi, vblock, spage, write_cb, (void *)p);
-    }
-
-    return BLKTAP_STOLEN;
-
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_WRITE;
-    rsp->status = BLKIF_RSP_ERROR;
-    
-    return BLKTAP_RESPOND;  
-}
-
-int parallax_request(blkif_request_t *req)
-{
-    blkif_response_t *rsp;
-    domid_t  dom   = ID_TO_DOM(req->id);
-    blkif_t *blkif = blkif_find_by_handle(dom, 0);
-    
-    if (blkif == NULL)
-        goto err;
-    
-    if ( req->operation == BLKIF_OP_PROBE ) {
-        
-        return parallax_probe(req, blkif);
-        
-    } else if ( req->operation == BLKIF_OP_READ ) {
-        
-        return parallax_read(req, blkif);
-        
-    } else if ( req->operation == BLKIF_OP_WRITE ) {
-        
-        return parallax_write(req, blkif);
-        
-    } else {
-        printf("Unknown request message type!\n");
-        /* Unknown operation */
-        goto err;
-    }
-    
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->operation = req->operation;
-    rsp->id = req->id;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-void __init_parallax(void) 
-{
-    memset(blkif_hash, 0, sizeof(blkif_hash));
-}
-
-
-
-int main(int argc, char *argv[])
-{
-    DPRINTF("parallax: starting.\n"); 
-    __init_blockstore();
-    DPRINTF("parallax: initialized blockstore...\n"); 
-    init_block_async();
-    DPRINTF("parallax: initialized async blocks...\n"); 
-    __init_vdi();
-    DPRINTF("parallax: initialized vdi registry etc...\n"); 
-    __init_parallax();
-    DPRINTF("parallax: initialized local stuff..\n"); 
-
-    blktap_register_ctrl_hook("parallax_control", parallax_control);
-    blktap_register_request_hook("parallax_request", parallax_request);
-    DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
-    blktap_listen();
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/radix.c
--- a/tools/blktap/parallax/radix.c     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,631 +0,0 @@
-/*
- * Radix tree for mapping (up to) 63-bit virtual block IDs to
- * 63-bit global block IDs
- *
- * Pointers within the tree set aside the least significant bit to indicate
- * whther or not the target block is writable from this node.
- *
- * The block with ID 0 is assumed to be an empty block of all zeros
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <pthread.h>
-#include "blockstore.h"
-#include "radix.h"
-
-#define RADIX_TREE_MAP_SHIFT 9
-#define RADIX_TREE_MAP_MASK 0x1ff
-#define RADIX_TREE_MAP_ENTRIES 512
-
-/*
-#define DEBUG
-*/
-
-/* Experimental radix cache. */
-
-static  pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
-static  int rcache_count = 0;
-#define RCACHE_MAX 1024
-
-typedef struct rcache_st {
-    radix_tree_node  *node;
-    uint64_t               id;
-    struct rcache_st *hash_next;
-    struct rcache_st *cache_next;
-    struct rcache_st *cache_prev;
-} rcache_t;
-
-static rcache_t *rcache_head = NULL;
-static rcache_t *rcache_tail = NULL;
-
-#define RCHASH_SIZE 512ULL
-rcache_t *rcache[RCHASH_SIZE];
-#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1))
-
-void __rcache_init(void)
-{
-    int i;
-
-    for (i=0; i<RCHASH_SIZE; i++)
-        rcache[i] = NULL;
-}
-    
-
-void rcache_write(uint64_t id, radix_tree_node *node)
-{
-    rcache_t *r, *tmp, **curs;
-    
-    pthread_mutex_lock(&rcache_mutex);
-    
-    /* Is it already in the cache? */
-    r = rcache[RCACHE_HASH(id)];
-    
-    for (;;) {
-        if (r == NULL) 
-            break;
-        if (r->id == id) 
-        {
-            memcpy(r->node, node, BLOCK_SIZE);
-            
-            /* bring to front. */
-            if (r != rcache_head) {
-                
-                if (r == rcache_tail) {
-                    if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
-                    rcache_tail->cache_next = NULL;
-                }
-
-                tmp = r->cache_next;
-                if (r->cache_next != NULL) r->cache_next->cache_prev 
-                                                     = r->cache_prev;
-                if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
-
-                r->cache_prev = NULL;
-                r->cache_next = rcache_head;
-                if (rcache_head != NULL) rcache_head->cache_prev = r;
-                rcache_head = r;
-            }
-
-//printf("Update (%Ld)\n", r->id);
-            goto done;
-        }
-        r = r->hash_next;
-    }
-    
-    if ( rcache_count == RCACHE_MAX ) 
-    {
-        /* Remove an entry */
-        
-        r = rcache_tail;
-        if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
-        rcache_tail->cache_next = NULL;
-        freeblock(r->node);
-        
-        curs = &rcache[RCACHE_HASH(r->id)];
-        while ((*curs) != r)
-            curs = &(*curs)->hash_next;
-        *curs = r->hash_next;
-//printf("Evict (%Ld)\n", r->id);
-        
-    } else {
-        
-        r = (rcache_t *)malloc(sizeof(rcache_t));
-        rcache_count++;
-    }
-    
-    r->node = newblock();
-    memcpy(r->node, node, BLOCK_SIZE);
-    r->id = id;
-    
-    r->hash_next = rcache[RCACHE_HASH(id)];
-    rcache[RCACHE_HASH(id)] = r;
-    
-    r->cache_prev = NULL;
-    r->cache_next = rcache_head;
-    if (rcache_head != NULL) rcache_head->cache_prev = r;
-    rcache_head = r;
-    if (rcache_tail == NULL) rcache_tail = r;
-    
-//printf("Added (%Ld, %p)\n", id, r->node);
-done:
-    pthread_mutex_unlock(&rcache_mutex);
-}
-
-radix_tree_node *rcache_read(uint64_t id)
-{
-    rcache_t *r, *tmp;
-    radix_tree_node *node = NULL;
-    
-    pthread_mutex_lock(&rcache_mutex);
-
-    r = rcache[RCACHE_HASH(id)];
-    
-    for (;;) {
-        if (r == NULL) {
-//printf("Miss (%Ld)\n", id);
-            goto done;
-        }
-        if (r->id == id) break;
-        r = r->hash_next;
-    }
-   
-    /* bring to front. */
-    if (r != rcache_head) 
-    {
-        if (r == rcache_tail) {
-            if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
-            rcache_tail->cache_next = NULL;
-        }
-        tmp = r->cache_next;
-        if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev;
-        if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
-
-        r->cache_prev = NULL;
-        r->cache_next = rcache_head;
-        if (rcache_head != NULL) rcache_head->cache_prev = r;
-        rcache_head = r;
-    }
-    
-    node = newblock();
-    memcpy(node, r->node, BLOCK_SIZE);
-    
-//printf("Hit (%Ld, %p)\n", id, r->node);
-done:
-    pthread_mutex_unlock(&rcache_mutex);
-    
-    return(node);
-}
-
-
-void *rc_readblock(uint64_t id)
-{
-    void *ret;
-    
-    ret = (void *)rcache_read(id);
-    
-    if (ret != NULL) return ret;
-    
-    ret = readblock(id);
-    
-    if (ret != NULL)
-        rcache_write(id, ret);
-    
-    return(ret);
-}
-
-uint64_t rc_allocblock(void *block)
-{
-    uint64_t ret;
-    
-    ret = allocblock(block);
-    
-    if (ret != ZERO)
-        rcache_write(ret, block);
-    
-    return(ret);
-}
-
-int rc_writeblock(uint64_t id, void *block)
-{
-    int ret;
-    
-    ret = writeblock(id, block);
-    rcache_write(id, block);
-    
-    return(ret);
-}
-
-
-/*
- * block device interface and other helper functions
- * with these functions, block id is just a 63-bit number, with
- * no special consideration for the LSB
- */
-radix_tree_node cloneblock(radix_tree_node block);
-
-/*
- * main api
- * with these functions, the LSB of root always indicates
- * whether or not the block is writable, including the return
- * values of update and snapshot
- */
-uint64_t lookup(int height, uint64_t root, uint64_t key);
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
-uint64_t snapshot(uint64_t root);
-
-/**
- * cloneblock: clone an existing block in memory
- *   @block: the old block
- *
- *   @return: new block, with LSB cleared for every entry
- */
-radix_tree_node cloneblock(radix_tree_node block) {
-    radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE);
-    int i;
-    if (node == NULL) {
-        perror("cloneblock malloc");
-        return NULL;
-    }
-    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
-        node[i] = block[i] & ONEMASK;
-    return node;
-}
-
-/**
- * lookup: find a value given a key
- *   @height: height in bits of the radix tree
- *   @root: root node id, with set LSB indicating writable node
- *   @key: key to lookup
- *
- *   @return: value on success, zero on error
- */
-
-uint64_t lookup(int height, uint64_t root, uint64_t key) {
-    radix_tree_node node;
-    uint64_t mask = ONE;
-    
-    assert(key >> height == 0);
-
-    /* the root block may be smaller to ensure all leaves are full */
-    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-
-    /* now carve off equal sized chunks at each step */
-    for (;;) {
-        uint64_t oldroot;
-
-#ifdef DEBUG
-        printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root,
-                (int) ((key >> height) & RADIX_TREE_MAP_MASK),
-                (iswritable(root) ? "" : " (readonly)"));
-#endif
-        
-        if (getid(root) == ZERO)
-            return ZERO;
-
-        oldroot = root;
-        node = (radix_tree_node) rc_readblock(getid(root));
-        if (node == NULL)
-            return ZERO;
-
-        root = node[(key >> height) & RADIX_TREE_MAP_MASK];
-        mask &= root;
-        freeblock(node);
-
-        if (height == 0)
-            return ( root & ONEMASK ) | mask;
-
-        height -= RADIX_TREE_MAP_SHIFT;
-    }
-
-    return ZERO;
-}
-
-/*
- * update: set a radix tree entry, doing copy-on-write as necessary
- *   @height: height in bits of the radix tree
- *   @root: root node id, with set LSB indicating writable node
- *   @key: key to set
- *   @val: value to set, s.t. radix(key)=val
- *
- *   @returns: (possibly new) root id on success (with LSB=1), 0 on failure
- */
-
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val) {
-    int offset;
-    uint64_t child;
-    radix_tree_node node;
-    
-    /* base case--return val */
-    if (height == 0)
-        return val;
-
-    /* the root block may be smaller to ensure all leaves are full */
-    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-    offset = (key >> height) & RADIX_TREE_MAP_MASK;
-
-#ifdef DEBUG
-    printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root,
-            offset, (iswritable(root)?"":" (clone)"));
-#endif
-
-    /* load a block, or create a new one */
-    if (root == ZERO) {
-        node = (radix_tree_node) newblock();
-    } else {
-        node = (radix_tree_node) rc_readblock(getid(root));
-
-        if (!iswritable(root)) {
-            /* need to clone this node */
-            radix_tree_node oldnode = node;
-            node = cloneblock(node);
-            freeblock(oldnode);
-            root = ZERO;
-        }
-    }
-
-    if (node == NULL) {
-#ifdef DEBUG
-        printf("update: node is null!\n");
-#endif
-        return ZERO;
-    }
-
-    child = update(height, node[offset], key, val);
-
-    if (child == ZERO) {
-        freeblock(node);
-        return ZERO;
-    } else if (child == node[offset]) {
-        /* no change, so we already owned the child */
-        assert(iswritable(root));
-
-        freeblock(node);
-        return root;
-    }
-
-    node[offset] = child;
-
-    /* new/cloned blocks need to be saved */
-    if (root == ZERO) {
-        /* mark this as an owned block */
-        root = rc_allocblock(node);
-        if (root)
-            root = writable(root);
-    } else if (rc_writeblock(getid(root), node) < 0) {
-        freeblock(node);
-        return ZERO;
-    }
-
-    freeblock(node);
-    return root;
-}
-
-/**
- * snapshot: create a snapshot
- *   @root: old root node
- *
- *   @return: new root node, 0 on error
- */
-uint64_t snapshot(uint64_t root) {
-    radix_tree_node node, newnode;
-
-    if ((node = rc_readblock(getid(root))) == NULL)
-        return ZERO;
-
-    newnode = cloneblock(node);
-    freeblock(node);
-    if (newnode == NULL)
-        return ZERO;
-    
-    root = rc_allocblock(newnode);
-    freeblock(newnode);
-
-    if (root == ZERO)
-        return ZERO;
-    else
-        return writable(root);
-}
-
-/**
- * collapse: collapse a parent onto a child.
- * 
- * NOTE: This assumes that parent and child really are, and further that
- * there are no other children forked from this parent. (children of the
- * child are okay...)
- */
-
-int collapse(int height, uint64_t proot, uint64_t croot)
-{
-    int i, numlinks, ret, total = 0;
-    radix_tree_node pnode, cnode;
-    
-    if (height == 0) {
-        height = -1; /* terminate recursion */
-    } else {        
-        height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-    }
-    numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
-
-    /* Terminal cases: */
-
-    if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
-        return -1;
-    
-    /* get roots */
-    if ((pnode = readblock(getid(proot))) == NULL)
-        return -1;
-    
-    if ((cnode = readblock(getid(croot))) == NULL)
-    {
-        freeblock(pnode);
-        return -1;
-    }
-    
-    /* For each writable link in proot */
-    for (i=0; i<numlinks; i++)
-    {
-        if ( pnode[i] == cnode[i] ) continue;
-        
-        /* collapse (next level) */
-        /* if height != 0 and writable... */
-        if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
-        {
-            //printf("   %Ld is writable (i=%d).\n", getid(pnode[i]), i);
-            ret = collapse(height, pnode[i], cnode[i]);
-            if (ret == -1) 
-            {
-                total = -1;
-            } else {
-                total += ret;
-            }
-        }
-    
-        
-    }
-    
-    /* if plink is writable, AND clink is writable -> free plink block */
-    if ( ( iswritable(proot) ) && ( iswritable(croot) ) ) 
-    {
-        releaseblock(getid(proot));
-        if (ret >=0) total++;
-        //printf("   Delete %Ld\n", getid(proot));
-    }
-//printf("done : %Ld\n", getid(proot));
-    return total;
-
-}
-
-
-void print_root(uint64_t root, int height, FILE *dot_f)
-{
-    FILE *f;
-    int i;
-    radix_tree_node node;
-    char *style[2] = { "", "style=bold,color=blue," };
-    
-    if (dot_f == NULL) {
-        f = fopen("radix.dot", "w");
-        if (f == NULL) {
-            perror("print_root: open");
-            return;
-        }
-
-        /* write graph preamble */
-        fprintf(f, "digraph G {\n");
-
-        /* add a node for this root. */
-        fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
-                getid(root), style[iswritable(root)], getid(root));
-    }
-    
-    printf("print_root(%Ld)\n", getid(root));
-    
-    /* base case */
-    if (height == 0) {
-        /* add a node and edge for each child root */
-        node = (radix_tree_node) readblock(getid(root));
-        if (node == NULL)
-            return;
-        
-        for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
-            if (node[i] != ZERO) {
-                fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
-                        getid(node[i]), style[iswritable(node[i])], 
-                        getid(node[i]));
-                fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
-                        getid(node[i]), i);
-            }
-        }
-        freeblock(node);
-        return;
-    }
-
-    /* the root block may be smaller to ensure all leaves are full */
-    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-
-    if (getid(root) == ZERO)
-        return;
-
-    node = (radix_tree_node) readblock(getid(root));
-    if (node == NULL)
-        return;
-
-    /* add a node and edge for each child root */
-    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
-        if (node[i] != ZERO) {
-            fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
-                    getid(node[i]), style[iswritable(node[i])], 
-                    getid(node[i]));
-
-            print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
-            fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
-                    getid(node[i]), i);
-        }
-
-    freeblock(node);
-    
-    /* write graph postamble */
-    if (dot_f == NULL) {
-        fprintf(f, "}\n");
-        fclose(f);
-    }
-}
-
-#ifdef RADIX_STANDALONE
-
-int main(int argc, char **argv) {
-    uint64_t key = ZERO, val = ZERO;
-    uint64_t root = writable(2ULL);
-    uint64_t p = ZERO, c = ZERO;
-    int v;
-    char buff[4096];
-
-    __init_blockstore();
-    
-    memset(buff, 0, 4096);
-    /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644);
-
-    if (fp < 3) {
-        perror("open");
-        return -1;
-    }
-    if (lseek(fp, 0, SEEK_END) == 0) {
-        write(fp, buff, 4096);
-    }*/
-        
-    allocblock(buff);
-            
-    printf("Recognized commands:\n"
-           "Note: the LSB of a node number indicates if it is writable\n"
-           "  root <node>               set root to <node>\n"
-           "  snapshot                  take a snapshot of the root\n"
-           "  set <key> <val>           set key=val\n"
-           "  get <key>                 query key\n"
-           "  c <proot> <croot>         collapse\n"
-           "  pr                        print tree to dot\n"
-           "  pf <1=verbose>            print freelist\n"
-           "  quit\n"
-           "\nroot = %Ld\n", root);
-    for (;;) {
-        //print_root(root, 34, NULL);
-        //system("dot radix.dot -Tps -o radix.ps");
-
-        printf("> ");
-        fflush(stdout);
-        fgets(buff, 1024, stdin);
-        if (feof(stdin))
-            break;
-        if (sscanf(buff, " root %Ld", &root) == 1) {
-            printf("root set to %Ld\n", root);
-        } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
-            root = update(34, root, key, val);
-            printf("root = %Ld\n", root);
-        } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
-            v = collapse(34, p, c);
-            printf("reclaimed %d blocks.\n", v);
-        } else if (sscanf(buff, " get %Ld", &key) == 1) {
-            val = lookup(34, root, key);
-            printf("value = %Ld\n", val);
-        } else if (!strcmp(buff, "quit\n")) {
-            break;
-        } else if (!strcmp(buff, "snapshot\n")) {
-            root = snapshot(root);
-            printf("new root = %Ld\n", root);
-        } else if (sscanf(buff, " pr %Ld", &root) == 1) {
-            print_root(root, 34, NULL);
-        } else if (sscanf(buff, " pf %d", &v) == 1) {
-            freelist_count(v);
-        } else if (!strcmp(buff, "pf\n")) {
-            freelist_count(0);
-        } else {
-            printf("command not recognized\n");
-        }
-    }
-    return 0;
-}
-
-#endif
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/radix.h
--- a/tools/blktap/parallax/radix.h     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-/*
- * Radix tree for mapping (up to) 63-bit virtual block IDs to
- * 63-bit global block IDs
- *
- * Pointers within the tree set aside the least significant bit to indicate
- * whther or not the target block is writable from this node.
- *
- * The block with ID 0 is assumed to be an empty block of all zeros
- */
-
-#ifndef __RADIX_H__
-#define __RADIX_H__
-
-/* I don't really like exposing these, but... */
-#define getid(x) (((x)>>1)&0x7fffffffffffffffLL)
-#define putid(x) ((x)<<1)
-#define writable(x) (((x)<<1)|1LL)
-#define iswritable(x) ((x)&1LL)
-#define ZERO 0LL
-#define ONE 1LL
-#define ONEMASK 0xffffffffffffffeLL
-
-#define RADIX_TREE_MAP_SHIFT 9
-#define RADIX_TREE_MAP_MASK 0x1ff
-#define RADIX_TREE_MAP_ENTRIES 512
-
-typedef uint64_t *radix_tree_node;
-
-
-/*
- * main api
- * with these functions, the LSB of root always indicates
- * whether or not the block is writable, including the return
- * values of update and snapshot
- */
-uint64_t lookup(int height, uint64_t root, uint64_t key);
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
-uint64_t snapshot(uint64_t root);
-int collapse(int height, uint64_t proot, uint64_t croot);
-int isprivate(int height, uint64_t root, uint64_t key);
-
-
-void __rcache_init(void);
-
-#endif /* __RADIX_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/requests-async.c
--- a/tools/blktap/parallax/requests-async.c    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,762 +0,0 @@
-/* requests-async.c
- *
- * asynchronous request dispatcher for radix access in parallax.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <assert.h>
-#include <pthread.h>
-#include <err.h>
-#include <zlib.h> /* for crc32() */
-#include "requests-async.h"
-#include "vdi.h"
-#include "radix.h"
-
-#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)
-#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)
-#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-struct block_info {
-    uint32_t        crc;
-    uint32_t        unused;
-};
-
-struct io_req {
-    enum { IO_OP_READ, IO_OP_WRITE } op;
-    uint64_t        root;
-    uint64_t        vaddr;
-    int        state;
-    io_cb_t    cb;
-    void      *param;
-    struct radix_lock *lock;
-
-    /* internal stuff: */
-    struct io_ret     retval;/* holds the return while we unlock. */
-    char             *block; /* the block to write */
-    radix_tree_node   radix[3];
-    uint64_t               radix_addr[3];
-    struct block_info bi;
-};
-
-void clear_w_bits(radix_tree_node node) 
-{
-    int i;
-    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)
-        node[i] = node[i] & ONEMASK;
-    return;
-}
-
-void clear_L3_w_bits(radix_tree_node node) 
-{
-    int i;
-    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i+=2)
-        node[i] = node[i] & ONEMASK;
-    return;
-}
-
-enum states {
-    /* both */
-    READ_L1,
-    READ_L2,
-    READ_L3,
-
-    /* read */
-    READ_LOCKED,
-    READ_DATA,
-    READ_UNLOCKED,
-    RETURN_ZERO,
-
-    /* write */
-    WRITE_LOCKED,
-    WRITE_DATA,
-    WRITE_L3,
-    WRITE_UNLOCKED,
-    
-    /* L3 Zero Path */
-    ALLOC_DATA_L3z,
-    WRITE_L3_L3z,
-    
-    /* L3 Fault Path */
-    ALLOC_DATA_L3f,
-    WRITE_L3_L3f,
-    
-    /* L2 Zero Path */
-    ALLOC_DATA_L2z,
-    WRITE_L2_L2z,
-    ALLOC_L3_L2z,
-    WRITE_L2_L3z,
-    
-    /* L2 Fault Path */
-    READ_L3_L2f,
-    ALLOC_DATA_L2f,
-    WRITE_L2_L2f,
-    ALLOC_L3_L2f,
-    WRITE_L2_L3f,
-
-    /* L1 Zero Path */
-    ALLOC_DATA_L1z,
-    ALLOC_L3_L1z,
-    ALLOC_L2_L1z,
-    WRITE_L1_L1z,
-
-    /* L1 Fault Path */
-    READ_L2_L1f,
-    READ_L3_L1f,
-    ALLOC_DATA_L1f,
-    ALLOC_L3_L1f,
-    ALLOC_L2_L1f,
-    WRITE_L1_L1f,
-    
-};
-
-enum radix_offsets {
-    L1 = 0, 
-    L2 = 1,
-    L3 = 2
-};
-
-
-static void read_cb(struct io_ret ret, void *param);
-static void write_cb(struct io_ret ret, void *param);
-
-int vdi_read(vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param)
-{
-    struct io_req *req;
-
-    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
-    /* Every second line in the bottom-level radix tree is used to      */
-    /* store crc32 values etc. We shift the vadder here to achied this. */
-    vaddr <<= 1;
-
-    req = (struct io_req *)malloc(sizeof (struct io_req));
-    if (req == NULL) return ERR_NOMEM;
-
-    req->radix[0] = req->radix[1] = req->radix[2] = NULL;      
-    req->op    = IO_OP_READ;
-    req->root  = vdi->radix_root;
-    req->lock  = vdi->radix_lock; 
-    req->vaddr = vaddr;
-    req->cb    = cb;
-    req->param = param;
-    req->state = READ_LOCKED;
-
-    block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);
-       
-    return 0;
-}
-
-
-int   vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, 
-                io_cb_t cb, void *param)
-{
-    struct io_req *req;
-
-    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
-    /* Every second line in the bottom-level radix tree is used to      */
-    /* store crc32 values etc. We shift the vadder here to achied this. */
-    vaddr <<= 1;
-
-    req = (struct io_req *)malloc(sizeof (struct io_req));
-    if (req == NULL) return ERR_NOMEM; 
-
-    req->radix[0] = req->radix[1] = req->radix[2] = NULL;
-    req->op     = IO_OP_WRITE;
-    req->root   = vdi->radix_root;
-    req->lock   = vdi->radix_lock; 
-    req->vaddr  = vaddr;
-    req->block  = block;
-    /* Todo: add a pseodoheader to the block to include some location   */
-    /* information in the CRC as well.                                  */
-    req->bi.crc = (uint32_t) crc32(0L, Z_NULL, 0); 
-    req->bi.crc = (uint32_t) crc32(req->bi.crc, block, BLOCK_SIZE); 
-    req->bi.unused = 0xdeadbeef;
-
-    req->cb     = cb;
-    req->param  = param;
-    req->radix_addr[L1] = getid(req->root); /* for consistency */
-    req->state  = WRITE_LOCKED;
-
-    block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);
-
-
-    return 0;
-}
-
-static void read_cb(struct io_ret ret, void *param)
-{
-    struct io_req *req = (struct io_req *)param;
-    radix_tree_node node;
-    uint64_t idx;
-    char *block;
-    void *req_param;
-
-    DPRINTF("read_cb\n");
-    /* get record */
-    switch(req->state) {
-       
-    case READ_LOCKED: 
-    
-        DPRINTF("READ_LOCKED\n");
-       req->state = READ_L1;
-       block_read(getid(req->root), read_cb, req); 
-       break;
-       
-    case READ_L1: /* block is the radix root */
-
-        DPRINTF("READ_L1\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-        node = (radix_tree_node) block;
-        idx  = getid( node[L1_IDX(req->vaddr)] );
-        free(block);
-        if ( idx == ZERO ) {
-            req->state = RETURN_ZERO;
-            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        } else {
-            req->state = READ_L2;
-            block_read(idx, read_cb, req);
-        }
-        break;
-
-    case READ_L2:
-
-        DPRINTF("READ_L2\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-        node = (radix_tree_node) block;
-        idx  = getid( node[L2_IDX(req->vaddr)] );
-        free(block);
-        if ( idx == ZERO ) {
-            req->state = RETURN_ZERO;
-            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        } else {
-            req->state = READ_L3;
-            block_read(idx, read_cb, req);
-        }
-        break;
-
-    case READ_L3:
-    {
-        struct block_info *bi;
-
-        DPRINTF("READ_L3\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-        node = (radix_tree_node) block;
-        idx  = getid( node[L3_IDX(req->vaddr)] );
-        bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1];
-        req->bi = *bi;
-        free(block);
-        if ( idx == ZERO )  {
-            req->state = RETURN_ZERO;
-            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        } else {
-            req->state = READ_DATA;
-            block_read(idx, read_cb, req);
-        }
-        break;
-    }
-    case READ_DATA:
-    {
-        uint32_t crc;
-
-        DPRINTF("READ_DATA\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-
-        /* crc check */
-        crc = (uint32_t) crc32(0L, Z_NULL, 0); 
-        crc = (uint32_t) crc32(crc, block, BLOCK_SIZE); 
-        if (crc != req->bi.crc) {
-            /* TODO: add a retry loop here.                          */
-            /* Do this after the cache is added -- make sure to      */
-            /* invalidate the bad page before reissuing the read.    */
-
-            warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused);
-#ifdef PRINT_BADCRC_PAGES
-            {
-                int j;
-                for (j=0; j<BLOCK_SIZE; j++) {
-                    if isprint(block[j]) {
-                        printf("%c", block[j]);
-                    } else {
-                        printf(".");
-                    }
-                    if ((j % 64) == 0) printf("\n");
-                }
-            }
-#endif /* PRINT_BADCRC_PAGES */
-
-            /* fast and loose for the moment. */
-            /* goto fail;                     */
-        }
-
-        req->retval = ret;
-        req->state = READ_UNLOCKED;
-        block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        break;
-    }
-    case READ_UNLOCKED:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("READ_UNLOCKED\n");
-        req_param = req->param;
-        r         = req->retval;
-        cb        = req->cb;
-        free(req);
-        cb(r, req_param);
-        break;
-    }
-    
-    case RETURN_ZERO:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("RETURN_ZERO\n");
-        req_param = req->param;
-        cb        = req->cb;
-        free(req);
-        r.type = IO_BLOCK_T;
-        r.u.b = newblock();
-        cb(r, req_param);
-        break;
-    }
-        
-    default:
-       DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
-       goto fail;
-    }
- 
-    return;
-
- fail:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("asyn_read had a read error.\n");
-        req_param = req->param;
-        r         = ret;
-        cb        = req->cb;
-        free(req);
-        cb(r, req_param);
-    }
-
-
-}
-
-static void write_cb(struct io_ret r, void *param)
-{
-    struct io_req *req = (struct io_req *)param;
-    radix_tree_node node;
-    uint64_t a, addr;
-    void *req_param;
-    struct block_info *bi;
-
-    switch(req->state) {
-       
-    case WRITE_LOCKED:
-        
-        DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));
-       req->state = READ_L1;
-       block_read(getid(req->root), write_cb, req); 
-       break;
-       
-    case READ_L1: /* block is the radix root */
-
-        DPRINTF("READ_L1\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        if (node == NULL) goto fail;
-        a    = node[L1_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix_addr[L2] = addr;
-        req->radix[L1] = node;
-
-        if ( addr == ZERO ) {
-            /* L1 empty subtree: */
-            req->state = ALLOC_DATA_L1z;
-            block_alloc( req->block, write_cb, req );
-        } else if ( !iswritable(a) ) {
-            /* L1 fault: */
-            req->state = READ_L2_L1f;
-            block_read( addr, write_cb, req );
-        } else {
-            req->state = READ_L2;
-            block_read( addr, write_cb, req );
-        }
-        break;
-    
-    case READ_L2:
-
-        DPRINTF("READ_L2\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix_addr[L3] = addr;
-        req->radix[L2] = node;
-
-        if ( addr == ZERO ) {
-            /* L2 empty subtree: */
-            req->state = ALLOC_DATA_L2z;
-            block_alloc( req->block, write_cb, req );
-        } else if ( !iswritable(a) ) {
-            /* L2 fault: */
-            req->state = READ_L3_L2f;
-            block_read( addr, write_cb, req );
-        } else {
-            req->state = READ_L3;
-            block_read( addr, write_cb, req );
-        }
-        break;
-    
-    case READ_L3:
-
-        DPRINTF("READ_L3\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        if (node == NULL) goto fail;
-        a    = node[L3_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix[L3] = node;
-
-        if ( addr == ZERO ) {
-            /* L3 fault: */
-            req->state = ALLOC_DATA_L3z;
-            block_alloc( req->block, write_cb, req );
-        } else if ( !iswritable(a) ) {
-            /* L3 fault: */
-            req->state = ALLOC_DATA_L3f;
-            block_alloc( req->block, write_cb, req );
-        } else {
-            req->state = WRITE_DATA;
-            block_write( addr, req->block, write_cb, req );
-        }
-        break;
-    
-    case WRITE_DATA:
-
-        DPRINTF("WRITE_DATA\n");
-        /* The L3 radix points to the correct block, we just need to  */
-        /* update the crc.                                            */
-        if (IO_INT(r) < 0) goto fail;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 101;
-        *bi = req->bi;
-        req->state = WRITE_L3;
-        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
-        break;
-    
-    /* L3 Zero Path: */
-
-    case ALLOC_DATA_L3z:
-
-        DPRINTF("ALLOC_DATA_L3z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 102;
-        *bi = req->bi;
-        req->state = WRITE_L3_L3z;
-        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
-        break;
-    
-    /* L3 Fault Path: */
-
-    case ALLOC_DATA_L3f:
-    
-        DPRINTF("ALLOC_DATA_L3f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 103;
-        *bi = req->bi;
-        req->state = WRITE_L3_L3f;
-        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
-        break;
-
-    /* L2 Zero Path: */
-        
-    case ALLOC_DATA_L2z:
-
-        DPRINTF("ALLOC_DATA_L2z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3] = newblock();
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 104;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L2z;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-
-    case ALLOC_L3_L2z:
-
-        DPRINTF("ALLOC_L3_L2z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = WRITE_L2_L2z;
-        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
-        break;
-        
-    /* L2 Fault Path: */
-        
-    case READ_L3_L2f:
-    
-       DPRINTF("READ_L3_L2f\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        clear_L3_w_bits(node);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix[L3] = node;
-        req->state = ALLOC_DATA_L2f;
-        block_alloc( req->block, write_cb, req );
-        break;
-                
-    case ALLOC_DATA_L2f:
-
-        DPRINTF("ALLOC_DATA_L2f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 105;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L2f;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-
-    case ALLOC_L3_L2f:
-
-        DPRINTF("ALLOC_L3_L2f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = WRITE_L2_L2f;
-        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
-        break;
-        
-    /* L1 Zero Path: */
-    
-    case ALLOC_DATA_L1z:
-
-        DPRINTF("ALLOC_DATA_L1z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3] = newblock();
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 106;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L1z;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-        
-    case ALLOC_L3_L1z:
-
-        DPRINTF("ALLOC_L3_L1z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2] = newblock();
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = ALLOC_L2_L1z;
-        block_alloc( (char*)req->radix[L2], write_cb, req );
-        break;
-
-    case ALLOC_L2_L1z:
-
-        DPRINTF("ALLOC_L2_L1z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L1][L1_IDX(req->vaddr)] = a;
-        req->state = WRITE_L1_L1z;
-        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
-        break;
-
-    /* L1 Fault Path: */
-        
-    case READ_L2_L1f:
-    
-       DPRINTF("READ_L2_L1f\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        clear_w_bits(node);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix_addr[L3] = addr;
-        req->radix[L2] = node;
-        
-        if (addr == ZERO) {
-            /* nothing below L2, create an empty L3 and alloc data. */
-            /* (So skip READ_L3_L1f.) */
-            req->radix[L3] = newblock();
-            req->state = ALLOC_DATA_L1f;
-            block_alloc( req->block, write_cb, req );
-        } else {
-            req->state = READ_L3_L1f;
-            block_read( addr, write_cb, req );
-        }
-        break;
-        
-    case READ_L3_L1f:
-    
-       DPRINTF("READ_L3_L1f\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        clear_L3_w_bits(node);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix[L3] = node;
-        req->state = ALLOC_DATA_L1f;
-        block_alloc( req->block, write_cb, req );
-        break;
-                
-    case ALLOC_DATA_L1f:
-
-        DPRINTF("ALLOC_DATA_L1f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 107;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L1f;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-
-    case ALLOC_L3_L1f:
-
-        DPRINTF("ALLOC_L3_L1f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = ALLOC_L2_L1f;
-        block_alloc( (char*)req->radix[L2], write_cb, req );
-        break;
-
-    case ALLOC_L2_L1f:
-
-        DPRINTF("ALLOC_L2_L1f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L1][L1_IDX(req->vaddr)] = a;
-        req->state = WRITE_L1_L1f;
-        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
-        break;
-
-    case WRITE_L3:
-    case WRITE_L3_L3z:
-    case WRITE_L3_L3f:
-    case WRITE_L2_L2z:
-    case WRITE_L2_L2f:
-    case WRITE_L1_L1z:
-    case WRITE_L1_L1f:
-    {
-       int i;
-        DPRINTF("DONE\n");
-        /* free any saved node vals. */
-        for (i=0; i<3; i++)
-            if (req->radix[i] != 0) free(req->radix[i]);
-        req->retval = r;
-        req->state = WRITE_UNLOCKED;
-        block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);
-        break;
-    }
-    case WRITE_UNLOCKED:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("WRITE_UNLOCKED!\n");
-        req_param = req->param;
-        r         = req->retval;
-        cb        = req->cb;
-        free(req);
-        cb(r, req_param);
-        break;
-    }
-        
-    default:
-       DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
-       goto fail;
-    }
-    
-    return;
-    
- fail:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        int i;
-
-        DPRINTF("asyn_write had a read error mid-way.\n");
-        req_param = req->param;
-        cb        = req->cb;
-        r.type = IO_INT_T;
-        r.u.i  = -1;
-        /* free any saved node vals. */
-        for (i=0; i<3; i++)
-            free(req->radix[i]);
-        free(req);
-        cb(r, req_param);
-    }
-}
-
-char *vdi_read_s(vdi_t *vdi, uint64_t vaddr)
-{
-    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
-    char *block = NULL;
-    int ret;
-
-    void reads_cb(struct io_ret r, void *param) 
-    {
-        block = IO_BLOCK(r);
-        pthread_mutex_unlock((pthread_mutex_t *)param);
-    }
-
-    pthread_mutex_lock(&m);
-    ret = vdi_read(vdi, vaddr, reads_cb, &m);
-
-    if (ret == 0) pthread_mutex_lock(&m);
-    
-    return block;
-}
-
-
-int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block)
-{
-    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
-    int ret, result;
-
-    void writes_cb(struct io_ret r, void *param) 
-    {
-        result = IO_INT(r);
-        pthread_mutex_unlock((pthread_mutex_t *)param);
-    }
-
-    pthread_mutex_lock(&m);
-    ret = vdi_write(vdi, vaddr, block, writes_cb, &m);
-
-    if (ret == 0) pthread_mutex_lock(&m);
-    
-    return result;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/requests-async.h
--- a/tools/blktap/parallax/requests-async.h    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#ifndef _REQUESTSASYNC_H_
-#define _REQUESTSASYNC_H_
-
-#include "block-async.h"
-#include "blockstore.h" /* for newblock etc. */
-
-/*
-#define BLOCK_SIZE 4096
-#define ZERO 0ULL
-#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)
-#define iswritable(x) (((x) & 1LLU) != 0)
-#define writable(x) (((x) << 1) | 1LLU)
-#define readonly(x) ((uint64_t)((x) << 1))
-*/
-
-#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */
-#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x))
-
-int vdi_read (vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param);
-int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, io_cb_t cb, void 
*param);
-             
-/* synchronous versions: */
-char *vdi_read_s (vdi_t *vdi, uint64_t vaddr);
-int   vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block);
-
-#define ERR_BAD_VADDR  -1
-#define ERR_NOMEM      -2
-
-#endif //_REQUESTSASYNC_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/snaplog.c
--- a/tools/blktap/parallax/snaplog.c   Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,238 +0,0 @@
-/**************************************************************************
- * 
- * snaplog.c
- *
- * Snapshot log on-disk data structure.
- *
- */
- 
- /* VDI histories are made from chains of snapshot logs.  These logs record 
-  * the (radix) root and timestamp of individual snapshots.
-  *
-  * creation of a new VDI involves 'forking' a snapshot log, by creating a 
-  * new, empty log (in a new VDI) and parenting it off of a record in an 
-  * existing snapshot log.
-  *
-  * snapshot log blocks have at most one writer.
-  */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "snaplog.h"
-
-
-
-snap_block_t *snap_get_block(uint64_t block)
-{
-    snap_block_t *blk = (snap_block_t *)readblock(block);
-    
-    if ( blk == NULL)
-        return NULL;
-    if ( blk->hdr.magic != SNAP_MAGIC ) {
-        freeblock(blk);
-        return NULL;
-    }
-    
-    return blk;
-}
-    
-int snap_get_id(snap_id_t *id, snap_rec_t *target)
-{
-    snap_block_t *blk;
-    
-    if ( id == NULL )
-        return -1;
-    
-    blk = snap_get_block(id->block);
-    
-    if ( blk == NULL ) 
-        return -1;
-    
-    if ( id->index > blk->hdr.nr_entries ) {
-        freeblock(blk);
-        return -1;
-    }
-    
-    *target = blk->snaps[id->index];
-    freeblock(blk);
-    return 0;
-}
-
-int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id,
-                                  snap_id_t *new_id)
-{
-    snap_rec_t parent_rec, fork_rec;
-    snap_block_t *blk, *pblk;
-    /*
-    if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) )
-        return -1;    
-    
-    if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) )
-        return -1;   
-*/
-    blk = (snap_block_t *)newblock();
-    blk->hdr.magic  = SNAP_MAGIC;
-    blk->hdr.nr_entries  = 0;
-    blk->hdr.log_entries = 0;
-    blk->hdr.immutable   = 0;
-    
-    if (   (parent_id  != NULL) 
-        && (parent_id->block != fork_id->block) 
-        && (parent_id->block != 0)) {
-        
-        pblk = snap_get_block(parent_id->block);
-        blk->hdr.log_entries = pblk->hdr.log_entries;
-        freeblock(pblk);
-    }
-    
-    if (parent_id != NULL) {
-        blk->hdr.parent_block = *parent_id;
-        blk->hdr.fork_block   = *fork_id;
-    } else {
-        blk->hdr.parent_block = null_snap_id;
-        blk->hdr.fork_block   = null_snap_id;
-    }
-    
-    new_id->index = 0;
-    new_id->block = allocblock(blk);
-    freeblock(blk);
-    if (new_id->block == 0)
-        return -1;
-    
-    return 0;
-}
-
-int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id)
-{
-    return __snap_block_create(parent_id, parent_id, new_id);
-}
-
-int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id)
-{
-    snap_id_t id = *old_id;
-    snap_block_t *blk = snap_get_block(id.block);
-    
-    if ( rec->deleted == 1 ) {
-        printf("Attempt to append a deleted snapshot!\n");
-        return -1;
-    }
-    
-    if ( blk->hdr.immutable != 0 ) {
-        printf("Attempt to snap an immutable snap block!\n");
-        return -1;
-    }
-    
-    new_id->block = id.block;
-    
-    if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) {
-        int ret;
-        
-        id.index--; /* make id point to the last full record */
-        
-        ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id);
-        if ( ret != 0 ) {
-            freeblock(blk);
-            return -1;
-        }
-        
-        blk->hdr.immutable = 1;
-        writeblock(id.block, blk);
-        freeblock(blk);
-        blk = snap_get_block(new_id->block);
-        id = *new_id;
-    }
-    
-    blk->snaps[blk->hdr.nr_entries] = *rec;
-    blk->hdr.nr_entries++;
-    blk->hdr.log_entries++;
-    new_id->index = blk->hdr.nr_entries;
-    //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries);
-    writeblock(id.block, blk);
-    freeblock(blk);
-    return 0;
-}
-
-int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
-{
-    snap_block_t *p_blk, *c_blk, *blk;
-    snap_rec_t   *p_rec, *c_rec;
-    int ret = -1;
-    
-    p_blk = snap_get_block(p_id->block);
-    
-    if (p_blk == NULL) return(-1);
-    
-    if (c_id->block == p_id->block)
-    {
-        c_blk = p_blk;
-    } else {
-         c_blk = snap_get_block(c_id->block);
-    }
-    
-    if (p_blk == NULL) {
-        freeblock(p_blk);
-        return(-1);
-    }
-     
-    /* parent and child must not be deleted. */
-    p_rec = &p_blk->snaps[p_id->index];
-    c_rec = &c_blk->snaps[c_id->index];
-    /*
-    if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
-        printf("One of those snaps is already deleted.\n");
-        goto done;
-    }
-    */
-    /* first non-deleted thing in the log before child must be parent. */
-    
-    /* XXX todo: text the range here for delete (and eventually fork) bits) */
-    /* for now, snaps must be consecutive, on the same log page: */
-    
-    if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
-    {
-        printf("Deleting non-consecutive snaps is not done yet.\n");
-        goto done;
-    }
-    
-    /* mark parent as deleted XXX: may need to lock parent block here.*/
-    p_rec->deleted = 1;
-    writeblock(p_id->block, p_blk);
-    
-    /* delete the parent */
-    printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
-    ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
-    
-    /* return the number of blocks reclaimed. */
-    
-done:
-    if (c_blk != p_blk) freeblock(c_blk);
-    freeblock(p_blk);
-    
-    return(ret);
-}
-
-void snap_print_history(snap_id_t *snap_id)
-{
-    snap_id_t id = *snap_id;
-    unsigned int idx = id.index;
-    snap_block_t *new_blk, *blk = snap_get_block(id.block);
-    
-    while ( blk ) {
-        printf("[Snap block %Ld]:\n", id.block);
-        do {
-            printf("   %03u: root: %Ld ts: %ld.%ld\n", idx, 
-                    blk->snaps[idx].radix_root,
-                    blk->snaps[idx].timestamp.tv_sec,
-                    blk->snaps[idx].timestamp.tv_usec);
-        } while (idx-- != 0);
-        
-        id = blk->hdr.parent_block;
-        if (id.block != 0) {
-            new_blk = snap_get_block(id.block);
-        }
-        freeblock(blk);
-        blk = new_blk;
-    }
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/snaplog.h
--- a/tools/blktap/parallax/snaplog.h   Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-/**************************************************************************
- * 
- * snaplog.h
- *
- * Snapshot log on-disk data structure.
- *
- */
- 
-#include "radix.h"
-#include "blockstore.h"    /* for BLOCK_SIZE */
- 
-#ifndef __SNAPLOG_H__
-#define __SNAPLOG_H__
-
-typedef struct snap_id {
-    uint64_t            block;
-    unsigned int   index;
-} snap_id_t;
-
-typedef struct snap_rec {
-    uint64_t            radix_root;
-    struct timeval timestamp;
-    /* flags: */
-    unsigned       deleted:1;
-} snap_rec_t;
-
-
-int  snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
-int  snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
-int  snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
-void snap_print_history(snap_id_t *snap_id);
-int  snap_get_id(snap_id_t *id, snap_rec_t *target);
-
-
-/* exported for vdi debugging */
-#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL
-
-static const snap_id_t null_snap_id = { 0, 0 }; 
-
-typedef struct snap_block_hdr {
-    uint64_t            magic;
-    snap_id_t      parent_block; /* parent block within this chain */
-    snap_id_t      fork_block;   /* where this log was forked */
-    unsigned       log_entries;  /* total entries since forking */
-    unsigned short nr_entries;   /* entries in snaps[] */
-    unsigned short immutable;    /* has this snap page become immutable? */
-} snap_block_hdr_t;
-
-
-#define SNAPS_PER_BLOCK \
-    ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t))
-
-typedef struct snap_block {
-    snap_block_hdr_t hdr;
-    snap_rec_t       snaps[SNAPS_PER_BLOCK];
-} snap_block_t;
-    
-
-snap_block_t *snap_get_block(uint64_t block);
-
-#endif /* __SNAPLOG_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi.c
--- a/tools/blktap/parallax/vdi.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-/**************************************************************************
- * 
- * vdi.c
- *
- * Virtual Disk Image (VDI) Interfaces
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <sys/time.h>
-#include <pthread.h>
-#include "blockstore.h"
-#include "block-async.h"
-#include "requests-async.h"
-#include "radix.h"
-#include "vdi.h"
-                    
-#define VDI_REG_BLOCK   2LL
-#define VDI_RADIX_ROOT  writable(3)
-                                                            
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* I haven't decided about this registry stuff, so this is just a really
- * quick lash-up so that there is some way to track VDIs.
- *
- * (Most vdi access should be with a direct handle to the block, so this
- *  registry is just for start-of-day lookup and other control operations.)
- */
-
-vdi_registry_t *create_vdi_registry(void)
-{
-    vdi_registry_t *reg = (vdi_registry_t *)newblock();
-    
-    if (reg == NULL)
-        return NULL;
-    
-    /* zero-fill the vdi radix root while we have an empty block. */
-    writeblock(VDI_RADIX_ROOT, (void *)reg);
-    
-    
-    DPRINTF("[vdi.c] Creating VDI registry!\n");
-    reg->magic      = VDI_REG_MAGIC;
-    reg->nr_vdis    = 0;
-    
-    writeblock(VDI_REG_BLOCK, (void *)reg);
-    
-    return reg;
-}
-    
-vdi_registry_t *get_vdi_registry(void)
-{
-    vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK);
-    
-    if ( vdi_reg == NULL )
-        vdi_reg = create_vdi_registry();
-    
-    if ( vdi_reg->magic != VDI_REG_MAGIC ) {
-        freeblock(vdi_reg);
-        return NULL;
-    }
-    
-    return vdi_reg;
-}
-
-
-vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
-{
-    int ret;
-    vdi_t *vdi;
-    vdi_registry_t *vdi_reg;
-    snap_rec_t snap_rec;
-    
-    /* create a vdi struct */
-    vdi = newblock();
-    if (vdi == NULL) 
-        return NULL;
-    
-    if ( snap_get_id(parent_snap, &snap_rec) == 0 ) {
-        vdi->radix_root = snapshot(snap_rec.radix_root);
-    } else {
-        vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */
-        vdi->radix_root = writable(vdi->radix_root); /* grr. */
-    }
-    
-    /* create a snapshot log, and add it to the vdi struct */
-    
-    ret = snap_block_create(parent_snap, &vdi->snap);
-    if ( ret != 0 ) {
-        DPRINTF("Error getting snap block in vdi_create.\n");
-        freeblock(vdi);
-        return NULL;
-    }
-            
-    /* append the vdi to the registry, fill block and id.             */
-    /* implicit allocation means we have to write the vdi twice here. */
-    vdi_reg    = get_vdi_registry();
-    if ( vdi_reg == NULL ) {
-        freeblock(vdi);
-        return NULL;
-    }
-    
-    vdi->block = allocblock((void *)vdi);
-    vdi->id    = vdi_reg->nr_vdis++;
-    strncpy(vdi->name, name, VDI_NAME_SZ);
-    vdi->name[VDI_NAME_SZ] = '\0';
-    vdi->radix_lock = NULL; /* for tidiness */
-    writeblock(vdi->block, (void *)vdi);
-    
-    update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
-    writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
-    freeblock(vdi_reg);
-    
-    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
-    if (vdi->radix_lock == NULL) 
-    {
-       perror("couldn't malloc radix_lock for new vdi!");
-       freeblock(vdi);
-       return NULL;
-    }
-    radix_lock_init(vdi->radix_lock);
-    
-    return vdi;
-}
-
-/* vdi_get and vdi_put currently act more like alloc/free -- they don't 
- * do refcount-based allocation.  
- */
-vdi_t *vdi_get(uint64_t vdi_id)
-{
-    uint64_t vdi_blk;
-    vdi_t *vdi;
-    
-    vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id);
-    
-    if ( vdi_blk == 0 )
-        return NULL;
-    
-    vdi = (vdi_t *)readblock(vdi_blk);
-    
-    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
-    if (vdi->radix_lock == NULL) 
-    {
-       perror("couldn't malloc radix_lock for new vdi!");
-       freeblock(vdi);
-       return NULL;
-    }
-    radix_lock_init(vdi->radix_lock);
-    
-    return vdi;
-}
-
-void vdi_put(vdi_t *vdi)
-{
-    free(vdi->radix_lock);
-    freeblock(vdi);
-}
-
-void vdi_snapshot(vdi_t *vdi)
-{
-    snap_rec_t rec;
-    int ret;
-    
-    rec.radix_root = vdi->radix_root;
-    gettimeofday(&rec.timestamp, NULL);
-    rec.deleted = 0;
-    
-    vdi->radix_root = snapshot(vdi->radix_root);
-    ret = snap_append(&vdi->snap, &rec, &vdi->snap);
-    if ( ret != 0 ) {
-        printf("snap_append returned failure\n");
-        return;
-    }
-    writeblock(vdi->block, vdi);
-}
-    
-int __init_vdi()
-{
-    /* sneak this in here for the moment. */
-    __rcache_init();
-    
-    /* force the registry to be created if it doesn't exist. */
-    vdi_registry_t *vdi_reg = get_vdi_registry();
-    if (vdi_reg == NULL) {
-        printf("[vdi.c] Couldn't get/create a VDI registry!\n");
-        return -1;
-    }
-    freeblock(vdi_reg);
-    
-    
-    return 0;
-}
-    
-#ifdef VDI_STANDALONE
-
-#define TEST_VDIS      50
-#define NR_ITERS    50000
-#define FORK_POINTS   200
-#define INIT_VDIS       3
-#define INIT_SNAPS     40
-
-/* These must be of decreasing size: */
-#define NEW_FORK       (RAND_MAX-(RAND_MAX/1000))
-#define NEW_ROOT_VDI   (RAND_MAX-((RAND_MAX/1000)*2))
-#define NEW_FORK_VDI   (RAND_MAX-((RAND_MAX/1000)*3))
-
-#define GRAPH_DOT_FILE "vdi.dot"
-#define GRAPH_PS_FILE  "vdi.ps"
-
-
-typedef struct sh_st {
-    snap_id_t     id;
-    struct sh_st *next;
-} sh_t;
-
-#define SNAP_HASHSZ 1024
-sh_t *node_hash[SNAP_HASHSZ];
-#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
-
-#define SNAPID_EQUAL(_a,_b) \
-    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
-int sh_check_and_add(snap_id_t *id)
-{
-    sh_t **s = &node_hash[SNAP_HASH(id)];
-    
-    while (*s != NULL) {
-        if (SNAPID_EQUAL(&((*s)->id), id))
-            return 1;
-        *s = (*s)->next;
-    }
-    
-    *s = (sh_t *)malloc(sizeof(sh_t));
-    (*s)->id = *id;
-    (*s)->next = NULL;
-    
-    return 0;
-}
-
-int main(int argc, char *argv[])
-{
-    vdi_t *vdi_list[TEST_VDIS];
-    snap_id_t id, fork_points[FORK_POINTS];
-    int nr_vdis = 0, nr_forks = 0;
-    int i, j, r;
-    FILE *f;
-    char name[VDI_NAME_SZ];
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS);
-    
-    for (i=0; i<INIT_VDIS; i++) {
-        r=rand();
-        
-        sprintf(name, "VDI Number %d", nr_vdis);
-        vdi_list[i] = vdi_create(NULL, name);
-        for (j=0; j<(r%INIT_SNAPS); j++)
-            vdi_snapshot(vdi_list[i]);
-        fork_points[i] = vdi_list[i]->snap;
-        nr_vdis++;
-        nr_forks++;
-    }
-    
-    printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS);
-            
-    for (i=0; i<NR_ITERS; i++) {
-        r = rand();
-        
-        if ( r > NEW_FORK ) {
-            if ( nr_forks > FORK_POINTS )
-                continue;
-            id = vdi_list[r%nr_vdis]->snap;
-            if ( ( id.block == 0 ) || ( id.index == 0 ) )
-                continue;
-            id.index--;
-            fork_points[nr_forks++] = id;
-            
-        } else if ( r > NEW_ROOT_VDI ) {
-            
-            if ( nr_vdis == TEST_VDIS )
-                continue;
-            
-            sprintf(name, "VDI Number %d.", nr_vdis);
-            vdi_list[nr_vdis++] = vdi_create(NULL, name);
-            
-        } else if ( r > NEW_FORK_VDI ) {
-            
-            if ( nr_vdis == TEST_VDIS )
-                continue;
-            
-            sprintf(name, "VDI Number %d.", nr_vdis);
-            vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name);
-            
-        } else /* SNAPSHOT */ {
-            
-            vdi_snapshot(vdi_list[r%nr_vdis]);
-            
-        }
-    }
-    
-    /* now dump it out to a dot file. */
-    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
-    
-    f = fopen(GRAPH_DOT_FILE, "w");
-    
-    /* write graph preamble */
-    fprintf(f, "digraph G {\n");
-    fprintf(f, "   rankdir=LR\n");
-    
-    for (i=0; i<nr_vdis; i++) {
-        char oldnode[255];
-        snap_block_t *blk;
-        snap_id_t id = vdi_list[i]->snap;
-        int nr_snaps, done=0;
-        
-        /* add a node for the id */
-printf("vdi: %d\n", i);
-        fprintf(f, "   n%Ld%d 
[color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
-                id.block, id.index, vdi_list[i]->name,
-                id.block, id.index);
-        sprintf(oldnode, "n%Ld%d", id.block, id.index);
-        
-        while (id.block != 0) {
-            blk = snap_get_block(id.block);
-            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
-            id = blk->hdr.fork_block;
-            
-            done = sh_check_and_add(&id);
-            
-            /* add a node for the fork_id */
-            if (!done) {
-                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
-                    id.block, id.index,
-                    id.block, id.index);
-            }
-            
-            /* add an edge between them */
-            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
-                    id.block, id.index, oldnode, nr_snaps);
-            sprintf(oldnode, "n%Ld%d", id.block, id.index);
-            freeblock(blk);
-            
-            if (done) break;
-        }
-    }
-    
-    /* write graph postamble */
-    fprintf(f, "}\n");
-    fclose(f);
-    
-    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
-    {
-        char cmd[255];
-        sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE);
-        system(cmd);
-    }
-    return 0;
-}
-
-#endif
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi.h
--- a/tools/blktap/parallax/vdi.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#ifndef _VDI_H_
-#define _VDI_H_
-/**************************************************************************
- * 
- * vdi.h
- *
- * Virtual Disk Image (VDI) Interfaces
- *
- */
-
-#ifndef __VDI_H__
-#define __VDI_H__
-
-#include "blktaplib.h"
-#include "snaplog.h"
-
-#define VDI_HEIGHT     27 /* Note that these are now hard-coded */
-#define VDI_REG_HEIGHT 27 /* in the async lookup code           */
-
-#define VDI_NAME_SZ 256
-
-
-typedef struct vdi {
-    uint64_t         id;               /* unique vdi id -- used by the 
registry   */
-    uint64_t         block;            /* block where this vdi lives (also 
unique)*/
-    uint64_t         radix_root;       /* radix root node for block mappings   
   */
-    snap_id_t   snap;             /* next snapshot slot for this VDI         */
-    struct vdi *next;             /* used to hash-chain in blkif.            */
-    blkif_vdev_t vdevice;         /* currently mounted as...                 */
-    struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs   */
-    char        name[VDI_NAME_SZ];/* human readable vdi name                 */
-} vdi_t;
-
-#define VDI_REG_MAGIC   0xff00ff0bb0ff00ffLL
-
-typedef struct vdi_registry {
-    uint64_t     magic;
-    uint64_t     nr_vdis;
-} vdi_registry_t;
-
-
-int __init_vdi(void);
-
-vdi_t *vdi_get(uint64_t vdi_id);
-void vdi_put(vdi_t *vdi);
-vdi_registry_t *get_vdi_registry(void);
-vdi_t *vdi_create(snap_id_t *parent_snap, char *name);
-uint64_t vdi_lookup_block(vdi_t *vdi, uint64_t vdi_block, int *writable);
-void vdi_update_block(vdi_t *vdi, uint64_t vdi_block, uint64_t g_block);
-void vdi_snapshot(vdi_t *vdi);
-
-
-#endif /* __VDI_H__ */
-
-#endif //_VDI_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_create.c
--- a/tools/blktap/parallax/vdi_create.c        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-/**************************************************************************
- * 
- * vdi_create.c
- *
- * Create a new vdi.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    char         name[VDI_NAME_SZ] = "";
-    snap_id_t    id;
-    int          from_snap = 0;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc == 1 ) {
-        printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]);
-        exit(-1);
-    }
-    
-    strncpy( name, argv[1], VDI_NAME_SZ);
-    name[VDI_NAME_SZ] = '\0';    
-    
-    if ( argc > 3 ) {
-        id.block   = (uint64_t)          atoll(argv[2]);
-        id.index   = (unsigned int) atol (argv[3]);
-        from_snap  = 1;
-    }
-    
-    vdi = vdi_create( from_snap ? &id : NULL, name);
-    
-    if ( vdi == NULL ) {
-        printf("Failed to create VDI!\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_fill.c
--- a/tools/blktap/parallax/vdi_fill.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-/**************************************************************************
- * 
- * vdi_fill.c
- *
- * Hoover a file or device into a vdi.
- * You must first create the vdi with vdi_create.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "requests-async.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    uint64_t          id;
-    int          fd;
-    struct stat  st;
-    uint64_t          tot_size;
-    char         spage[BLOCK_SIZE];
-    char        *dpage;
-    uint64_t          vblock = 0, count=0;
-    
-    __init_blockstore();
-    init_block_async();
-    __init_vdi();
-    
-    if ( argc < 3 ) {
-        printf("usage: %s <VDI id> <filename>\n", argv[0]);
-        exit(-1);
-    }
-        
-    id = (uint64_t) atoll(argv[1]);
-    
-    vdi = vdi_get( id );
-    
-    if ( vdi == NULL ) {
-        printf("Failed to retreive VDI %Ld!\n", id);
-        exit(-1);
-    }
-    
-    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
-    
-    if (fd < 0) {
-        printf("Couldn't open %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    if ( fstat(fd, &st) != 0 ) {
-        printf("Couldn't stat %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    tot_size = (uint64_t) st.st_size;
-    printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size);
-    
-    printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE);    
-    printf("           ");
-    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
-        vdi_write_s(vdi, vblock, spage);
-        
-        vblock++;
-        if ((vblock % 512) == 0)
-        printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
-        fflush(stdout);
-    }
-    printf("\n");
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_list.c
--- a/tools/blktap/parallax/vdi_list.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-/**************************************************************************
- * 
- * vdi_list.c
- *
- * Print a list of VDIs on the block store.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_registry_t *reg;
-    vdi_t *vdi;
-    int i;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    reg = get_vdi_registry();
-    
-    if ( reg == NULL ) {
-        printf("couldn't get VDI registry.\n");
-        exit(-1);
-    }
-    
-    for (i=0; i < reg->nr_vdis; i++) {
-        vdi = vdi_get(i);
-        
-        if ( vdi != NULL ) {
-            
-            printf("%10Ld %60s\n", vdi->id, vdi->name);
-            freeblock(vdi);
-            
-        }
-    }
-    
-    freeblock(reg);
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap.c
--- a/tools/blktap/parallax/vdi_snap.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-/**************************************************************************
- * 
- * vdi_snap.c
- *
- * Snapshot a vdi.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t  *vdi;
-    uint64_t     id;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc == 1 ) {
-        printf("usage: %s <VDI id>\n", argv[0]);
-        exit(-1);
-    }
-    
-    id = (uint64_t) atoll(argv[1]);
-    
-    vdi = vdi_get(id);
-    
-    if ( vdi == NULL ) {
-        printf("couldn't find the requested VDI.\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-    
-    vdi_snapshot(vdi);
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap_delete.c
--- a/tools/blktap/parallax/vdi_snap_delete.c   Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/**************************************************************************
- * 
- * vdi_snap_delete.c
- *
- * Delete a snapshot.
- *
- * This is not finished:  right now it takes a snap n and calls 
- * snap_collapse(n,n+1).
- *
- * TODO: support for non-consecutive, non-same-block snaps
- *       Avoid forking probs.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "snaplog.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    snap_id_t    id, c_id;
-    int ret;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc != 3 ) {
-        printf("usage: %s <snap block> <snap idx>\n", argv[0]);
-        exit(-1);
-    }
-    
-    id.block   = (uint64_t)          atoll(argv[1]);
-    id.index   = (unsigned int) atol (argv[2]);
-    
-    c_id = id;
-    c_id.index++;
-    
-    ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
-    
-    printf("Freed %d blocks.\n", ret);
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap_list.c
--- a/tools/blktap/parallax/vdi_snap_list.c     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-/**************************************************************************
- * 
- * vdi_snap_list.c
- *
- * Print a list of snapshots for the specified vdi.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t        *vdi;
-    uint64_t           id;
-    int           i, max_snaps = -1;
-    snap_block_t *blk;
-    snap_id_t     sid;
-    char         *t;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc == 1 ) {
-        printf("usage: %s <VDI id> [max snaps]\n", argv[0]);
-        exit(-1);
-    }
-    
-    id = (uint64_t) atoll(argv[1]);
-    
-    if ( argc > 2 ) {
-        max_snaps = atoi(argv[2]);
-    }
-    
-    vdi = vdi_get(id);
-    
-    if ( vdi == NULL ) {
-        printf("couldn't find the requested VDI.\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-    
-    sid = vdi->snap;
-    sid.index--;
-    
-    //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp", 
-    //    "radix root", "d");
-    printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp", 
-            "radix root", "d");
-     
-    while (sid.block != 0) {
-        blk = snap_get_block(sid.block);
-        for (i = sid.index; i >= 0; i--) {
-            if ( max_snaps == 0  ) {
-                freeblock(blk);
-                goto done;
-            }
-            t = ctime(&blk->snaps[i].timestamp.tv_sec);
-            t[strlen(t)-1] = '\0';
-            //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
-            printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
-                    sid.block, i, 
-                    //blk->snaps[i].timestamp.tv_sec,
-                    t,
-                    blk->snaps[i].timestamp.tv_usec,
-                    blk->snaps[i].radix_root,
-                    blk->snaps[i].deleted ? "*" : " ");
-            if ( max_snaps != -1 ) 
-                max_snaps--;
-        }
-        sid = blk->hdr.parent_block;
-        freeblock(blk);
-    }
-done:            
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_tree.c
--- a/tools/blktap/parallax/vdi_tree.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-/**************************************************************************
- * 
- * vdi_tree.c
- *
- * Output current vdi tree to dot and postscript.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-#define GRAPH_DOT_FILE "vdi.dot"
-#define GRAPH_PS_FILE  "vdi.ps"
-
-typedef struct sh_st {
-    snap_id_t     id;
-    struct sh_st *next;
-} sh_t;
-
-#define SNAP_HASHSZ 1024
-sh_t *node_hash[SNAP_HASHSZ];
-#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
-
-#define SNAPID_EQUAL(_a,_b) \
-    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
-int sh_check_and_add(snap_id_t *id)
-{
-    sh_t **s = &node_hash[SNAP_HASH(id)];
-    
-    while (*s != NULL) {
-        if (SNAPID_EQUAL(&((*s)->id), id))
-            return 1;
-        *s = (*s)->next;
-    }
-    
-    *s = (sh_t *)malloc(sizeof(sh_t));
-    (*s)->id = *id;
-    (*s)->next = NULL;
-    
-    return 0;
-}
-
-int main(int argc, char *argv[])
-{
-    FILE *f;
-    char dot_file[255] = GRAPH_DOT_FILE;
-    char  ps_file[255] = GRAPH_PS_FILE;
-    int nr_vdis = 0, nr_forks = 0;
-    vdi_registry_t *reg;
-    vdi_t *vdi;
-    int i;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    reg = get_vdi_registry();
-    
-    if ( reg == NULL ) {
-        printf("couldn't get VDI registry.\n");
-        exit(-1);
-    }
-    
-    if ( argc > 1 ) {
-        strncpy(ps_file, argv[1], 255);
-        ps_file[255] = '\0';
-    }
-    
-    /* now dump it out to a dot file. */
-    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
-    
-    f = fopen(dot_file, "w");
-    
-    /* write graph preamble */
-    fprintf(f, "digraph G {\n");
-    fprintf(f, "   rankdir=LR\n");
-    
-    for (i=0; i<reg->nr_vdis; i++) {
-        char oldnode[255];
-        snap_block_t *blk;
-        snap_id_t id;
-        int nr_snaps, done=0;
-        
-        vdi = vdi_get(i);
-        id = vdi->snap;
-        /* add a node for the id */
-printf("vdi: %d\n", i);
-        fprintf(f, "   n%Ld%d 
[color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
-                id.block, id.index, vdi->name,
-                id.block, id.index);
-        sprintf(oldnode, "n%Ld%d", id.block, id.index);
-        
-        while (id.block != 0) {
-            blk = snap_get_block(id.block);
-            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
-            id = blk->hdr.fork_block;
-            
-            done = sh_check_and_add(&id);
-            
-            /* add a node for the fork_id */
-            if (!done) {
-                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
-                    id.block, id.index,
-                    id.block, id.index);
-            }
-            
-            /* add an edge between them */
-            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
-                    id.block, id.index, oldnode, nr_snaps);
-            sprintf(oldnode, "n%Ld%d", id.block, id.index);
-            freeblock(blk);
-            
-            if (done) break;
-        }
-    }
-    
-    /* write graph postamble */
-    fprintf(f, "}\n");
-    fclose(f);
-    
-    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
-    {
-        char cmd[255];
-        sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file);
-        system(cmd);
-    }
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_unittest.c
--- a/tools/blktap/parallax/vdi_unittest.c      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
-/**************************************************************************
- * 
- * vdi_unittest.c
- *
- * Run a small test workload to ensure that data access through a vdi
- * is (at least superficially) correct.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "requests-async.h"
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-#define TEST_PAGES  32
-static char *zero_page;
-static char pages[TEST_PAGES][BLOCK_SIZE];
-static int next_page = 0;
-
-void fill_test_pages(void)
-{
-    int i, j;
-    long *page;
-
-    for (i=0; i< TEST_PAGES; i++) {
-        page = (unsigned long *)pages[i];
-        for (j=0; j<(BLOCK_SIZE/4); j++) {
-            page[j] = random();
-        }
-    }
-
-    zero_page = newblock();
-}
-
-inline uint64_t make_vaddr(uint64_t L1, uint64_t L2, uint64_t L3)
-{
-    uint64_t ret = L1;
-
-    ret = (ret << 9) | L2;
-    ret = (ret << 9) | L3;
-
-    return ret;
-}
-
-void touch_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3)
-{
-    uint64_t vaddr;
-    char *page = pages[next_page++];
-    char *rpage = NULL;
-
-    printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
-
-    vaddr = make_vaddr(L1, L2, L3);
-    vdi_write_s(vdi, vaddr, page);
-    rpage = vdi_read_s(vdi, vaddr);
-
-    if (rpage == NULL) 
-    {
-        printf( "read %Lu returned NULL\n", vaddr); 
-        return; 
-    }
-
-    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
-    {
-        printf( "read %Lu returned a different page\n", vaddr);
-        return;
-    }
-
-    freeblock(rpage);
-}
-
-void test_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3, char *page)
-{
-    uint64_t vaddr;
-    char *rpage = NULL;
-
-    printf("TEST  (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
-
-    vaddr = make_vaddr(L1, L2, L3);
-    rpage = vdi_read_s(vdi, vaddr);
-
-    if (rpage == NULL) 
-    {
-        printf( "read %Lu returned NULL\n", vaddr); 
-        return; 
-    }
-
-    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
-    {
-        printf( "read %Lu returned a different page\n", vaddr);
-        return;
-    }
-
-    freeblock(rpage);
-}
-
-void coverage_test(vdi_t *vdi)
-{
-    uint64_t vaddr;
-    int i, j, k;
-
-    /* Do a series of writes and reads to test all paths through the 
-     * async radix code.  The radix request code will dump CRC warnings
-     * if there are data problems here as well.
-     */
-
-    /* L1 Zero */
-    touch_block(vdi, 0, 0, 0);
-
-    /* L2 Zero */
-    i = next_page;
-    touch_block(vdi, 0, 1, 0);
-
-    /* L3 Zero */
-    j = next_page;
-    touch_block(vdi, 0, 0, 1);
-    k = next_page;
-    touch_block(vdi, 0, 1, 1);
-
-    /* Direct write */
-    touch_block(vdi, 0, 0, 0);
-
-    vdi_snapshot(vdi);
-
-    /* L1 fault */
-    touch_block(vdi, 0, 0, 0);
-    /* test the read-only branches that should have been copied over. */
-    test_block(vdi, 0, 1, 0, pages[i]);
-    test_block(vdi, 0, 0, 1, pages[j]);
-
-    /* L2 fault */
-    touch_block(vdi, 0, 1, 0);
-    test_block(vdi, 0, 1, 1, pages[k]);
-
-    /* L3 fault */
-    touch_block(vdi, 0, 0, 1);
-    
-    /* read - L1 zero */
-    test_block(vdi, 1, 0, 0, zero_page);
-    
-    /* read - L2 zero */
-    test_block(vdi, 0, 2, 0, zero_page);
-
-    /* read - L3 zero */
-    test_block(vdi, 0, 0, 2, zero_page);
-}
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    uint64_t          id;
-    int          fd;
-    struct stat  st;
-    uint64_t          tot_size;
-    char         spage[BLOCK_SIZE];
-    char        *dpage;
-    uint64_t          vblock = 0, count=0;
-    
-    __init_blockstore();
-    init_block_async();
-    __init_vdi();
-        
-    vdi = vdi_create( NULL, "UNIT TEST VDI");
-    
-    if ( vdi == NULL ) {
-        printf("Failed to create VDI!\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-
-    fill_test_pages();
-    coverage_test(vdi);
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_validate.c
--- a/tools/blktap/parallax/vdi_validate.c      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-/**************************************************************************
- * 
- * vdi_validate.c
- *
- * Intended to sanity-check vm_fill and the underlying vdi code.
- *
- * Block-by-block compare of a vdi with a file/device on the disk.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-#include "requests-async.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    uint64_t          id;
-    int          fd;
-    struct stat  st;
-    uint64_t          tot_size;
-    char         spage[BLOCK_SIZE], *dpage;
-    char        *vpage;
-    uint64_t          vblock = 0, count=0;
-    
-    __init_blockstore();
-    init_block_async();
-    __init_vdi();
-    
-    if ( argc < 3 ) {
-        printf("usage: %s <VDI id> <filename>\n", argv[0]);
-        exit(-1);
-    }
-        
-    id = (uint64_t) atoll(argv[1]);
-    
-    vdi = vdi_get( id );
-    
-    if ( vdi == NULL ) {
-        printf("Failed to retreive VDI %Ld!\n", id);
-        exit(-1);
-    }
-    
-    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
-    
-    if (fd < 0) {
-        printf("Couldn't open %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    if ( fstat(fd, &st) != 0 ) {
-        printf("Couldn't stat %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    tot_size = (uint64_t) st.st_size;
-    printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size);
-    
-    printf("           ");
-    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
-
-        dpage = vdi_read_s(vdi, vblock);
-
-        if (dpage == NULL) {
-            printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock);
-            exit(0);
-        }
-
-        if (memcmp(spage, dpage, BLOCK_SIZE) != 0) {
-            printf("\n\nblocks don't match! (%Ld)\n", vblock);
-            exit(0);
-        }
-        
-        freeblock(dpage);
-        
-        vblock++;
-        if ((vblock % 1024) == 0) {
-            printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
-            fflush(stdout);
-        }
-    }
-    printf("\n");
-    
-    printf("VDI %Ld looks good!\n", id);
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/Makefile
--- a/tools/blktap/ublkback/Makefile    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-INCLUDES += -I..
-
-INSTALL            = install
-INSTALL_PROG = $(INSTALL) -m0755
-IBIN         = ublkback
-INSTALL_DIR  = /usr/sbin
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += -I $(XEN_LIBXC)
-CFLAGS   += $(INCLUDES) -I.
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-
-.PHONY: all
-all: $(IBIN)
-
-LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
-
-.PHONY: install
-install:
-       $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR)
-
-.PHONY: clean
-clean:
-       rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN)
-
-ublkback: 
-       $(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L..  \
-             -lblktap -laio ublkback.c ublkbacklib.c -pg
-
--include $(DEPS)
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkback.c
--- a/tools/blktap/ublkback/ublkback.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/* ublkback.c
- *
- * libaio-based userlevel backend.
- */
-
-#include "blktaplib.h"
-#include "ublkbacklib.h"
-
-
-int main(int argc, char *argv[])
-{
-    ublkback_init();
-    
-    register_new_blkif_hook(ublkback_new_blkif);
-    blktap_listen();
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkbacklib.c
--- a/tools/blktap/ublkback/ublkbacklib.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,473 +0,0 @@
-/* ublkbacklib.c
- *
- * file/device image-backed block device -- using linux libaio.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- *
- * NOTE: This doesn't work.  Grrr.
- */
-
-#define _GNU_SOURCE
-#define __USE_LARGEFILE64
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <db.h>       
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/poll.h>
-#include <unistd.h>
-#include <errno.h>
-#include <libaio.h>
-#include <pthread.h>
-#include <time.h>
-#include <err.h>
-#include "blktaplib.h"
-
-/* XXXX:  */
-/* Current code just mounts this file/device to any requests that come in. */
-//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
-#define TMP_IMAGE_FILE_NAME "fc3.image"
-
-#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
-#define MAX_SEGMENTS_PER_REQ    11
-#define SECTOR_SHIFT             9
-#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-           
-#if 1                                                                        
-#define ASSERT(_p) \
-    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif                                                                     
-
-/* Note on pending_reqs: I assume all reqs are queued before they start to 
- * get filled.  so count of 0 is an unused record.
- */
-typedef struct {
-    blkif_request_t  req;
-    blkif_t         *blkif;
-    int              count;
-} pending_req_t;
-
-static pending_req_t    pending_list[MAX_REQUESTS];
-static io_context_t  ctx;
-static struct iocb  *iocb_free[MAX_AIO_REQS];
-static int           iocb_free_count;
-
-/* ---[ Notification mecahnism ]--------------------------------------- */
-
-enum { 
-    READ   = 0,
-    WRITE  = 1
-};
-
-static int aio_notify[2];
-static volatile int aio_listening = 0;
-static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER;
-
-static struct io_event aio_events[MAX_AIO_REQS];
-static int             aio_event_count = 0;
-
-/* this is commented out in libaio.h for some reason. */
-extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
-
-static void *notifier_thread(void *arg)
-{
-    int ret; 
-    int msg = 0x00feeb00;
-    
-    DPRINTF("Notifier thread started.\n");
-    for (;;) {
-        pthread_mutex_lock(&notifier_sem);
-        if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) {
-            aio_event_count = ret;
-            write(aio_notify[WRITE], &msg, sizeof(msg));
-        } else {
-                printf("[io_queue_wait error! %d]\n", errno);
-                pthread_mutex_unlock(&notifier_sem);
-        }
-    }
-}
-
-/* --- Talking to xenstore: ------------------------------------------- */
-
-int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done);
-int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done);
-
-typedef struct image {
-    /* These need to turn into an array/rbtree for multi-disk support. */
-    int  fd;
-    uint64_t  fsid;
-    blkif_vdev_t   vdevice;
-    long int size;
-    long int secsize;
-    long int info;
-} image_t;
-
-long int ublkback_get_size(blkif_t *blkif)
-{
-    image_t *img = (image_t *)blkif->prv;
-    return img->size;
-}
-
-long int ublkback_get_secsize(blkif_t *blkif)
-{
-    image_t *img = (image_t *)blkif->prv;
-    return img->secsize;
-}
-
-unsigned ublkback_get_info(blkif_t *blkif)
-{
-    image_t *img = (image_t *)blkif->prv;
-    return img->info;
-}
-
-static struct blkif_ops ublkback_ops = {
-    get_size:    ublkback_get_size,
-    get_secsize: ublkback_get_secsize,
-    get_info:    ublkback_get_info,
-};
-
-int ublkback_new_blkif(blkif_t *blkif)
-{
-    image_t *image;
-    struct stat stat;
-    int ret;
-
-    image = (image_t *)malloc(sizeof(image_t));
-    if (image == NULL) {
-        printf("error allocating image record.\n");
-        return -ENOMEM;
-    }
-
-    /* Open it. */
-    image->fd = open(TMP_IMAGE_FILE_NAME, 
-                     O_RDWR | O_DIRECT | O_LARGEFILE);
-
-    if ((image->fd < 0) && (errno == EINVAL)) {
-        /* Maybe O_DIRECT isn't supported. */
-        warn("open() failed on '%s', trying again without O_DIRECT",
-               TMP_IMAGE_FILE_NAME);
-        image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE);
-    }
-
-    if (image->fd < 0) {
-        warn("Couldn't open image file!");
-        free(image);
-        return -EINVAL;
-    }
-
-    /* Size it. */
-    ret = fstat(image->fd, &stat);
-    if (ret != 0) {
-        printf("Couldn't stat image in PROBE!");
-        return -EINVAL;
-    }
-    
-    image->size = (stat.st_size >> SECTOR_SHIFT);
-
-    /* TODO: IOCTL to get size of raw device. */
-/*
-  ret = ioctl(img->fd, BLKGETSIZE, &blksize);
-  if (ret != 0) {
-  printf("Couldn't ioctl image in PROBE!\n");
-  goto err;
-  }
-*/
-    if (image->size == 0)
-        image->size =((uint64_t) 16836057);
-    image->secsize = 512;
-    image->info = 0;
-
-    /* Register the hooks */
-    blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request);
-    blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response);
-
-
-    printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", 
-           blkif->pdev, TMP_IMAGE_FILE_NAME);
-
-    blkif->ops = &ublkback_ops;
-    blkif->prv = (void *)image;
-
-    return 0;
-}
-
-
-/* --- Moving the bits: ----------------------------------------------- */
-
-static int batch_count = 0;
-int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done)
-{
-    int fd;
-    uint64_t sector;
-    char *spage, *dpage;
-    int ret, i, idx;
-    blkif_response_t *rsp;
-    domid_t dom = ID_TO_DOM(req->id);
-    static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; 
-    static int io_idx = 0;
-    struct iocb *io;
-    image_t *img;
-
-    img = (image_t *)blkif->prv;
-    fd = img->fd;
-
-    switch (req->operation) 
-    {
-    case BLKIF_OP_WRITE:
-    {
-        unsigned long size;
-
-        batch_count++;
-
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        pending_list[idx].blkif = blkif;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector = req->sector_number + (8*i);
-            
-            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
-            
-            if (req->seg[i].first_sect != 0)
-                DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
-                        "pos: %15lu\n",
-                        req->sector_number, sector, 
-                        req->seg[i].first_sect, req->seg[i].last_sect,
-                        (long)(sector << SECTOR_SHIFT));
-                        
-            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            spage += req->seg[i].first_sect << SECTOR_SHIFT;
-            
-            /*convert size and sector to byte offsets */
-            size   <<= SECTOR_SHIFT;
-            sector <<= SECTOR_SHIFT;
-            
-            io = iocb_free[--iocb_free_count];
-            io_prep_pwrite(io, fd, spage, size, sector);
-            io->data = (void *)idx;
-            //ioq[i] = io;
-            ioq[io_idx++] = io;
-        }
-
-        if (batch_done) {
-            ret = io_submit(ctx, io_idx, ioq);
-            batch_count = 0;
-            if (ret < 0)
-                printf("BADNESS: io_submit error! (%d)\n", errno);
-            io_idx = 0;
-        }
-        
-        return BLKTAP_STOLEN;
-        
-    }
-    case BLKIF_OP_READ:
-    {
-        unsigned long size;
-        
-        batch_count++;
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        pending_list[idx].blkif = blkif;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector  = req->sector_number + (8*i);
-            
-            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
-
-            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            dpage += req->seg[i].first_sect << SECTOR_SHIFT;
-            
-            if (req->seg[i].first_sect != 0)
-                DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
-                        "pos: %15lu dpage: %p\n", 
-                        req->sector_number, sector, 
-                        req->seg[i].first_sect, req->seg[i].last_sect,
-                        (long)(sector << SECTOR_SHIFT), dpage);
-            
-            /*convert size and sector to byte offsets */
-            size   <<= SECTOR_SHIFT;
-            sector <<= SECTOR_SHIFT;
-            
-
-            /*
-             * NB: Looks like AIO now has non-page aligned support, this path 
-             * can probably be removed...  Only really used for hunting
-             * superblocks anyway... ;)
-             */
-            if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) {
-                /* AIO to raw devices must be page aligned, so do this read
-                 * synchronously.  The OS is probably just looking for 
-                 * a superblock or something, so this won't hurt performance. 
-                 */
-                int ret;
-
-                printf("Slow path block read.\n");
-                /* Question: do in-progress aio ops modify the file cursor? */
-                ret = lseek(fd, sector, SEEK_SET);
-                if (ret == (off_t)-1)
-                    printf("lseek failed!\n");
-                ret = read(fd, dpage, size);
-                if (ret < 0)
-                    printf("read problem (%d)\n", ret);
-                printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, 
ret);
-
-                /* not an async request any more... */
-                pending_list[idx].count--;
-
-                rsp = (blkif_response_t *)req;
-                rsp->id = req->id;
-                rsp->operation = BLKIF_OP_READ;
-                rsp->status = BLKIF_RSP_OKAY;
-                return BLKTAP_RESPOND;  
-                /* Doh -- need to flush aio if this is end-of-batch */
-            }
-
-            io = iocb_free[--iocb_free_count];
-            
-            io_prep_pread(io, fd, dpage, size, sector);
-            io->data = (void *)idx;
-            
-            ioq[io_idx++] = io;
-            //ioq[i] = io;
-        }
-        
-        if (batch_done) {
-            ret = io_submit(ctx, io_idx, ioq);
-            batch_count = 0;
-            if (ret < 0)
-                printf("BADNESS: io_submit error! (%d)\n", errno);
-            io_idx = 0;
-        }
-        
-        return BLKTAP_STOLEN;
-        
-    }
-    }
-    
-    printf("Unknown block operation!\n");
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = req->operation;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-
-int ublkback_pollhook(int fd)
-{
-    struct io_event *ep;
-    int n, ret, idx;
-    blkif_request_t *req;
-    blkif_response_t *rsp;
-    int responses_queued = 0;
-    int pages=0;
-    
-    for (ep = aio_events; aio_event_count-- > 0; ep++) {
-        struct iocb *io = ep->obj;
-        idx = (int) ep->data;
-        
-        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
-            printf("invalid index returned(%u)!\n", idx);
-            break;
-        }
-        
-        if ((int)ep->res < 0) 
-            printf("***\n***aio request error! (%d,%d)\n***\n", 
-                   (int)ep->res, (int)ep->res2);
-        
-        pending_list[idx].count--;
-        iocb_free[iocb_free_count++] = io;
-        pages++;
-
-        if (pending_list[idx].count == 0) {
-            blkif_request_t tmp = pending_list[idx].req;
-            rsp = (blkif_response_t *)&pending_list[idx].req;
-            rsp->id = tmp.id;
-            rsp->operation = tmp.operation;
-            rsp->status = BLKIF_RSP_OKAY;
-            blkif_inject_response(pending_list[idx].blkif, rsp);
-            responses_queued++;
-        }
-    }
-
-    if (responses_queued) {
-        blktap_kick_responses();
-    }
-    
-    read(aio_notify[READ], &idx, sizeof(idx));
-    aio_listening = 1;
-    pthread_mutex_unlock(&notifier_sem);
-    
-    return 0;
-}
-
-/* the image library terminates the request stream. _resp is a noop. */
-int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done)
-{   
-    return BLKTAP_PASS;
-}
-
-void ublkback_init(void)
-{
-    int i, rc;
-    pthread_t p;
-    
-    for (i = 0; i < MAX_REQUESTS; i++)
-        pending_list[i].count = 0; 
-    
-    memset(&ctx, 0, sizeof(ctx));
-    rc = io_queue_init(MAX_AIO_REQS, &ctx);
-    if (rc != 0) {
-        printf("queue_init failed! (%d)\n", rc);
-        exit(0);
-    }
-    
-    for (i=0; i<MAX_AIO_REQS; i++) {
-        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
-            printf("error allocating iocb array\n");
-            exit(0);
-        }
-        iocb_free_count = i;
-    }
-    
-    rc = pipe(aio_notify);
-    if (rc != 0) {
-        printf("pipe failed! (%d)\n", errno);
-        exit(0);
-    }
-    
-    rc = pthread_create(&p, NULL, notifier_thread, NULL);
-    if (rc != 0) {
-        printf("pthread_create failed! (%d)\n", errno);
-        exit(0);
-    }
-    
-    aio_listening = 1;
-    
-    blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook);
-}
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkbacklib.h
--- a/tools/blktap/ublkback/ublkbacklib.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-/* blkaiolib.h
- *
- * aio image-backed block device.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int  ublkback_request(blkif_request_t *req, int batch_done);
-int  ublkback_response(blkif_response_t *rsp); /* noop */
-int  ublkback_new_blkif(blkif_t *blkif);
-void ublkback_init(void);
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,568 +0,0 @@
-/*
- * xenbus.c
- * 
- * xenbus interface to the blocktap.
- * 
- * this handles the top-half of integration with block devices through the
- * store -- the tap driver negotiates the device channel etc, while the
- * userland tap clinet needs to sort out the disk parameters etc.
- * 
- * A. Warfield 2005 Based primarily on the blkback and xenbus driver code.  
- * Comments there apply here...
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <err.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <xs.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <poll.h>
-#include "blktaplib.h"
-#include "list.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* --- Xenstore / Xenbus helpers ---------------------------------------- */
-/*
- * These should all be pulled out into the xenstore API.  I'm faulting commands
- * in from the xenbus interface as i need them.
- */
-
-
-/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xs_gather(struct xs_handle *xs, const char *dir, ...)
-{
-    va_list ap;
-    const char *name;
-    char *path;
-    int ret = 0;
-    
-    va_start(ap, dir);
-    while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
-        const char *fmt = va_arg(ap, char *);
-        void *result = va_arg(ap, void *);
-        char *p;
-        
-        if (asprintf(&path, "%s/%s", dir, name) == -1)
-        {
-            warn("allocation error in xs_gather!\n");
-            ret = ENOMEM;
-            break;
-        }
-        p = xs_read(xs, path, NULL);
-        free(path);
-        if (p == NULL) {
-            ret = ENOENT;
-            break;
-        }
-        if (fmt) {
-            if (sscanf(p, fmt, result) == 0)
-                ret = EINVAL;
-            free(p);
-        } else
-            *(char **)result = p;
-    }
-    va_end(ap);
-    return ret;
-}
-
-/* Single printf and write: returns -errno or 0. */
-int xs_printf(struct xs_handle *h, const char *dir, const char *node, 
-                  const char *fmt, ...)
-{
-        char *buf, *path;
-        va_list ap;
-        int ret;
- 
-        va_start(ap, fmt);
-        ret = vasprintf(&buf, fmt, ap);
-        va_end(ap);
- 
-        asprintf(&path, "%s/%s", dir, node);
-
-        if ((path == NULL) || (buf == NULL))
-            return 0;
-
-        ret = xs_write(h, path, buf, strlen(buf)+1);
-
-        free(buf);
-        free(path);
-
-        return ret;
-}
-
-
-int xs_exists(struct xs_handle *h, const char *path)
-{
-    char **d;
-    int num;
-
-    d = xs_directory(h, path, &num);
-    if (d == NULL)
-        return 0;
-    free(d);
-    return 1;
-}
-
-
-
-/* This assumes that the domain name we are looking for is unique! */
-char *get_dom_domid(struct xs_handle *h, const char *name)
-{
-    char **e, *val, *domid = NULL;
-    int num, i, len;
-    char *path;
-
-    e = xs_directory(h, "/local/domain", &num);
-
-    i=0;
-    while (i < num) {
-        asprintf(&path, "/local/domain/%s/name", e[i]);
-        val = xs_read(h, path, &len);
-        free(path);
-        if (val == NULL)
-            continue;
-        if (strcmp(val, name) == 0) {
-            /* match! */
-            asprintf(&path, "/local/domain/%s/domid", e[i]);
-            domid = xs_read(h, path, &len);
-            free(val);
-            free(path);
-            break;
-        }
-        free(val);
-        i++;
-    }
-
-    free(e);
-    return domid;
-}
-
-static int strsep_len(const char *str, char c, unsigned int len)
-{
-    unsigned int i;
-    
-    for (i = 0; str[i]; i++)
-        if (str[i] == c) {
-            if (len == 0)
-                return i;
-            len--;
-        }
-    return (len == 0) ? i : -ERANGE;
-}
-
-
-/* xenbus watches: */     
-/* Register callback to watch this node. */
-struct xenbus_watch
-{
-        struct list_head list;
-        char *node;
-        void (*callback)(struct xs_handle *h, 
-                         struct xenbus_watch *, 
-                         const  char *node);
-};
-
-static LIST_HEAD(watches);
-
-/* A little paranoia: we don't just trust token. */
-static struct xenbus_watch *find_watch(const char *token)
-{
-    struct xenbus_watch *i, *cmp;
-    
-    cmp = (void *)strtoul(token, NULL, 16);
-    
-    list_for_each_entry(i, &watches, list)
-        if (i == cmp)
-            return i;
-    return NULL;
-}
-
-/* Register callback to watch this node. like xs_watch, return 0 on failure */
-int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
-{
-    /* Pointer in ascii is the token. */
-    char token[sizeof(watch) * 2 + 1];
-    int er;
-    
-    sprintf(token, "%lX", (long)watch);
-    if (find_watch(token)) 
-    {
-        warn("watch collision!");
-        return -EINVAL;
-    }
-    
-    er = xs_watch(h, watch->node, token);
-    if (er != 0) {
-        list_add(&watch->list, &watches);
-    } 
-        
-    return er;
-}
-
-int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
-{
-    char token[sizeof(watch) * 2 + 1];
-    int er;
-    
-    sprintf(token, "%lX", (long)watch);
-    if (!find_watch(token))
-    {
-        warn("no such watch!");
-        return -EINVAL;
-    }
-    
-    
-    er = xs_unwatch(h, watch->node, token);
-    list_del(&watch->list);
-    
-    if (er == 0)
-        warn("XENBUS Failed to release watch %s: %i",
-             watch->node, er);
-    return 0;
-}
-
-/* Re-register callbacks to all watches. */
-void reregister_xenbus_watches(struct xs_handle *h)
-{
-    struct xenbus_watch *watch;
-    char token[sizeof(watch) * 2 + 1];
-    
-    list_for_each_entry(watch, &watches, list) {
-        sprintf(token, "%lX", (long)watch);
-        xs_watch(h, watch->node, token);
-    }
-}
-
-/* based on watch_thread() */
-int xs_fire_next_watch(struct xs_handle *h)
-{
-    char **res;
-    char *token;
-    char *node = NULL;
-    struct xenbus_watch *w;
-    int er;
-    unsigned int num;
-
-    res = xs_read_watch(h, &num);
-    if (res == NULL) 
-        return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
-
-    node  = res[XS_WATCH_PATH];
-    token = res[XS_WATCH_TOKEN];
-
-    w = find_watch(token);
-    if (!w)
-    {
-        warn("unregistered watch fired");
-        goto done;
-    }
-    w->callback(h, w, node);
-
- done:
-    free(res);
-    return 1;
-}
-
-
-
-
-/* ---------------------------------------------------------------------- */
-
-struct backend_info
-{
-    /* our communications channel */
-    blkif_t *blkif;
-    
-    long int frontend_id;
-    long int pdev;
-    long int readonly;
-    
-    /* watch back end for changes */
-    struct xenbus_watch backend_watch;
-    char *backpath;
-
-    /* watch front end for changes */
-    struct xenbus_watch watch;
-    char *frontpath;
-
-    struct list_head list;
-};
-
-static LIST_HEAD(belist);
-
-static struct backend_info *be_lookup_be(const char *bepath)
-{
-    struct backend_info *be;
-
-    list_for_each_entry(be, &belist, list)
-        if (strcmp(bepath, be->backpath) == 0)
-            return be;
-    return (struct backend_info *)NULL;
-}
-
-static int be_exists_be(const char *bepath)
-{
-    return ( be_lookup_be(bepath) != NULL );
-}
-
-static struct backend_info *be_lookup_fe(const char *fepath)
-{
-    struct backend_info *be;
-
-    list_for_each_entry(be, &belist, list)
-        if (strcmp(fepath, be->frontpath) == 0)
-            return be;
-    return (struct backend_info *)NULL;
-}
-
-static int backend_remove(struct xs_handle *h, struct backend_info *be)
-{
-    /* Turn off watches. */
-    if (be->watch.node)
-        unregister_xenbus_watch(h, &be->watch);
-    if (be->backend_watch.node)
-        unregister_xenbus_watch(h, &be->backend_watch);
-
-    /* Unhook from be list. */
-    list_del(&be->list);
-
-    /* Free everything else. */
-    if (be->blkif)
-        free_blkif(be->blkif);
-    free(be->frontpath);
-    free(be->backpath);
-    free(be);
-    return 0;
-}
-
-static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, 
-                     const char *fepath_im)
-{
-    struct backend_info *be;
-    char *fepath = NULL;
-    int er;
-
-    be = be_lookup_fe(w->node);
-    if (be == NULL)
-    {
-        warn("frontend changed called for nonexistent backend! (%s)", fepath);
-        goto fail;
-    }
-    
-    /* If other end is gone, delete ourself. */
-    if (w->node && !xs_exists(h, be->frontpath)) {
-        DPRINTF("DELETING BE: %s\n", be->backpath);
-        backend_remove(h, be);
-        return;
-    }
-
-    if (be->blkif == NULL || (be->blkif->state == CONNECTED))
-        return;
-
-    /* Supply the information about the device the frontend needs */
-    er = xs_transaction_start(h, be->backpath);
-    if (er == 0) {
-        warn("starting transaction");
-        goto fail;
-    }
-    
-    er = xs_printf(h, be->backpath, "sectors", "%lu",
-                           be->blkif->ops->get_size(be->blkif));
-    if (er == 0) {
-        warn("writing sectors");
-        goto fail;
-    }
-    
-    er = xs_printf(h, be->backpath, "info", "%u",
-                           be->blkif->ops->get_info(be->blkif));
-    if (er == 0) {
-        warn("writing info");
-        goto fail;
-    }
-    
-    er = xs_printf(h, be->backpath, "sector-size", "%lu",
-                           be->blkif->ops->get_secsize(be->blkif));
-    if (er == 0) {
-        warn("writing sector-size");
-        goto fail;
-    }
-
-    be->blkif->state = CONNECTED;
-
-    xs_transaction_end(h, 0);
-
-    return;
-
- fail:
-    free(fepath);
-}
-
-
-static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, 
-                     const char *bepath_im)
-{
-    struct backend_info *be;
-    char *path = NULL, *p;
-    int len, er;
-    long int pdev = 0, handle;
-
-    be = be_lookup_be(w->node);
-    if (be == NULL)
-    {
-        warn("backend changed called for nonexistent backend! (%s)", w->node);
-        goto fail;
-    }
-    
-    er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL);
-    if (er != 0) 
-        goto fail;
-
-    if (be->pdev && be->pdev != pdev) {
-        warn("changing physical-device not supported");
-        goto fail;
-    }
-    be->pdev = pdev;
-
-    asprintf(&path, "%s/%s", w->node, "read-only");
-    if (xs_exists(h, path))
-        be->readonly = 1;
-
-    if (be->blkif == NULL) {
-        /* Front end dir is a number, which is used as the handle. */
-        p = strrchr(be->frontpath, '/') + 1;
-        handle = strtoul(p, NULL, 0);
-
-        be->blkif = alloc_blkif(be->frontend_id);
-        if (be->blkif == NULL) 
-            goto fail;
-
-        er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
-        if (er) 
-            goto fail;
-
-        DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node);
-
-        /* Pass in NULL node to skip exist test. */
-        frontend_changed(h, &be->watch, NULL);
-    }
-
- fail:
-    free(path);
-}
-
-static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, 
-                         const char *bepath_im)
-{
-       struct backend_info *be = NULL;
-       char *frontend = NULL, *bepath = NULL;
-       int er, len;
-
-        bepath = strdup(bepath_im);
-        if (!bepath)
-            return;
-        len = strsep_len(bepath, '/', 6);
-        if (len < 0) 
-            goto free_be;
-        
-        bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */
-
-       be = malloc(sizeof(*be));
-       if (!be) {
-               warn("allocating backend structure");
-               goto free_be;
-       }
-       memset(be, 0, sizeof(*be));
-
-       frontend = NULL;
-       er = xs_gather(h, bepath,
-                        "frontend-id", "%li", &be->frontend_id,
-                        "frontend", NULL, &frontend,
-                        NULL);
-       if (er)
-               goto free_be;
-
-       if (strlen(frontend) == 0 || !xs_exists(h, frontend)) {
-            /* If we can't get a frontend path and a frontend-id,
-             * then our bus-id is no longer valid and we need to
-             * destroy the backend device.
-             */
-            DPRINTF("No frontend (%s)\n", frontend);
-            goto free_be;
-       }
-
-        /* Are we already tracking this device? */
-        if (be_exists_be(bepath))
-            goto free_be;
-
-        be->backpath = bepath;
-       be->backend_watch.node = be->backpath;
-       be->backend_watch.callback = backend_changed;
-       er = register_xenbus_watch(h, &be->backend_watch);
-       if (er == 0) {
-               be->backend_watch.node = NULL;
-               warn("error adding backend watch on %s", bepath);
-               goto free_be;
-       }
-
-       be->frontpath = frontend;
-       be->watch.node = be->frontpath;
-       be->watch.callback = frontend_changed;
-       er = register_xenbus_watch(h, &be->watch);
-       if (er == 0) {
-               be->watch.node = NULL;
-               warn("adding frontend watch on %s", be->frontpath);
-               goto free_be;
-       }
-
-        list_add(&be->list, &belist);
-
-        DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im);
-
-       backend_changed(h, &be->backend_watch, bepath);
-       return;
-
- free_be:
-       if (be && (be->backend_watch.node))
-            unregister_xenbus_watch(h, &be->backend_watch);
-        free(frontend);
-        free(bepath);
-       free(be);
-       return;
-}
-
-
-int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
-{
-    char *domid, *path;
-    struct xenbus_watch *vbd_watch;
-    int er;
-
-    domid = get_dom_domid(h, domname);
-
-    DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
-
-    asprintf(&path, "/local/domain/%s/backend/vbd", domid);
-    if (path == NULL) 
-        return -ENOMEM;
-
-    vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
-    vbd_watch->node     = path;
-    vbd_watch->callback = blkback_probe;
-    er = register_xenbus_watch(h, vbd_watch);
-    if (er == 0) {
-        warn("Error adding vbd probe watch %s", path);
-        return -EINVAL;
-    }
-
-    return 0;
-}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.