[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Merged.



# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID c9772105fead52abf64213aa1eda6419871acf94
# Parent  76bff6c996b0250739229181e40bc9c349f80c15
# Parent  a08aef9f1c8e6f0350e89ee34ad6d3ba54027958
Merged.

diff -r 76bff6c996b0 -r c9772105fead buildconfigs/Rules.mk
--- a/buildconfigs/Rules.mk     Thu Dec  8 15:04:31 2005
+++ b/buildconfigs/Rules.mk     Thu Dec  8 15:04:41 2005
@@ -21,6 +21,7 @@
 
 # Expand Linux series to Linux version
 LINUX_SERIES   ?= 2.6
+LINUX_VER      ?= $(shell grep "^LINUX_VER" buildconfigs/mk.linux-2.6-xen | 
sed -e 's/.*=[ ]*//')
 
 # Setup Linux search path
 LINUX_SRC_PATH ?= .:..
@@ -109,6 +110,13 @@
 %-config:
        $(MAKE) -f buildconfigs/mk.$* config
 
+linux-2.6-xen.patch: ref-linux-$(LINUX_VER)/.valid-ref
+       rm -rf tmp-$@
+       cp -al $(<D) tmp-$@
+       ( cd linux-2.6-xen-sparse && ./mkbuildtree ../tmp-$@ )  
+       diff -Nurp $(<D) tmp-$@ > $@ || true
+       rm -rf tmp-$@
+
 %-xen.patch: ref-%/.valid-ref
        rm -rf tmp-$@
        cp -al $(<D) tmp-$@
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/events.c
--- a/extras/mini-os/events.c   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/events.c   Thu Dec  8 15:04:41 2005
@@ -56,7 +56,7 @@
 
 }
 
-void bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) )
+int bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) )
 {
        if(ev_actions[port].handler)
         printk("WARN: Handler for port %d already registered, replacing\n",
@@ -67,6 +67,16 @@
  
        /* Finally unmask the port */
        unmask_evtchn(port);
+
+       return port;
+}
+
+void unbind_evtchn( u32 port )
+{
+       if (!ev_actions[port].handler)
+               printk("WARN: No handler for port %d when unbinding\n", port);
+       ev_actions[port].handler = NULL;
+       ev_actions[port].status |= EVS_DISABLED;
 }
 
 int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) )
@@ -90,6 +100,10 @@
        return ret;
 }
 
+void unbind_virq( u32 port )
+{
+       unbind_evtchn(port);
+}
 
 
 /*
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/events.h
--- a/extras/mini-os/include/events.h   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/events.h   Thu Dec  8 15:04:41 2005
@@ -40,10 +40,12 @@
 /* prototypes */
 int do_event(u32 port, struct pt_regs *regs);
 int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) );
-void bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) );
+int bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) );
+void unbind_evtchn( u32 port );
 void init_events(void);
+void unbind_virq( u32 port );
 
-static inline int notify_via_evtchn(int port)
+static inline int notify_remote_via_evtchn(int port)
 {
     evtchn_op_t op;
     op.cmd = EVTCHNOP_send;
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/os.h
--- a/extras/mini-os/include/os.h       Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/os.h       Thu Dec  8 15:04:41 2005
@@ -131,9 +131,11 @@
 #if defined(__i386__)
 #define mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
 #define rmb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define wmb()  __asm__ __volatile__ ("": : :"memory")
 #elif defined(__x86_64__)
 #define mb()    __asm__ __volatile__ ("mfence":::"memory")
 #define rmb()   __asm__ __volatile__ ("lfence":::"memory")
+#define wmb()  __asm__ __volatile__ ("sfence" ::: "memory") /* From 
CONFIG_UNORDERED_IO (linux) */
 #endif
 
 
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/semaphore.h
--- a/extras/mini-os/include/semaphore.h        Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/semaphore.h        Thu Dec  8 15:04:41 2005
@@ -2,6 +2,7 @@
 #define _SEMAPHORE_H_
 
 #include <wait.h>
+#include <spinlock.h>
 
 /*
  * Implementation of semaphore in Mini-os is simple, because 
@@ -14,6 +15,15 @@
        struct wait_queue_head wait;
 };
 
+/*
+ * the semaphore definition
+ */
+struct rw_semaphore {
+       signed long             count;
+       spinlock_t              wait_lock;
+       struct list_head        wait_list;
+       int                     debug;
+};
 
 #define __SEMAPHORE_INITIALIZER(name, n)                            \
 {                                                                   \
@@ -31,6 +41,12 @@
 
 #define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
 
+static inline void init_MUTEX(struct semaphore *sem)
+{
+  sem->count = 1;
+  init_waitqueue_head(&sem->wait);
+}
+
 static void inline down(struct semaphore *sem)
 {
     wait_event(sem->wait, sem->count > 0);
@@ -43,4 +59,27 @@
     wake_up(&sem->wait);
 }
 
+/* FIXME! Thre read/write semaphores are unimplemented! */
+static inline void init_rwsem(struct rw_semaphore *sem)
+{
+  sem->count = 1;
+}
+
+static inline void down_read(struct rw_semaphore *sem)
+{
+}
+
+
+static inline void up_read(struct rw_semaphore *sem)
+{
+}
+
+static inline void up_write(struct rw_semaphore *sem)
+{
+}
+
+static inline void down_write(struct rw_semaphore *sem)
+{
+}
+
 #endif /* _SEMAPHORE_H */
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/wait.h
--- a/extras/mini-os/include/wait.h     Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/wait.h     Thu Dec  8 15:04:41 2005
@@ -33,6 +33,10 @@
 }
 
 
+static inline void init_waitqueue_head(struct wait_queue_head *h)
+{
+  INIT_LIST_HEAD(&h->thread_list);
+}
 
 static inline void init_waitqueue_entry(struct wait_queue *q, struct thread 
*thread)
 {
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/xenbus.h   Thu Dec  8 15:04:41 2005
@@ -4,6 +4,7 @@
  * Talks to Xen Store to figure out what devices we have.
  *
  * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
@@ -30,45 +31,98 @@
 #ifndef _ASM_XEN_XENBUS_H
 #define _ASM_XEN_XENBUS_H
 
-
-/* Caller must hold this lock to call these functions: it's also held
- * across watch callbacks. */
-// TODO
-//extern struct semaphore xenbus_lock;
-
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
-void *xenbus_read(const char *dir, const char *node, unsigned int *len);
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags);
-int xenbus_mkdir(const char *dir, const char *node);
-int xenbus_exists(const char *dir, const char *node);
-int xenbus_rm(const char *dir, const char *node);
-int xenbus_transaction_start(const char *subtree);
-int xenbus_transaction_end(int abort);
-
-/* Single read and scanf: returns -errno or num scanned if > 0. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(scanf, 3, 4)));
-
-/* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(printf, 3, 4)));
-
-/* Generic read function: NULL-terminated triples of name,
- * sprintf-style type string, and pointer. Returns 0 or errno.*/
-int xenbus_gather(const char *dir, ...);
+#include <errno.h>
+#include <xen/io/xenbus.h>
+#include <xen/io/xs_wire.h>
 
 /* Register callback to watch this node. */
 struct xenbus_watch
 {
        struct list_head list;
-       char *node;
-       void (*callback)(struct xenbus_watch *, const char *node);
-};
+
+       /* Path being watched. */
+       const char *node;
+
+       /* Callback (executed in a process context with no locks held). */
+       void (*callback)(struct xenbus_watch *,
+                        const char **vec, unsigned int len);
+};
+
+
+/* A xenbus device. */
+struct xenbus_device {
+       const char *devicetype;
+       const char *nodename;
+       const char *otherend;
+       int otherend_id;
+       struct xenbus_watch otherend_watch;
+       int has_error;
+       void *data;
+};
+
+struct xenbus_device_id
+{
+       /* .../device/<device_type>/<identifier> */
+       char devicetype[32];    /* General class of device. */
+};
+
+/* A xenbus driver. */
+struct xenbus_driver {
+       char *name;
+       struct module *owner;
+       const struct xenbus_device_id *ids;
+       int (*probe)(struct xenbus_device *dev,
+                    const struct xenbus_device_id *id);
+       void (*otherend_changed)(struct xenbus_device *dev,
+                                XenbusState backend_state);
+       int (*remove)(struct xenbus_device *dev);
+       int (*suspend)(struct xenbus_device *dev);
+       int (*resume)(struct xenbus_device *dev);
+       int (*hotplug)(struct xenbus_device *, char **, int, char *, int);
+       int (*read_otherend_details)(struct xenbus_device *dev);
+};
+
+int xenbus_register_frontend(struct xenbus_driver *drv);
+int xenbus_register_backend(struct xenbus_driver *drv);
+void xenbus_unregister_driver(struct xenbus_driver *drv);
+
+struct xenbus_transaction;
+
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num);
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len);
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string);
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node);
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node);
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node);
+struct xenbus_transaction *xenbus_transaction_start(void);
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort);
+
+/* Single read and scanf: returns -errno or num scanned if > 0. */
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(scanf, 4, 5)));
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(printf, 4, 5)));
+
+/* Generic read function: NULL-terminated triples of name,
+ * sprintf-style type string, and pointer. Returns 0 or errno.*/
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...);
 
 int register_xenbus_watch(struct xenbus_watch *watch);
 void unregister_xenbus_watch(struct xenbus_watch *watch);
-void reregister_xenbus_watches(void);
+void xs_suspend(void);
+void xs_resume(void);
+
+/* Used by xenbus_dev to borrow kernel's store connection. */
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
 
 /* Called from xen core code. */
 void xenbus_suspend(void);
@@ -84,6 +138,87 @@
 
 #define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE)
 
-int xs_init(void);
+
+/**
+ * Register a watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given callback function as the callback.  Return 0 on
+ * success, or -errno on error.  On success, the given path will be saved as
+ * watch->node, and remains the caller's to free.  On error, watch->node will
+ * be NULL, the device will switch to XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
+int xenbus_watch_path(struct xenbus_device *dev, const char *path,
+                     struct xenbus_watch *watch, 
+                     void (*callback)(struct xenbus_watch *,
+                                      const char **, unsigned int));
+
+
+/**
+ * Register a watch on the given path/path2, using the given xenbus_watch
+ * structure for storage, and the given callback function as the callback.
+ * Return 0 on success, or -errno on error.  On success, the watched path
+ * (path/path2) will be saved as watch->node, and becomes the caller's to
+ * kfree().  On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
+                      const char *path2, struct xenbus_watch *watch, 
+                      void (*callback)(struct xenbus_watch *,
+                                       const char **, unsigned int));
+
+
+/**
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Perform the change inside the given transaction xbt.  xbt may be NULL, in
+ * which case this is performed inside its own transaction.  Return 0 on
+ * success, or -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev,
+                       struct xenbus_transaction *xbt,
+                       XenbusState new_state);
+
+
+/**
+ * Grant access to the given ring_mfn to the peer of the given device.  Return
+ * 0 on success, or -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
+
+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port.  Return 0 on success, or -errno on error.  On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
+
+
+/**
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateClosed if no state can be read.
+ */
+XenbusState xenbus_read_driver_state(const char *path);
+
+
+/***
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
+                     ...);
+
+
+/***
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
+                     ...);
+
 
 #endif /* _ASM_XEN_XENBUS_H */
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/xmalloc.h
--- a/extras/mini-os/include/xmalloc.h  Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/xmalloc.h  Thu Dec  8 15:04:41 2005
@@ -6,6 +6,9 @@
 
 /* Allocate space for array of typed objects. */
 #define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), 
__alignof__(_type), _num))
+
+#define malloc(size) _xmalloc(size, 4)
+#define free(ptr) xfree(ptr)
 
 /* Free any of the above. */
 extern void xfree(const void *);
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/kernel.c   Thu Dec  8 15:04:41 2005
@@ -35,6 +35,7 @@
 #include <lib.h>
 #include <sched.h>
 #include <xenbus.h>
+#include "xenbus/xenbus_comms.h"
 
 /*
  * Shared page for communicating with the hypervisor.
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_comms.c
--- a/extras/mini-os/xenbus/xenbus_comms.c      Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/xenbus/xenbus_comms.c      Thu Dec  8 15:04:41 2005
@@ -33,35 +33,19 @@
 #include <events.h>
 #include <os.h>
 #include <lib.h>
+#include <xenbus.h>
+#include "xenbus_comms.h"
 
+static int xenbus_irq;
 
-#ifdef XENBUS_COMMS_DEBUG
-#define DEBUG(_f, _a...) \
-    printk("MINI_OS(file=xenbus_comms.c, line=%d) " _f "\n", __LINE__, ## _a)
-#else
-#define DEBUG(_f, _a...)    ((void)0)
-#endif
-
-
-#define RINGBUF_DATASIZE ((PAGE_SIZE / 2) - sizeof(struct ringbuf_head))
-struct ringbuf_head
-{
-       u32 write; /* Next place to write to */
-       u32 read; /* Next place to read from */
-       u8 flags;
-       char buf[0];
-} __attribute__((packed));
+extern void xenbus_probe(void *);
+extern int xenstored_ready;
 
 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
 
-static inline struct ringbuf_head *outbuf(void)
+static inline struct xenstore_domain_interface *xenstore_domain_interface(void)
 {
        return mfn_to_virt(start_info.store_mfn);
-}
-
-static inline struct ringbuf_head *inbuf(void)
-{
-       return (struct ringbuf_head *)((char 
*)mfn_to_virt(start_info.store_mfn) + PAGE_SIZE/2);
 }
 
 static void wake_waiting(int port, struct pt_regs *regs)
@@ -69,138 +53,112 @@
        wake_up(&xb_waitq);
 }
 
-static int check_buffer(const struct ringbuf_head *h)
+static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
 {
-       return (h->write < RINGBUF_DATASIZE && h->read < RINGBUF_DATASIZE);
+       return ((prod - cons) <= XENSTORE_RING_SIZE);
 }
 
-/* We can't fill last byte: would look like empty buffer. */
-static void *get_output_chunk(const struct ringbuf_head *h,
-                             void *buf, u32 *len)
+static void *get_output_chunk(XENSTORE_RING_IDX cons,
+                             XENSTORE_RING_IDX prod,
+                             char *buf, uint32_t *len)
 {
-       u32 read_mark;
-
-       if (h->read == 0)
-               read_mark = RINGBUF_DATASIZE - 1;
-       else
-               read_mark = h->read - 1;
-
-       /* Here to the end of buffer, unless they haven't read some out. */
-       *len = RINGBUF_DATASIZE - h->write;
-       if (read_mark >= h->write)
-               *len = read_mark - h->write;
-       return (void *)((char *)buf + h->write);
+       *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
+       if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
+               *len = XENSTORE_RING_SIZE - (prod - cons);
+       return buf + MASK_XENSTORE_IDX(prod);
 }
 
-static const void *get_input_chunk(const struct ringbuf_head *h,
-                                  const void *buf, u32 *len)
+static const void *get_input_chunk(XENSTORE_RING_IDX cons,
+                                  XENSTORE_RING_IDX prod,
+                                  const char *buf, uint32_t *len)
 {
-       /* Here to the end of buffer, unless they haven't written some. */
-       *len = RINGBUF_DATASIZE - h->read;
-       if (h->write >= h->read)
-               *len = h->write - h->read;
-       return (void *)((char *)buf + h->read);
-}
-
-static void update_output_chunk(struct ringbuf_head *h, u32 len)
-{
-       h->write += len;
-       if (h->write == RINGBUF_DATASIZE)
-               h->write = 0;
-}
-
-static void update_input_chunk(struct ringbuf_head *h, u32 len)
-{
-       h->read += len;
-       if (h->read == RINGBUF_DATASIZE)
-               h->read = 0;
-}
-
-static int output_avail(struct ringbuf_head *out)
-{
-       unsigned int avail;
-
-       get_output_chunk(out, out->buf, &avail);
-       return avail != 0;
+       *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
+       if ((prod - cons) < *len)
+               *len = prod - cons;
+       return buf + MASK_XENSTORE_IDX(cons);
 }
 
 int xb_write(const void *data, unsigned len)
 {
-       struct ringbuf_head h;
-       struct ringbuf_head *out = outbuf();
+       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       XENSTORE_RING_IDX cons, prod;
 
-       do {
+       while (len != 0) {
                void *dst;
                unsigned int avail;
 
-               wait_event(xb_waitq, output_avail(out));
+               wait_event(xb_waitq, (intf->req_prod - intf->req_cons) !=
+                          XENSTORE_RING_SIZE);
 
-               /* Read, then check: not that we don't trust store.
-                * Hell, some of my best friends are daemons.  But,
-                * in this post-911 world... */
-               h = *out;
+               /* Read indexes, then verify. */
+               cons = intf->req_cons;
+               prod = intf->req_prod;
                mb();
-               if (!check_buffer(&h)) {
-                       return -1; /* ETERRORIST! */
-               }
+               if (!check_indexes(cons, prod))
+                       return -EIO;
 
-               dst = get_output_chunk(&h, out->buf, &avail);
+               dst = get_output_chunk(cons, prod, intf->req, &avail);
+               if (avail == 0)
+                       continue;
                if (avail > len)
                        avail = len;
+
                memcpy(dst, data, avail);
-               data = (void *)((char *)data + avail);
+               data = (void*) ( (unsigned long)data + avail );
                len -= avail;
-               update_output_chunk(out, avail);
-               notify_via_evtchn(start_info.store_evtchn);
-       } while (len != 0);
+
+               /* Other side must not see new header until data is there. */
+               wmb();
+               intf->req_prod += avail;
+
+               /* This implies mb() before other side sees interrupt. */
+               notify_remote_via_evtchn(start_info.store_evtchn);
+       }
 
        return 0;
 }
 
-int xs_input_avail(void)
-{
-       unsigned int avail;
-       struct ringbuf_head *in = inbuf();
-
-       get_input_chunk(in, in->buf, &avail);
-       return avail != 0;
-}
-
 int xb_read(void *data, unsigned len)
 {
-       struct ringbuf_head h;
-       struct ringbuf_head *in = inbuf();
-       int was_full;
+       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       XENSTORE_RING_IDX cons, prod;
 
        while (len != 0) {
                unsigned int avail;
                const char *src;
 
-               wait_event(xb_waitq, xs_input_avail());
-               h = *in;
+               wait_event(xb_waitq,
+                          intf->rsp_cons != intf->rsp_prod);
+
+               /* Read indexes, then verify. */
+               cons = intf->rsp_cons;
+               prod = intf->rsp_prod;
                mb();
-               if (!check_buffer(&h)) {
-                       return -1;
-               }
+               if (!check_indexes(cons, prod))
+                       return -EIO;
 
-               src = get_input_chunk(&h, in->buf, &avail);
+               src = get_input_chunk(cons, prod, intf->rsp, &avail);
+               if (avail == 0)
+                       continue;
                if (avail > len)
                        avail = len;
-               was_full = !output_avail(&h);
+
+               /* We must read header before we read data. */
+               rmb();
 
                memcpy(data, src, avail);
-               data = (void *)((char *)data + avail);
+               data = (void*) ( (unsigned long)data + avail );
                len -= avail;
-               update_input_chunk(in, avail);
-               DEBUG("Finished read of %i bytes (%i to go)\n", avail, len);
-               /* If it was full, tell them we've taken some. */
-               if (was_full)
-                       notify_via_evtchn(start_info.store_evtchn);
+
+               /* Other side must not see free space until we've copied out */
+               mb();
+               intf->rsp_cons += avail;
+
+               printk("Finished read of %i bytes (%i to go)\n", avail, len);
+
+               /* Implies mb(): they will see new header. */
+               notify_remote_via_evtchn(start_info.store_evtchn);
        }
-
-       /* If we left something, wake watch thread to deal with it. */
-       if (xs_input_avail())
-               wake_up(&xb_waitq);
 
        return 0;
 }
@@ -208,24 +166,19 @@
 /* Set up interrupt handler off store event channel. */
 int xb_init_comms(void)
 {
-    printk("Init xenbus comms, store event channel %d\n", 
start_info.store_evtchn);
-       if (!start_info.store_evtchn)
-               return 0;
-    printk("Binding virq\n");
-       bind_evtchn(start_info.store_evtchn, &wake_waiting);
+       int err;
 
-       /* FIXME zero out page -- domain builder should probably do this*/
-       memset(mfn_to_virt(start_info.store_mfn), 0, PAGE_SIZE);
-    notify_via_evtchn(start_info.store_evtchn);
+       if (xenbus_irq)
+               unbind_evtchn(xenbus_irq);
+
+       err = bind_evtchn(
+               start_info.store_evtchn, wake_waiting);
+       if (err <= 0) {
+               printk("XENBUS request irq failed %i\n", err);
+               return err;
+       }
+
+       xenbus_irq = err;
+
        return 0;
 }
-
-void xb_suspend_comms(void)
-{
-
-       if (!start_info.store_evtchn)
-               return;
-
-    // TODO
-       //unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq);
-}
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_comms.h
--- a/extras/mini-os/xenbus/xenbus_comms.h      Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/xenbus/xenbus_comms.h      Thu Dec  8 15:04:41 2005
@@ -28,8 +28,8 @@
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
 
+int xs_init(void);
 int xb_init_comms(void);
-void xb_suspend_comms(void);
 
 /* Low level routines. */
 int xb_write(const void *data, unsigned len);
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_xs.c
--- a/extras/mini-os/xenbus/xenbus_xs.c Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/xenbus/xenbus_xs.c Thu Dec  8 15:04:41 2005
@@ -39,15 +39,63 @@
 #include <wait.h>
 #include <sched.h>
 #include <semaphore.h>
+#include <spinlock.h>
 #include <xen/io/xs_wire.h>
 #include "xenbus_comms.h"
 
 #define streq(a, b) (strcmp((a), (b)) == 0)
 
-static char printf_buffer[4096];
+struct xs_stored_msg {
+       struct list_head list;
+
+       struct xsd_sockmsg hdr;
+
+       union {
+               /* Queued replies. */
+               struct {
+                       char *body;
+               } reply;
+
+               /* Queued watch events. */
+               struct {
+                       struct xenbus_watch *handle;
+                       char **vec;
+                       unsigned int vec_size;
+               } watch;
+       } u;
+};
+
+struct xs_handle {
+       /* A list of replies. Currently only one will ever be outstanding. */
+       struct list_head reply_list;
+       spinlock_t reply_lock;
+       struct wait_queue_head reply_waitq;
+
+       /* One request at a time. */
+       struct semaphore request_mutex;
+
+       /* Protect transactions against save/restore. */
+       struct rw_semaphore suspend_mutex;
+};
+
+static struct xs_handle xs_state;
+
+/* List of registered watches, and a lock to protect it. */
 static LIST_HEAD(watches);
-//TODO
-DECLARE_MUTEX(xenbus_lock);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* List of pending watch callback events, and a lock to protect it. */
+static LIST_HEAD(watch_events);
+static DEFINE_SPINLOCK(watch_events_lock);
+
+/*
+ * Details of the xenwatch callback kernel thread. The thread waits on the
+ * watch_events_waitq for work to do (queued on watch_events list). When it
+ * wakes up it acquires the xenwatch_mutex before reading the list and
+ * carrying out work.
+ */
+/* static */ DECLARE_MUTEX(xenwatch_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
 
 static int get_error(const char *errorstring)
 {
@@ -65,47 +113,82 @@
 
 static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
 {
-       struct xsd_sockmsg msg;
-       void *ret;
-       int err;
-
-       err = xb_read(&msg, sizeof(msg));
-       if (err)
-               return ERR_PTR(err);
-
-       ret = xmalloc_array(char, msg.len + 1);
-       if (!ret)
-               return ERR_PTR(-ENOMEM);
-
-       err = xb_read(ret, msg.len);
-       if (err) {
-               xfree(ret);
-               return ERR_PTR(err);
-       }
-       ((char*)ret)[msg.len] = '\0';
-
-       *type = msg.type;
+       struct xs_stored_msg *msg;
+       char *body;
+
+       spin_lock(&xs_state.reply_lock);
+
+       while (list_empty(&xs_state.reply_list)) {
+               spin_unlock(&xs_state.reply_lock);
+               wait_event(xs_state.reply_waitq,
+                          !list_empty(&xs_state.reply_list));
+               spin_lock(&xs_state.reply_lock);
+       }
+
+       msg = list_entry(xs_state.reply_list.next,
+                        struct xs_stored_msg, list);
+       list_del(&msg->list);
+
+       spin_unlock(&xs_state.reply_lock);
+
+       *type = msg->hdr.type;
        if (len)
-               *len = msg.len;
-       return ret;
+               *len = msg->hdr.len;
+       body = msg->u.reply.body;
+
+       free(msg);
+
+       return body;
 }
 
 /* Emergency write. */
 void xenbus_debug_write(const char *str, unsigned int count)
 {
-       struct xsd_sockmsg msg;
+       struct xsd_sockmsg msg = { 0 };
 
        msg.type = XS_DEBUG;
        msg.len = sizeof("print") + count + 1;
 
+       down(&xs_state.request_mutex);
        xb_write(&msg, sizeof(msg));
        xb_write("print", sizeof("print"));
        xb_write(str, count);
        xb_write("", 1);
+       up(&xs_state.request_mutex);
+}
+
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+{
+       void *ret;
+       struct xsd_sockmsg req_msg = *msg;
+       int err;
+
+       if (req_msg.type == XS_TRANSACTION_START)
+               down_read(&xs_state.suspend_mutex);
+
+       down(&xs_state.request_mutex);
+
+       err = xb_write(msg, sizeof(*msg) + msg->len);
+       if (err) {
+               msg->type = XS_ERROR;
+               ret = ERR_PTR(err);
+       } else {
+               ret = read_reply(&msg->type, &msg->len);
+       }
+
+       up(&xs_state.request_mutex);
+
+       if ((msg->type == XS_TRANSACTION_END) ||
+           ((req_msg.type == XS_TRANSACTION_START) &&
+            (msg->type == XS_ERROR)))
+               up_read(&xs_state.suspend_mutex);
+
+       return ret;
 }
 
 /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
-static void *xs_talkv(enum xsd_sockmsg_type type,
+static void *xs_talkv(struct xenbus_transaction *t,
+                     enum xsd_sockmsg_type type,
                      const struct kvec *iovec,
                      unsigned int num_vecs,
                      unsigned int *len)
@@ -115,51 +198,57 @@
        unsigned int i;
        int err;
 
-       //WARN_ON(down_trylock(&xenbus_lock) == 0);
-
+       msg.tx_id = (u32)(unsigned long)t;
+       msg.req_id = 0;
        msg.type = type;
        msg.len = 0;
        for (i = 0; i < num_vecs; i++)
                msg.len += iovec[i].iov_len;
 
+       down(&xs_state.request_mutex);
+
        err = xb_write(&msg, sizeof(msg));
-       if (err)
+       if (err) {
+               up(&xs_state.request_mutex);
                return ERR_PTR(err);
+       }
 
        for (i = 0; i < num_vecs; i++) {
-               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
-               if (err)
+               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
+               if (err) {
+                       up(&xs_state.request_mutex);
                        return ERR_PTR(err);
-       }
-
-       /* Watches can have fired before reply comes: daemon detects
-        * and re-transmits, so we can ignore this. */
-       do {
-               xfree(ret);
-               ret = read_reply(&msg.type, len);
-               if (IS_ERR(ret))
-                       return ret;
-       } while (msg.type == XS_WATCH_EVENT);
+               }
+       }
+
+       ret = read_reply(&msg.type, len);
+
+       up(&xs_state.request_mutex);
+
+       if (IS_ERR(ret))
+               return ret;
 
        if (msg.type == XS_ERROR) {
                err = get_error(ret);
-               xfree(ret);
+               free(ret);
                return ERR_PTR(-err);
        }
 
-       //BUG_ON(msg.type != type);
+       //      BUG_ON(msg.type != type);
        return ret;
 }
 
 /* Simplified version of xs_talkv: single message. */
-static void *xs_single(enum xsd_sockmsg_type type,
-                      const char *string, unsigned int *len)
+static void *xs_single(struct xenbus_transaction *t,
+                      enum xsd_sockmsg_type type,
+                      const char *string,
+                      unsigned int *len)
 {
        struct kvec iovec;
 
        iovec.iov_base = (void *)string;
        iovec.iov_len = strlen(string) + 1;
-       return xs_talkv(type, &iovec, 1, len);
+       return xs_talkv(t, type, &iovec, 1, len);
 }
 
 /* Many commands only need an ack, don't care what it says. */
@@ -167,7 +256,7 @@
 {
        if (IS_ERR(reply))
                return PTR_ERR(reply);
-       xfree(reply);
+       free(reply);
        return 0;
 }
 
@@ -182,60 +271,76 @@
        return num;
 }
 
-/* Return the path to dir with /name appended. */ 
+/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ 
 static char *join(const char *dir, const char *name)
 {
-       static char buffer[4096];
-
-       //BUG_ON(down_trylock(&xenbus_lock) == 0);
-       /* XXX FIXME: might not be correct if name == "" */
-       //BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
+       char *buffer;
+
+       buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1);
+       if (buffer == NULL)
+               return ERR_PTR(-ENOMEM);
 
        strcpy(buffer, dir);
        if (!streq(name, "")) {
                strcat(buffer, "/");
                strcat(buffer, name);
        }
+
        return buffer;
 }
 
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num)
-{
-       char *strings, *p, **ret;
-       unsigned int len;
-
-       strings = xs_single(XS_DIRECTORY, join(dir, node), &len);
-       if (IS_ERR(strings))
-               return (char **)strings;
+static char **split(char *strings, unsigned int len, unsigned int *num)
+{
+       char *p, **ret;
 
        /* Count the strings. */
        *num = count_strings(strings, len);
 
        /* Transfer to one big alloc for easy freeing. */
-       ret = (char **)xmalloc_array(char, *num * sizeof(char *) + len);
+       ret = malloc(*num * sizeof(char *) + len);
        if (!ret) {
-               xfree(strings);
+               free(strings);
                return ERR_PTR(-ENOMEM);
        }
        memcpy(&ret[*num], strings, len);
-       xfree(strings);
+       free(strings);
 
        strings = (char *)&ret[*num];
        for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
                ret[(*num)++] = p;
-       return ret;
+
+       return ret;
+}
+
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num)
+{
+       char *strings, *path;
+       unsigned int len;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return (char **)path;
+
+       strings = xs_single(t, XS_DIRECTORY, path, &len);
+       free(path);
+       if (IS_ERR(strings))
+               return (char **)strings;
+
+       return split(strings, len, num);
 }
 
 /* Check if a path exists. Return 1 if it does. */
-int xenbus_exists(const char *dir, const char *node)
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node)
 {
        char **d;
        int dir_n;
 
-       d = xenbus_directory(dir, node, &dir_n);
+       d = xenbus_directory(t, dir, node, &dir_n);
        if (IS_ERR(d))
                return 0;
-       xfree(d);
+       free(d);
        return 1;
 }
 
@@ -243,92 +348,134 @@
  * Returns a kmalloced value: call free() on it after use.
  * len indicates length in bytes.
  */
-void *xenbus_read(const char *dir, const char *node, unsigned int *len)
-{
-       return xs_single(XS_READ, join(dir, node), len);
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len)
+{
+       char *path;
+       void *ret;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return (void *)path;
+
+       ret = xs_single(t, XS_READ, path, len);
+       free(path);
+       return ret;
 }
 
 /* Write the value of a single file.
- * Returns -err on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns -err on failure.
  */
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags)
-{
-       const char *flags, *path;
-       struct kvec iovec[3];
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string)
+{
+       const char *path;
+       struct kvec iovec[2];
+       int ret;
 
        path = join(dir, node);
-       /* Format: Flags (as string), path, data. */
-       if (createflags == 0)
-               flags = XS_WRITE_NONE;
-       else if (createflags == O_CREAT)
-               flags = XS_WRITE_CREATE;
-       else if (createflags == (O_CREAT|O_EXCL))
-               flags = XS_WRITE_CREATE_EXCL;
-       else
-               return -EINVAL;
+       if (IS_ERR(path))
+               return PTR_ERR(path);
 
        iovec[0].iov_base = (void *)path;
        iovec[0].iov_len = strlen(path) + 1;
-       iovec[1].iov_base = (void *)flags;
-       iovec[1].iov_len = strlen(flags) + 1;
-       iovec[2].iov_base = (void *)string;
-       iovec[2].iov_len = strlen(string);
-
-       return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+       iovec[1].iov_base = (void *)string;
+       iovec[1].iov_len = strlen(string);
+
+       ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+       free(path);
+       return ret;
 }
 
 /* Create a new directory. */
-int xenbus_mkdir(const char *dir, const char *node)
-{
-       return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL));
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node)
+{
+       char *path;
+       int ret;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+
+       ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
+       free(path);
+       return ret;
 }
 
 /* Destroy a file or directory (directories must be empty). */
-int xenbus_rm(const char *dir, const char *node)
-{
-       return xs_error(xs_single(XS_RM, join(dir, node), NULL));
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node)
+{
+       char *path;
+       int ret;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+
+       ret = xs_error(xs_single(t, XS_RM, path, NULL));
+       free(path);
+       return ret;
 }
 
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
- * Transaction only applies to the given subtree.
- * You can only have one transaction at any time.
  */
-int xenbus_transaction_start(const char *subtree)
-{
-       return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL));
+struct xenbus_transaction *xenbus_transaction_start(void)
+{
+       char *id_str;
+       unsigned long id;
+
+       down_read(&xs_state.suspend_mutex);
+
+       id_str = xs_single(NULL, XS_TRANSACTION_START, "", NULL);
+       if (IS_ERR(id_str)) {
+               up_read(&xs_state.suspend_mutex);
+               return (struct xenbus_transaction *)id_str;
+       }
+
+       id = simple_strtoul(id_str, NULL, 0);
+       free(id_str);
+
+       return (struct xenbus_transaction *)id;
 }
 
 /* End a transaction.
  * If abandon is true, transaction is discarded instead of committed.
  */
-int xenbus_transaction_end(int abort)
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
 {
        char abortstr[2];
+       int err;
 
        if (abort)
                strcpy(abortstr, "F");
        else
                strcpy(abortstr, "T");
-       return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
+
+       err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
+
+       up_read(&xs_state.suspend_mutex);
+
+       return err;
 }
 
 /* Single read and scanf: returns -errno or num scanned. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
        char *val;
 
-       val = xenbus_read(dir, node, NULL);
+       val = xenbus_read(t, dir, node, NULL);
        if (IS_ERR(val))
                return PTR_ERR(val);
 
        va_start(ap, fmt);
        ret = vsscanf(val, fmt, ap);
        va_end(ap);
-       xfree(val);
+       free(val);
        /* Distinctive errno. */
        if (ret == 0)
                return -ERANGE;
@@ -336,23 +483,32 @@
 }
 
 /* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
-
-       //BUG_ON(down_trylock(&xenbus_lock) == 0);
+#define PRINTF_BUFFER_SIZE 4096
+       char *printf_buffer;
+
+       printf_buffer = malloc(PRINTF_BUFFER_SIZE);
+       if (printf_buffer == NULL)
+               return -ENOMEM;
+
        va_start(ap, fmt);
-       ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap);
+       ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
        va_end(ap);
 
-       //BUG_ON(ret > sizeof(printf_buffer)-1);
-       return xenbus_write(dir, node, printf_buffer, O_CREAT);
-}
-
-       
+       //      BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+       ret = xenbus_write(t, dir, node, printf_buffer);
+
+       free(printf_buffer);
+
+       return ret;
+}
+
 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xenbus_gather(const char *dir, ...)
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...)
 {
        va_list ap;
        const char *name;
@@ -364,7 +520,7 @@
                void *result = va_arg(ap, void *);
                char *p;
 
-               p = xenbus_read(dir, name, NULL);
+               p = xenbus_read(t, dir, name, NULL);
                if (IS_ERR(p)) {
                        ret = PTR_ERR(p);
                        break;
@@ -372,7 +528,7 @@
                if (fmt) {
                        if (sscanf(p, fmt, result) == 0)
                                ret = -EINVAL;
-                       xfree(p);
+                       free(p);
                } else
                        *(char **)result = p;
        }
@@ -389,31 +545,8 @@
        iov[1].iov_base = (void *)token;
        iov[1].iov_len = strlen(token) + 1;
 
-       return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
-}
-
-static char *xs_read_watch(char **token)
-{
-       enum xsd_sockmsg_type type;
-       char *ret;
-
-       ret = read_reply(&type, NULL);
-       if (IS_ERR(ret))
-               return ret;
-
-       //BUG_ON(type != XS_WATCH_EVENT);
-       *token = ret + strlen(ret) + 1;
-       return ret;
-}
-
-static int xs_acknowledge_watch(const char *token)
-{
-#if 0
-       return xs_error(xs_single(XS_WATCH_ACK, token, NULL));
-#else
-       /* XS_WATCH_ACK is no longer available */
-       return 0;
-#endif
+       return xs_error(xs_talkv(NULL, XS_WATCH, iov,
+                                ARRAY_SIZE(iov), NULL));
 }
 
 static int xs_unwatch(const char *path, const char *token)
@@ -425,10 +558,10 @@
        iov[1].iov_base = (char *)token;
        iov[1].iov_len = strlen(token) + 1;
 
-       return xs_error(xs_talkv(XS_UNWATCH, iov, ARRAY_SIZE(iov), NULL));
-}
-
-/* A little paranoia: we don't just trust token. */
+       return xs_error(xs_talkv(NULL, XS_UNWATCH, iov,
+                                ARRAY_SIZE(iov), NULL));
+}
+
 static struct xenbus_watch *find_watch(const char *token)
 {
        struct xenbus_watch *i, *cmp;
@@ -438,6 +571,7 @@
        list_for_each_entry(i, &watches, list)
                if (i == cmp)
                        return i;
+
        return NULL;
 }
 
@@ -449,111 +583,214 @@
        int err;
 
        sprintf(token, "%lX", (long)watch);
-       //BUG_ON(find_watch(token));
-printk("Registered watch for: %s\n", token);
+
+       down_read(&xs_state.suspend_mutex);
+
+       spin_lock(&watches_lock);
+       //      BUG_ON(find_watch(token));
+       list_add(&watch->list, &watches);
+       spin_unlock(&watches_lock);
+
        err = xs_watch(watch->node, token);
-       if (!err)
-               list_add(&watch->list, &watches);
+
+       /* Ignore errors due to multiple registration. */
+       if ((err != 0) && (err != -EEXIST)) {
+               spin_lock(&watches_lock);
+               list_del(&watch->list);
+               spin_unlock(&watches_lock);
+       }
+
+       up_read(&xs_state.suspend_mutex);
+
        return err;
 }
 
 void unregister_xenbus_watch(struct xenbus_watch *watch)
 {
+       struct xs_stored_msg *msg, *tmp;
        char token[sizeof(watch) * 2 + 1];
        int err;
 
        sprintf(token, "%lX", (long)watch);
-       //BUG_ON(!find_watch(token));
+
+       down_read(&xs_state.suspend_mutex);
+
+       spin_lock(&watches_lock);
+       //      BUG_ON(!find_watch(token));
+       list_del(&watch->list);
+       spin_unlock(&watches_lock);
 
        err = xs_unwatch(watch->node, token);
-       list_del(&watch->list);
-
        if (err)
                printk("XENBUS Failed to release watch %s: %i\n",
                       watch->node, err);
-}
-
-/* Re-register callbacks to all watches. */
-void reregister_xenbus_watches(void)
+
+       up_read(&xs_state.suspend_mutex);
+
+       /* Cancel pending watch events. */
+       spin_lock(&watch_events_lock);
+       list_for_each_entry_safe(msg, tmp, &watch_events, list) {
+               if (msg->u.watch.handle != watch)
+                       continue;
+               list_del(&msg->list);
+               free(msg->u.watch.vec);
+               free(msg);
+       }
+       spin_unlock(&watch_events_lock);
+}
+
+void xs_suspend(void)
+{
+       down_write(&xs_state.suspend_mutex);
+       down(&xs_state.request_mutex);
+}
+
+void xs_resume(void)
 {
        struct xenbus_watch *watch;
        char token[sizeof(watch) * 2 + 1];
 
+       up(&xs_state.request_mutex);
+
+       /* No need for watches_lock: the suspend_mutex is sufficient. */
        list_for_each_entry(watch, &watches, list) {
                sprintf(token, "%lX", (long)watch);
                xs_watch(watch->node, token);
        }
-}
-
-void watch_thread(void *unused)
-{
+
+       up_write(&xs_state.suspend_mutex);
+}
+
+static void xenwatch_thread(void *unused)
+{
+       struct list_head *ent;
+       struct xs_stored_msg *msg;
+
        for (;;) {
-               char *token;
-               char *node = NULL;
-
-               wait_event(xb_waitq, xs_input_avail());
-
-               /* If this is a spurious wakeup caused by someone
-                * doing an op, they'll hold the lock and the buffer
-                * will be empty by the time we get there.               
-                */
-               down(&xenbus_lock);
-               if (xs_input_avail())
-                       node = xs_read_watch(&token);
-
-               if (node && !IS_ERR(node)) {
-                       struct xenbus_watch *w;
-                       int err;
-
-                       err = xs_acknowledge_watch(token);
-                       if (err)
-                               printk("XENBUS ack %s fail %i\n", node, err);
-                       w = find_watch(token);
-                       //BUG_ON(!w);
-                       w->callback(w, node);
-                       xfree(node);
-               } else
-                       printk("XENBUS xs_read_watch: %li\n", PTR_ERR(node));
-               up(&xenbus_lock);
-       }
-}
-
-
-static void ballon_changed(struct xenbus_watch *watch, const char *node)
-{
-    unsigned long new_target;
-    int err;
-    err = xenbus_scanf("memory", "target", "%lu", &new_target);
-
-    if(err != 1)
-    {
-        printk("Unable to read memory/target\n");
-        return;
-    }
-
-    printk("Memory target changed to: %ld bytes, ignoring.\n", new_target);
-}
-
-
-static struct xenbus_watch ballon_watch = {
-    .node = "memory/target",
-    .callback = ballon_changed,
-};
-
-
+               wait_event(watch_events_waitq,
+                          !list_empty(&watch_events));
+
+               down(&xenwatch_mutex);
+
+               spin_lock(&watch_events_lock);
+               ent = watch_events.next;
+               if (ent != &watch_events)
+                       list_del(ent);
+               spin_unlock(&watch_events_lock);
+
+               if (ent != &watch_events) {
+                       msg = list_entry(ent, struct xs_stored_msg, list);
+                       msg->u.watch.handle->callback(
+                               msg->u.watch.handle,
+                               (const char **)msg->u.watch.vec,
+                               msg->u.watch.vec_size);
+                       free(msg->u.watch.vec);
+                       free(msg);
+               }
+
+               up(&xenwatch_mutex);
+       }
+}
+
+static int process_msg(void)
+{
+       struct xs_stored_msg *msg;
+       char *body;
+       int err;
+
+       msg = malloc(sizeof(*msg));
+       if (msg == NULL)
+               return -ENOMEM;
+
+       err = xb_read(&msg->hdr, sizeof(msg->hdr));
+       if (err) {
+               free(msg);
+               return err;
+       }
+
+       body = malloc(msg->hdr.len + 1);
+       if (body == NULL) {
+               free(msg);
+               return -ENOMEM;
+       }
+
+       err = xb_read(body, msg->hdr.len);
+       if (err) {
+               free(body);
+               free(msg);
+               return err;
+       }
+       body[msg->hdr.len] = '\0';
+
+       if (msg->hdr.type == XS_WATCH_EVENT) {
+               msg->u.watch.vec = split(body, msg->hdr.len,
+                                        &msg->u.watch.vec_size);
+               if (IS_ERR(msg->u.watch.vec)) {
+                       free(msg);
+                       return PTR_ERR(msg->u.watch.vec);
+               }
+
+               spin_lock(&watches_lock);
+               msg->u.watch.handle = find_watch(
+                       msg->u.watch.vec[XS_WATCH_TOKEN]);
+               if (msg->u.watch.handle != NULL) {
+                       spin_lock(&watch_events_lock);
+                       list_add_tail(&msg->list, &watch_events);
+                       wake_up(&watch_events_waitq);
+                       spin_unlock(&watch_events_lock);
+               } else {
+                       free(msg->u.watch.vec);
+                       free(msg);
+               }
+               spin_unlock(&watches_lock);
+       } else {
+               msg->u.reply.body = body;
+               spin_lock(&xs_state.reply_lock);
+               list_add_tail(&msg->list, &xs_state.reply_list);
+               spin_unlock(&xs_state.reply_lock);
+               wake_up(&xs_state.reply_waitq);
+       }
+
+       return 0;
+}
+
+static void xenbus_thread(void *unused)
+{
+       int err;
+
+       for (;;) {
+               err = process_msg();
+               if (err)
+                       printk("XENBUS error %d while reading "
+                              "message\n", err);
+       }
+}
 
 int xs_init(void)
 {
        int err;
-       struct thread *watcher;
-    printk("xb_init_comms\n");
+       struct thread *kxwatcher_thread;
+       struct thread *kxenbus_thread;
+
+       INIT_LIST_HEAD(&xs_state.reply_list);
+       spin_lock_init(&xs_state.reply_lock);
+       init_waitqueue_head(&xs_state.reply_waitq);
+
+       init_MUTEX(&xs_state.request_mutex);
+       init_rwsem(&xs_state.suspend_mutex);
+
+       /* Initialize the shared memory rings to talk to xenstored */
        err = xb_init_comms();
        if (err)
                return err;
-       
-       watcher = create_thread("kxwatch", watch_thread, NULL);
-    down(&xenbus_lock);
-    register_xenbus_watch(&ballon_watch);
-    up(&xenbus_lock);
+
+       kxwatcher_thread = create_thread("kxwatch", xenwatch_thread, NULL);
+       if (IS_ERR(kxwatcher_thread))
+               return PTR_ERR(kxwatcher_thread);
+
+       kxenbus_thread = create_thread("kxenbus", xenbus_thread, NULL);
+       if (IS_ERR(kxenbus_thread))
+               return PTR_ERR(kxenbus_thread);
+
        return 0;
 }
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Dec  8 
15:04:41 2005
@@ -192,8 +192,8 @@
                page = balloon_retrieve();
                BUG_ON(page == NULL);
 
-               pfn = page - mem_map;
-               BUG_ON(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+               pfn = page_to_pfn(page);
+               BUG_ON(phys_to_machine_mapping_valid(pfn));
 
                /* Update P->M and M->P tables. */
                set_phys_to_machine(pfn, mfn_list[i]);
@@ -253,8 +253,8 @@
                        break;
                }
 
-               pfn = page - mem_map;
-               mfn_list[i] = phys_to_machine_mapping[pfn];
+               pfn = page_to_pfn(page);
+               mfn_list[i] = pfn_to_mfn(pfn);
 
                if (!PageHighMem(page)) {
                        v = phys_to_virt(pfn << PAGE_SHIFT);
@@ -444,6 +444,9 @@
 
        IPRINTK("Initialising balloon driver.\n");
 
+       if (xen_init() < 0)
+               return -1;
+
        current_pages = min(xen_start_info->nr_pages, max_pfn);
        target_pages  = current_pages;
        balloon_low   = 0;
@@ -465,7 +468,7 @@
     
        /* Initialise the balloon with excess memory space. */
        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
-               page = &mem_map[pfn];
+               page = pfn_to_page(pfn);
                if (!PageReserved(page))
                        balloon_append(page);
        }
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Dec  8 
15:04:41 2005
@@ -65,6 +65,8 @@
 extern unsigned long *phys_to_machine_mapping;
 #define pfn_to_mfn(pfn)        \
 (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL<<31))
+#define        phys_to_machine_mapping_valid(pfn) \
+       (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
        unsigned long pfn;
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Thu Dec  8 
15:04:41 2005
@@ -355,34 +355,27 @@
 #endif
     return 1;
 }
+#endif
 
 static inline int
 HYPERVISOR_update_va_mapping(
     unsigned long va, pte_t new_val, unsigned long flags)
 {
-#if 0
-    int ret;
-    unsigned long ign1, ign2, ign3;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
-       : "0" (__HYPERVISOR_update_va_mapping), 
-          "1" (va), "2" ((new_val).pte_low), "3" (flags)
-       : "memory" );
-
-    if ( unlikely(ret < 0) )
-    {
-        printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
-               va, (new_val).pte_low, flags);
-        BUG();
-    }
-
-    return ret;
-#endif
-    return 1;
-}
-#endif
+    /* no-op */
+    return 1;
+}
+
+static inline int
+HYPERVISOR_memory_op(
+    unsigned int cmd, void *arg)
+{
+    int ret;
+    __asm__ __volatile__ ( ";; mov r14=%2 ; mov r15=%3 ; mov r2=%1 ; break 
0x1000 ;; mov %0=r8 ;;"
+        : "=r" (ret)
+        : "i" (__HYPERVISOR_console_io), "r"(cmd), "r"(arg)
+        : "r14","r15","r2","r8","memory" );
+    return ret;
+}
 
 static inline int
 HYPERVISOR_event_channel_op(
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h        Thu Dec 
 8 15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h        Thu Dec 
 8 15:04:41 2005
@@ -52,4 +52,19 @@
 #define        mfn_to_pfn(x)   (x)
 #define machine_to_phys_mapping 0
 
+// for drivers/xen/balloon/balloon.c
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p,_n) ((void)0)
+#endif
+#define        pte_mfn(_x)     pte_pfn(_x)
+#define INVALID_P2M_ENTRY      (~0UL)
+#define __pte_ma(_x)   ((pte_t) {(_x)})
+#define phys_to_machine_mapping_valid(_x)      (1)
+#define        kmap_flush_unused()     do {} while (0)
+#define set_phys_to_machine(_x,_y)     do {} while (0)
+#define xen_machphys_update(_x,_y)     do {} while (0)
+#define pfn_pte_ma(_x,_y)      __pte_ma(0)
+
 #endif /* __HYPERVISOR_H__ */
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Dec  8 
15:04:41 2005
@@ -67,6 +67,8 @@
 extern unsigned long *phys_to_machine_mapping;
 #define pfn_to_mfn(pfn)        \
 (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL << 63))
+#define        phys_to_machine_mapping_valid(pfn) \
+       (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
        unsigned long pfn;
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/shadow.c     Thu Dec  8 15:04:41 2005
@@ -1450,6 +1450,7 @@
     int changed;
     u32 min_max_shadow, min_max_snapshot;
     int min_shadow, max_shadow, min_snapshot, max_snapshot;
+    struct vcpu *v;
 
     ASSERT(shadow_lock_is_acquired(d));
 
@@ -1739,6 +1740,9 @@
 
         if ( unlikely(unshadow) )
         {
+            for_each_vcpu(d, v)
+                if(smfn == pagetable_get_pfn(v->arch.shadow_table))
+                    return need_flush;
             perfc_incrc(unshadow_l2_count);
             shadow_unpin(smfn);
 #if CONFIG_PAGING_LEVELS == 2
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/shadow32.c   Thu Dec  8 15:04:41 2005
@@ -2326,6 +2326,7 @@
     int changed;
     u32 min_max_shadow, min_max_snapshot;
     int min_shadow, max_shadow, min_snapshot, max_snapshot;
+    struct vcpu *v;
 
     ASSERT(shadow_lock_is_acquired(d));
 
@@ -2527,6 +2528,9 @@
 
         if ( unlikely(unshadow) )
         {
+            for_each_vcpu(d, v)
+                if(smfn == pagetable_get_pfn(v->arch.shadow_table))
+                    return need_flush;
             perfc_incrc(unshadow_l2_count);
             shadow_unpin(smfn);
             if ( unlikely(shadow_mode_external(d)) )
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx.c        Thu Dec  8 15:04:41 2005
@@ -108,7 +108,7 @@
     destroy_vmcs(&v->arch.arch_vmx);
     free_monitor_pagetable(v);
     vpit = &v->domain->arch.vmx_platform.vmx_pit;
-    if ( vpit->ticking && active_ac_timer(&(vpit->pit_timer)) )
+    if ( active_ac_timer(&(vpit->pit_timer)) )
         rem_ac_timer(&vpit->pit_timer);
     if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) ) {
         rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
@@ -905,7 +905,7 @@
 int
 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
 {
-    unsigned long mfn, old_cr4;
+    unsigned long mfn, old_cr4, old_base_mfn;
     int error = 0;
 
     error |= __vmwrite(GUEST_RIP, c->eip);
@@ -945,7 +945,12 @@
             return 0;
         }
         mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
+        if(!get_page(pfn_to_page(mfn), v->domain))
+                return 0;
+        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
         v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+        if (old_base_mfn)
+             put_page(pfn_to_page(old_base_mfn));
         update_pagetables(v);
         /*
          * arch.shadow_table should now hold the next CR3 for shadow
@@ -1174,9 +1179,11 @@
     }
 
     if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
-        if(v->arch.arch_vmx.cpu_cr3)
+        if(v->arch.arch_vmx.cpu_cr3){
             put_page(pfn_to_page(get_mfn_from_pfn(
                       v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
+            v->arch.guest_table = mk_pagetable(0);
+        }
 
     /*
      * VMX does not implement real-mode virtualization. We emulate
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx_intercept.c      Thu Dec  8 15:04:41 2005
@@ -387,7 +387,6 @@
         }
         else {
             init_ac_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
-            vpit->ticking = 1;
         }
 
         /* init count for this channel */
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx_io.c     Thu Dec  8 15:04:41 2005
@@ -748,7 +748,7 @@
 {
     /* clear the event *before* checking for work. This should avoid
        the set-and-check races */
-    if (vmx_clear_pending_io_event(current))
+    if (vmx_clear_pending_io_event(v))
         vmx_io_assist(v);
 }
 
@@ -793,29 +793,39 @@
     return __fls(pintr[0]);
 }
 
+void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit)
+{
+    u64   drift;
+
+    if ( vpit->first_injected )
+        drift = vpit->period_cycles * vpit->pending_intr_nr;
+    else 
+        drift = 0;
+    drift = v->arch.arch_vmx.tsc_offset - drift;
+    __vmwrite(TSC_OFFSET, drift);
+
+#if defined (__i386__)
+    __vmwrite(TSC_OFFSET_HIGH, (drift >> 32));
+#endif
+}
+
 #define BSP_CPU(v)    (!(v->vcpu_id))
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
     struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-    u64    drift;
 
     if ( is_pit_irq(v, vector, type) ) {
         if ( !vpit->first_injected ) {
+            vpit->pending_intr_nr = 0;
+            vpit->scheduled = NOW() + vpit->period;
+            set_ac_timer(&vpit->pit_timer, vpit->scheduled);
             vpit->first_injected = 1;
-            vpit->pending_intr_nr = 0;
         } else {
             vpit->pending_intr_nr--;
         }
         vpit->inject_point = NOW();
-        drift = vpit->period_cycles * vpit->pending_intr_nr;
-        drift = v->arch.arch_vmx.tsc_offset - drift;
-        __vmwrite(TSC_OFFSET, drift);
-
-#if defined (__i386__)
-        __vmwrite(TSC_OFFSET_HIGH, (drift >> 32));
-#endif
-
+        set_tsc_shift (v, vpit);
     }
 
     switch(type)
@@ -982,8 +992,10 @@
             vmx_wait_io();
     }
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( vpit->ticking )
+    if ( vpit->first_injected ) {
         pickup_deactive_ticks(vpit);
+    }
+    set_tsc_shift(v,vpit);
 
     /* We can't resume the guest if we're waiting on I/O */
     ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags));
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Thu Dec  8 15:04:41 2005
@@ -243,9 +243,6 @@
 {
     struct vmx_platform *platform;
 
-    if (!(VMX_DOMAIN(current) && (current->vcpu_id == 0)))
-        return;
-
     vmx_map_io_shared_page(d);
     vmx_set_vcpu_nr(d);
 
@@ -290,6 +287,7 @@
 /* Update CR3, GDT, LDT, TR */
     unsigned int  error = 0;
     unsigned long cr0, cr4;
+    u64     host_tsc;
 
     if (v->vcpu_id == 0)
         vmx_setup_platform(v->domain);
@@ -337,6 +335,10 @@
     __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
 
     v->arch.schedule_tail = arch_vmx_do_resume;
+    /* init guest tsc to start from 0 */
+    rdtscll(host_tsc);
+    v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
+    set_tsc_shift (v, &v->domain->arch.vmx_platform.vmx_pit);
 }
 
 /*
@@ -366,7 +368,6 @@
     error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
 
     /* TSC */
-    error |= __vmwrite(TSC_OFFSET, 0);
     error |= __vmwrite(CR3_TARGET_COUNT, 0);
 
     /* Guest Selectors */
diff -r 76bff6c996b0 -r c9772105fead xen/include/asm-x86/vmx_vpit.h
--- a/xen/include/asm-x86/vmx_vpit.h    Thu Dec  8 15:04:31 2005
+++ b/xen/include/asm-x86/vmx_vpit.h    Thu Dec  8 15:04:41 2005
@@ -27,7 +27,6 @@
     unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
     u32 period;                /* pit frequency in ns */
     int first_injected;                 /* flag to prevent shadow window */
-    int ticking;    /* indicating it is ticking */
 
     /* virtual PIT state for handle related I/O */
     int read_state;
@@ -51,5 +50,6 @@
     else
         return -1;
 }
+extern void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit);
 
 #endif /* _VMX_VIRPIT_H_ */
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/spinlock.h
--- /dev/null   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/spinlock.h Thu Dec  8 15:04:41 2005
@@ -0,0 +1,121 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <lib.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int slock;
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile signed char *)(&(x)->slock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+        "1:\n" \
+       LOCK \
+       "decb %0\n\t" \
+       "jns 3f\n" \
+       "2:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0,%0\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       "3:\n\t"
+
+#define spin_lock_string_flags \
+        "1:\n" \
+       LOCK \
+       "decb %0\n\t" \
+       "jns 4f\n\t" \
+       "2:\t" \
+       "testl $0x200, %1\n\t" \
+       "jz 3f\n\t" \
+       "#sti\n\t" \
+       "3:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0, %0\n\t" \
+       "jle 3b\n\t" \
+       "#cli\n\t" \
+       "jmp 1b\n" \
+       "4:\n\t"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#define spin_unlock_string \
+       "xchgb %b0, %1" \
+               :"=q" (oldval), "=m" (lock->slock) \
+               :"0" (oldval) : "memory"
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+       char oldval = 1;
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1\n"
+               :"=q" (oldval), "=m" (lock->slock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->slock) : : "memory");
+}
+
+static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+{
+       __asm__ __volatile__(
+               spin_lock_string_flags
+               :"=m" (lock->slock) : "r" (flags) : "memory");
+}
+
+#define _spin_trylock(lock)     ({_raw_spin_trylock(lock) ? \
+                                1 : ({ 0;});})
+
+#define _spin_lock(lock)        \
+do {                            \
+        _raw_spin_lock(lock);   \
+} while(0)
+
+#define _spin_unlock(lock)      \
+do {                            \
+        _raw_spin_unlock(lock); \
+} while (0)
+
+
+#define spin_lock(lock)       _spin_lock(lock)
+#define spin_unlock(lock)       _spin_unlock(lock)
+
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+
+#endif

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.