[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Merged.



# HG changeset patch
# User emellor@ewan
# Node ID 1ac39c7a043541cfa94655f0e9ab98d4503c29a2
# Parent  0e7c48861e95b738fdf96d4a4df6b0ba90a8423d
# Parent  b7dce4fe2488bf354e5718a84fdb82bed3919761
Merged.

diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Mon Oct 10 
13:46:53 2005
@@ -1327,18 +1327,14 @@
        .callback = handle_vcpu_hotplug_event
 };
 
-/* NB: Assumes xenbus_lock is held! */
 static int setup_cpu_watcher(struct notifier_block *notifier,
                              unsigned long event, void *data)
 {
-       int err = 0;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
+       int err;
+
        err = register_xenbus_watch(&cpu_watch);
-
-       if (err) {
+       if (err)
                printk("Failed to register watch on /cpu\n");
-       }
 
        return NOTIFY_DONE;
 }
@@ -1368,7 +1364,7 @@
                        return;
 
                /* get the state value */
-               err = xenbus_scanf(dir, "availability", "%s", state);
+               err = xenbus_scanf(NULL, dir, "availability", "%s", state);
 
                if (err != 1) {
                        printk(KERN_ERR
@@ -1578,7 +1574,7 @@
 void smp_resume(void)
 {
        smp_intr_init();
-       local_setup_timer_irq();
+       local_setup_timer();
 }
 
 void vcpu_prepare(int vcpu)
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Mon Oct 10 13:46:53 2005
@@ -122,7 +122,7 @@
 static u64 processed_system_time;   /* System time (ns) at last processing. */
 static DEFINE_PER_CPU(u64, processed_system_time);
 
-#define NS_PER_TICK (1000000000L/HZ)
+#define NS_PER_TICK (1000000000ULL/HZ)
 
 static inline void __normalize_time(time_t *sec, s64 *nsec)
 {
@@ -800,9 +800,9 @@
                delta = j - jiffies;
                /* NB. The next check can trigger in some wrap-around cases,
                 * but that's ok: we'll just end up with a shorter timeout. */
-               if (delta < 1)
+               if (delta < 1) 
                        delta = 1;
-               st = processed_system_time + (delta * NS_PER_TICK);
+               st = processed_system_time + ((u64)delta * NS_PER_TICK);
        } while (read_seqretry(&xtime_lock, seq));
 
        return st;
@@ -816,7 +816,7 @@
 {
        unsigned int cpu = smp_processor_id();
        unsigned long j;
-
+       
        /* s390 does this /before/ checking rcu_pending(). We copy them. */
        cpu_set(cpu, nohz_cpu_mask);
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Oct 10 13:46:53 2005
@@ -275,22 +275,23 @@
 {
        static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
        char *str;
+       struct xenbus_transaction *xbt;
        int err;
 
  again:
-       err = xenbus_transaction_start();
-       if (err)
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt))
                return;
-       str = (char *)xenbus_read("control", "shutdown", NULL);
+       str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
        /* Ignore read errors and empty reads. */
        if (XENBUS_IS_ERR_READ(str)) {
-               xenbus_transaction_end(1);
+               xenbus_transaction_end(xbt, 1);
                return;
        }
 
-       xenbus_write("control", "shutdown", "");
-
-       err = xenbus_transaction_end(0);
+       xenbus_write(xbt, "control", "shutdown", "");
+
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN) {
                kfree(str);
                goto again;
@@ -320,23 +321,24 @@
                          unsigned int len)
 {
        char sysrq_key = '\0';
+       struct xenbus_transaction *xbt;
        int err;
 
  again:
-       err = xenbus_transaction_start();
-       if (err)
+       xbt  = xenbus_transaction_start();
+       if (IS_ERR(xbt))
                return;
-       if (!xenbus_scanf("control", "sysrq", "%c", &sysrq_key)) {
+       if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
                printk(KERN_ERR "Unable to read sysrq code in "
                       "control/sysrq\n");
-               xenbus_transaction_end(1);
+               xenbus_transaction_end(xbt, 1);
                return;
        }
 
        if (sysrq_key != '\0')
-               xenbus_printf("control", "sysrq", "%c", '\0');
-
-       err = xenbus_transaction_end(0);
+               xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
 
@@ -360,9 +362,6 @@
 
 static struct notifier_block xenstore_notifier;
 
-/* Setup our watcher
-   NB: Assumes xenbus_lock is held!
-*/
 static int setup_shutdown_watcher(struct notifier_block *notifier,
                                   unsigned long event,
                                   void *data)
@@ -371,8 +370,6 @@
 #ifdef CONFIG_MAGIC_SYSRQ
        int err2 = 0;
 #endif
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
 
        err1 = register_xenbus_watch(&shutdown_watch);
 #ifdef CONFIG_MAGIC_SYSRQ
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Mon Oct 10 
13:46:53 2005
@@ -357,7 +357,7 @@
        unsigned long long new_target;
        int err;
 
-       err = xenbus_scanf("memory", "target", "%llu", &new_target);
+       err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
        if (err != 1) {
                printk(KERN_ERR "Unable to read memory/target\n");
                return;
@@ -370,16 +370,11 @@
     
 }
 
-/* Setup our watcher
-   NB: Assumes xenbus_lock is held!
-*/
 int balloon_init_watcher(struct notifier_block *notifier,
                          unsigned long event,
                          void *data)
 {
        int err;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
 
        err = register_xenbus_watch(&target_watch);
        if (err)
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Oct 10 13:46:53 2005
@@ -61,18 +61,19 @@
        unsigned long ring_ref;
        unsigned int evtchn;
        int err;
+       struct xenbus_transaction *xbt;
        struct backend_info *be
                = container_of(watch, struct backend_info, watch);
 
        /* If other end is gone, delete ourself. */
-       if (vec && !xenbus_exists(be->frontpath, "")) {
+       if (vec && !xenbus_exists(NULL, be->frontpath, "")) {
                device_unregister(&be->dev->dev);
                return;
        }
        if (be->blkif == NULL || be->blkif->status == CONNECTED)
                return;
 
-       err = xenbus_gather(be->frontpath, "ring-ref", "%lu", &ring_ref,
+       err = xenbus_gather(NULL, be->frontpath, "ring-ref", "%lu", &ring_ref,
                            "event-channel", "%u", &evtchn, NULL);
        if (err) {
                xenbus_dev_error(be->dev, err,
@@ -84,7 +85,8 @@
        /* Map the shared frame, irq etc. */
        err = blkif_map(be->blkif, ring_ref, evtchn);
        if (err) {
-               xenbus_dev_error(be->dev, err, "mapping ring-ref %lu port %u",
+               xenbus_dev_error(be->dev, err,
+                                "mapping ring-ref %lu port %u",
                                 ring_ref, evtchn);
                return;
        }
@@ -92,13 +94,13 @@
 
 again:
        /* Supply the information about the device the frontend needs */
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(be->dev, err, "starting transaction");
                return;
        }
 
-       err = xenbus_printf(be->dev->nodename, "sectors", "%lu",
+       err = xenbus_printf(xbt, be->dev->nodename, "sectors", "%lu",
                            vbd_size(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/sectors",
@@ -107,14 +109,14 @@
        }
 
        /* FIXME: use a typename instead */
-       err = xenbus_printf(be->dev->nodename, "info", "%u",
+       err = xenbus_printf(xbt, be->dev->nodename, "info", "%u",
                            vbd_info(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/info",
                                 be->dev->nodename);
                goto abort;
        }
-       err = xenbus_printf(be->dev->nodename, "sector-size", "%lu",
+       err = xenbus_printf(xbt, be->dev->nodename, "sector-size", "%lu",
                            vbd_secsize(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/sector-size",
@@ -122,7 +124,7 @@
                goto abort;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
        if (err) {
@@ -136,7 +138,7 @@
        return;
 
  abort:
-       xenbus_transaction_end(1);
+       xenbus_transaction_end(xbt, 1);
 }
 
 /* 
@@ -154,7 +156,8 @@
                = container_of(watch, struct backend_info, backend_watch);
        struct xenbus_device *dev = be->dev;
 
-       err = xenbus_scanf(dev->nodename, "physical-device", "%li", &pdev);
+       err = xenbus_scanf(NULL, dev->nodename,
+                          "physical-device", "%li", &pdev);
        if (XENBUS_EXIST_ERR(err))
                return;
        if (err < 0) {
@@ -169,7 +172,7 @@
        be->pdev = pdev;
 
        /* If there's a read-only node, we're read only. */
-       p = xenbus_read(dev->nodename, "read-only", NULL);
+       p = xenbus_read(NULL, dev->nodename, "read-only", NULL);
        if (!IS_ERR(p)) {
                be->readonly = 1;
                kfree(p);
@@ -184,7 +187,8 @@
                if (IS_ERR(be->blkif)) {
                        err = PTR_ERR(be->blkif);
                        be->blkif = NULL;
-                       xenbus_dev_error(dev, err, "creating block interface");
+                       xenbus_dev_error(dev, err,
+                                        "creating block interface");
                        return;
                }
 
@@ -192,7 +196,8 @@
                if (err) {
                        blkif_put(be->blkif);
                        be->blkif = NULL;
-                       xenbus_dev_error(dev, err, "creating vbd structure");
+                       xenbus_dev_error(dev, err,
+                                        "creating vbd structure");
                        return;
                }
 
@@ -210,13 +215,14 @@
 
        be = kmalloc(sizeof(*be), GFP_KERNEL);
        if (!be) {
-               xenbus_dev_error(dev, -ENOMEM, "allocating backend structure");
+               xenbus_dev_error(dev, -ENOMEM,
+                                "allocating backend structure");
                return -ENOMEM;
        }
        memset(be, 0, sizeof(*be));
 
        frontend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "frontend-id", "%li", &be->frontend_id,
                            "frontend", NULL, &frontend,
                            NULL);
@@ -228,7 +234,7 @@
                                 dev->nodename);
                goto free_be;
        }
-       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+       if (strlen(frontend) == 0 || !xenbus_exists(NULL, frontend, "")) {
                /* If we can't get a frontend path and a frontend-id,
                 * then our bus-id is no longer valid and we need to
                 * destroy the backend device.
@@ -244,7 +250,8 @@
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
-               xenbus_dev_error(dev, err, "adding backend watch on %s",
+               xenbus_dev_error(dev, err,
+                                "adding backend watch on %s",
                                 dev->nodename);
                goto free_be;
        }
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Oct 10 
13:46:53 2005
@@ -460,7 +460,7 @@
        if (info->connected == BLKIF_STATE_CONNECTED)
                return;
 
-       err = xenbus_gather(watch->node,
+       err = xenbus_gather(NULL, watch->node,
                            "sectors", "%lu", &sectors,
                            "info", "%u", &binfo,
                            "sector-size", "%lu", &sector_size,
@@ -532,10 +532,11 @@
 {
        char *backend;
        const char *message;
+       struct xenbus_transaction *xbt;
        int err;
 
        backend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "backend-id", "%i", &info->backend_id,
                            "backend", NULL, &backend,
                            NULL);
@@ -559,25 +560,26 @@
        }
 
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(dev, err, "starting transaction");
                goto destroy_blkring;
        }
 
-       err = xenbus_printf(dev->nodename, "ring-ref","%u", info->ring_ref);
+       err = xenbus_printf(xbt, dev->nodename,
+                           "ring-ref","%u", info->ring_ref);
        if (err) {
                message = "writing ring-ref";
                goto abort_transaction;
        }
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
                message = "writing event-channel";
                goto abort_transaction;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err) {
                if (err == -EAGAIN)
                        goto again;
@@ -598,8 +600,7 @@
        return 0;
 
  abort_transaction:
-       xenbus_transaction_end(1);
-       /* Have to do this *outside* transaction.  */
+       xenbus_transaction_end(xbt, 1);
        xenbus_dev_error(dev, err, "%s", message);
  destroy_blkring:
        blkif_free(info);
@@ -620,7 +621,8 @@
        struct blkfront_info *info;
 
        /* FIXME: Use dynamic device id if this is not set. */
-       err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
+       err = xenbus_scanf(NULL, dev->nodename,
+                          "virtual-device", "%i", &vdevice);
        if (XENBUS_EXIST_ERR(err))
                return err;
        if (err < 0) {
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Mon Oct 10 13:46:53 2005
@@ -160,7 +160,8 @@
 
        mi = ((major_info[index] != NULL) ? major_info[index] :
              xlbd_alloc_major_info(major, minor, index));
-       mi->usage++;
+       if (mi)
+               mi->usage++;
        return mi;
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Oct 10 13:46:53 2005
@@ -69,15 +69,15 @@
        int i;
 
        /* If other end is gone, delete ourself. */
-       if (vec && !xenbus_exists(be->frontpath, "")) {
-               xenbus_rm(be->dev->nodename, "");
+       if (vec && !xenbus_exists(NULL, be->frontpath, "")) {
+               xenbus_rm(NULL, be->dev->nodename, "");
                device_unregister(&be->dev->dev);
                return;
        }
        if (be->netif == NULL || be->netif->status == CONNECTED)
                return;
 
-       mac = xenbus_read(be->frontpath, "mac", NULL);
+       mac = xenbus_read(NULL, be->frontpath, "mac", NULL);
        if (IS_ERR(mac)) {
                err = PTR_ERR(mac);
                xenbus_dev_error(be->dev, err, "reading %s/mac",
@@ -98,7 +98,8 @@
        }
        kfree(mac);
 
-       err = xenbus_gather(be->frontpath, "tx-ring-ref", "%lu", &tx_ring_ref,
+       err = xenbus_gather(NULL, be->frontpath,
+                           "tx-ring-ref", "%lu", &tx_ring_ref,
                            "rx-ring-ref", "%lu", &rx_ring_ref,
                            "event-channel", "%u", &evtchn, NULL);
        if (err) {
@@ -137,7 +138,7 @@
        struct xenbus_device *dev = be->dev;
        u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
 
-       err = xenbus_scanf(dev->nodename, "handle", "%li", &handle);
+       err = xenbus_scanf(NULL, dev->nodename, "handle", "%li", &handle);
        if (XENBUS_EXIST_ERR(err))
                return;
        if (err < 0) {
@@ -188,7 +189,7 @@
 
        key = env_vars;
        while (*key != NULL) {
-               val = xenbus_read(xdev->nodename, *key, NULL);
+               val = xenbus_read(NULL, xdev->nodename, *key, NULL);
                if (!IS_ERR(val)) {
                        char buf[strlen(*key) + 4];
                        sprintf(buf, "%s=%%s", *key);
@@ -220,7 +221,7 @@
        memset(be, 0, sizeof(*be));
 
        frontend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "frontend-id", "%li", &be->frontend_id,
                            "frontend", NULL, &frontend,
                            NULL);
@@ -232,7 +233,7 @@
                                 dev->nodename);
                goto free_be;
        }
-       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+       if (strlen(frontend) == 0 || !xenbus_exists(NULL, frontend, "")) {
                /* If we can't get a frontend path and a frontend-id,
                 * then our bus-id is no longer valid and we need to
                 * destroy the backend device.
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Oct 10 
13:46:53 2005
@@ -1083,10 +1083,11 @@
 {
        char *backend, *mac, *e, *s;
        const char *message;
+       struct xenbus_transaction *xbt;
        int err, i;
 
        backend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "backend-id", "%i", &info->backend_id,
                            "backend", NULL, &backend,
                            NULL);
@@ -1102,7 +1103,7 @@
                goto out;
        }
 
-       mac = xenbus_read(dev->nodename, "mac", NULL);
+       mac = xenbus_read(NULL, dev->nodename, "mac", NULL);
        if (IS_ERR(mac)) {
                err = PTR_ERR(mac);
                xenbus_dev_error(dev, err, "reading %s/mac",
@@ -1131,32 +1132,32 @@
        }
 
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(dev, err, "starting transaction");
                goto destroy_ring;
        }
 
-       err = xenbus_printf(dev->nodename, "tx-ring-ref","%u",
+       err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
                            info->tx_ring_ref);
        if (err) {
                message = "writing tx ring-ref";
                goto abort_transaction;
        }
-       err = xenbus_printf(dev->nodename, "rx-ring-ref","%u",
+       err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
                            info->rx_ring_ref);
        if (err) {
                message = "writing rx ring-ref";
                goto abort_transaction;
        }
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
                message = "writing event-channel";
                goto abort_transaction;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err) {
                if (err == -EAGAIN)
                        goto again;
@@ -1177,8 +1178,7 @@
        return 0;
 
  abort_transaction:
-       xenbus_transaction_end(1);
-       /* Have to do this *outside* transaction.  */
+       xenbus_transaction_end(xbt, 1);
        xenbus_dev_error(dev, err, "%s", message);
  destroy_ring:
        shutdown_device(info);
@@ -1201,7 +1201,7 @@
        struct netfront_info *info;
        unsigned int handle;
 
-       err = xenbus_scanf(dev->nodename, "handle", "%u", &handle);
+       err = xenbus_scanf(NULL, dev->nodename, "handle", "%u", &handle);
        if (XENBUS_EXIST_ERR(err))
                return err;
        if (err < 0) {
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Oct 10 
13:46:53 2005
@@ -246,7 +246,10 @@
                                   PAGE_SHIFT);
                ret = xen_start_info->store_mfn;
 
-               /* We'll return then this will wait for daemon to answer */
+               /* 
+               ** Complete initialization of xenbus (viz. set up the 
+               ** connection to xenstored now that it has started). 
+               */
                kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
        }
        break;
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Mon Oct 10 13:46:53 2005
@@ -66,12 +66,13 @@
        unsigned int evtchn;
        unsigned long ready = 1;
        int err;
+       struct xenbus_transaction *xbt;
        struct backend_info *be
                = container_of(watch, struct backend_info, watch);
 
        /* If other end is gone, delete ourself. */
-       if (vec && !xenbus_exists(be->frontpath, "")) {
-               xenbus_rm(be->dev->nodename, "");
+       if (vec && !xenbus_exists(NULL, be->frontpath, "")) {
+               xenbus_rm(NULL, be->dev->nodename, "");
                device_unregister(&be->dev->dev);
                return;
        }
@@ -79,7 +80,7 @@
        if (be->tpmif == NULL || be->tpmif->status == CONNECTED)
                return;
 
-       err = xenbus_gather(be->frontpath,
+       err = xenbus_gather(NULL, be->frontpath,
                            "ring-ref", "%lu", &ringref,
                            "event-channel", "%u", &evtchn, NULL);
        if (err) {
@@ -115,20 +116,20 @@
         * unless something bad happens
         */
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(be->dev, err, "starting transaction");
                return;
        }
 
-       err = xenbus_printf(be->dev->nodename,
+       err = xenbus_printf(xbt, be->dev->nodename,
                            "ready", "%lu", ready);
        if (err) {
                xenbus_dev_error(be->dev, err, "writing 'ready'");
                goto abort;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
        if (err) {
@@ -139,7 +140,7 @@
        xenbus_dev_ok(be->dev);
        return;
 abort:
-       xenbus_transaction_end(1);
+       xenbus_transaction_end(xbt, 1);
 }
 
 
@@ -152,7 +153,7 @@
                = container_of(watch, struct backend_info, backend_watch);
        struct xenbus_device *dev = be->dev;
 
-       err = xenbus_scanf(dev->nodename, "instance", "%li", &instance);
+       err = xenbus_scanf(NULL, dev->nodename, "instance", "%li", &instance);
        if (XENBUS_EXIST_ERR(err))
                return;
        if (err < 0) {
@@ -205,7 +206,7 @@
        memset(be, 0, sizeof(*be));
 
        frontend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "frontend-id", "%li", &be->frontend_id,
                            "frontend", NULL, &frontend,
                            NULL);
@@ -217,7 +218,7 @@
                                 dev->nodename);
                goto free_be;
        }
-       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+       if (strlen(frontend) == 0 || !xenbus_exists(NULL, frontend, "")) {
                /* If we can't get a frontend path and a frontend-id,
                 * then our bus-id is no longer valid and we need to
                 * destroy the backend device.
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Mon Oct 10 
13:46:53 2005
@@ -226,7 +226,7 @@
        if (tp->connected)
                return;
 
-       err = xenbus_gather(watch->node,
+       err = xenbus_gather(NULL, watch->node,
                            "ready", "%lu", &ready,
                            NULL);
        if (err) {
@@ -311,9 +311,10 @@
        const char *message;
        int err;
        int backend_id;
+       struct xenbus_transaction *xbt;
 
        backend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "backend-id", "%i", &backend_id,
                            "backend", NULL, &backend,
                            NULL);
@@ -339,27 +340,27 @@
        }
 
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(dev, err, "starting transaction");
                goto destroy_tpmring;
        }
 
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "ring-ref","%u", info->ring_ref);
        if (err) {
                message = "writing ring-ref";
                goto abort_transaction;
        }
 
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", my_private.evtchn);
        if (err) {
                message = "writing event-channel";
                goto abort_transaction;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
        if (err) {
@@ -380,8 +381,7 @@
        return 0;
 
 abort_transaction:
-       xenbus_transaction_end(1);
-       /* Have to do this *outside* transaction.  */
+       xenbus_transaction_end(xbt, 1);
        xenbus_dev_error(dev, err, "%s", message);
 destroy_tpmring:
        destroy_tpmring(info, &my_private);
@@ -399,7 +399,7 @@
        struct tpmfront_info *info;
        int handle;
 
-       err = xenbus_scanf(dev->nodename,
+       err = xenbus_scanf(NULL, dev->nodename,
                           "handle", "%i", &handle);
        if (XENBUS_EXIST_ERR(err))
                return err;
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Mon Oct 10 
13:46:53 2005
@@ -130,15 +130,10 @@
 
                wait_event(xb_waitq, output_avail(out));
 
-               /* Read, then check: not that we don't trust store.
-                * Hell, some of my best friends are daemons.  But,
-                * in this post-911 world... */
+               mb();
                h = *out;
-               mb();
-               if (!check_buffer(&h)) {
-                       set_current_state(TASK_RUNNING);
-                       return -EIO; /* ETERRORIST! */
-               }
+               if (!check_buffer(&h))
+                       return -EIO;
 
                dst = get_output_chunk(&h, out->buf, &avail);
                if (avail > len)
@@ -173,12 +168,11 @@
                const char *src;
 
                wait_event(xb_waitq, xs_input_avail());
+
+               mb();
                h = *in;
-               mb();
-               if (!check_buffer(&h)) {
-                       set_current_state(TASK_RUNNING);
+               if (!check_buffer(&h))
                        return -EIO;
-               }
 
                src = get_input_chunk(&h, in->buf, &avail);
                if (avail > len)
@@ -195,10 +189,6 @@
                        notify_remote_via_evtchn(xen_start_info->store_evtchn);
        }
 
-       /* If we left something, wake watch thread to deal with it. */
-       if (xs_input_avail())
-               wake_up(&xb_waitq);
-
        return 0;
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Oct 10 
13:46:53 2005
@@ -46,85 +46,113 @@
 #include <asm/hypervisor.h>
 
 struct xenbus_dev_data {
-       /* Are there bytes left to be read in this message? */
-       int bytes_left;
-       /* Are we still waiting for the reply to a message we wrote? */
-       int awaiting_reply;
-       /* Buffer for outgoing messages. */
+       int in_transaction;
+
+       /* Partial request. */
        unsigned int len;
        union {
                struct xsd_sockmsg msg;
                char buffer[PAGE_SIZE];
        } u;
+
+       /* Response queue. */
+#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1))
+       char read_buffer[PAGE_SIZE];
+       unsigned int read_cons, read_prod;
+       wait_queue_head_t read_waitq;
 };
 
 static struct proc_dir_entry *xenbus_dev_intf;
 
-/* Reply can be long (dir, getperm): don't buffer, just examine
- * headers so we can discard rest if they die. */
 static ssize_t xenbus_dev_read(struct file *filp,
                               char __user *ubuf,
                               size_t len, loff_t *ppos)
 {
-       struct xenbus_dev_data *data = filp->private_data;
-       struct xsd_sockmsg msg;
-       int err;
-
-       /* Refill empty buffer? */
-       if (data->bytes_left == 0) {
-               if (len < sizeof(msg))
-                       return -EINVAL;
-
-               err = xb_read(&msg, sizeof(msg));
-               if (err)
-                       return err;
-               data->bytes_left = msg.len;
-               if (ubuf && copy_to_user(ubuf, &msg, sizeof(msg)) != 0)
-                       return -EFAULT;
-               /* We can receive spurious XS_WATCH_EVENT messages. */
-               if (msg.type != XS_WATCH_EVENT)
-                       data->awaiting_reply = 0;
-               return sizeof(msg);
+       struct xenbus_dev_data *u = filp->private_data;
+       int i;
+
+       if (wait_event_interruptible(u->read_waitq,
+                                    u->read_prod != u->read_cons))
+               return -EINTR;
+
+       for (i = 0; i < len; i++) {
+               if (u->read_cons == u->read_prod)
+                       break;
+               put_user(u->read_buffer[MASK_READ_IDX(u->read_cons)], ubuf+i);
+               u->read_cons++;
        }
 
-       /* Don't read over next header, or over temporary buffer. */
-       if (len > sizeof(data->u.buffer))
-               len = sizeof(data->u.buffer);
-       if (len > data->bytes_left)
-               len = data->bytes_left;
-
-       err = xb_read(data->u.buffer, len);
-       if (err)
-               return err;
-
-       data->bytes_left -= len;
-       if (ubuf && copy_to_user(ubuf, data->u.buffer, len) != 0)
-               return -EFAULT;
-       return len;
-}
-
-/* We do v. basic sanity checking so they don't screw up kernel later. */
+       return i;
+}
+
+static void queue_reply(struct xenbus_dev_data *u,
+                       char *data, unsigned int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++, u->read_prod++)
+               u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
+
+       BUG_ON((u->read_prod - u->read_cons) > sizeof(u->read_buffer));
+
+       wake_up(&u->read_waitq);
+}
+
 static ssize_t xenbus_dev_write(struct file *filp,
                                const char __user *ubuf,
                                size_t len, loff_t *ppos)
 {
-       struct xenbus_dev_data *data = filp->private_data;
-       int err;
-
-       /* We gather data in buffer until we're ready to send it. */
-       if (len > data->len + sizeof(data->u))
+       struct xenbus_dev_data *u = filp->private_data;
+       void *reply;
+       int err = 0;
+
+       if ((len + u->len) > sizeof(u->u.buffer))
                return -EINVAL;
-       if (copy_from_user(data->u.buffer + data->len, ubuf, len) != 0)
+
+       if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0)
                return -EFAULT;
-       data->len += len;
-       if (data->len >= sizeof(data->u.msg) + data->u.msg.len) {
-               err = xb_write(data->u.buffer, data->len);
-               if (err)
-                       return err;
-               data->len = 0;
-               data->awaiting_reply = 1;
+
+       u->len += len;
+       if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
+               return len;
+
+       switch (u->u.msg.type) {
+       case XS_TRANSACTION_START:
+       case XS_TRANSACTION_END:
+       case XS_DIRECTORY:
+       case XS_READ:
+       case XS_GET_PERMS:
+       case XS_RELEASE:
+       case XS_GET_DOMAIN_PATH:
+       case XS_WRITE:
+       case XS_MKDIR:
+       case XS_RM:
+       case XS_SET_PERMS:
+               reply = xenbus_dev_request_and_reply(&u->u.msg);
+               if (IS_ERR(reply))
+                       err = PTR_ERR(reply);
+               else {
+                       if (u->u.msg.type == XS_TRANSACTION_START)
+                               u->in_transaction = 1;
+                       if (u->u.msg.type == XS_TRANSACTION_END)
+                               u->in_transaction = 0;
+                       queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
+                       queue_reply(u, (char *)reply, u->u.msg.len);
+                       kfree(reply);
+               }
+               break;
+
+       default:
+               err = -EINVAL;
+               break;
        }
-       return len;
+
+       if (err == 0) {
+               u->len = 0;
+               err = len;
+       }
+
+       return err;
 }
 
 static int xenbus_dev_open(struct inode *inode, struct file *filp)
@@ -134,7 +162,6 @@
        if (xen_start_info->store_evtchn == 0)
                return -ENOENT;
 
-       /* Don't try seeking. */
        nonseekable_open(inode, filp);
 
        u = kmalloc(sizeof(*u), GFP_KERNEL);
@@ -142,28 +169,21 @@
                return -ENOMEM;
 
        memset(u, 0, sizeof(*u));
+       init_waitqueue_head(&u->read_waitq);
 
        filp->private_data = u;
 
-       down(&xenbus_lock);
-
        return 0;
 }
 
 static int xenbus_dev_release(struct inode *inode, struct file *filp)
 {
-       struct xenbus_dev_data *data = filp->private_data;
-
-       /* Discard any unread replies. */
-       while (data->bytes_left || data->awaiting_reply)
-               xenbus_dev_read(filp, NULL, sizeof(data->u.buffer), NULL);
-
-       /* Harmless if no transaction in progress. */
-       xenbus_transaction_end(1);
-
-       up(&xenbus_lock);
-
-       kfree(data);
+       struct xenbus_dev_data *u = filp->private_data;
+
+       if (u->in_transaction)
+               xenbus_transaction_end((struct xenbus_transaction *)1, 1);
+
+       kfree(u);
 
        return 0;
 }
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Oct 10 
13:46:53 2005
@@ -43,6 +43,9 @@
 
 static struct notifier_block *xenstore_chain;
 
+/* Now used to protect xenbus probes against save/restore. */
+static DECLARE_MUTEX(xenbus_lock);
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -125,7 +128,7 @@
 
        devid = strrchr(nodename, '/') + 1;
 
-       err = xenbus_gather(nodename, "frontend-id", "%i", &domid,
+       err = xenbus_gather(NULL, nodename, "frontend-id", "%i", &domid,
                            "frontend", NULL, &frontend,
                            NULL);
        if (err)
@@ -133,7 +136,7 @@
        if (strlen(frontend) == 0)
                err = -ERANGE;
 
-       if (!err && !xenbus_exists(frontend, ""))
+       if (!err && !xenbus_exists(NULL, frontend, ""))
                err = -ENOENT;
 
        if (err) {
@@ -447,7 +450,7 @@
        if (!nodename)
                return -ENOMEM;
 
-       dir = xenbus_directory(nodename, "", &dir_n);
+       dir = xenbus_directory(NULL, nodename, "", &dir_n);
        if (IS_ERR(dir)) {
                kfree(nodename);
                return PTR_ERR(dir);
@@ -470,7 +473,7 @@
        unsigned int dir_n = 0;
        int i;
 
-       dir = xenbus_directory(bus->root, type, &dir_n);
+       dir = xenbus_directory(NULL, bus->root, type, &dir_n);
        if (IS_ERR(dir))
                return PTR_ERR(dir);
 
@@ -489,7 +492,7 @@
        char **dir;
        unsigned int i, dir_n;
 
-       dir = xenbus_directory(bus->root, "", &dir_n);
+       dir = xenbus_directory(NULL, bus->root, "", &dir_n);
        if (IS_ERR(dir))
                return PTR_ERR(dir);
 
@@ -535,7 +538,7 @@
        if (char_count(node, '/') < 2)
                return;
 
-       exists = xenbus_exists(node, "");
+       exists = xenbus_exists(NULL, node, "");
        if (!exists) {
                xenbus_cleanup_devices(node, &bus->bus);
                return;
@@ -625,12 +628,13 @@
        down(&xenbus_lock);
        bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
        bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+       xs_suspend();
 }
 
 void xenbus_resume(void)
 {
        xb_init_comms();
-       reregister_xenbus_watches();
+       xs_resume();
        bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
        bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
        up(&xenbus_lock);
@@ -662,12 +666,16 @@
 }
 EXPORT_SYMBOL(unregister_xenstore_notifier);
 
-/* called from a thread in privcmd/privcmd.c */
+/* 
+** Called either from below xenbus_probe_init() initcall (for domUs) 
+** or, for dom0, from a thread created in privcmd/privcmd.c (after 
+** the user-space tools have invoked initDomainStore()) 
+*/
 int do_xenbus_probe(void *unused)
 {
        int err = 0;
 
-       /* Initialize xenstore comms unless already done. */
+       /* Initialize the interface to xenstore. */
        err = xs_init();
        if (err) {
                printk("XENBUS: Error initializing xenstore comms:"
@@ -685,6 +693,7 @@
        /* Notify others that xenstore is up */
        notifier_call_chain(&xenstore_chain, 0, 0);
        up(&xenbus_lock);
+
        return 0;
 }
 
@@ -698,6 +707,10 @@
        device_register(&xenbus_frontend.dev);
        device_register(&xenbus_backend.dev);
 
+       /* 
+       ** Domain0 doesn't have a store_evtchn yet - this will
+       ** be set up later by xend invoking initDomainStore() 
+       */
        if (!xen_start_info->store_evtchn)
                return 0;
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Mon Oct 10 
13:46:53 2005
@@ -42,11 +42,67 @@
 
 #define streq(a, b) (strcmp((a), (b)) == 0)
 
-static char printf_buffer[4096];
+struct xs_stored_msg {
+       struct xsd_sockmsg hdr;
+
+       union {
+               /* Stored replies. */
+               struct {
+                       struct list_head list;
+                       char *body;
+               } reply;
+
+               /* Queued watch callbacks. */
+               struct {
+                       struct work_struct work;
+                       struct xenbus_watch *handle;
+                       char **vec;
+                       unsigned int vec_size;
+               } watch;
+       } u;
+};
+
+struct xs_handle {
+       /* A list of replies. Currently only one will ever be outstanding. */
+       struct list_head reply_list;
+       spinlock_t reply_lock;
+       wait_queue_head_t reply_waitq;
+
+       /* One request at a time. */
+       struct semaphore request_mutex;
+
+       /* One transaction at a time. */
+       struct semaphore transaction_mutex;
+       int transaction_pid;
+};
+
+static struct xs_handle xs_state;
+
 static LIST_HEAD(watches);
-
-DECLARE_MUTEX(xenbus_lock);
-EXPORT_SYMBOL(xenbus_lock);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* Can wait on !xs_resuming for suspend/resume cycle to complete. */
+static int xs_resuming;
+static DECLARE_WAIT_QUEUE_HEAD(xs_resuming_waitq);
+
+static void request_mutex_acquire(void)
+{
+       /*
+        * We can't distinguish non-transactional from transactional
+        * requests right now. So temporarily acquire the transaction mutex
+        * if this task is outside transaction context.
+        */
+       if (xs_state.transaction_pid != current->pid)
+               down(&xs_state.transaction_mutex);
+       down(&xs_state.request_mutex);
+}
+
+static void request_mutex_release(void)
+{
+       up(&xs_state.request_mutex);
+       if (xs_state.transaction_pid != current->pid)
+               up(&xs_state.transaction_mutex);
+}
 
 static int get_error(const char *errorstring)
 {
@@ -65,29 +121,32 @@
 
 static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
 {
-       struct xsd_sockmsg msg;
-       void *ret;
-       int err;
-
-       err = xb_read(&msg, sizeof(msg));
-       if (err)
-               return ERR_PTR(err);
-
-       ret = kmalloc(msg.len + 1, GFP_KERNEL);
-       if (!ret)
-               return ERR_PTR(-ENOMEM);
-
-       err = xb_read(ret, msg.len);
-       if (err) {
-               kfree(ret);
-               return ERR_PTR(err);
-       }
-       ((char*)ret)[msg.len] = '\0';
-
-       *type = msg.type;
+       struct xs_stored_msg *msg;
+       char *body;
+
+       spin_lock(&xs_state.reply_lock);
+
+       while (list_empty(&xs_state.reply_list)) {
+               spin_unlock(&xs_state.reply_lock);
+               wait_event(xs_state.reply_waitq,
+                          !list_empty(&xs_state.reply_list));
+               spin_lock(&xs_state.reply_lock);
+       }
+
+       msg = list_entry(xs_state.reply_list.next,
+                        struct xs_stored_msg, u.reply.list);
+       list_del(&msg->u.reply.list);
+
+       spin_unlock(&xs_state.reply_lock);
+
+       *type = msg->hdr.type;
        if (len)
-               *len = msg.len;
-       return ret;
+               *len = msg->hdr.len;
+       body = msg->u.reply.body;
+
+       kfree(msg);
+
+       return body;
 }
 
 /* Emergency write. */
@@ -98,10 +157,45 @@
        msg.type = XS_DEBUG;
        msg.len = sizeof("print") + count + 1;
 
+       request_mutex_acquire();
        xb_write(&msg, sizeof(msg));
        xb_write("print", sizeof("print"));
        xb_write(str, count);
        xb_write("", 1);
+       request_mutex_release();
+}
+
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+{
+       void *ret;
+       struct xsd_sockmsg req_msg = *msg;
+       int err;
+
+       if (req_msg.type == XS_TRANSACTION_START) {
+               down(&xs_state.transaction_mutex);
+               xs_state.transaction_pid = current->pid;
+       }
+
+       request_mutex_acquire();
+
+       err = xb_write(msg, sizeof(*msg) + msg->len);
+       if (err) {
+               msg->type = XS_ERROR;
+               ret = ERR_PTR(err);
+       } else {
+               ret = read_reply(&msg->type, &msg->len);
+       }
+
+       request_mutex_release();
+
+       if ((msg->type == XS_TRANSACTION_END) ||
+           ((req_msg.type == XS_TRANSACTION_START) &&
+            (msg->type == XS_ERROR))) {
+               xs_state.transaction_pid = -1;
+               up(&xs_state.transaction_mutex);
+       }
+
+       return ret;
 }
 
 /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
@@ -115,31 +209,33 @@
        unsigned int i;
        int err;
 
-       WARN_ON(down_trylock(&xenbus_lock) == 0);
-
        msg.type = type;
        msg.len = 0;
        for (i = 0; i < num_vecs; i++)
                msg.len += iovec[i].iov_len;
 
+       request_mutex_acquire();
+
        err = xb_write(&msg, sizeof(msg));
-       if (err)
+       if (err) {
+               up(&xs_state.request_mutex);
                return ERR_PTR(err);
+       }
 
        for (i = 0; i < num_vecs; i++) {
                err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
-               if (err)
+               if (err) {
+                       request_mutex_release();
                        return ERR_PTR(err);
-       }
-
-       /* Watches can have fired before reply comes: daemon detects
-        * and re-transmits, so we can ignore this. */
-       do {
-               kfree(ret);
-               ret = read_reply(&msg.type, len);
-               if (IS_ERR(ret))
-                       return ret;
-       } while (msg.type == XS_WATCH_EVENT);
+               }
+       }
+
+       ret = read_reply(&msg.type, len);
+
+       request_mutex_release();
+
+       if (IS_ERR(ret))
+               return ret;
 
        if (msg.type == XS_ERROR) {
                err = get_error(ret);
@@ -187,8 +283,6 @@
 {
        static char buffer[4096];
 
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
-       /* XXX FIXME: might not be correct if name == "" */
        BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
 
        strcpy(buffer, dir);
@@ -207,7 +301,7 @@
        *num = count_strings(strings, len);
 
        /* Transfer to one big alloc for easy freeing. */
-       ret = kmalloc(*num * sizeof(char *) + len, GFP_ATOMIC);
+       ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
        if (!ret) {
                kfree(strings);
                return ERR_PTR(-ENOMEM);
@@ -222,7 +316,8 @@
        return ret;
 }
 
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num)
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num)
 {
        char *strings;
        unsigned int len;
@@ -236,12 +331,13 @@
 EXPORT_SYMBOL(xenbus_directory);
 
 /* Check if a path exists. Return 1 if it does. */
-int xenbus_exists(const char *dir, const char *node)
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node)
 {
        char **d;
        int dir_n;
 
-       d = xenbus_directory(dir, node, &dir_n);
+       d = xenbus_directory(t, dir, node, &dir_n);
        if (IS_ERR(d))
                return 0;
        kfree(d);
@@ -253,7 +349,8 @@
  * Returns a kmalloced value: call free() on it after use.
  * len indicates length in bytes.
  */
-void *xenbus_read(const char *dir, const char *node, unsigned int *len)
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len)
 {
        return xs_single(XS_READ, join(dir, node), len);
 }
@@ -262,7 +359,8 @@
 /* Write the value of a single file.
  * Returns -err on failure.
  */
-int xenbus_write(const char *dir, const char *node, const char *string)
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string)
 {
        const char *path;
        struct kvec iovec[2];
@@ -279,14 +377,15 @@
 EXPORT_SYMBOL(xenbus_write);
 
 /* Create a new directory. */
-int xenbus_mkdir(const char *dir, const char *node)
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node)
 {
        return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL));
 }
 EXPORT_SYMBOL(xenbus_mkdir);
 
 /* Destroy a file or directory (directories must be empty). */
-int xenbus_rm(const char *dir, const char *node)
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node)
 {
        return xs_error(xs_single(XS_RM, join(dir, node), NULL));
 }
@@ -294,37 +393,57 @@
 
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
- * You can only have one transaction at any time.
  */
-int xenbus_transaction_start(void)
-{
-       return xs_error(xs_single(XS_TRANSACTION_START, "", NULL));
+struct xenbus_transaction *xenbus_transaction_start(void)
+{
+       int err;
+
+       down(&xs_state.transaction_mutex);
+       xs_state.transaction_pid = current->pid;
+
+       err = xs_error(xs_single(XS_TRANSACTION_START, "", NULL));
+       if (err) {
+               xs_state.transaction_pid = -1;
+               up(&xs_state.transaction_mutex);
+       }
+
+       return err ? ERR_PTR(err) : (struct xenbus_transaction *)1;
 }
 EXPORT_SYMBOL(xenbus_transaction_start);
 
 /* End a transaction.
  * If abandon is true, transaction is discarded instead of committed.
  */
-int xenbus_transaction_end(int abort)
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
 {
        char abortstr[2];
+       int err;
+
+       BUG_ON(t == NULL);
 
        if (abort)
                strcpy(abortstr, "F");
        else
                strcpy(abortstr, "T");
-       return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
+
+       err = xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
+
+       xs_state.transaction_pid = -1;
+       up(&xs_state.transaction_mutex);
+
+       return err;
 }
 EXPORT_SYMBOL(xenbus_transaction_end);
 
 /* Single read and scanf: returns -errno or num scanned. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
        char *val;
 
-       val = xenbus_read(dir, node, NULL);
+       val = xenbus_read(t, dir, node, NULL);
        if (IS_ERR(val))
                return PTR_ERR(val);
 
@@ -340,18 +459,28 @@
 EXPORT_SYMBOL(xenbus_scanf);
 
 /* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
+#define PRINTF_BUFFER_SIZE 4096
+       char *printf_buffer;
+
+       printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+       if (printf_buffer == NULL)
+               return -ENOMEM;
+
        va_start(ap, fmt);
-       ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap);
+       ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
        va_end(ap);
 
-       BUG_ON(ret > sizeof(printf_buffer)-1);
-       return xenbus_write(dir, node, printf_buffer);
+       BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+       ret = xenbus_write(t, dir, node, printf_buffer);
+
+       kfree(printf_buffer);
+
+       return ret;
 }
 EXPORT_SYMBOL(xenbus_printf);
 
@@ -361,19 +490,28 @@
        va_list ap;
        int ret;
        unsigned int len;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
+       char *printf_buffer;
+
+       printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+       if (printf_buffer == NULL)
+               goto fail;
 
        len = sprintf(printf_buffer, "%i ", -err);
        va_start(ap, fmt);
-       ret = vsnprintf(printf_buffer+len, sizeof(printf_buffer)-len, fmt, ap);
+       ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
        va_end(ap);
 
-       BUG_ON(len + ret > sizeof(printf_buffer)-1);
+       BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
        dev->has_error = 1;
-       if (xenbus_write(dev->nodename, "error", printf_buffer) != 0)
-               printk("xenbus: failed to write error node for %s (%s)\n",
-                      dev->nodename, printf_buffer);
+       if (xenbus_write(NULL, dev->nodename, "error", printf_buffer) != 0)
+               goto fail;
+
+       kfree(printf_buffer);
+       return;
+
+ fail:
+       printk("xenbus: failed to write error node for %s (%s)\n",
+              dev->nodename, printf_buffer);
 }
 EXPORT_SYMBOL(xenbus_dev_error);
 
@@ -381,7 +519,7 @@
 void xenbus_dev_ok(struct xenbus_device *dev)
 {
        if (dev->has_error) {
-               if (xenbus_rm(dev->nodename, "error") != 0)
+               if (xenbus_rm(NULL, dev->nodename, "error") != 0)
                        printk("xenbus: failed to clear error node for %s\n",
                               dev->nodename);
                else
@@ -391,7 +529,7 @@
 EXPORT_SYMBOL(xenbus_dev_ok);
        
 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xenbus_gather(const char *dir, ...)
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...)
 {
        va_list ap;
        const char *name;
@@ -403,7 +541,7 @@
                void *result = va_arg(ap, void *);
                char *p;
 
-               p = xenbus_read(dir, name, NULL);
+               p = xenbus_read(t, dir, name, NULL);
                if (IS_ERR(p)) {
                        ret = PTR_ERR(p);
                        break;
@@ -432,26 +570,6 @@
        return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
 }
 
-static char **xs_read_watch(unsigned int *num)
-{
-       enum xsd_sockmsg_type type;
-       char *strings;
-       unsigned int len;
-
-       strings = read_reply(&type, &len);
-       if (IS_ERR(strings))
-               return (char **)strings;
-
-       BUG_ON(type != XS_WATCH_EVENT);
-
-       return split(strings, len, num);
-}
-
-static int xs_acknowledge_watch(const char *token)
-{
-       return xs_error(xs_single(XS_WATCH_ACK, token, NULL));
-}
-
 static int xs_unwatch(const char *path, const char *token)
 {
        struct kvec iov[2];
@@ -464,7 +582,6 @@
        return xs_error(xs_talkv(XS_UNWATCH, iov, ARRAY_SIZE(iov), NULL));
 }
 
-/* A little paranoia: we don't just trust token. */
 static struct xenbus_watch *find_watch(const char *token)
 {
        struct xenbus_watch *i, *cmp;
@@ -474,6 +591,7 @@
        list_for_each_entry(i, &watches, list)
                if (i == cmp)
                        return i;
+
        return NULL;
 }
 
@@ -485,11 +603,20 @@
        int err;
 
        sprintf(token, "%lX", (long)watch);
+
+       spin_lock(&watches_lock);
        BUG_ON(find_watch(token));
+       spin_unlock(&watches_lock);
 
        err = xs_watch(watch->node, token);
-       if (!err)
+
+       /* Ignore errors due to multiple registration. */
+       if ((err == 0) || (err == -EEXIST)) {
+               spin_lock(&watches_lock);
                list_add(&watch->list, &watches);
+               spin_unlock(&watches_lock);
+       }
+
        return err;
 }
 EXPORT_SYMBOL(register_xenbus_watch);
@@ -500,77 +627,192 @@
        int err;
 
        sprintf(token, "%lX", (long)watch);
+
+       spin_lock(&watches_lock);
        BUG_ON(!find_watch(token));
+       list_del(&watch->list);
+       spin_unlock(&watches_lock);
+
+       /* Ensure xs_resume() is not in progress (see comments there). */
+       wait_event(xs_resuming_waitq, !xs_resuming);
 
        err = xs_unwatch(watch->node, token);
-       list_del(&watch->list);
-
        if (err)
                printk(KERN_WARNING
                       "XENBUS Failed to release watch %s: %i\n",
                       watch->node, err);
+
+       /* Make sure watch is not in use. */
+       flush_scheduled_work();
 }
 EXPORT_SYMBOL(unregister_xenbus_watch);
 
-/* Re-register callbacks to all watches. */
-void reregister_xenbus_watches(void)
-{
+void xs_suspend(void)
+{
+       down(&xs_state.transaction_mutex);
+       down(&xs_state.request_mutex);
+}
+
+void xs_resume(void)
+{
+       struct list_head *ent, *prev_ent = &watches;
        struct xenbus_watch *watch;
        char token[sizeof(watch) * 2 + 1];
 
-       list_for_each_entry(watch, &watches, list) {
-               sprintf(token, "%lX", (long)watch);
-               xs_watch(watch->node, token);
-       }
-}
-
-static int watch_thread(void *unused)
-{
+       /* Protect against concurrent unregistration and freeing of watches. */
+       BUG_ON(xs_resuming);
+       xs_resuming = 1;
+
+       up(&xs_state.request_mutex);
+       up(&xs_state.transaction_mutex);
+
+       /*
+        * Iterate over the watch list re-registering each node. We must
+        * be careful about concurrent registrations and unregistrations.
+        * We search for the node immediately following the previously
+        * re-registered node. If we get no match then either we are done
+        * (previous node is last in list) or the node was unregistered, in
+        * which case we restart from the beginning of the list.
+        * register_xenbus_watch() + unregister_xenbus_watch() is safe because
+        * it will only ever move a watch node earlier in the list, so it
+        * cannot cause us to skip nodes.
+        */
        for (;;) {
-               char **vec = NULL;
-               unsigned int num;
-
-               wait_event(xb_waitq, xs_input_avail());
-
-               /* If this is a spurious wakeup caused by someone
-                * doing an op, they'll hold the lock and the buffer
-                * will be empty by the time we get there.               
-                */
-               down(&xenbus_lock);
-               if (xs_input_avail())
-                       vec = xs_read_watch(&num);
-
-               if (vec && !IS_ERR(vec)) {
-                       struct xenbus_watch *w;
-                       int err;
-
-                       err = xs_acknowledge_watch(vec[XS_WATCH_TOKEN]);
-                       if (err)
-                               printk(KERN_WARNING "XENBUS ack %s fail %i\n",
-                                      vec[XS_WATCH_TOKEN], err);
-                       w = find_watch(vec[XS_WATCH_TOKEN]);
-                       BUG_ON(!w);
-                       w->callback(w, (const char **)vec, num);
-                       kfree(vec);
-               } else if (vec)
-                       printk(KERN_WARNING "XENBUS xs_read_watch: %li\n",
-                              PTR_ERR(vec));
-               up(&xenbus_lock);
-       }
-}
-
+               spin_lock(&watches_lock);
+               list_for_each(ent, &watches)
+                       if (ent->prev == prev_ent)
+                               break;
+               spin_unlock(&watches_lock);
+
+               /* No match because prev_ent is at the end of the list? */
+               if ((ent == &watches) && (watches.prev == prev_ent))
+                        break; /* We're done! */
+
+               if ((prev_ent = ent) != &watches) {
+                       /*
+                        * Safe even with watch_lock not held. We are saved by
+                        * (xs_resumed==1) check in unregister_xenbus_watch.
+                        */
+                       watch = list_entry(ent, struct xenbus_watch, list);
+                       sprintf(token, "%lX", (long)watch);
+                       xs_watch(watch->node, token);
+               }
+       }
+
+       xs_resuming = 0;
+       wake_up(&xs_resuming_waitq);
+}
+
+static void xenbus_fire_watch(void *arg)
+{
+       struct xs_stored_msg *msg = arg;
+
+       msg->u.watch.handle->callback(msg->u.watch.handle,
+                                     (const char **)msg->u.watch.vec,
+                                     msg->u.watch.vec_size);
+
+       kfree(msg->u.watch.vec);
+       kfree(msg);
+}
+
+static int process_msg(void)
+{
+       struct xs_stored_msg *msg;
+       char *body;
+       int err;
+
+       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+       if (msg == NULL)
+               return -ENOMEM;
+
+       err = xb_read(&msg->hdr, sizeof(msg->hdr));
+       if (err) {
+               kfree(msg);
+               return err;
+       }
+
+       body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
+       if (body == NULL) {
+               kfree(msg);
+               return -ENOMEM;
+       }
+
+       err = xb_read(body, msg->hdr.len);
+       if (err) {
+               kfree(body);
+               kfree(msg);
+               return err;
+       }
+       body[msg->hdr.len] = '\0';
+
+       if (msg->hdr.type == XS_WATCH_EVENT) {
+               INIT_WORK(&msg->u.watch.work, xenbus_fire_watch, msg);
+
+               msg->u.watch.vec = split(body, msg->hdr.len,
+                                        &msg->u.watch.vec_size);
+               if (IS_ERR(msg->u.watch.vec)) {
+                       kfree(msg);
+                       return PTR_ERR(msg->u.watch.vec);
+               }
+
+               spin_lock(&watches_lock);
+               msg->u.watch.handle = find_watch(
+                       msg->u.watch.vec[XS_WATCH_TOKEN]);
+               if (msg->u.watch.handle != NULL) {
+                       schedule_work(&msg->u.watch.work);
+               } else {
+                       kfree(msg->u.watch.vec);
+                       kfree(msg);
+               }
+               spin_unlock(&watches_lock);
+       } else {
+               msg->u.reply.body = body;
+               spin_lock(&xs_state.reply_lock);
+               list_add_tail(&msg->u.reply.list, &xs_state.reply_list);
+               spin_unlock(&xs_state.reply_lock);
+               wake_up(&xs_state.reply_waitq);
+       }
+
+       return 0;
+}
+
+static int read_thread(void *unused)
+{
+       int err;
+
+       for (;;) {
+               err = process_msg();
+               if (err)
+                       printk(KERN_WARNING "XENBUS error %d while reading "
+                              "message\n", err);
+       }
+}
+
+/*
+** Initialize the interface to xenstore. 
+*/
 int xs_init(void)
 {
        int err;
-       struct task_struct *watcher;
-
+       struct task_struct *reader;
+
+       INIT_LIST_HEAD(&xs_state.reply_list);
+       spin_lock_init(&xs_state.reply_lock);
+       init_waitqueue_head(&xs_state.reply_waitq);
+
+       init_MUTEX(&xs_state.request_mutex);
+       init_MUTEX(&xs_state.transaction_mutex);
+       xs_state.transaction_pid = -1;
+
+       /* Initialize the shared memory rings to talk to xenstored */
        err = xb_init_comms();
        if (err)
                return err;
        
-       watcher = kthread_run(watch_thread, NULL, "kxbwatch");
-       if (IS_ERR(watcher))
-               return PTR_ERR(watcher);
+       reader = kthread_run(read_thread, NULL, "xenbusd");
+       if (IS_ERR(reader))
+               return PTR_ERR(reader);
+
        return 0;
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Mon Oct 10 13:46:53 2005
@@ -78,30 +78,35 @@
 int xenbus_register_backend(struct xenbus_driver *drv);
 void xenbus_unregister_driver(struct xenbus_driver *drv);
 
-/* Caller must hold this lock to call these functions: it's also held
- * across watch callbacks. */
-extern struct semaphore xenbus_lock;
+struct xenbus_transaction;
 
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
-void *xenbus_read(const char *dir, const char *node, unsigned int *len);
-int xenbus_write(const char *dir, const char *node, const char *string);
-int xenbus_mkdir(const char *dir, const char *node);
-int xenbus_exists(const char *dir, const char *node);
-int xenbus_rm(const char *dir, const char *node);
-int xenbus_transaction_start(void);
-int xenbus_transaction_end(int abort);
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num);
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len);
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string);
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node);
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node);
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node);
+struct xenbus_transaction *xenbus_transaction_start(void);
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort);
 
 /* Single read and scanf: returns -errno or num scanned if > 0. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(scanf, 3, 4)));
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(scanf, 4, 5)));
 
 /* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(printf, 3, 4)));
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(printf, 4, 5)));
 
 /* Generic read function: NULL-terminated triples of name,
  * sprintf-style type string, and pointer. Returns 0 or errno.*/
-int xenbus_gather(const char *dir, ...);
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...);
 
 /* Report a (negative) errno into the store, with explanation. */
 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,...);
@@ -113,7 +118,11 @@
 struct xenbus_watch
 {
        struct list_head list;
+
+       /* Path being watched. */
        char *node;
+
+       /* Callback (executed in a process context with no locks held). */
        void (*callback)(struct xenbus_watch *,
                         const char **vec, unsigned int len);
 };
@@ -124,7 +133,11 @@
 
 int register_xenbus_watch(struct xenbus_watch *watch);
 void unregister_xenbus_watch(struct xenbus_watch *watch);
-void reregister_xenbus_watches(void);
+void xs_suspend(void);
+void xs_resume(void);
+
+/* Used by xenbus_dev to borrow kernel's store connection. */
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
 
 /* Called from xen core code. */
 void xenbus_suspend(void);
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c     Mon Oct 10 13:42:38 2005
+++ b/tools/blktap/xenbus.c     Mon Oct 10 13:46:53 2005
@@ -260,10 +260,6 @@
     node  = res[XS_WATCH_PATH];
     token = res[XS_WATCH_TOKEN];
 
-    er = xs_acknowledge_watch(h, token);
-    if (er == 0)
-        warn("Couldn't acknowledge watch (%s)", token);
-
     w = find_watch(token);
     if (!w)
     {
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/console/client/main.c
--- a/tools/console/client/main.c       Mon Oct 10 13:42:38 2005
+++ b/tools/console/client/main.c       Mon Oct 10 13:46:53 2005
@@ -220,7 +220,7 @@
        if (path == NULL)
                err(ENOMEM, "realloc");
        strcat(path, "/console/tty");
-       str_pty = xs_read(xs, path, &len);
+       str_pty = xs_read(xs, NULL, path, &len);
 
        /* FIXME consoled currently does not assume domain-0 doesn't have a
           console which is good when we break domain-0 up.  To keep us
@@ -245,7 +245,7 @@
                struct timeval tv = { 0, 500 };
                select(0, NULL, NULL, NULL, &tv); /* pause briefly */
 
-               str_pty = xs_read(xs, path, &len);
+               str_pty = xs_read(xs, NULL, path, &len);
        }
 
        if (str_pty == NULL) {
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Mon Oct 10 13:42:38 2005
+++ b/tools/console/daemon/io.c Mon Oct 10 13:46:53 2005
@@ -179,7 +179,7 @@
                success = asprintf(&path, "%s/tty", dom->conspath) != -1;
                if (!success)
                        goto out;
-               success = xs_write(xs, path, slave, strlen(slave));
+               success = xs_write(xs, NULL, path, slave, strlen(slave));
                free(path);
                if (!success)
                        goto out;
@@ -187,7 +187,7 @@
                success = asprintf(&path, "%s/limit", dom->conspath) != -1;
                if (!success)
                        goto out;
-               data = xs_read(xs, path, &len);
+               data = xs_read(xs, NULL, path, &len);
                if (data) {
                        dom->buffer.max_capacity = strtoul(data, 0, 0);
                        free(data);
@@ -216,7 +216,7 @@
                char *p;
 
                asprintf(&path, "%s/%s", dir, name);
-               p = xs_read(xs, path, NULL);
+               p = xs_read(xs, NULL, path, NULL);
                free(path);
                if (p == NULL) {
                        ret = ENOENT;
@@ -505,7 +505,6 @@
                        domain_create_ring(dom);
        }
 
-       xs_acknowledge_watch(xs, vec[1]);
        free(vec);
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Mon Oct 10 13:42:38 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Mon Oct 10 13:46:53 2005
@@ -80,8 +80,8 @@
 
 static PyObject *xspy_read(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
@@ -89,13 +89,19 @@
     unsigned int xsval_n = 0;
     PyObject *val = NULL;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_read(xh, path, &xsval_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_read(xh, th, path, &xsval_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         if (errno == ENOENT) {
@@ -123,8 +129,8 @@
 
 static PyObject *xspy_write(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", "data", NULL };
-    static char *arg_spec = "ss#";
+    static char *kwd_spec[] = { "transaction", "path", "data", NULL };
+    static char *arg_spec = "sss#";
     char *path = NULL;
     char *data = NULL;
     int data_n = 0;
@@ -133,13 +139,19 @@
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path, &data, &data_n))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_write(xh, path, data, data_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path, &data, &data_n))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_write(xh, th, path, data, data_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -162,8 +174,8 @@
 
 static PyObject *xspy_ls(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
@@ -172,12 +184,20 @@
     unsigned int xsval_n = 0;
     int i;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_directory(xh, path, &xsval_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_directory(xh, th, path, &xsval_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         if (errno == ENOENT) {
@@ -205,20 +225,27 @@
 
 static PyObject *xspy_mkdir(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_mkdir(xh, path);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_mkdir(xh, th, path);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -240,20 +267,27 @@
 
 static PyObject *xspy_rm(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_rm(xh, path);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_rm(xh, th, path);
     Py_END_ALLOW_THREADS
     if (!xsval && errno != ENOENT) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -276,8 +310,8 @@
 static PyObject *xspy_get_permissions(PyObject *self, PyObject *args,
                                       PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
@@ -286,12 +320,19 @@
     unsigned int perms_n = 0;
     int i;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    perms = xs_get_permissions(xh, path, &perms_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    perms = xs_get_permissions(xh, th, path, &perms_n);
     Py_END_ALLOW_THREADS
     if (!perms) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -321,8 +362,8 @@
 static PyObject *xspy_set_permissions(PyObject *self, PyObject *args,
                                       PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", "perms", NULL };
-    static char *arg_spec = "sO";
+    static char *kwd_spec[] = { "transaction", "path", "perms", NULL };
+    static char *arg_spec = "ssO";
     char *path = NULL;
     PyObject *perms = NULL;
     static char *perm_names[] = { "dom", "read", "write", NULL };
@@ -335,11 +376,17 @@
     PyObject *tuple0 = NULL;
     PyObject *val = NULL;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path, &perms))
-        goto exit;
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path, &perms))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
     if (!PyList_Check(perms)) {
         PyErr_SetString(PyExc_RuntimeError, "perms must be a list");
         goto exit;
@@ -369,7 +416,7 @@
             xsperms[i].perms |= XS_PERM_WRITE;
     }
     Py_BEGIN_ALLOW_THREADS
-    xsval = xs_set_permissions(xh, path, xsperms, xsperms_n);
+    xsval = xs_set_permissions(xh, th, path, xsperms, xsperms_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -442,9 +489,6 @@
 
 #define xspy_read_watch_doc "\n"                               \
        "Read a watch notification.\n"                          \
-       "The notification must be acknowledged by passing\n"    \
-       "the token to acknowledge_watch().\n"                   \
-       " path [string]: xenstore path.\n"                      \
        "\n"                                                    \
        "Returns: [tuple] (path, token).\n"                     \
        "Raises RuntimeError on error.\n"                       \
@@ -492,44 +536,6 @@
  exit:
     if (xsval)
         free(xsval);
-    return val;
-}
-
-#define xspy_acknowledge_watch_doc "\n"                                        
\
-       "Acknowledge a watch notification that has been read.\n"        \
-       " token [string] : from the watch notification\n"               \
-       "\n"                                                            \
-       "Returns None on success.\n"                                    \
-       "Raises RuntimeError on error.\n"                               \
-       "\n"
-
-static PyObject *xspy_acknowledge_watch(PyObject *self, PyObject *args,
-                                        PyObject *kwds)
-{
-    static char *kwd_spec[] = { "token", NULL };
-    static char *arg_spec = "O";
-    PyObject *token;
-    char token_str[MAX_STRLEN(unsigned long) + 1];
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int xsval = 0;
-
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &token))
-        goto exit;
-    sprintf(token_str, "%li", (unsigned long)token);
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_acknowledge_watch(xh, token_str);
-    Py_END_ALLOW_THREADS
-    if (!xsval) {
-        PyErr_SetFromErrno(PyExc_RuntimeError);
-        goto exit;
-    }
-    Py_INCREF(Py_None);
-    val = Py_None;
- exit:
     return val;
 }
 
@@ -584,9 +590,8 @@
 
 #define xspy_transaction_start_doc "\n"                                \
        "Start a transaction.\n"                                \
-       "Only one transaction can be active at a time.\n"       \
        "\n"                                                    \
-       "Returns None on success.\n"                            \
+       "Returns transaction handle on success.\n"              \
        "Raises RuntimeError on error.\n"                       \
        "\n"
 
@@ -599,21 +604,23 @@
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
-    int xsval = 0;
+    struct xs_transaction_handle *th;
+    char thstr[20];
 
     if (!xh)
         goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
         goto exit;
     Py_BEGIN_ALLOW_THREADS
-    xsval = xs_transaction_start(xh);
-    Py_END_ALLOW_THREADS
-    if (!xsval) {
-        PyErr_SetFromErrno(PyExc_RuntimeError);
-        goto exit;
-    }
-    Py_INCREF(Py_None);
-    val = Py_None;
+    th = xs_transaction_start(xh);
+    Py_END_ALLOW_THREADS
+    if (th == NULL) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+
+    sprintf(thstr, "%lX", (unsigned long)th);
+    val = PyString_FromString(thstr);
  exit:
     return val;
 }
@@ -630,20 +637,27 @@
 static PyObject *xspy_transaction_end(PyObject *self, PyObject *args,
                                       PyObject *kwds)
 {
-    static char *kwd_spec[] = { "abort", NULL };
-    static char *arg_spec = "|i";
+    static char *kwd_spec[] = { "transaction", "abort", NULL };
+    static char *arg_spec = "s|i";
     int abort = 0;
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &abort))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_transaction_end(xh, abort);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &abort))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_transaction_end(xh, th, abort);
     Py_END_ALLOW_THREADS
     if (!xsval) {
        if (errno == EAGAIN) {
@@ -833,7 +847,6 @@
      XSPY_METH(set_permissions),
      XSPY_METH(watch),
      XSPY_METH(read_watch),
-     XSPY_METH(acknowledge_watch),
      XSPY_METH(unwatch),
      XSPY_METH(transaction_start),
      XSPY_METH(transaction_end),
diff -r 0e7c48861e95 -r 1ac39c7a0435 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Mon Oct 10 13:42:38 2005
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Mon Oct 10 13:46:53 2005
@@ -14,29 +14,34 @@
 class xstransact:
 
     def __init__(self, path):
-        self.in_transaction = False
         self.path = path.rstrip("/")
-        xshandle().transaction_start()
+        self.transaction = xshandle().transaction_start()
         self.in_transaction = True
 
     def __del__(self):
         if self.in_transaction:
-            xshandle().transaction_end(True)
+            xshandle().transaction_end(self.transaction, True)
 
     def commit(self):
         if not self.in_transaction:
             raise RuntimeError
         self.in_transaction = False
-        return xshandle().transaction_end(False)
+        rc = xshandle().transaction_end(self.transaction, False)
+        self.transaction = "0"
+        return rc
 
     def abort(self):
+        if not self.in_transaction:
+            return True
         self.in_transaction = False
-        return xshandle().transaction_end(True)
+        rc = xshandle().transaction_end(self.transaction, True)
+        self.transaction = "0"
+        return rc
 
     def _read(self, key):
         path = "%s/%s" % (self.path, key)
         try:
-            return xshandle().read(path)
+            return xshandle().read(self.transaction, path)
         except RuntimeError, ex:
             raise RuntimeError(ex.args[0],
                                '%s, while reading %s' % (ex.args[1], path))
@@ -50,7 +55,7 @@
         instead.
         """
         if len(args) == 0:
-            return xshandle().read(self.path)
+            return xshandle().read(self.transaction, self.path)
         if len(args) == 1:
             return self._read(args[0])
         ret = []
@@ -61,7 +66,7 @@
     def _write(self, key, data):
         path = "%s/%s" % (self.path, key)
         try:
-            xshandle().write(path, data)
+            xshandle().write(self.transaction, path, data)
         except RuntimeError, ex:
             raise RuntimeError(ex.args[0],
                                ('%s, while writing %s : %s' %
@@ -93,7 +98,7 @@
 
     def _remove(self, key):
         path = "%s/%s" % (self.path, key)
-        return xshandle().rm(path)
+        return xshandle().rm(self.transaction, path)
 
     def remove(self, *args):
         """If no arguments are given, remove this transaction's path.
@@ -101,14 +106,14 @@
         path, and remove each of those instead.
         """
         if len(args) == 0:
-            xshandle().rm(self.path)
+            xshandle().rm(self.transaction, self.path)
         else:
             for key in args:
                 self._remove(key)
 
     def _list(self, key):
         path = "%s/%s" % (self.path, key)
-        l = xshandle().ls(path)
+        l = xshandle().ls(self.transaction, path)
         if l:
             return map(lambda x: key + "/" + x, l)
         return []
@@ -120,7 +125,7 @@
         path, and return the cumulative listing of each of those instead.
         """
         if len(args) == 0:
-            ret = xshandle().ls(self.path)
+            ret = xshandle().ls(self.transaction, self.path)
             if ret is None:
                 return []
             else:
@@ -136,11 +141,11 @@
         ret = []
         for key in keys:
             new_subdir = subdir + "/" + key
-            l = xshandle().ls(new_subdir)
+            l = xshandle().ls(self.transaction, new_subdir)
             if l:
                 ret.append([key, self.list_recursive_(new_subdir, l)])
             else:
-                ret.append([key, xshandle().read(new_subdir)])
+                ret.append([key, xshandle().read(self.transaction, 
new_subdir)])
         return ret
 
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/python/xen/xend/xenstore/xswatch.py
--- a/tools/python/xen/xend/xenstore/xswatch.py Mon Oct 10 13:42:38 2005
+++ b/tools/python/xen/xend/xenstore/xswatch.py Mon Oct 10 13:46:53 2005
@@ -8,6 +8,7 @@
 import select
 import threading
 from xen.lowlevel import xs
+from xen.xend.xenstore.xsutil import xshandle
 
 class xswatch:
 
@@ -27,10 +28,7 @@
         if cls.watchThread:
             cls.xslock.release()
             return
-        # XXX: When we fix xenstored to have better watch semantics,
-        # this can change to shared xshandle(). Currently that would result
-        # in duplicate watch firings, thus failed extra xs.acknowledge_watch.
-        cls.xs = xs.open()
+        cls.xs = xshandle()
         cls.watchThread = threading.Thread(name="Watcher",
                                            target=cls.watchMain)
         cls.watchThread.setDaemon(True)
@@ -43,11 +41,10 @@
         while True:
             try:
                 we = cls.xs.read_watch()
-                watch = we[1]
-                cls.xs.acknowledge_watch(watch)
             except RuntimeError, ex:
                 print ex
                 raise
+            watch = we[1]
             watch.fn(*watch.args, **watch.kwargs)
 
     watchMain = classmethod(watchMain)
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/Makefile   Mon Oct 10 13:46:53 2005
@@ -8,7 +8,7 @@
 INSTALL_DIR     = $(INSTALL) -d -m0755
 
 PROFILE=#-pg
-BASECFLAGS=-Wall -W -g -Werror
+BASECFLAGS=-Wall -g -Werror
 # Make gcc generate dependencies.
 BASECFLAGS += -Wp,-MD,.$(@F).d
 PROG_DEP = .*.d
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/07watch.test
--- a/tools/xenstore/testsuite/07watch.test     Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/07watch.test     Mon Oct 10 13:46:53 2005
@@ -5,7 +5,6 @@
 2 write /test contents2
 expect 1:/test:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Check that reads don't set it off.
@@ -22,15 +21,12 @@
 2 mkdir /dir/newdir
 expect 1:/dir/newdir:token
 1 waitwatch
-1 ackwatch token
 2 setperm /dir/newdir 0 READ
 expect 1:/dir/newdir:token
 1 waitwatch
-1 ackwatch token
 2 rm /dir/newdir
 expect 1:/dir/newdir:token
 1 waitwatch
-1 ackwatch token
 1 close
 2 close
 
@@ -49,7 +45,6 @@
 read /dir/test
 expect /dir/test:token
 waitwatch
-ackwatch token
 close
 
 # watch priority test: all simultaneous
@@ -59,13 +54,10 @@
 write /dir/test contents
 expect 3:/dir/test:token3
 3 waitwatch
-3 ackwatch token3
 expect 2:/dir/test:token2
 2 waitwatch
-2 ackwatch token2
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 2 close
 3 close
@@ -79,7 +71,6 @@
 2 close
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 
 # If one dies (without reading at all), the other should still get ack.
@@ -89,7 +80,6 @@
 2 close
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 2 close
 
@@ -111,7 +101,6 @@
 2 unwatch /dir token2
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 2 close
 
@@ -123,14 +112,12 @@
 write /dir/test contents2
 expect 1:/dir/test:token2
 1 waitwatch
-1 ackwatch token2
 
 # check we only get notified once.
 1 watch /test token
 2 write /test contents2
 expect 1:/test:token
 1 waitwatch
-1 ackwatch token
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
 1 close
@@ -142,13 +129,10 @@
 2 write /test3 contents
 expect 1:/test1:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test2:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test3:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Creation of subpaths should be covered correctly.
@@ -157,10 +141,8 @@
 2 write /test/subnode/subnode contents2
 expect 1:/test/subnode:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test/subnode/subnode:token
 1 waitwatch
-1 ackwatch token
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
 1 close
@@ -171,7 +153,6 @@
 1 watchnoack / token2 0
 expect 1:/test/subnode:token
 1 waitwatch
-1 ackwatch token
 expect 1:/:token2
 1 waitwatch
 expect 1: waitwatch failed: Connection timed out
@@ -183,7 +164,6 @@
 2 rm /test
 expect 1:/test/subnode:token
 1 waitwatch
-1 ackwatch token
 
 # Watch should not double-send after we ack, even if we did something in 
between.
 1 watch /test2 token
@@ -192,6 +172,5 @@
 1 waitwatch
 expect 1:contents2
 1 read /test2/foo
-1 ackwatch token
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/08transaction.test
--- a/tools/xenstore/testsuite/08transaction.test       Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/08transaction.test       Mon Oct 10 13:46:53 2005
@@ -68,7 +68,6 @@
 2 commit
 expect 1:/test/dir/sub:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Rm inside transaction works like rm outside: children get notified.
@@ -78,7 +77,6 @@
 2 commit
 expect 1:/test/dir/sub:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Multiple events from single transaction don't trigger assert
@@ -89,8 +87,6 @@
 2 commit
 expect 1:/test/1:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test/2:token
 1 waitwatch
-1 ackwatch token
 1 close
diff -r 0e7c48861e95 -r 1ac39c7a0435 
tools/xenstore/testsuite/10domain-homedir.test
--- a/tools/xenstore/testsuite/10domain-homedir.test    Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/10domain-homedir.test    Mon Oct 10 13:46:53 2005
@@ -16,4 +16,3 @@
 write /home/foo/bar contents
 expect 1:foo/bar:token
 1 waitwatch
-1 ackwatch token
diff -r 0e7c48861e95 -r 1ac39c7a0435 
tools/xenstore/testsuite/11domain-watch.test
--- a/tools/xenstore/testsuite/11domain-watch.test      Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/11domain-watch.test      Mon Oct 10 13:46:53 2005
@@ -10,7 +10,6 @@
 write /test contents2
 expect 1:/test:token
 1 waitwatch
-1 ackwatch token
 1 unwatch /test token
 release 1
 1 close
@@ -25,7 +24,6 @@
 1 write /dir/test4 contents4
 expect 1:/dir/test:token
 1 waitwatch
-1 ackwatch token
 release 1
 1 close
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/12readonly.test
--- a/tools/xenstore/testsuite/12readonly.test  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/12readonly.test  Mon Oct 10 13:46:53 2005
@@ -36,4 +36,3 @@
 1 write /test contents
 expect /test:token
 waitwatch
-ackwatch token
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/13watch-ack.test
--- a/tools/xenstore/testsuite/13watch-ack.test Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/13watch-ack.test Mon Oct 10 13:46:53 2005
@@ -18,5 +18,4 @@
 1 waitwatch
 3 write /test/1 contents1
 4 write /test/3 contents3
-1 ackwatch token2
 1 close
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstore_client.c
--- a/tools/xenstore/xenstore_client.c  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstore_client.c  Mon Oct 10 13:46:53 2005
@@ -32,6 +32,7 @@
 main(int argc, char **argv)
 {
     struct xs_handle *xsh;
+    struct xs_transaction_handle *xth;
     bool success;
     int ret = 0;
 #if defined(CLIENT_read) || defined(CLIENT_list)
@@ -84,13 +85,13 @@
 #endif
 
   again:
-    success = xs_transaction_start(xsh);
-    if (!success)
+    xth = xs_transaction_start(xsh);
+    if (xth == NULL)
        errx(1, "couldn't start transaction");
 
     while (optind < argc) {
 #if defined(CLIENT_read)
-       char *val = xs_read(xsh, argv[optind], NULL);
+       char *val = xs_read(xsh, xth, argv[optind], NULL);
        if (val == NULL) {
            warnx("couldn't read path %s", argv[optind]);
            ret = 1;
@@ -102,7 +103,7 @@
        free(val);
        optind++;
 #elif defined(CLIENT_write)
-       success = xs_write(xsh, argv[optind], argv[optind + 1],
+       success = xs_write(xsh, xth, argv[optind], argv[optind + 1],
                           strlen(argv[optind + 1]));
        if (!success) {
            warnx("could not write path %s", argv[optind]);
@@ -111,7 +112,7 @@
        }
        optind += 2;
 #elif defined(CLIENT_rm)
-       success = xs_rm(xsh, argv[optind]);
+       success = xs_rm(xsh, xth, argv[optind]);
        if (!success) {
            warnx("could not remove path %s", argv[optind]);
            ret = 1;
@@ -119,7 +120,7 @@
        }
        optind++;
 #elif defined(CLIENT_exists)
-       char *val = xs_read(xsh, argv[optind], NULL);
+       char *val = xs_read(xsh, xth, argv[optind], NULL);
        if (val == NULL) {
            ret = 1;
            goto out;
@@ -128,7 +129,7 @@
        optind++;
 #elif defined(CLIENT_list)
        unsigned int i, num;
-       char **list = xs_directory(xsh, argv[optind], &num);
+       char **list = xs_directory(xsh, xth, argv[optind], &num);
        if (list == NULL) {
            warnx("could not list path %s", argv[optind]);
            ret = 1;
@@ -145,7 +146,7 @@
     }
 
  out:
-    success = xs_transaction_end(xsh, ret ? true : false);
+    success = xs_transaction_end(xsh, xth, ret ? true : false);
     if (!success) {
        if (ret == 0 && errno == EAGAIN)
            goto again;
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_core.c   Mon Oct 10 13:46:53 2005
@@ -154,7 +154,6 @@
        case XS_READ: return "READ";
        case XS_GET_PERMS: return "GET_PERMS";
        case XS_WATCH: return "WATCH";
-       case XS_WATCH_ACK: return "WATCH_ACK";
        case XS_UNWATCH: return "UNWATCH";
        case XS_TRANSACTION_START: return "TRANSACTION_START";
        case XS_TRANSACTION_END: return "TRANSACTION_END";
@@ -236,52 +235,50 @@
        talloc_free(str);
 }
 
-static bool write_message(struct connection *conn)
+static bool write_messages(struct connection *conn)
 {
        int ret;
-       struct buffered_data *out = conn->out;
-
-       if (out->inhdr) {
-               if (verbose)
-                       xprintf("Writing msg %s (%s) out to %p\n",
-                               sockmsg_string(out->hdr.msg.type),
-                               out->buffer, conn);
-               ret = conn->write(conn, out->hdr.raw + out->used,
-                                 sizeof(out->hdr) - out->used);
+       struct buffered_data *out, *tmp;
+
+       list_for_each_entry_safe(out, tmp, &conn->out_list, list) {
+               if (out->inhdr) {
+                       if (verbose)
+                               xprintf("Writing msg %s (%s) out to %p\n",
+                                       sockmsg_string(out->hdr.msg.type),
+                                       out->buffer, conn);
+                       ret = conn->write(conn, out->hdr.raw + out->used,
+                                         sizeof(out->hdr) - out->used);
+                       if (ret < 0)
+                               return false;
+
+                       out->used += ret;
+                       if (out->used < sizeof(out->hdr))
+                               return true;
+
+                       out->inhdr = false;
+                       out->used = 0;
+
+                       /* Second write might block if non-zero. */
+                       if (out->hdr.msg.len && !conn->domain)
+                               return true;
+               }
+
+               ret = conn->write(conn, out->buffer + out->used,
+                                 out->hdr.msg.len - out->used);
+
                if (ret < 0)
                        return false;
 
                out->used += ret;
-               if (out->used < sizeof(out->hdr))
+               if (out->used != out->hdr.msg.len)
                        return true;
 
-               out->inhdr = false;
-               out->used = 0;
-
-               /* Second write might block if non-zero. */
-               if (out->hdr.msg.len && !conn->domain)
-                       return true;
-       }
-
-       ret = conn->write(conn, out->buffer + out->used,
-                         out->hdr.msg.len - out->used);
-
-       if (ret < 0)
-               return false;
-
-       out->used += ret;
-       if (out->used != out->hdr.msg.len)
-               return true;
-
-       trace_io(conn, "OUT", out);
-       conn->out = NULL;
-       talloc_free(out);
-
-       queue_next_event(conn);
-
-       /* No longer busy? */
-       if (!conn->out)
-               conn->state = OK;
+               trace_io(conn, "OUT", out);
+
+               list_del(&out->list);
+               talloc_free(out);
+       }
+
        return true;
 }
 
@@ -298,9 +295,9 @@
                FD_SET(conn->fd, &set);
                none.tv_sec = none.tv_usec = 0;
 
-               while (conn->out
+               while (!list_empty(&conn->out_list)
                       && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
-                       if (!write_message(conn))
+                       if (!write_messages(conn))
                                break;
                close(conn->fd);
        }
@@ -327,9 +324,8 @@
        list_for_each_entry(i, &connections, list) {
                if (i->domain)
                        continue;
-               if (i->state == OK)
-                       FD_SET(i->fd, inset);
-               if (i->out)
+               FD_SET(i->fd, inset);
+               if (!list_empty(&i->out_list))
                        FD_SET(i->fd, outset);
                if (i->fd > max)
                        max = i->fd;
@@ -595,14 +591,7 @@
        bdata->hdr.msg.len = len;
        memcpy(bdata->buffer, data, len);
 
-       /* There might be an event going out now.  Queue behind it. */
-       if (conn->out) {
-               assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
-               assert(!conn->waiting_reply);
-               conn->waiting_reply = bdata;
-       } else
-               conn->out = bdata;
-       conn->state = BUSY;
+       list_add_tail(&bdata->list, &conn->out_list);
 }
 
 /* Some routines (write, mkdir, etc) just need a non-error return */
@@ -1103,10 +1092,6 @@
                do_watch(conn, in);
                break;
 
-       case XS_WATCH_ACK:
-               do_watch_ack(conn, onearg(in));
-               break;
-
        case XS_UNWATCH:
                do_unwatch(conn, in);
                break;
@@ -1153,8 +1138,6 @@
        enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type;
        jmp_buf talloc_fail;
 
-       assert(conn->state == OK);
-
        /* For simplicity, we kill the connection on OOM. */
        talloc_set_fail_handler(out_of_mem, &talloc_fail);
        if (setjmp(talloc_fail)) {
@@ -1168,11 +1151,6 @@
                xprintf("Got message %s len %i from %p\n",
                        sockmsg_string(type), conn->in->hdr.msg.len, conn);
 
-       /* We might get a command while waiting for an ack: this means
-        * the other end discarded it: we will re-transmit. */
-       if (type != XS_WATCH_ACK)
-               conn->waiting_for_ack = NULL;
-
        /* Careful: process_message may free connection.  We detach
         * "in" beforehand and allocate the new buffer to avoid
         * touching conn after process_message.
@@ -1196,10 +1174,7 @@
 static void handle_input(struct connection *conn)
 {
        int bytes;
-       struct buffered_data *in;
-
-       assert(conn->state == OK);
-       in = conn->in;
+       struct buffered_data *in = conn->in;
 
        /* Not finished header yet? */
        if (in->inhdr) {
@@ -1247,7 +1222,7 @@
 
 static void handle_output(struct connection *conn)
 {
-       if (!write_message(conn))
+       if (!write_messages(conn))
                talloc_free(conn);
 }
 
@@ -1264,9 +1239,6 @@
        if (!new)
                return NULL;
 
-       new->state = OK;
-       new->out = new->waiting_reply = NULL;
-       new->waiting_for_ack = NULL;
        new->fd = -1;
        new->id = 0;
        new->domain = NULL;
@@ -1274,6 +1246,7 @@
        new->write = write;
        new->read = read;
        new->can_write = true;
+       INIT_LIST_HEAD(&new->out_list);
        INIT_LIST_HEAD(&new->watches);
 
        talloc_set_fail_handler(out_of_mem, &talloc_fail);
@@ -1328,23 +1301,17 @@
        list_for_each_entry(i, &connections, list) {
                printf("Connection %p:\n", i);
                printf("    state = %s\n",
-                      i->state == OK ? "OK"
-                      : i->state == BUSY ? "BUSY"
-                      : "INVALID");
+                      list_empty(&i->out_list) ? "OK" : "BUSY");
                if (i->id)
                        printf("    id = %i\n", i->id);
                if (!i->in->inhdr || i->in->used)
                        printf("    got %i bytes of %s\n",
                               i->in->used, i->in->inhdr ? "header" : "data");
+#if 0
                if (i->out)
                        printf("    sending message %s (%s) out\n",
                               sockmsg_string(i->out->hdr.msg.type),
                               i->out->buffer);
-               if (i->waiting_reply)
-                       printf("    ... and behind is queued %s (%s)\n",
-                              sockmsg_string(i->waiting_reply->hdr.msg.type),
-                              i->waiting_reply->buffer);
-#if 0
                if (i->transaction)
                        dump_transaction(i);
                if (i->domain)
@@ -1615,3 +1582,13 @@
                max = initialize_set(&inset, &outset, *sock, *ro_sock);
        }
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_core.h   Mon Oct 10 13:46:53 2005
@@ -31,14 +31,19 @@
 
 struct buffered_data
 {
+       struct list_head list;
+
        /* Are we still doing the header? */
        bool inhdr;
+
        /* How far are we? */
        unsigned int used;
+
        union {
                struct xsd_sockmsg msg;
                char raw[sizeof(struct xsd_sockmsg)];
        } hdr;
+
        /* The actual data. */
        char *buffer;
 };
@@ -47,14 +52,6 @@
 typedef int connwritefn_t(struct connection *, const void *, unsigned int);
 typedef int connreadfn_t(struct connection *, void *, unsigned int);
 
-enum state
-{
-       /* Doing action, not listening */
-       BUSY,
-       /* Completed */
-       OK,
-};
-
 struct connection
 {
        struct list_head list;
@@ -62,26 +59,17 @@
        /* The file descriptor we came in on. */
        int fd;
 
-       /* Who am I?  0 for socket connections. */
+       /* Who am I? 0 for socket connections. */
        domid_t id;
-
-       /* Blocked on transaction?  Busy? */
-       enum state state;
 
        /* Is this a read-only connection? */
        bool can_write;
-
-       /* Are we waiting for a watch event ack? */
-       struct watch *waiting_for_ack;
 
        /* Buffered incoming data. */
        struct buffered_data *in;
 
        /* Buffered output data */
-       struct buffered_data *out;
-
-       /* If we had a watch fire outgoing when we needed to reply... */
-       struct buffered_data *waiting_reply;
+       struct list_head out_list;
 
        /* My transaction, if any. */
        struct transaction *transaction;
@@ -175,3 +163,13 @@
 extern int event_fd;
 
 #endif /* _XENSTORED_CORE_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_domain.c Mon Oct 10 13:46:53 2005
@@ -276,12 +276,13 @@
 
 bool domain_can_read(struct connection *conn)
 {
-       return conn->state == OK && buffer_has_input(conn->domain->input);
+       return buffer_has_input(conn->domain->input);
 }
 
 bool domain_can_write(struct connection *conn)
 {
-       return conn->out && buffer_has_output_room(conn->domain->output);
+       return (!list_empty(&conn->out_list) &&
+                buffer_has_output_room(conn->domain->output));
 }
 
 static struct domain *new_domain(void *context, domid_t domid,
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_transaction.c
--- a/tools/xenstore/xenstored_transaction.c    Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_transaction.c    Mon Oct 10 13:46:53 2005
@@ -154,9 +154,9 @@
                return;
        }
 
-       /* Set to NULL so fire_watches sends events, tdb_context works. */
        trans = conn->transaction;
        conn->transaction = NULL;
+
        /* Attach transaction to arg for auto-cleanup */
        talloc_steal(arg, trans);
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_watch.c  Mon Oct 10 13:46:53 2005
@@ -32,17 +32,6 @@
 #include "xenstored_test.h"
 #include "xenstored_domain.h"
 
-/* FIXME: time out unacked watches. */
-struct watch_event
-{
-       /* The events on this watch. */
-       struct list_head list;
-
-       /* Data to send (node\0token\0). */
-       unsigned int len;
-       char *data;
-};
-
 struct watch
 {
        /* Watches on this connection */
@@ -58,54 +47,17 @@
        char *node;
 };
 
-/* Look through our watches: if any of them have an event, queue it. */
-void queue_next_event(struct connection *conn)
-{
-       struct watch_event *event;
-       struct watch *watch;
-
-       /* We had a reply queued already?  Send it: other end will
-        * discard watch. */
-       if (conn->waiting_reply) {
-               conn->out = conn->waiting_reply;
-               conn->waiting_reply = NULL;
-               conn->waiting_for_ack = NULL;
-               return;
-       }
-
-       /* If we're already waiting for ack, don't queue more. */
-       if (conn->waiting_for_ack)
-               return;
-
-       list_for_each_entry(watch, &conn->watches, list) {
-               event = list_top(&watch->events, struct watch_event, list);
-               if (event) {
-                       conn->waiting_for_ack = watch;
-                       send_reply(conn,XS_WATCH_EVENT,event->data,event->len);
-                       break;
-               }
-       }
-}
-
-static int destroy_watch_event(void *_event)
-{
-       struct watch_event *event = _event;
-
-       trace_destroy(event, "watch_event");
-       return 0;
-}
-
 static void add_event(struct connection *conn,
                      struct watch *watch,
                      const char *name)
 {
-       struct watch_event *event;
+       /* Data to send (node\0token\0). */
+       unsigned int len;
+       char *data;
 
        if (!check_event_node(name)) {
                /* Can this conn load node, or see that it doesn't exist? */
-               struct node *node;
-
-               node = get_node(conn, name, XS_PERM_READ);
+               struct node *node = get_node(conn, name, XS_PERM_READ);
                if (!node && errno != ENOENT)
                        return;
        }
@@ -116,14 +68,12 @@
                        name++;
        }
 
-       event = talloc(watch, struct watch_event);
-       event->len = strlen(name) + 1 + strlen(watch->token) + 1;
-       event->data = talloc_array(event, char, event->len);
-       strcpy(event->data, name);
-       strcpy(event->data + strlen(name) + 1, watch->token);
-       talloc_set_destructor(event, destroy_watch_event);
-       list_add_tail(&event->list, &watch->events);
-       trace_create(event, "watch_event");
+       len = strlen(name) + 1 + strlen(watch->token) + 1;
+       data = talloc_array(watch, char, len);
+       strcpy(data, name);
+       strcpy(data + strlen(name) + 1, watch->token);
+        send_reply(conn, XS_WATCH_EVENT, data, len);
+       talloc_free(data);
 }
 
 /* FIXME: we fail to fire on out of memory.  Should drop connections. */
@@ -143,11 +93,6 @@
                                add_event(i, watch, name);
                        else if (recurse && is_child(watch->node, name))
                                add_event(i, watch, watch->node);
-                       else
-                               continue;
-                       /* If connection not doing anything, queue this. */
-                       if (i->state == OK)
-                               queue_next_event(i);
                }
        }
 }
@@ -181,6 +126,15 @@
                }
        }
 
+       /* Check for duplicates. */
+       list_for_each_entry(watch, &conn->watches, list) {
+               if (streq(watch->node, vec[0]) &&
+                    streq(watch->token, vec[1])) {
+                       send_error(conn, EEXIST);
+                       return;
+               }
+       }
+
        watch = talloc(conn, struct watch);
        watch->node = talloc_strdup(watch, vec[0]);
        watch->token = talloc_strdup(watch, vec[1]);
@@ -200,37 +154,6 @@
        add_event(conn, watch, watch->node);
 }
 
-void do_watch_ack(struct connection *conn, const char *token)
-{
-       struct watch_event *event;
-
-       if (!token) {
-               send_error(conn, EINVAL);
-               return;
-       }
-
-       if (!conn->waiting_for_ack) {
-               send_error(conn, ENOENT);
-               return;
-       }
-
-       if (!streq(conn->waiting_for_ack->token, token)) {
-               /* They're confused: this will cause us to send event again */
-               conn->waiting_for_ack = NULL;
-               send_error(conn, EINVAL);
-               return;
-       }
-
-       /* Remove event: after ack sent, core will call queue_next_event */
-       event = list_top(&conn->waiting_for_ack->events, struct watch_event,
-                        list);
-       list_del(&event->list);
-       talloc_free(event);
-
-       conn->waiting_for_ack = NULL;
-       send_ack(conn, XS_WATCH_ACK);
-}
-
 void do_unwatch(struct connection *conn, struct buffered_data *in)
 {
        struct watch *watch;
@@ -241,9 +164,6 @@
                return;
        }
 
-       /* We don't need to worry if we're waiting for an ack for the
-        * watch we're deleting: conn->waiting_for_ack was reset by
-        * this command in consider_message anyway. */
        node = canonicalize(conn, vec[0]);
        list_for_each_entry(watch, &conn->watches, list) {
                if (streq(watch->node, node) && streq(watch->token, vec[1])) {
@@ -260,18 +180,19 @@
 void dump_watches(struct connection *conn)
 {
        struct watch *watch;
-       struct watch_event *event;
 
-       if (conn->waiting_for_ack)
-               printf("    waiting_for_ack for watch on %s token %s\n",
-                      conn->waiting_for_ack->node,
-                      conn->waiting_for_ack->token);
-
-       list_for_each_entry(watch, &conn->watches, list) {
+       list_for_each_entry(watch, &conn->watches, list)
                printf("    watch on %s token %s\n",
                       watch->node, watch->token);
-               list_for_each_entry(event, &watch->events, list)
-                       printf("        event: %s\n", event->data);
-       }
 }
 #endif
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_watch.h
--- a/tools/xenstore/xenstored_watch.h  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_watch.h  Mon Oct 10 13:46:53 2005
@@ -23,17 +23,9 @@
 #include "xenstored_core.h"
 
 void do_watch(struct connection *conn, struct buffered_data *in);
-void do_watch_ack(struct connection *conn, const char *token);
 void do_unwatch(struct connection *conn, struct buffered_data *in);
 
-/* Is this a watch event message for this connection? */
-bool is_watch_event(struct connection *conn, struct buffered_data *out);
-
-/* Look through our watches: if any of them have an event, queue it. */
-void queue_next_event(struct connection *conn);
-
-/* Fire all watches: recurse means all the children are affected (ie. rm).
- */
+/* Fire all watches: recurse means all the children are affected (ie. rm). */
 void fire_watches(struct connection *conn, const char *name, bool recurse);
 
 void dump_watches(struct connection *conn);
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c       Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xs.c       Mon Oct 10 13:46:53 2005
@@ -78,9 +78,33 @@
 
        /* One transaction at a time. */
        pthread_mutex_t transaction_mutex;
+       pthread_t transaction_pthread;
 };
 
+struct xs_transaction_handle {
+       int id;
+};
+
 static void *read_thread(void *arg);
+
+static void request_mutex_acquire(struct xs_handle *h)
+{
+       /*
+        * We can't distinguish non-transactional from transactional
+        * requests right now. So temporarily acquire the transaction mutex
+        * if this task is outside transaction context.
+        */
+       if (h->transaction_pthread != pthread_self())
+               pthread_mutex_lock(&h->transaction_mutex);
+       pthread_mutex_lock(&h->request_mutex);
+}
+
+static void request_mutex_release(struct xs_handle *h)
+{
+       pthread_mutex_unlock(&h->request_mutex);
+       if (h->transaction_pthread != pthread_self())
+               pthread_mutex_unlock(&h->transaction_mutex);
+}
 
 int xs_fileno(struct xs_handle *h)
 {
@@ -163,6 +187,7 @@
 
        pthread_mutex_init(&h->request_mutex, NULL);
        pthread_mutex_init(&h->transaction_mutex, NULL);
+       h->transaction_pthread = -1;
 
        if (pthread_create(&h->read_thr, NULL, read_thread, h) != 0)
                goto error;
@@ -316,7 +341,7 @@
        ignorepipe.sa_flags = 0;
        sigaction(SIGPIPE, &ignorepipe, &oldact);
 
-       pthread_mutex_lock(&h->request_mutex);
+       request_mutex_acquire(h);
 
        if (!xs_write_all(h->fd, &msg, sizeof(msg)))
                goto fail;
@@ -329,7 +354,7 @@
        if (!ret)
                goto fail;
 
-       pthread_mutex_unlock(&h->request_mutex);
+       request_mutex_release(h);
 
        sigaction(SIGPIPE, &oldact, NULL);
        if (msg.type == XS_ERROR) {
@@ -350,7 +375,7 @@
 fail:
        /* We're in a bad state, so close fd. */
        saved_errno = errno;
-       pthread_mutex_unlock(&h->request_mutex);
+       request_mutex_release(h);
        sigaction(SIGPIPE, &oldact, NULL);
 close_fd:
        close(h->fd);
@@ -386,7 +411,8 @@
        return true;
 }
 
-char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num)
+char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
+                   const char *path, unsigned int *num)
 {
        char *strings, *p, **ret;
        unsigned int len;
@@ -417,7 +443,8 @@
  * Returns a malloced value: call free() on it after use.
  * len indicates length in bytes, not including the nul.
  */
-void *xs_read(struct xs_handle *h, const char *path, unsigned int *len)
+void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, unsigned int *len)
 {
        return xs_single(h, XS_READ, path, len);
 }
@@ -425,8 +452,8 @@
 /* Write the value of a single file.
  * Returns false on failure.
  */
-bool xs_write(struct xs_handle *h, const char *path,
-             const void *data, unsigned int len)
+bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, const void *data, unsigned int len)
 {
        struct iovec iovec[2];
 
@@ -441,7 +468,8 @@
 /* Create a new directory.
  * Returns false on failure, or success if it already exists.
  */
-bool xs_mkdir(struct xs_handle *h, const char *path)
+bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path)
 {
        return xs_bool(xs_single(h, XS_MKDIR, path, NULL));
 }
@@ -449,7 +477,8 @@
 /* Destroy a file or directory (directories must be empty).
  * Returns false on failure, or success if it doesn't exist.
  */
-bool xs_rm(struct xs_handle *h, const char *path)
+bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
+          const char *path)
 {
        return xs_bool(xs_single(h, XS_RM, path, NULL));
 }
@@ -458,6 +487,7 @@
  * Returns malloced array, or NULL: call free() after use.
  */
 struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+                                         struct xs_transaction_handle *t,
                                          const char *path, unsigned int *num)
 {
        char *strings;
@@ -490,7 +520,9 @@
 /* Set permissions of node (must be owner).
  * Returns false on failure.
  */
-bool xs_set_permissions(struct xs_handle *h, const char *path,
+bool xs_set_permissions(struct xs_handle *h,
+                       struct xs_transaction_handle *t,
+                       const char *path,
                        struct xs_permissions *perms,
                        unsigned int num_perms)
 {
@@ -593,15 +625,6 @@
        return ret;
 }
 
-/* Acknowledge watch on node.  Watches must be acknowledged before
- * any other watches can be read.
- * Returns false on failure.
- */
-bool xs_acknowledge_watch(struct xs_handle *h, const char *token)
-{
-       return xs_bool(xs_single(h, XS_WATCH_ACK, token, NULL));
-}
-
 /* Remove a watch on a node.
  * Returns false on failure (no watch on that node).
  */
@@ -620,12 +643,22 @@
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
  * You can only have one transaction at any time.
- * Returns false on failure.
- */
-bool xs_transaction_start(struct xs_handle *h)
-{
+ * Returns NULL on failure.
+ */
+struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h)
+{
+       bool rc;
+
        pthread_mutex_lock(&h->transaction_mutex);
-       return xs_bool(xs_single(h, XS_TRANSACTION_START, "", NULL));
+       h->transaction_pthread = pthread_self();
+
+       rc = xs_bool(xs_single(h, XS_TRANSACTION_START, "", NULL));
+       if (!rc) {
+               h->transaction_pthread = -1;
+               pthread_mutex_unlock(&h->transaction_mutex);
+       }
+
+       return (struct xs_transaction_handle *)rc;
 }
 
 /* End a transaction.
@@ -633,10 +666,14 @@
  * Returns false on failure, which indicates an error: transactions will
  * not fail spuriously.
  */
-bool xs_transaction_end(struct xs_handle *h, bool abort)
+bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
+                       bool abort)
 {
        char abortstr[2];
        bool rc;
+
+       if (t == NULL)
+               return -EINVAL;
 
        if (abort)
                strcpy(abortstr, "F");
@@ -645,6 +682,7 @@
        
        rc = xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL));
 
+       h->transaction_pthread = -1;
        pthread_mutex_unlock(&h->transaction_mutex);
 
        return rc;
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h       Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xs.h       Mon Oct 10 13:46:53 2005
@@ -23,6 +23,7 @@
 #include <xs_lib.h>
 
 struct xs_handle;
+struct xs_transaction_handle;
 
 /* On failure, these routines set errno. */
 
@@ -44,41 +45,47 @@
  * Returns a malloced array: call free() on it after use.
  * Num indicates size.
  */
-char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num);
+char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
+                   const char *path, unsigned int *num);
 
 /* Get the value of a single file, nul terminated.
  * Returns a malloced value: call free() on it after use.
  * len indicates length in bytes, not including terminator.
  */
-void *xs_read(struct xs_handle *h, const char *path, unsigned int *len);
+void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, unsigned int *len);
 
 /* Write the value of a single file.
  * Returns false on failure.
  */
-bool xs_write(struct xs_handle *h, const char *path, const void *data,
-             unsigned int len);
+bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, const void *data, unsigned int len);
 
 /* Create a new directory.
  * Returns false on failure, or success if it already exists.
  */
-bool xs_mkdir(struct xs_handle *h, const char *path);
+bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path);
 
 /* Destroy a file or directory (and children).
  * Returns false on failure, or success if it doesn't exist.
  */
-bool xs_rm(struct xs_handle *h, const char *path);
+bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
+          const char *path);
 
 /* Get permissions of node (first element is owner, first perms is "other").
  * Returns malloced array, or NULL: call free() after use.
  */
 struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+                                         struct xs_transaction_handle *t,
                                          const char *path, unsigned int *num);
 
 /* Set permissions of node (must be owner).
  * Returns false on failure.
  */
-bool xs_set_permissions(struct xs_handle *h, const char *path,
-                       struct xs_permissions *perms, unsigned int num_perms);
+bool xs_set_permissions(struct xs_handle *h, struct xs_transaction_handle *t,
+                       const char *path, struct xs_permissions *perms,
+                       unsigned int num_perms);
 
 /* Watch a node for changes (poll on fd to detect, or call read_watch()).
  * When the node (or any child) changes, fd will become readable.
@@ -96,12 +103,6 @@
  */
 char **xs_read_watch(struct xs_handle *h, unsigned int *num);
 
-/* Acknowledge watch on node.  Watches must be acknowledged before
- * any other watches can be read.
- * Returns false on failure.
- */
-bool xs_acknowledge_watch(struct xs_handle *h, const char *token);
-
 /* Remove a watch on a node: implicitly acks any outstanding watch.
  * Returns false on failure (no watch on that node).
  */
@@ -110,16 +111,17 @@
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
  * You can only have one transaction at any time.
- * Returns false on failure.
+ * Returns NULL on failure.
  */
-bool xs_transaction_start(struct xs_handle *h);
+struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h);
 
 /* End a transaction.
  * If abandon is true, transaction is discarded instead of committed.
  * Returns false on failure: if errno == EAGAIN, you have to restart
  * transaction.
  */
-bool xs_transaction_end(struct xs_handle *h, bool abort);
+bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
+                       bool abort);
 
 /* Introduce a new domain.
  * This tells the store daemon about a shared memory page, event channel
@@ -142,3 +144,13 @@
                       void *data, unsigned int len);
 
 #endif /* _XS_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xs_test.c
--- a/tools/xenstore/xs_test.c  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xs_test.c  Mon Oct 10 13:46:53 2005
@@ -42,6 +42,7 @@
 #define XSTEST
 
 static struct xs_handle *handles[10] = { NULL };
+static struct xs_transaction_handle *txh[10] = { NULL };
 
 static unsigned int timeout_ms = 500;
 static bool timeout_suppressed = true;
@@ -201,7 +202,6 @@
             "  watch <path> <token>\n"
             "  watchnoack <path> <token>\n"
             "  waitwatch\n"
-            "  ackwatch <token>\n"
             "  unwatch <path> <token>\n"
             "  close\n"
             "  start <node>\n"
@@ -313,7 +313,7 @@
        char **entries;
        unsigned int i, num;
 
-       entries = xs_directory(handles[handle], path, &num);
+       entries = xs_directory(handles[handle], txh[handle], path, &num);
        if (!entries) {
                failed(handle);
                return;
@@ -332,7 +332,7 @@
        char *value;
        unsigned int len;
 
-       value = xs_read(handles[handle], path, &len);
+       value = xs_read(handles[handle], txh[handle], path, &len);
        if (!value) {
                failed(handle);
                return;
@@ -348,7 +348,7 @@
 
 static void do_write(unsigned int handle, char *path, char *data)
 {
-       if (!xs_write(handles[handle], path, data, strlen(data)))
+       if (!xs_write(handles[handle], txh[handle], path, data, strlen(data)))
                failed(handle);
 }
 
@@ -361,13 +361,13 @@
 
 static void do_mkdir(unsigned int handle, char *path)
 {
-       if (!xs_mkdir(handles[handle], path))
+       if (!xs_mkdir(handles[handle], txh[handle], path))
                failed(handle);
 }
 
 static void do_rm(unsigned int handle, char *path)
 {
-       if (!xs_rm(handles[handle], path))
+       if (!xs_rm(handles[handle], txh[handle], path))
                failed(handle);
 }
 
@@ -376,7 +376,7 @@
        unsigned int i, num;
        struct xs_permissions *perms;
 
-       perms = xs_get_permissions(handles[handle], path, &num);
+       perms = xs_get_permissions(handles[handle], txh[handle], path, &num);
        if (!perms) {
                failed(handle);
                return;
@@ -437,7 +437,7 @@
                        barf("bad flags %s\n", arg);
        }
 
-       if (!xs_set_permissions(handles[handle], path, perms, i))
+       if (!xs_set_permissions(handles[handle], txh[handle], path, perms, i))
                failed(handle);
 }
 
@@ -454,8 +454,6 @@
                if (!vec ||
                    !streq(vec[XS_WATCH_PATH], node) ||
                    !streq(vec[XS_WATCH_TOKEN], token))
-                       failed(handle);
-               if (!xs_acknowledge_watch(handles[handle], token))
                        failed(handle);
        }
 }
@@ -515,12 +513,6 @@
        free(vec);
 }
 
-static void do_ackwatch(unsigned int handle, const char *token)
-{
-       if (!xs_acknowledge_watch(handles[handle], token))
-               failed(handle);
-}
-
 static void do_unwatch(unsigned int handle, const char *node, const char 
*token)
 {
        if (!xs_unwatch(handles[handle], node, token))
@@ -529,14 +521,16 @@
 
 static void do_start(unsigned int handle)
 {
-       if (!xs_transaction_start(handles[handle]))
+       txh[handle] = xs_transaction_start(handles[handle]);
+       if (txh[handle] == NULL)
                failed(handle);
 }
 
 static void do_end(unsigned int handle, bool abort)
 {
-       if (!xs_transaction_end(handles[handle], abort))
-               failed(handle);
+       if (!xs_transaction_end(handles[handle], txh[handle], abort))
+               failed(handle);
+       txh[handle] = NULL;
 }
 
 static void do_introduce(unsigned int handle,
@@ -626,7 +620,8 @@
 
                sprintf(subnode, "%s/%s", node, dir[i]);
 
-               perms = xs_get_permissions(handles[handle], subnode,&numperms);
+               perms = xs_get_permissions(handles[handle], txh[handle],
+                                          subnode,&numperms);
                if (!perms) {
                        failed(handle);
                        return;
@@ -643,7 +638,8 @@
                output("\n");
 
                /* Even directories can have contents. */
-               contents = xs_read(handles[handle], subnode, &len);
+               contents = xs_read(handles[handle], txh[handle], 
+                                  subnode, &len);
                if (!contents) {
                        if (errno != EISDIR)
                                failed(handle);
@@ -653,7 +649,8 @@
                }                       
 
                /* Every node is a directory. */
-               subdirs = xs_directory(handles[handle], subnode, &subnum);
+               subdirs = xs_directory(handles[handle], txh[handle], 
+                                      subnode, &subnum);
                if (!subdirs) {
                        failed(handle);
                        return;
@@ -668,7 +665,7 @@
        char **subdirs;
        unsigned int subnum;
 
-       subdirs = xs_directory(handles[handle], "/", &subnum);
+       subdirs = xs_directory(handles[handle], txh[handle], "/", &subnum);
        if (!subdirs) {
                failed(handle);
                return;
@@ -746,13 +743,12 @@
                do_watch(handle, arg(line, 1), arg(line, 2), false);
        else if (streq(command, "waitwatch"))
                do_waitwatch(handle);
-       else if (streq(command, "ackwatch"))
-               do_ackwatch(handle, arg(line, 1));
        else if (streq(command, "unwatch"))
                do_unwatch(handle, arg(line, 1), arg(line, 2));
        else if (streq(command, "close")) {
                xs_daemon_close(handles[handle]);
                handles[handle] = NULL;
+               txh[handle] = NULL;
        } else if (streq(command, "start"))
                do_start(handle);
        else if (streq(command, "commit"))
@@ -836,3 +832,13 @@
 
        return 0;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 xen/include/public/io/xs_wire.h
--- a/xen/include/public/io/xs_wire.h   Mon Oct 10 13:42:38 2005
+++ b/xen/include/public/io/xs_wire.h   Mon Oct 10 13:46:53 2005
@@ -30,25 +30,23 @@
 
 enum xsd_sockmsg_type
 {
-       XS_DEBUG,
-       XS_DIRECTORY,
-       XS_READ,
-       XS_GET_PERMS,
-       XS_WATCH,
-       XS_WATCH_ACK,
-       XS_UNWATCH,
-       XS_TRANSACTION_START,
-       XS_TRANSACTION_END,
-       XS_OP_READ_ONLY = XS_TRANSACTION_END,
-       XS_INTRODUCE,
-       XS_RELEASE,
-       XS_GET_DOMAIN_PATH,
-       XS_WRITE,
-       XS_MKDIR,
-       XS_RM,
-       XS_SET_PERMS,
-       XS_WATCH_EVENT,
-       XS_ERROR,
+    XS_DEBUG,
+    XS_DIRECTORY,
+    XS_READ,
+    XS_GET_PERMS,
+    XS_WATCH,
+    XS_UNWATCH,
+    XS_TRANSACTION_START,
+    XS_TRANSACTION_END,
+    XS_INTRODUCE,
+    XS_RELEASE,
+    XS_GET_DOMAIN_PATH,
+    XS_WRITE,
+    XS_MKDIR,
+    XS_RM,
+    XS_SET_PERMS,
+    XS_WATCH_EVENT,
+    XS_ERROR,
 };
 
 #define XS_WRITE_NONE "NONE"
@@ -58,38 +56,40 @@
 /* We hand errors as strings, for portability. */
 struct xsd_errors
 {
-       int errnum;
-       const char *errstring;
+    int errnum;
+    const char *errstring;
 };
 #define XSD_ERROR(x) { x, #x }
 static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
-       XSD_ERROR(EINVAL),
-       XSD_ERROR(EACCES),
-       XSD_ERROR(EEXIST),
-       XSD_ERROR(EISDIR),
-       XSD_ERROR(ENOENT),
-       XSD_ERROR(ENOMEM),
-       XSD_ERROR(ENOSPC),
-       XSD_ERROR(EIO),
-       XSD_ERROR(ENOTEMPTY),
-       XSD_ERROR(ENOSYS),
-       XSD_ERROR(EROFS),
-       XSD_ERROR(EBUSY),
-       XSD_ERROR(EAGAIN),
-       XSD_ERROR(EISCONN),
+    XSD_ERROR(EINVAL),
+    XSD_ERROR(EACCES),
+    XSD_ERROR(EEXIST),
+    XSD_ERROR(EISDIR),
+    XSD_ERROR(ENOENT),
+    XSD_ERROR(ENOMEM),
+    XSD_ERROR(ENOSPC),
+    XSD_ERROR(EIO),
+    XSD_ERROR(ENOTEMPTY),
+    XSD_ERROR(ENOSYS),
+    XSD_ERROR(EROFS),
+    XSD_ERROR(EBUSY),
+    XSD_ERROR(EAGAIN),
+    XSD_ERROR(EISCONN),
 };
 struct xsd_sockmsg
 {
-       u32 type;
-       u32 len;                /* Length of data following this. */
+    u32 type;  /* XS_??? */
+    u32 req_id;/* Request identifier, echoed in daemon's response.  */
+    u32 tx_id; /* Transaction id (0 if not related to a transaction). */
+    u32 len;   /* Length of data following this. */
 
-       /* Generally followed by nul-terminated string(s). */
+    /* Generally followed by nul-terminated string(s). */
 };
 
 enum xs_watch_type
 {
-       XS_WATCH_PATH = 0,
-       XS_WATCH_TOKEN,
+    XS_WATCH_PATH = 0,
+    XS_WATCH_TOKEN,
 };
 
 #endif /* _XS_WIRE_H */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.