[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[XenPPC] [linux-ppc-2.6] [LINUX][XEN][POWERPC] update with the lates Xen VIO



# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID 933b1d114a89abe409b50b948c39d3b28dd3e02f
# Parent  f0be2cc05103e19788416719e2b9ec38b38bd26e
[LINUX][XEN][POWERPC] update with the lates Xen VIO

Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
---
 arch/powerpc/platforms/xen/balloon.c       |   46 ++++-
 arch/powerpc/platforms/xen/gnttab.c        |   20 +-
 arch/powerpc/xmon/xmon.c                   |    3 
 drivers/xen/blkback/blkback.c              |   77 ++++----
 drivers/xen/blkback/common.h               |    9 -
 drivers/xen/blkback/interface.c            |   28 +--
 drivers/xen/blkback/xenbus.c               |  126 ++++++++++----
 drivers/xen/blkfront/blkfront.c            |   13 +
 drivers/xen/netback/interface.c            |   29 ++-
 drivers/xen/netback/loopback.c             |   64 +++++++
 drivers/xen/netback/netback.c              |  169 +++++++++----------
 drivers/xen/netback/xenbus.c               |   24 +-
 drivers/xen/netfront/netfront.c            |  129 +++++++++++---
 drivers/xen/xenbus/xenbus_backend_client.c |   13 +
 drivers/xen/xenbus/xenbus_client.c         |   23 ++
 drivers/xen/xenbus/xenbus_comms.c          |   15 -
 drivers/xen/xenbus/xenbus_comms.h          |    2 
 drivers/xen/xenbus/xenbus_dev.c            |  120 +++++++++++++
 drivers/xen/xenbus/xenbus_probe.c          |  252 +++++++++++++++++++++--------
 drivers/xen/xenbus/xenbus_xs.c             |   10 +
 include/asm-powerpc/page.h                 |    1 
 include/asm-powerpc/xen/asm/hypervisor.h   |   11 +
 include/xen/balloon.h                      |    2 
 include/xen/foreign_page.h                 |   31 +++
 include/xen/hvm.h                          |   13 +
 include/xen/xenbus.h                       |    9 -
 26 files changed, 908 insertions(+), 331 deletions(-)

diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/platforms/xen/balloon.c
--- a/arch/powerpc/platforms/xen/balloon.c      Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/platforms/xen/balloon.c      Mon Oct 16 09:31:03 2006 -0400
@@ -1,20 +1,47 @@
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <asm/hypervisor.h>
 
 /*
  * FIXME: Port balloon driver, if ever
  */
 
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 {
-       unsigned long vstart;
-       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+       struct page *page, **pagevec;
+       void *vaddr;
+       int i;
 
-       vstart = __get_free_pages(GFP_KERNEL, order);
-       if (vstart == 0)
-               return NULL;
+       pagevec = kmalloc(sizeof(*pagevec) * nr_pages, GFP_KERNEL);
+       if (pagevec == NULL)
+               return  NULL;
 
-       return virt_to_page(vstart);
+       for (i = 0; i < nr_pages; i++) {
+               page = alloc_page(GFP_KERNEL);
+               pagevec[i] = page;
+               vaddr = page_address(page);
+               scrub_pages(vaddr, 1);
+       }
+
+       return pagevec;
+}
+
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
+{
+       int arch_is_foreign_page(struct page *page);
+       struct page *page;
+       int i;
+
+       if (pagevec == NULL)
+               return;
+
+       for (i = 0; i < nr_pages; i++) {
+               page = pagevec[i];
+               if (!arch_is_foreign_page(page))
+                       __free_page(page);
+       }
+       
+       kfree(pagevec);
 }
 
 void balloon_dealloc_empty_page_range(
@@ -32,6 +59,7 @@ void balloon_release_driver_page(struct 
        BUG();
 }
 
-EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
 EXPORT_SYMBOL_GPL(balloon_release_driver_page);
diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/platforms/xen/gnttab.c
--- a/arch/powerpc/platforms/xen/gnttab.c       Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/platforms/xen/gnttab.c       Mon Oct 16 09:31:03 2006 -0400
@@ -39,8 +39,6 @@ static long map_to_linear(ulong paddr)
        mode = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
        vaddr = (ulong)__va(paddr);
 
-       DBG("%s: 0x%lx: 0x%x\n",
-           __func__, paddr, page_count(virt_to_page(vaddr)));
        {
                unsigned long vpn, hash, hpteg;
                unsigned long vsid = get_kernel_vsid(vaddr);
@@ -162,12 +160,9 @@ static void gnttab_pre_unmap_grant_ref(
                        continue;
                }
 
-               DBG("%s: 0x%lx: 0x%x\n",
+               DBG("%s: 0x%lx: page count: 0x%x\n",
                       __func__, ea, page_count(virt_to_page(ea)));
                plpar_pte_remove(0, slot, 0, &dummy1, &dummy2);
-
-               DBG("%s: remove_pages(0x%lx, 0x%lx)\n",
-                   __func__, unmap[i].host_addr, unmap[i].dev_bus_addr);
        }
 }
 
@@ -186,6 +181,9 @@ static void gnttab_post_map_grant_ref(
                /* ??? store the slot somewhere ??? */
                map[i].host_addr = (ulong)__va(pa);
                page = virt_to_page(map[i].host_addr);
+
+               DBG("%s: 0x%lx: 0x%x\n",
+                   __func__, pa, page_count(page));
 
                if (page_count(page) == 1) {
 #ifdef DEBUG                   
@@ -258,11 +256,6 @@ int HYPERVISOR_grant_table_op(unsigned i
        return ret;
 }
 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
-
-ulong foreign_alloc_empty_page_range(unsigned long nr_pages)
-{
-       return (ulong)__va(foreign_map_base);
-}
 
 static ulong setup_grant_maps(void)
 {
@@ -350,3 +343,8 @@ void *arch_gnttab_map(unsigned long *fra
 
        return shared;
 }
+
+int arch_is_foreign_page(struct page *page)
+{
+       return ((page_to_pfn(page) << PAGE_SHIFT) >= foreign_map_base);
+}
diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/xmon/xmon.c
--- a/arch/powerpc/xmon/xmon.c  Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/xmon/xmon.c  Mon Oct 16 09:31:03 2006 -0400
@@ -753,6 +753,9 @@ cmds(struct pt_regs *excp)
                        cmd = inchar();
                }
                switch (cmd) {
+               case 'A':
+                       asm volatile(".long 0x200;nop");
+                       break;
                case 'm':
                        cmd = inchar();
                        switch (cmd) {
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/blkback.c
--- a/drivers/xen/blkback/blkback.c     Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/blkback.c     Mon Oct 16 09:31:03 2006 -0400
@@ -55,8 +55,6 @@ static int blkif_reqs = 64;
 static int blkif_reqs = 64;
 module_param_named(reqs, blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
-
-static int mmap_pages;
 
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats = 0;
@@ -87,8 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(pending_f
 
 #define BLKBACK_INVALID_HANDLE (~0)
 
-static unsigned long mmap_vstart;
-static unsigned long *pending_vaddrs;
+static struct page **pending_pages;
 static grant_handle_t *pending_grant_handles;
 
 static inline int vaddr_pagenr(pending_req_t *req, int seg)
@@ -98,8 +95,23 @@ static inline int vaddr_pagenr(pending_r
 
 static inline unsigned long vaddr(pending_req_t *req, int seg)
 {
-       return pending_vaddrs[vaddr_pagenr(req, seg)];
-}
+       unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
+       return (unsigned long)pfn_to_kaddr(pfn);
+}
+
+#ifdef CONFIG_PPC_XEN
+static inline void update_pending_pages(
+       unsigned int idx, gnttab_map_grant_ref_t *mop)
+{
+#ifdef PPC_NOT_YET
+       extern int arch_is_foreign_page(struct page *page);
+
+       if (!arch_is_foreign_page(pending_pages[idx]))
+               __free_page(pending_pages[idx]);
+#endif
+       pending_pages[idx] = pfn_to_page(mop->dev_bus_addr >> PAGE_SHIFT);
+}
+#endif
 
 #define pending_handle(_req, _seg) \
        (pending_grant_handles[vaddr_pagenr(_req, _seg)])
@@ -399,8 +411,7 @@ static void dispatch_rw_block_io(blkif_t
 
                pending_handle(pending_req, i) = map[i].handle;
 #ifdef CONFIG_PPC_XEN
-               pending_vaddrs[vaddr_pagenr(pending_req, i)] =
-                       (unsigned long)gnttab_map_vaddr(map[i]);
+               update_pending_pages(vaddr_pagenr(pending_req, i), &map[i]);
 #else
                set_phys_to_machine(__pa(vaddr(
                        pending_req, i)) >> PAGE_SHIFT,
@@ -511,57 +522,43 @@ static void make_response(blkif_t *blkif
 
 static int __init blkif_init(void)
 {
-       struct page *page;
-       int i;
+       int i, mmap_pages;
 
        if (!is_running_on_xen())
                return -ENODEV;
-
-       mmap_pages            = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-#ifdef CONFIG_PPC_XEN
-       (void)page;
-       mmap_vstart = foreign_alloc_empty_page_range(mmap_pages);
-#else
-       page = balloon_alloc_empty_page_range(mmap_pages);
-       if (page == NULL)
-               return -ENOMEM;
-       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-#endif
+       
+       mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
        pending_reqs          = kmalloc(sizeof(pending_reqs[0]) *
                                        blkif_reqs, GFP_KERNEL);
        pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
                                        mmap_pages, GFP_KERNEL);
-       pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
-                                       mmap_pages, GFP_KERNEL);
-       if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
-               kfree(pending_reqs);
-               kfree(pending_grant_handles);
-               kfree(pending_vaddrs);
-               printk("%s: out of memory\n", __FUNCTION__);
-               return -ENOMEM;
-       }
+       pending_pages         = alloc_empty_pages_and_pagevec(mmap_pages);
+
+       if (!pending_reqs || !pending_grant_handles || !pending_pages)
+               goto out_of_memory;
+
+       for (i = 0; i < mmap_pages; i++)
+               pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
 
        blkif_interface_init();
-       
-       printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
-              __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
-       BUG_ON(mmap_vstart == 0);
-       for (i = 0; i < mmap_pages; i++) {
-               pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
-               pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-       }
 
        memset(pending_reqs, 0, sizeof(pending_reqs));
        INIT_LIST_HEAD(&pending_free);
 
        for (i = 0; i < blkif_reqs; i++)
                list_add_tail(&pending_reqs[i].free_list, &pending_free);
-    
+
        blkif_xenbus_init();
 
        return 0;
+
+ out_of_memory:
+       kfree(pending_reqs);
+       kfree(pending_grant_handles);
+       free_empty_pages_and_pagevec(pending_pages, mmap_pages);
+       printk("%s: out of memory\n", __FUNCTION__);
+       return -ENOMEM;
 }
 
 module_init(blkif_init);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/common.h
--- a/drivers/xen/blkback/common.h      Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/common.h      Mon Oct 16 09:31:03 2006 -0400
@@ -55,9 +55,9 @@ struct vbd {
        unsigned char  type;        /* VDISK_xxx */
        u32            pdevice;     /* phys device that this vbd maps to */
        struct block_device *bdev;
-}; 
+};
 
-struct backend_info; 
+struct backend_info;
 
 typedef struct blkif_st {
        /* Unique identifier for this interface. */
@@ -72,7 +72,7 @@ typedef struct blkif_st {
        /* The VBD attached to this interface. */
        struct vbd        vbd;
        /* Back pointer to the backend_info. */
-       struct backend_info *be; 
+       struct backend_info *be;
        /* Private fields. */
        spinlock_t       blk_ring_lock;
        atomic_t         refcnt;
@@ -95,6 +95,7 @@ typedef struct blkif_st {
 } blkif_t;
 
 blkif_t *blkif_alloc(domid_t domid);
+void blkif_disconnect(blkif_t *blkif);
 void blkif_free(blkif_t *blkif);
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
 
@@ -121,7 +122,7 @@ struct phys_req {
        blkif_sector_t       sector_number;
 };
 
-int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
 
 void blkif_interface_init(void);
 
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/interface.c
--- a/drivers/xen/blkback/interface.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/interface.c   Mon Oct 16 09:31:03 2006 -0400
@@ -32,6 +32,7 @@
 
 #include "common.h"
 #include <xen/evtchn.h>
+#include <linux/kthread.h>
 
 static kmem_cache_t *blkif_cachep;
 
@@ -75,12 +76,6 @@ static int map_frontend_page(blkif_t *bl
 
        blkif->shmem_ref = shared_page;
        blkif->shmem_handle = op.handle;
-
-#ifdef CONFIG_XEN_IA64_DOM0_NON_VP
-       /* on some arch's, map_grant_ref behaves like mmap, in that the
-        * passed address is a hint and a different address may be returned */
-       blkif->blk_ring_area->addr = gnttab_map_vaddr(op);
-#endif
 
        return 0;
 }
@@ -140,22 +135,33 @@ int blkif_map(blkif_t *blkif, unsigned l
        return 0;
 }
 
-void blkif_free(blkif_t *blkif)
+void blkif_disconnect(blkif_t *blkif)
 {
+       if (blkif->xenblkd) {
+               kthread_stop(blkif->xenblkd);
+               blkif->xenblkd = NULL;
+       }
+
        atomic_dec(&blkif->refcnt);
        wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
+       atomic_inc(&blkif->refcnt);
 
-       /* Already disconnected? */
-       if (blkif->irq)
+       if (blkif->irq) {
                unbind_from_irqhandler(blkif->irq, blkif);
-
-       vbd_free(&blkif->vbd);
+               blkif->irq = 0;
+       }
 
        if (blkif->blk_ring.sring) {
                unmap_frontend_page(blkif);
                free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
        }
+}
 
+void blkif_free(blkif_t *blkif)
+{
+       if (!atomic_dec_and_test(&blkif->refcnt))
+               BUG();
        kmem_cache_free(blkif_cachep, blkif);
 }
 
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/xenbus.c
--- a/drivers/xen/blkback/xenbus.c      Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/xenbus.c      Mon Oct 16 09:31:03 2006 -0400
@@ -42,7 +42,6 @@ static int connect_ring(struct backend_i
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
-
 
 static void update_blkif_status(blkif_t *blkif)
 { 
@@ -73,26 +72,71 @@ static void update_blkif_status(blkif_t 
 }
 
 
-static ssize_t show_physical_device(struct device *_dev,
-                                   struct device_attribute *attr, char *buf)
-{
-       struct xenbus_device *dev = to_xenbus_device(_dev);
-       struct backend_info *be = dev->dev.driver_data;
-       return sprintf(buf, "%x:%x\n", be->major, be->minor);
-}
-DEVICE_ATTR(physical_device, S_IRUSR | S_IRGRP | S_IROTH,
-           show_physical_device, NULL);
-
-
-static ssize_t show_mode(struct device *_dev, struct device_attribute *attr,
-                        char *buf)
-{
-       struct xenbus_device *dev = to_xenbus_device(_dev);
-       struct backend_info *be = dev->dev.driver_data;
-       return sprintf(buf, "%s\n", be->mode);
-}
-DEVICE_ATTR(mode, S_IRUSR | S_IRGRP | S_IROTH, show_mode, NULL);
-
+/****************************************************************
+ *  sysfs interface for VBD I/O requests
+ */
+
+#define VBD_SHOW(name, format, args...)                                        
\
+       static ssize_t show_##name(struct device *_dev,                 \
+                                  struct device_attribute *attr,       \
+                                  char *buf)                           \
+       {                                                               \
+               struct xenbus_device *dev = to_xenbus_device(_dev);     \
+               struct backend_info *be = dev->dev.driver_data;         \
+                                                                       \
+               return sprintf(buf, format, ##args);                    \
+       }                                                               \
+       DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
+
+static struct attribute *vbdstat_attrs[] = {
+       &dev_attr_oo_req.attr,
+       &dev_attr_rd_req.attr,
+       &dev_attr_wr_req.attr,
+       NULL
+};
+
+static struct attribute_group vbdstat_group = {
+       .name = "statistics",
+       .attrs = vbdstat_attrs,
+};
+
+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
+VBD_SHOW(mode, "%s\n", be->mode);
+
+int xenvbd_sysfs_addif(struct xenbus_device *dev)
+{
+       int error;
+       
+       error = device_create_file(&dev->dev, &dev_attr_physical_device);
+       if (error)
+               goto fail1;
+
+       error = device_create_file(&dev->dev, &dev_attr_mode);
+       if (error)
+               goto fail2;
+
+       error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
+       if (error)
+               goto fail3;
+
+       return 0;
+
+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
+fail2: device_remove_file(&dev->dev, &dev_attr_mode);
+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
+       return error;
+}
+
+void xenvbd_sysfs_delif(struct xenbus_device *dev)
+{
+       sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
+       device_remove_file(&dev->dev, &dev_attr_mode);
+       device_remove_file(&dev->dev, &dev_attr_physical_device);
+}
 
 static int blkback_remove(struct xenbus_device *dev)
 {
@@ -105,15 +149,16 @@ static int blkback_remove(struct xenbus_
                kfree(be->backend_watch.node);
                be->backend_watch.node = NULL;
        }
+
        if (be->blkif) {
-               if (be->blkif->xenblkd)
-                       kthread_stop(be->blkif->xenblkd);
+               blkif_disconnect(be->blkif);
+               vbd_free(&be->blkif->vbd);
                blkif_free(be->blkif);
                be->blkif = NULL;
        }
 
-       device_remove_file(&dev->dev, &dev_attr_physical_device);
-       device_remove_file(&dev->dev, &dev_attr_mode);
+       if (be->major || be->minor)
+               xenvbd_sysfs_delif(dev);
 
        kfree(be);
        dev->dev.driver_data = NULL;
@@ -149,7 +194,7 @@ static int blkback_probe(struct xenbus_d
        }
 
        /* setup back pointer */
-       be->blkif->be = be; 
+       be->blkif->be = be;
 
        err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
                                 &be->backend_watch, backend_changed);
@@ -228,17 +273,21 @@ static void backend_changed(struct xenbu
                err = vbd_create(be->blkif, handle, major, minor,
                                 (NULL == strchr(be->mode, 'w')));
                if (err) {
-                       be->major = 0;
-                       be->minor = 0;
+                       be->major = be->minor = 0;
                        xenbus_dev_fatal(dev, err, "creating vbd structure");
                        return;
                }
 
-               device_create_file(&dev->dev, &dev_attr_physical_device);
-               device_create_file(&dev->dev, &dev_attr_mode);
+               err = xenvbd_sysfs_addif(dev);
+               if (err) {
+                       vbd_free(&be->blkif->vbd);
+                       be->major = be->minor = 0;
+                       xenbus_dev_fatal(dev, err, "creating sysfs entries");
+                       return;
+               }
 
                /* We're potentially connected now */
-               update_blkif_status(be->blkif); 
+               update_blkif_status(be->blkif);
        }
 }
 
@@ -252,10 +301,15 @@ static void frontend_changed(struct xenb
        struct backend_info *be = dev->dev.driver_data;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", xenbus_strstate(frontend_state));
 
        switch (frontend_state) {
        case XenbusStateInitialising:
+               if (dev->state == XenbusStateClosed) {
+                       printk("%s: %s: prepare for reconnect\n",
+                              __FUNCTION__, dev->nodename);
+                       xenbus_switch_state(dev, XenbusStateInitWait);
+               }
                break;
 
        case XenbusStateInitialised:
@@ -273,15 +327,19 @@ static void frontend_changed(struct xenb
                break;
 
        case XenbusStateClosing:
+               blkif_disconnect(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
        case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
+       case XenbusStateUnknown:
                device_unregister(&dev->dev);
                break;
 
-       case XenbusStateUnknown:
-       case XenbusStateInitWait:
        default:
                xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
                                 frontend_state);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkfront/blkfront.c
--- a/drivers/xen/blkfront/blkfront.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkfront/blkfront.c   Mon Oct 16 09:31:03 2006 -0400
@@ -46,6 +46,7 @@
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 #include <asm/hypervisor.h>
+#include <asm/maddr.h>
 
 #define BLKIF_STATE_DISCONNECTED 0
 #define BLKIF_STATE_CONNECTED    1
@@ -255,10 +256,10 @@ static void backend_changed(struct xenbu
        DPRINTK("blkfront:backend_changed.\n");
 
        switch (backend_state) {
-       case XenbusStateUnknown:
        case XenbusStateInitialising:
        case XenbusStateInitWait:
        case XenbusStateInitialised:
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                break;
 
@@ -354,12 +355,14 @@ static void blkfront_closing(struct xenb
        blk_stop_queue(info->rq);
        /* No more gnttab callback work. */
        gnttab_cancel_free_callback(&info->callback);
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+       /* Flush gnttab callback work. Must be done with no locks held. */
        flush_scheduled_work();
-       spin_unlock_irqrestore(&blkif_io_lock, flags);
 
        xlvbd_del(info);
 
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
@@ -713,8 +716,10 @@ static void blkif_free(struct blkfront_i
                blk_stop_queue(info->rq);
        /* No more gnttab callback work. */
        gnttab_cancel_free_callback(&info->callback);
+       spin_unlock_irq(&blkif_io_lock);
+
+       /* Flush gnttab callback work. Must be done with no locks held. */
        flush_scheduled_work();
-       spin_unlock_irq(&blkif_io_lock);
 
        /* Free resources associated with old device channel. */
        if (info->ring_ref != GRANT_INVALID_REF) {
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/interface.c
--- a/drivers/xen/netback/interface.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/interface.c   Mon Oct 16 09:31:03 2006 -0400
@@ -34,6 +34,24 @@
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 
+/*
+ * Module parameter 'queue_length':
+ * 
+ * Enables queuing in the network stack when a client has run out of receive
+ * descriptors. Although this feature can improve receive bandwidth by avoiding
+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
+ * unbounded time. This is bad if those packets hold onto foreign resources.
+ * For example, consider a packet that holds onto resources belonging to the
+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
+ * vif1.1 which is not activated in the guest): in this situation the guest
+ * will never be destroyed, unless vif1.1 is taken down (which flushes the
+ * 'tx_queue').
+ * 
+ * Only set this parameter to non-zero value if you know what you are doing!
+ */
+static unsigned long netbk_queue_length = 0;
+module_param_named(queue_length, netbk_queue_length, ulong, 0);
+
 static void __netif_up(netif_t *netif)
 {
        enable_irq(netif->irq);
@@ -44,6 +62,7 @@ static void __netif_down(netif_t *netif)
 {
        disable_irq(netif->irq);
        netif_deschedule_work(netif);
+       del_timer_sync(&netif->credit_timeout);
 }
 
 static int net_open(struct net_device *dev)
@@ -134,6 +153,7 @@ netif_t *netif_alloc(domid_t domid, unsi
        netif->credit_bytes = netif->remaining_credit = ~0UL;
        netif->credit_usec  = 0UL;
        init_timer(&netif->credit_timeout);
+       netif->credit_timeout.expires = jiffies;
 
        dev->hard_start_xmit = netif_be_start_xmit;
        dev->get_stats       = netif_be_get_stats;
@@ -144,11 +164,10 @@ netif_t *netif_alloc(domid_t domid, unsi
 
        SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
 
-       /*
-        * Reduce default TX queuelen so that each guest interface only
-        * allows it to eat around 6.4MB of host memory.
-        */
-       dev->tx_queue_len = 100;
+       dev->tx_queue_len = netbk_queue_length;
+       if (dev->tx_queue_len != 0)
+               printk(KERN_WARNING "netbk: WARNING: device '%s' has non-zero "
+                      "queue length (%lu)!\n", dev->name, dev->tx_queue_len);
 
        for (i = 0; i < ETH_ALEN; i++)
                if (be_mac[i] != 0)
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/loopback.c
--- a/drivers/xen/netback/loopback.c    Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/loopback.c    Mon Oct 16 09:31:03 2006 -0400
@@ -53,8 +53,10 @@
 #include <linux/skbuff.h>
 #include <linux/ethtool.h>
 #include <net/dst.h>
-
-static int nloopbacks = 8;
+#include <net/xfrm.h>          /* secpath_reset() */
+#include <asm/hypervisor.h>    /* is_initial_xendomain() */
+
+static int nloopbacks = -1;
 module_param(nloopbacks, int, 0);
 MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
 
@@ -77,9 +79,59 @@ static int loopback_close(struct net_dev
        return 0;
 }
 
+#ifdef CONFIG_X86
+static int is_foreign(unsigned long pfn)
+{
+       /* NB. Play it safe for auto-translation mode. */
+       return (xen_feature(XENFEAT_auto_translated_physmap) ||
+               (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
+}
+#else
+/* How to detect a foreign mapping? Play it safe. */
+#define is_foreign(pfn)        (1)
+#endif
+
+static int skb_remove_foreign_references(struct sk_buff *skb)
+{
+       struct page *page;
+       unsigned long pfn;
+       int i, off;
+       char *vaddr;
+
+       BUG_ON(skb_shinfo(skb)->frag_list);
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
+               if (!is_foreign(pfn))
+                       continue;
+               
+               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+               if (unlikely(!page))
+                       return 0;
+
+               vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+               off = skb_shinfo(skb)->frags[i].page_offset;
+               memcpy(page_address(page) + off,
+                      vaddr + off,
+                      skb_shinfo(skb)->frags[i].size);
+               kunmap_skb_frag(vaddr);
+
+               put_page(skb_shinfo(skb)->frags[i].page);
+               skb_shinfo(skb)->frags[i].page = page;
+       }
+
+       return 1;
+}
+
 static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct net_private *np = netdev_priv(dev);
+
+       if (!skb_remove_foreign_references(skb)) {
+               np->stats.tx_dropped++;
+               dev_kfree_skb(skb);
+               return 0;
+       }
 
        dst_release(skb->dst);
        skb->dst = NULL;
@@ -110,6 +162,11 @@ static int loopback_start_xmit(struct sk
        skb->protocol = eth_type_trans(skb, dev);
        skb->dev      = dev;
        dev->last_rx  = jiffies;
+
+       /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
+       nf_reset(skb);
+       secpath_reset(skb);
+
        netif_rx(skb);
 
        return 0;
@@ -239,6 +296,9 @@ static int __init loopback_init(void)
 {
        int i, err = 0;
 
+       if (nloopbacks == -1)
+               nloopbacks = is_initial_xendomain() ? 4 : 0;
+
        for (i = 0; i < nloopbacks; i++)
                if ((err = make_loopback(i)) != 0)
                        break;
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/netback.c
--- a/drivers/xen/netback/netback.c     Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/netback.c     Mon Oct 16 09:31:03 2006 -0400
@@ -70,35 +70,43 @@ static struct timer_list net_timer;
 
 static struct sk_buff_head rx_queue;
 
-static unsigned long mmap_vstart;
+static struct page **mmap_pages;
+static inline unsigned long idx_to_kaddr(unsigned int idx)
+{
+       return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
+}
+
 #ifdef CONFIG_PPC_XEN
 
-
-static ulong mmap_vaddrs[MAX_PENDING_REQS];
-#define MMAP_VADDR(_req) (mmap_vaddrs[(_req)])
-
-static inline void PPC_map_vaddrs(int idx, gnttab_map_grant_ref_t *mop)
-{
-       struct page *page;
-       ulong virt = mop->host_addr;
-
-       page = virt_to_page(virt);
-       get_page(page);
-
-#if 0
-       SetPageForeign(page, netif_page_release);
-#else
-       (void)netif_page_release;
+struct address_space xen_foreign_dummy_mapping;
+
+static inline void update_mmap_pages(
+       unsigned int idx, gnttab_map_grant_ref_t *mop)
+{
+       struct page *p;
+#ifdef PPC_NOT_YET
+       struct page *cp = mmap_pages[idx];
+       extern int arch_is_foreign_page(struct page *page);
+
+       if (arch_is_foreign_page(cp)) {
+               printk(KERN_EMERG "%s foreign: %p, 0x%x\n",
+                      __func__, page_address(cp), page_count(cp));
+       } else {
+               printk(KERN_EMERG "%s local:  %p, 0x%x\n",
+                      __func__, page_address(cp), page_count(cp));
+       //              __free_page(mmap_pages[idx]);
+       }
+       
 #endif
-       mmap_vaddrs[idx] = virt;
-}
-#define _mmap_vaddrs(i,op) do { mmap_vaddrs[(i)] = mop->host_addr;
-#else
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-#endif
-
-#ifndef CONFIG_PPC_XEN
-static void *rx_mmap_area;
+       p = pfn_to_page(mop->dev_bus_addr >> PAGE_SHIFT);
+
+       DPRINTK(KERN_EMERG "%s insert[%d]:  0x%lx, 0x%x\n",
+              __func__, idx, __va(mop->dev_bus_addr), page_count(p));
+
+       SetPageForeign(p, netif_page_release);
+       p->index = idx;
+       mmap_pages[idx] = p;
+}
 #endif
 
 #define PKT_PROT_LEN 64
@@ -243,7 +251,7 @@ static struct sk_buff *netbk_copy_skb(st
                copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
                zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
 
-               page = alloc_page(GFP_ATOMIC | zero);
+               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
                if (unlikely(!page))
                        goto err_free;
 
@@ -818,10 +826,27 @@ void netif_deschedule_work(netif_t *neti
 }
 
 
+static void tx_add_credit(netif_t *netif)
+{
+       unsigned long max_burst;
+
+       /*
+        * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+        * Otherwise the interface can seize up due to insufficient credit.
+        */
+       max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+       max_burst = min(max_burst, 131072UL);
+       max_burst = max(max_burst, netif->credit_bytes);
+
+       netif->remaining_credit = min(netif->remaining_credit +
+                                     netif->credit_bytes,
+                                     max_burst);
+}
+
 static void tx_credit_callback(unsigned long data)
 {
        netif_t *netif = (netif_t *)data;
-       netif->remaining_credit = netif->credit_bytes;
+       tx_add_credit(netif);
        netif_schedule_work(netif);
 }
 
@@ -845,7 +870,7 @@ inline static void net_tx_action_dealloc
        gop = tx_unmap_ops;
        while (dc != dp) {
                pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-               gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
+               gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
                                    GNTMAP_host_map,
                                    grant_tx_handle[pending_idx]);
                gop++;
@@ -933,7 +958,7 @@ static gnttab_map_grant_ref_t *netbk_get
                txp = RING_GET_REQUEST(&netif->tx, cons++);
                pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
 
-               gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+               gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
                                  GNTMAP_host_map | GNTMAP_readonly,
                                  txp->gref, netif->domid);
 
@@ -966,10 +991,10 @@ static int netbk_tx_check_mop(struct sk_
                netif_put(netif);
        } else {
 #ifdef CONFIG_PPC_XEN
-               PPC_map_vaddrs(pending_idx, mop);
+               update_mmap_pages(pending_idx, mop);
 #else
                set_phys_to_machine(
-                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+                       __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
                        FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
 #endif
                grant_tx_handle[pending_idx] = mop->handle;
@@ -987,10 +1012,10 @@ static int netbk_tx_check_mop(struct sk_
                newerr = (++mop)->status;
                if (likely(!newerr)) {
 #ifdef CONFIG_PPC_XEN
-                       PPC_map_vaddrs(pending_idx, mop);
+                       update_mmap_pages(pending_idx, mop);
 #else
                        set_phys_to_machine(
-                               __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+                               __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
                                FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
 #endif
                        grant_tx_handle[pending_idx] = mop->handle;
@@ -1039,7 +1064,7 @@ static void netbk_fill_frags(struct sk_b
 
                pending_idx = (unsigned long)frag->page;
                txp = &pending_tx_info[pending_idx].req;
-               frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+               frag->page = virt_to_page(idx_to_kaddr(pending_idx));
                frag->size = txp->size;
                frag->page_offset = txp->offset;
 
@@ -1135,6 +1160,7 @@ static void net_tx_action(unsigned long 
                i = netif->tx.req_cons;
                rmb(); /* Ensure that we see the request before we copy it. */
                memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
                /* Credit-based scheduling. */
                if (txreq.size > netif->remaining_credit) {
                        unsigned long now = jiffies;
@@ -1143,25 +1169,27 @@ static void net_tx_action(unsigned long 
                                msecs_to_jiffies(netif->credit_usec / 1000);
 
                        /* Timer could already be pending in rare cases. */
-                       if (timer_pending(&netif->credit_timeout))
-                               break;
+                       if (timer_pending(&netif->credit_timeout)) {
+                               netif_put(netif);
+                               continue;
+                       }
 
                        /* Passed the point where we can replenish credit? */
                        if (time_after_eq(now, next_credit)) {
                                netif->credit_timeout.expires = now;
-                               netif->remaining_credit = netif->credit_bytes;
+                               tx_add_credit(netif);
                        }
 
                        /* Still too big to send right now? Set a callback. */
                        if (txreq.size > netif->remaining_credit) {
-                               netif->remaining_credit = 0;
                                netif->credit_timeout.data     =
                                        (unsigned long)netif;
                                netif->credit_timeout.function =
                                        tx_credit_callback;
                                __mod_timer(&netif->credit_timeout,
                                            next_credit);
-                               break;
+                               netif_put(netif);
+                               continue;
                        }
                }
                netif->remaining_credit -= txreq.size;
@@ -1235,7 +1263,7 @@ static void net_tx_action(unsigned long 
                        }
                }
 
-               gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
+               gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
                                  GNTMAP_host_map | GNTMAP_readonly,
                                  txreq.gref, netif->domid);
                mop++;
@@ -1294,8 +1322,8 @@ static void net_tx_action(unsigned long 
                }
 
                data_len = skb->len;
-               memcpy(skb->data, 
-                      (void *)(MMAP_VADDR(pending_idx)|txp->offset),
+               memcpy(skb->data,
+                      (void *)(idx_to_kaddr(pending_idx)|txp->offset),
                       data_len);
                if (data_len < txp->size) {
                        /* Append the packet payload as a fragment. */
@@ -1349,22 +1377,14 @@ static void netif_idx_release(u16 pendin
 
 static void netif_page_release(struct page *page)
 {
-       u16 pending_idx = page - virt_to_page(mmap_vstart);
-
-#ifdef CONFIG_PPC_XEN  /* Ready for next use. */
-       BUG();
-#endif 
-       init_page_count(page);
-       netif_idx_release(pending_idx);
-}
-
-#ifndef CONFIG_PPC_XEN
-static void netif_rx_page_release(struct page *page)
-{
        /* Ready for next use. */
        init_page_count(page);
-}
+#ifdef CONFIG_PPC_XEN
+       /* we need the count to be 2 so the unmap occurs */
+       get_page(page);
 #endif
+       netif_idx_release(page->index);
+}
 
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
@@ -1474,10 +1494,6 @@ static int __init netback_init(void)
        if (!is_running_on_xen())
                return -ENODEV;
 
-#ifdef CONFIG_PPC_XEN
-       if (!(xen_start_info->flags & SIF_INITDOMAIN))
-               return -ENODEV;
-#endif
        /* We can increase reservation by this much in net_rx_action(). */
        balloon_update_driver_allowance(NET_RX_RING_SIZE);
 
@@ -1488,35 +1504,16 @@ static int __init netback_init(void)
        net_timer.data = 0;
        net_timer.function = net_alarm;
 
-#ifdef CONFIG_PPC_XEN
-       (void)page;
-       mmap_vstart = foreign_alloc_empty_page_range(MAX_PENDING_REQS);
-#else
-       page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
-       if (page == NULL)
+       mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+       if (mmap_pages == NULL) {
+               printk("%s: out of memory\n", __FUNCTION__);
                return -ENOMEM;
-
-       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
+       }
        for (i = 0; i < MAX_PENDING_REQS; i++) {
-               page = virt_to_page(MMAP_VADDR(i));
-               init_page_count(page);
+               page = mmap_pages[i];
                SetPageForeign(page, netif_page_release);
-       }
-#endif
-
-#ifndef CONFIG_PPC_XEN
-       page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
-       BUG_ON(page == NULL);
-       rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
-
-       for (i = 0; i < NET_RX_RING_SIZE; i++) {
-               page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
-               init_page_count(page);
-               SetPageForeign(page, netif_rx_page_release);
-       }
-#endif
-
+               page->index = i;
+       }
        pending_cons = 0;
        pending_prod = MAX_PENDING_REQS;
        for (i = 0; i < MAX_PENDING_REQS; i++)
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/xenbus.c
--- a/drivers/xen/netback/xenbus.c      Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/xenbus.c      Mon Oct 16 09:31:03 2006 -0400
@@ -262,10 +262,8 @@ static void frontend_changed(struct xenb
 
        case XenbusStateClosed:
                xenbus_switch_state(dev, XenbusStateClosed);
-#ifdef JX
                if (xenbus_dev_is_online(dev))
                        break;
-#endif
                /* fall through if not online */
        case XenbusStateUnknown:
                if (be->netif != NULL)
@@ -368,6 +366,10 @@ static void connect(struct backend_info 
        be->netif->remaining_credit = be->netif->credit_bytes;
 
        xenbus_switch_state(dev, XenbusStateConnected);
+
+       /* May not get a kick from the frontend, so start the tx_queue now. */
+       if (!netbk_can_queue(be->netif->dev))
+               netif_start_queue(be->netif->dev);
 }
 
 
@@ -405,14 +407,16 @@ static int connect_rings(struct backend_
        }
        be->netif->copying_receiver = !!rx_copy;
 
-       if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
-                        &val) < 0)
-               val = 0;
-       if (val)
-               be->netif->can_queue = 1;
-       else
-               /* Must be non-zero for pfifo_fast to work. */
-               be->netif->dev->tx_queue_len = 1;
+       if (be->netif->dev->tx_queue_len != 0) {
+               if (xenbus_scanf(XBT_NIL, dev->otherend,
+                                "feature-rx-notify", "%d", &val) < 0)
+                       val = 0;
+               if (val)
+                       be->netif->can_queue = 1;
+               else
+                       /* Must be non-zero for pfifo_fast to work. */
+                       be->netif->dev->tx_queue_len = 1;
+       }
 
        if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
                val = 0;
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netfront/netfront.c
--- a/drivers/xen/netfront/netfront.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netfront/netfront.c   Mon Oct 16 09:31:03 2006 -0400
@@ -47,6 +47,7 @@
 #include <linux/in.h>
 #include <linux/if_ether.h>
 #include <linux/io.h>
+#include <linux/moduleparam.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/arp.h>
@@ -63,20 +64,64 @@
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 
+/*
+ * Mutually-exclusive module options to select receive data path:
+ *  rx_copy : Packets are copied by network backend into local memory
+ *  rx_flip : Page containing packet data is transferred to our ownership
+ * For fully-virtualised guests there is no option - copying must be used.
+ * For paravirtualised guests, flipping is the default.
+ */
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
 #define RX_COPY_THRESHOLD 256
 
 /* If we don't have GSO, fake things up so that we never try to use it. */
-#ifndef NETIF_F_GSO
-#define netif_needs_gso(dev, skb)      0
-#define dev_disable_gso_features(dev)  ((void)0)
-#else
+#if defined(NETIF_F_GSO)
 #define HAVE_GSO                       1
+#define HAVE_TSO                       1 /* TSO is a subset of GSO */
 static inline void dev_disable_gso_features(struct net_device *dev)
 {
        /* Turn off all GSO bits except ROBUST. */
        dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
        dev->features |= NETIF_F_GSO_ROBUST;
 }
+#elif defined(NETIF_F_TSO)
+#define HAVE_TSO                       1
+#define gso_size tso_size
+#define gso_segs tso_segs
+static inline void dev_disable_gso_features(struct net_device *dev)
+{
+       /* Turn off all TSO bits. */
+       dev->features &= ~NETIF_F_TSO;
+}
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+        return skb_shinfo(skb)->tso_size;
+}
+static inline int skb_gso_ok(struct sk_buff *skb, int features)
+{
+        return (features & NETIF_F_TSO);
+}
+
+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+{
+        return skb_is_gso(skb) &&
+               (!skb_gso_ok(skb, dev->features) ||
+                unlikely(skb->ip_summed != CHECKSUM_HW));
+}
+#else
+#define netif_needs_gso(dev, skb)      0
+#define dev_disable_gso_features(dev)  ((void)0)
 #endif
 
 #define GRANT_INVALID_REF      0
@@ -120,7 +165,7 @@ struct netfront_info {
        grant_ref_t gref_tx_head;
        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
        grant_ref_t gref_rx_head;
-       grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
+       grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 
        struct xenbus_device *xbdev;
        int tx_ring_ref;
@@ -229,8 +274,7 @@ static int __devinit netfront_probe(stru
        int err;
        struct net_device *netdev;
        struct netfront_info *info;
-       unsigned int handle;
-       unsigned feature_rx_copy;
+       unsigned int handle, feature_rx_copy, feature_rx_flip, use_copy;
 
        err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
        if (err != 1) {
@@ -238,24 +282,28 @@ static int __devinit netfront_probe(stru
                return err;
        }
 
-#ifdef CONFIG_PPC_XEN
        err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
                           &feature_rx_copy);
-       BUG_ON(err != 1);
-       if (err != 1) {
-               xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
-               return err;
-       }
-       BUG_ON(!feature_rx_copy);
-       if (!feature_rx_copy) {
-               xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
-               return -EINVAL;
-       }
-#else
-       feature_rx_copy = 0;
+       if (err != 1)
+               feature_rx_copy = 0;
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-flip", "%u",
+                          &feature_rx_flip);
+       if (err != 1)
+               feature_rx_flip = 1;
+
+       /*
+        * Copy packets on receive path if:
+        *  (a) This was requested by user, and the backend supports it; or
+        *  (b) Flipping was requested, but this is unsupported by the backend.
+        */
+       use_copy = (MODPARM_rx_copy && feature_rx_copy) ||
+               (MODPARM_rx_flip && !feature_rx_flip);
+
+#ifdef CONFIG_PPC_XEN
+       if (!use_copy)
+               panic("NetFront _must_ use rx copy feature on PowerPC\n");
 #endif
-
-       netdev = create_netdev(handle, feature_rx_copy, dev);
+       netdev = create_netdev(handle, use_copy, dev);
        if (IS_ERR(netdev)) {
                err = PTR_ERR(netdev);
                xenbus_dev_fatal(dev, err, "creating netdev");
@@ -272,6 +320,9 @@ static int __devinit netfront_probe(stru
        err = open_netdev(info);
        if (err)
                goto fail_open;
+
+       IPRINTK("Created netdev %s with %sing receive path.\n",
+               netdev->name, info->copying_receiver ? "copy" : "flipp");
 
        return 0;
 
@@ -387,7 +438,7 @@ again:
                goto abort_transaction;
        }
 
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
        if (err) {
                message = "writing feature-gso-tcpv4";
@@ -744,7 +795,7 @@ no_skb:
                } else {
                        gnttab_grant_foreign_access_ref(ref,
                                                        np->xbdev->otherend_id,
-                                                       pfn,
+                                                       pfn_to_mfn(pfn),
                                                        0);
                }
 
@@ -919,7 +970,7 @@ static int network_start_xmit(struct sk_
                tx->flags |= NETTXF_data_validated;
 #endif
 
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
        if (skb_shinfo(skb)->gso_size) {
                struct netif_extra_info *gso = (struct netif_extra_info *)
                        RING_GET_REQUEST(&np->tx, ++i);
@@ -1207,12 +1258,14 @@ static int xennet_set_skb_gso(struct sk_
                return -EINVAL;
        }
 
+#ifdef HAVE_TSO
+       skb_shinfo(skb)->gso_size = gso->u.gso.size;
 #ifdef HAVE_GSO
-       skb_shinfo(skb)->gso_size = gso->u.gso.size;
        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
        /* Header must be checked, and gso_segs computed. */
        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+#endif
        skb_shinfo(skb)->gso_segs = 0;
 
        return 0;
@@ -1563,7 +1616,7 @@ static int xennet_set_sg(struct net_devi
 
 static int xennet_set_tso(struct net_device *dev, u32 data)
 {
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
        if (data) {
                struct netfront_info *np = netdev_priv(dev);
                int val;
@@ -1634,7 +1687,8 @@ static void network_connect(struct net_d
                } else {
                        gnttab_grant_foreign_access_ref(
                                ref, np->xbdev->otherend_id,
-                               page_to_pfn(skb_shinfo(skb)->frags->page),
+                               pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
+                                                      frags->page)),
                                0);
                }
                req->gref = ref;
@@ -1941,7 +1995,7 @@ static void netfront_closing(struct xenb
        DPRINTK("%s\n", dev->nodename);
 
        close_netdev(info);
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
@@ -2055,6 +2109,23 @@ static int __init netif_init(void)
        if (!is_running_on_xen())
                return -ENODEV;
 
+#ifdef CONFIG_PPC_XEN
+       if (MODPARM_rx_flip || !MODPARM_rx_copy) {
+               WPRINTK("PowerPC forcing rx_copy.\n");
+               MODPARM_rx_flip = 0;
+               MODPARM_rx_copy = 1;
+       }
+#endif
+#ifdef CONFIG_XEN
+       if (MODPARM_rx_flip && MODPARM_rx_copy) {
+               WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
+               return -EINVAL;
+       }
+
+       if (!MODPARM_rx_flip && !MODPARM_rx_copy)
+               MODPARM_rx_flip = 1; /* Default is to flip. */
+#endif
+
        if (is_initial_xendomain())
                return 0;
 
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_backend_client.c
--- a/drivers/xen/xenbus/xenbus_backend_client.c        Fri Oct 13 12:36:39 
2006 -0400
+++ b/drivers/xen/xenbus/xenbus_backend_client.c        Mon Oct 16 09:31:03 
2006 -0400
@@ -50,7 +50,6 @@ struct vm_struct *xenbus_map_ring_valloc
        
        lock_vm_area(area);
        BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
-       update_vm_area(area, &op);
        unlock_vm_area(area);
 
        if (op.status != GNTST_okay) {
@@ -133,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
 
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+       int rc, val;
+
+       rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
+       if (rc != 1)
+               val = 0; /* no online node present */
+
+       return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+
 MODULE_LICENSE("Dual BSD/GPL");
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_client.c
--- a/drivers/xen/xenbus/xenbus_client.c        Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_client.c        Mon Oct 16 09:31:03 2006 -0400
@@ -38,6 +38,20 @@
 #define DPRINTK(fmt, args...) \
     pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, 
##args)
 
+char *xenbus_strstate(enum xenbus_state state)
+{
+       static char *name[] = {
+               [ XenbusStateUnknown      ] = "Unknown",
+               [ XenbusStateInitialising ] = "Initialising",
+               [ XenbusStateInitWait     ] = "InitWait",
+               [ XenbusStateInitialised  ] = "Initialised",
+               [ XenbusStateConnected    ] = "Connected",
+               [ XenbusStateClosing      ] = "Closing",
+               [ XenbusStateClosed       ] = "Closed",
+       };
+       return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
                      struct xenbus_watch *watch,
                      void (*callback)(struct xenbus_watch *,
@@ -121,6 +135,13 @@ int xenbus_switch_state(struct xenbus_de
 }
 EXPORT_SYMBOL_GPL(xenbus_switch_state);
 
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+       xenbus_switch_state(dev, XenbusStateClosed);
+       complete(&dev->down);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
 
 /**
  * Return the path to the error node for the given device, or NULL on failure.
@@ -271,7 +292,7 @@ enum xenbus_state xenbus_read_driver_sta
        enum xenbus_state result;
        int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
        if (err)
-               result = XenbusStateClosed;
+               result = XenbusStateUnknown;
 
        return result;
 }
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_comms.c
--- a/drivers/xen/xenbus/xenbus_comms.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_comms.c Mon Oct 16 09:31:03 2006 -0400
@@ -47,11 +47,6 @@ static DECLARE_WORK(probe_work, xenbus_p
 
 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
 
-static inline struct xenstore_domain_interface *xenstore_domain_interface(void)
-{
-       return mfn_to_virt(xen_start_info->store_mfn);
-}
-
 static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
 {
        if (unlikely(xenstored_ready == 0)) {
@@ -90,7 +85,7 @@ static const void *get_input_chunk(XENST
 
 int xb_write(const void *data, unsigned len)
 {
-       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       struct xenstore_domain_interface *intf = xen_store_interface;
        XENSTORE_RING_IDX cons, prod;
        int rc;
 
@@ -129,7 +124,7 @@ int xb_write(const void *data, unsigned 
                intf->req_prod += avail;
 
                /* This implies mb() before other side sees interrupt. */
-               notify_remote_via_evtchn(xen_start_info->store_evtchn);
+               notify_remote_via_evtchn(xen_store_evtchn);
        }
 
        return 0;
@@ -137,7 +132,7 @@ int xb_write(const void *data, unsigned 
 
 int xb_read(void *data, unsigned len)
 {
-       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       struct xenstore_domain_interface *intf = xen_store_interface;
        XENSTORE_RING_IDX cons, prod;
        int rc;
 
@@ -180,7 +175,7 @@ int xb_read(void *data, unsigned len)
                pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
 
                /* Implies mb(): they will see new header. */
-               notify_remote_via_evtchn(xen_start_info->store_evtchn);
+               notify_remote_via_evtchn(xen_store_evtchn);
        }
 
        return 0;
@@ -195,7 +190,7 @@ int xb_init_comms(void)
                unbind_from_irqhandler(xenbus_irq, &xb_waitq);
 
        err = bind_evtchn_to_irqhandler(
-               xen_start_info->store_evtchn, wake_waiting,
+               xen_store_evtchn, wake_waiting,
                0, "xenbus", &xb_waitq);
        if (err <= 0) {
                printk(KERN_ERR "XENBUS request irq failed %i\n", err);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_comms.h
--- a/drivers/xen/xenbus/xenbus_comms.h Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_comms.h Mon Oct 16 09:31:03 2006 -0400
@@ -39,5 +39,7 @@ int xb_read(void *data, unsigned len);
 int xb_read(void *data, unsigned len);
 int xs_input_avail(void);
 extern wait_queue_head_t xb_waitq;
+extern struct xenstore_domain_interface *xen_store_interface;
+extern int xen_store_evtchn;
 
 #endif /* _XENBUS_COMMS_H */
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_dev.c
--- a/drivers/xen/xenbus/xenbus_dev.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_dev.c   Mon Oct 16 09:31:03 2006 -0400
@@ -58,6 +58,9 @@ struct xenbus_dev_data {
        /* In-progress transaction. */
        struct list_head transactions;
 
+       /* Active watches. */
+       struct list_head watches;
+
        /* Partial request. */
        unsigned int len;
        union {
@@ -70,6 +73,8 @@ struct xenbus_dev_data {
        char read_buffer[PAGE_SIZE];
        unsigned int read_cons, read_prod;
        wait_queue_head_t read_waitq;
+
+       struct mutex reply_mutex;
 };
 
 static struct proc_dir_entry *xenbus_dev_intf;
@@ -100,13 +105,59 @@ static void queue_reply(struct xenbus_de
 {
        int i;
 
+       mutex_lock(&u->reply_mutex);
+
        for (i = 0; i < len; i++, u->read_prod++)
                u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
 
        BUG_ON((u->read_prod - u->read_cons) > sizeof(u->read_buffer));
 
+       mutex_unlock(&u->reply_mutex);
+
        wake_up(&u->read_waitq);
 }
+
+struct watch_adapter
+{
+       struct list_head list;
+       struct xenbus_watch watch;
+       struct xenbus_dev_data *dev_data;
+       char *token;
+};
+
+static void free_watch_adapter (struct watch_adapter *watch)
+{
+       kfree(watch->watch.node);
+       kfree(watch->token);
+       kfree(watch);
+}
+
+static void watch_fired(struct xenbus_watch *watch,
+                       const char **vec,
+                       unsigned int len)
+{
+       struct watch_adapter *adap =
+            container_of(watch, struct watch_adapter, watch);
+       struct xsd_sockmsg hdr;
+       const char *path, *token;
+       int path_len, tok_len, body_len;
+
+       path = vec[XS_WATCH_PATH];
+       token = adap->token;
+
+       path_len = strlen(path) + 1;
+       tok_len = strlen(token) + 1;
+       body_len = path_len + tok_len;
+
+       hdr.type = XS_WATCH_EVENT;
+       hdr.len = body_len;
+       
+       queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr));
+       queue_reply(adap->dev_data, (char *)path, path_len);
+       queue_reply(adap->dev_data, (char *)token, tok_len);
+}
+
+static LIST_HEAD(watch_list);
 
 static ssize_t xenbus_dev_write(struct file *filp,
                                const char __user *ubuf,
@@ -116,6 +167,9 @@ static ssize_t xenbus_dev_write(struct f
        struct xenbus_dev_transaction *trans = NULL;
        uint32_t msg_type;
        void *reply;
+       char *path, *token;
+       struct watch_adapter *watch, *tmp_watch;
+       int err;
 
        if ((len + u->len) > sizeof(u->u.buffer))
                return -EINVAL;
@@ -169,6 +223,56 @@ static ssize_t xenbus_dev_write(struct f
                kfree(reply);
                break;
 
+       case XS_WATCH:
+       case XS_UNWATCH:
+               path = u->u.buffer + sizeof(u->u.msg);
+               token = memchr(path, 0, u->u.msg.len);
+               if (token == NULL)
+                       return -EILSEQ;
+               token++;
+
+               if (msg_type == XS_WATCH) {
+                       static const char * XS_WATCH_RESP = "OK";
+                       struct xsd_sockmsg hdr;
+
+                       watch = kmalloc(sizeof(*watch), GFP_KERNEL);
+                       watch->watch.node = kmalloc(strlen(path)+1,
+                                                    GFP_KERNEL);
+                       strcpy((char *)watch->watch.node, path);
+                       watch->watch.callback = watch_fired;
+                       watch->token = kmalloc(strlen(token)+1, GFP_KERNEL);
+                       strcpy(watch->token, token);
+                       watch->dev_data = u;
+
+                       err = register_xenbus_watch(&watch->watch);
+                       if (err) {
+                               free_watch_adapter(watch);
+                               return err;
+                       }
+                       
+                       list_add(&watch->list, &u->watches);
+
+                       hdr.type = XS_WATCH;
+                       hdr.len = strlen(XS_WATCH_RESP) + 1;
+                       queue_reply(u, (char *)&hdr, sizeof(hdr));
+                       queue_reply(u, (char *)XS_WATCH_RESP, hdr.len);
+               } else {
+                       list_for_each_entry_safe(watch, tmp_watch,
+                                                 &u->watches, list) {
+                               if (!strcmp(watch->token, token) &&
+                                   !strcmp(watch->watch.node, path))
+                                       break;
+                               {
+                                       unregister_xenbus_watch(&watch->watch);
+                                       list_del(&watch->list);
+                                       free_watch_adapter(watch);
+                                       break;
+                               }
+                       }
+               }
+
+               break;
+
        default:
                return -EINVAL;
        }
@@ -181,7 +285,7 @@ static int xenbus_dev_open(struct inode 
 {
        struct xenbus_dev_data *u;
 
-       if (xen_start_info->store_evtchn == 0)
+       if (xen_store_evtchn == 0)
                return -ENOENT;
 
        nonseekable_open(inode, filp);
@@ -191,7 +295,10 @@ static int xenbus_dev_open(struct inode 
                return -ENOMEM;
 
        INIT_LIST_HEAD(&u->transactions);
+       INIT_LIST_HEAD(&u->watches);
        init_waitqueue_head(&u->read_waitq);
+
+       mutex_init(&u->reply_mutex);
 
        filp->private_data = u;
 
@@ -202,11 +309,18 @@ static int xenbus_dev_release(struct ino
 {
        struct xenbus_dev_data *u = filp->private_data;
        struct xenbus_dev_transaction *trans, *tmp;
+       struct watch_adapter *watch, *tmp_watch;
 
        list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
                xenbus_transaction_end(trans->handle, 1);
                list_del(&trans->list);
                kfree(trans);
+       }
+
+       list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+               unregister_xenbus_watch(&watch->watch);
+               list_del(&watch->list);
+               free_watch_adapter(watch);
        }
 
        kfree(u);
@@ -232,7 +346,7 @@ static struct file_operations xenbus_dev
        .poll = xenbus_dev_poll,
 };
 
-static int __init
+int __init
 xenbus_dev_init(void)
 {
        xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
@@ -241,5 +355,3 @@ xenbus_dev_init(void)
 
        return 0;
 }
-
-__initcall(xenbus_dev_init);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_probe.c
--- a/drivers/xen/xenbus/xenbus_probe.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_probe.c Mon Oct 16 09:31:03 2006 -0400
@@ -45,18 +45,35 @@
 
 #include <asm/io.h>
 #include <asm/page.h>
+#include <asm/maddr.h>
 #include <asm/pgtable.h>
 #include <asm/hypervisor.h>
 #include <xen/xenbus.h>
 #include <xen/xen_proc.h>
 #include <xen/evtchn.h>
 #include <xen/features.h>
+#include <xen/hvm.h>
 
 #include "xenbus_comms.h"
 
+int xen_store_evtchn;
+struct xenstore_domain_interface *xen_store_interface;
+static unsigned long xen_store_mfn;
+
 extern struct mutex xenwatch_mutex;
 
 static BLOCKING_NOTIFIER_HEAD(xenstore_notifier_list);
+
+static void wait_for_devices(struct xenbus_driver *xendrv);
+
+static int xenbus_probe_frontend(const char *type, const char *name);
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+                                int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+static int xenbus_dev_probe(struct device *_dev);
+static int xenbus_dev_remove(struct device *_dev);
+static void xenbus_dev_shutdown(struct device *_dev);
 
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
@@ -141,7 +158,9 @@ static int read_otherend_details(struct 
        }
        if (strlen(xendev->otherend) == 0 ||
            !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
-               xenbus_dev_fatal(xendev, -ENOENT, "missing other end from %s",
+               xenbus_dev_fatal(xendev, -ENOENT,
+                                "unable to read other end from %s.  "
+                                "missing or inaccessible.",
                                 xendev->nodename);
                free_otherend_details(xendev);
                return -ENOENT;
@@ -164,15 +183,17 @@ static int read_frontend_details(struct 
 
 
 /* Bus type for frontend drivers. */
-static int xenbus_probe_frontend(const char *type, const char *name);
 static struct xen_bus_type xenbus_frontend = {
        .root = "device",
        .levels = 2,            /* device/type/<id> */
        .get_bus_id = frontend_bus_id,
        .probe = xenbus_probe_frontend,
        .bus = {
-               .name  = "xen",
-               .match = xenbus_match,
+               .name     = "xen",
+               .match    = xenbus_match,
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
+               .shutdown = xenbus_dev_shutdown,
        },
        .dev = {
                .bus_id = "xen",
@@ -217,18 +238,18 @@ static int backend_bus_id(char bus_id[BU
        return 0;
 }
 
-static int xenbus_uevent_backend(struct device *dev, char **envp,
-                                int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
 static struct xen_bus_type xenbus_backend = {
        .root = "backend",
        .levels = 3,            /* backend/type/<frontend>/<id> */
        .get_bus_id = backend_bus_id,
        .probe = xenbus_probe_backend,
        .bus = {
-               .name  = "xen-backend",
-               .match = xenbus_match,
-               .uevent = xenbus_uevent_backend,
+               .name     = "xen-backend",
+               .match    = xenbus_match,
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
+//             .shutdown = xenbus_dev_shutdown,
+               .uevent   = xenbus_uevent_backend,
        },
        .dev = {
                .bus_id = "xen-backend",
@@ -298,8 +319,23 @@ static void otherend_changed(struct xenb
 
        state = xenbus_read_driver_state(dev->otherend);
 
-       DPRINTK("state is %d, %s, %s",
-               state, dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+       DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
+               dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+
+       /*
+        * Ignore xenbus transitions during shutdown. This prevents us doing
+        * work that can fail e.g., when the rootfs is gone.
+        */
+       if (system_state > SYSTEM_RUNNING) {
+               struct xen_bus_type *bus = bus;
+               bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+               /* If we're frontend, drive the state machine to Closed. */
+               /* This should cause the backend to release our resources. */
+               if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+                       xenbus_frontend_closed(dev);
+               return;
+       }
+
        if (drv->otherend_changed)
                drv->otherend_changed(dev, state);
 }
@@ -330,7 +366,7 @@ static int xenbus_dev_probe(struct devic
        const struct xenbus_device_id *id;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        if (!drv->probe) {
                err = -ENODEV;
@@ -375,7 +411,7 @@ static int xenbus_dev_remove(struct devi
        struct xenbus_device *dev = to_xenbus_device(_dev);
        struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        free_otherend_watch(dev);
        free_otherend_details(dev);
@@ -385,6 +421,27 @@ static int xenbus_dev_remove(struct devi
 
        xenbus_switch_state(dev, XenbusStateClosed);
        return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       unsigned long timeout = 5*HZ;
+
+       DPRINTK("%s", dev->nodename);
+
+       get_device(&dev->dev);
+       if (dev->state != XenbusStateConnected) {
+               printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
+                      dev->nodename, xenbus_strstate(dev->state));
+               goto out;
+       }
+       xenbus_switch_state(dev, XenbusStateClosing);
+       timeout = wait_for_completion_timeout(&dev->down, timeout);
+       if (!timeout)
+               printk("%s: %s timeout closing device\n", __FUNCTION__, 
dev->nodename);
+ out:
+       put_device(&dev->dev);
 }
 
 static int xenbus_register_driver_common(struct xenbus_driver *drv,
@@ -395,8 +452,6 @@ static int xenbus_register_driver_common
        drv->driver.name = drv->name;
        drv->driver.bus = &bus->bus;
        drv->driver.owner = drv->owner;
-       drv->driver.probe = xenbus_dev_probe;
-       drv->driver.remove = xenbus_dev_remove;
 
        mutex_lock(&xenwatch_mutex);
        ret = driver_register(&drv->driver);
@@ -406,9 +461,18 @@ static int xenbus_register_driver_common
 
 int xenbus_register_frontend(struct xenbus_driver *drv)
 {
+       int ret;
+
        drv->read_otherend_details = read_backend_details;
 
-       return xenbus_register_driver_common(drv, &xenbus_frontend);
+       ret = xenbus_register_driver_common(drv, &xenbus_frontend);
+       if (ret)
+               return ret;
+
+       /* If this driver is loaded as a module wait for devices to attach. */
+       wait_for_devices(drv);
+
+       return 0;
 }
 EXPORT_SYMBOL_GPL(xenbus_register_frontend);
 
@@ -541,6 +605,7 @@ static int xenbus_probe_node(struct xen_
        tmpstring += strlen(tmpstring) + 1;
        strcpy(tmpstring, type);
        xendev->devicetype = tmpstring;
+       init_completion(&xendev->down);
 
        xendev->dev.parent = &bus->dev;
        xendev->dev.bus = &bus->bus;
@@ -806,7 +871,7 @@ static int resume_dev(struct device *dev
                        printk(KERN_WARNING
                               "xenbus: resume %s failed: %i\n", 
                               dev->bus_id, err);
-                       return err; 
+                       return err;
                }
        }
 
@@ -818,7 +883,7 @@ static int resume_dev(struct device *dev
                return err;
        }
 
-       return 0; 
+       return 0;
 }
 
 void xenbus_suspend(void)
@@ -865,29 +930,6 @@ EXPORT_SYMBOL_GPL(unregister_xenstore_no
 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
 
 
-static int all_devices_ready_(struct device *dev, void *data)
-{
-       struct xenbus_device *xendev = to_xenbus_device(dev);
-       int *result = data;
-
-       if (xendev->state != XenbusStateConnected) {
-               *result = 0;
-               return 1;
-       }
-
-       return 0;
-}
-
-
-static int all_devices_ready(void)
-{
-       int ready = 1;
-       bus_for_each_dev(&xenbus_frontend.bus, NULL, &ready,
-                        all_devices_ready_);
-       return ready;
-}
-
-
 void xenbus_probe(void *unused)
 {
        BUG_ON((xenstored_ready <= 0));
@@ -917,8 +959,7 @@ static int xsd_kva_mmap(struct file *fil
        if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
                return -EINVAL;
 
-       if (remap_pfn_range(vma, vma->vm_start,
-                           mfn_to_pfn(xen_start_info->store_mfn),
+       if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn),
                            size, vma->vm_page_prot))
                return -EAGAIN;
 
@@ -930,7 +971,7 @@ static int xsd_kva_read(char *page, char
 {
        int len;
 
-       len  = sprintf(page, "0x%p", mfn_to_virt(xen_start_info->store_mfn));
+       len  = sprintf(page, "0x%p", xen_store_interface);
        *eof = 1;
        return len;
 }
@@ -940,16 +981,15 @@ static int xsd_port_read(char *page, cha
 {
        int len;
 
-       len  = sprintf(page, "%d", xen_start_info->store_evtchn);
+       len  = sprintf(page, "%d", xen_store_evtchn);
        *eof = 1;
        return len;
 }
 #endif
 
-
 static int __init xenbus_probe_init(void)
 {
-       int err = 0, dom0;
+       int err = 0;
        unsigned long page = 0;
 
        DPRINTK("");
@@ -964,9 +1004,7 @@ static int __init xenbus_probe_init(void
        /*
         * Domain0 doesn't have a store_evtchn or store_mfn yet.
         */
-       dom0 = (xen_start_info->store_evtchn == 0);
-
-       if (dom0) {
+       if (is_initial_xendomain()) {
                struct evtchn_alloc_unbound alloc_unbound;
 
                /* Allocate page. */
@@ -974,7 +1012,7 @@ static int __init xenbus_probe_init(void
                if (!page)
                        return -ENOMEM;
 
-               xen_start_info->store_mfn =
+               xen_store_mfn = xen_start_info->store_mfn =
                        pfn_to_mfn(virt_to_phys((void *)page) >>
                                   PAGE_SHIFT);
 
@@ -987,7 +1025,8 @@ static int __init xenbus_probe_init(void
                if (err == -ENOSYS)
                        goto err;
                BUG_ON(err);
-               xen_start_info->store_evtchn = alloc_unbound.port;
+               xen_store_evtchn = xen_start_info->store_evtchn =
+                       alloc_unbound.port;
 
 #ifdef CONFIG_PROC_FS
                /* And finally publish the above info in /proc/xen */
@@ -1003,8 +1042,23 @@ static int __init xenbus_probe_init(void
                if (xsd_port_intf)
                        xsd_port_intf->read_proc = xsd_port_read;
 #endif
-       } else
+               xen_store_interface = mfn_to_virt(xen_store_mfn);
+       } else {
                xenstored_ready = 1;
+#ifdef CONFIG_XEN
+               xen_store_evtchn = xen_start_info->store_evtchn;
+               xen_store_mfn = xen_start_info->store_mfn;
+               xen_store_interface = mfn_to_virt(xen_store_mfn);
+#else
+               xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+               xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
+               xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
+                                             PAGE_SIZE);
+#endif
+       }
+
+
+       xenbus_dev_init();
 
        /* Initialize the interface to xenstore. */
        err = xs_init();
@@ -1018,7 +1072,7 @@ static int __init xenbus_probe_init(void
        device_register(&xenbus_frontend.dev);
        device_register(&xenbus_backend.dev);
 
-       if (!dom0)
+       if (!is_initial_xendomain())
                xenbus_probe(NULL);
 
        return 0;
@@ -1038,6 +1092,58 @@ static int __init xenbus_probe_init(void
 
 postcore_initcall(xenbus_probe_init);
 
+MODULE_LICENSE("Dual BSD/GPL");
+
+
+static int is_disconnected_device(struct device *dev, void *data)
+{
+       struct xenbus_device *xendev = to_xenbus_device(dev);
+       struct device_driver *drv = data;
+
+       /*
+        * A device with no driver will never connect. We care only about
+        * devices which should currently be in the process of connecting.
+        */
+       if (!dev->driver)
+               return 0;
+
+       /* Is this search limited to a particular driver? */
+       if (drv && (dev->driver != drv))
+               return 0;
+
+       return (xendev->state != XenbusStateConnected);
+}
+
+static int exists_disconnected_device(struct device_driver *drv)
+{
+       return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+                               is_disconnected_device);
+}
+
+static int print_device_status(struct device *dev, void *data)
+{
+       struct xenbus_device *xendev = to_xenbus_device(dev);
+       struct device_driver *drv = data;
+
+       /* Is this operation limited to a particular driver? */
+       if (drv && (dev->driver != drv))
+               return 0;
+
+       if (!dev->driver) {
+               /* Information only: is this too noisy? */
+               printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
+                      xendev->nodename);
+       } else if (xendev->state != XenbusStateConnected) {
+               printk(KERN_WARNING "XENBUS: Timeout connecting "
+                      "to device: %s (state %d)\n",
+                      xendev->nodename, xendev->state);
+       }
+
+       return 0;
+}
+
+/* We only wait for device setup after most initcalls have run. */
+static int ready_to_wait_for_devices;
 
 /*
  * On a 10 second timeout, wait for all devices currently configured.  We need
@@ -1053,21 +1159,31 @@ postcore_initcall(xenbus_probe_init);
  * boot slightly, but of course needs tools or manual intervention to set up
  * those flags correctly.
  */
-static int __init wait_for_devices(void)
+static void wait_for_devices(struct xenbus_driver *xendrv)
 {
        unsigned long timeout = jiffies + 10*HZ;
-
-       if (!is_running_on_xen())
-               return -ENODEV;
-
-       while (time_before(jiffies, timeout)) {
-               if (all_devices_ready())
-                       return 0;
+       struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+
+       if (!ready_to_wait_for_devices || !is_running_on_xen())
+               return;
+
+       while (exists_disconnected_device(drv)) {
+               if (time_after(jiffies, timeout))
+                       break;
                schedule_timeout_interruptible(HZ/10);
        }
 
-       printk(KERN_WARNING "XENBUS: Timeout connecting to devices!\n");
-       return 0;
-}
-
-late_initcall(wait_for_devices);
+       bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+                        print_device_status);
+}
+
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
+       ready_to_wait_for_devices = 1;
+       wait_for_devices(NULL);
+       return 0;
+}
+
+late_initcall(boot_wait_for_devices);
+#endif
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_xs.c
--- a/drivers/xen/xenbus/xenbus_xs.c    Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_xs.c    Mon Oct 16 09:31:03 2006 -0400
@@ -662,7 +662,17 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watc
 
 void xs_suspend(void)
 {
+       struct xenbus_watch *watch;
+       char token[sizeof(watch) * 2 + 1];
+
        down_write(&xs_state.suspend_mutex);
+
+       /* No need for watches_lock: the suspend_mutex is sufficient. */
+       list_for_each_entry(watch, &watches, list) {
+               sprintf(token, "%lX", (long)watch);
+               xs_unwatch(watch->node, token);
+       }
+
        mutex_lock(&xs_state.request_mutex);
 }
 
diff -r f0be2cc05103 -r 933b1d114a89 include/asm-powerpc/page.h
--- a/include/asm-powerpc/page.h        Fri Oct 13 12:36:39 2006 -0400
+++ b/include/asm-powerpc/page.h        Mon Oct 16 09:31:03 2006 -0400
@@ -195,6 +195,7 @@ extern const char *arch_vma_name(struct 
 extern const char *arch_vma_name(struct vm_area_struct *vma);
 
 #include <asm-generic/memory_model.h>
+#include <xen/foreign_page.h>
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff -r f0be2cc05103 -r 933b1d114a89 include/asm-powerpc/xen/asm/hypervisor.h
--- a/include/asm-powerpc/xen/asm/hypervisor.h  Fri Oct 13 12:36:39 2006 -0400
+++ b/include/asm-powerpc/xen/asm/hypervisor.h  Mon Oct 16 09:31:03 2006 -0400
@@ -239,7 +239,16 @@ typedef unsigned long maddr_t;
 typedef unsigned long maddr_t;
 
 #ifdef CONFIG_XEN_SCRUB_PAGES
-#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+
+static inline void scrub_pages(void *p, unsigned n)
+{
+       unsigned i;
+
+       for (i = 0; i < n; i++) {
+               clear_page(p);
+               p += PAGE_SIZE;
+       }
+}
 #else
 #define scrub_pages(_p,_n) ((void)0)
 #endif
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/balloon.h
--- a/include/xen/balloon.h     Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/balloon.h     Mon Oct 16 09:31:03 2006 -0400
@@ -41,6 +41,8 @@ extern void
 extern void
 balloon_update_driver_allowance(
        long delta);
+struct page **alloc_empty_pages_and_pagevec(int nr_pages);
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
 
 /* Allocate an empty low-memory page range. */
 extern struct page *
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/foreign_page.h
--- a/include/xen/foreign_page.h        Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/foreign_page.h        Mon Oct 16 09:31:03 2006 -0400
@@ -10,6 +10,7 @@
 #ifndef __ASM_XEN_FOREIGN_PAGE_H__
 #define __ASM_XEN_FOREIGN_PAGE_H__
 
+#ifndef CONFIG_PPC_XEN
 #define PG_foreign             PG_arch_1
 
 #define PageForeign(page)      test_bit(PG_foreign, &(page)->flags)
@@ -27,4 +28,34 @@
 #define PageForeignDestructor(page)    \
        ( (void (*) (struct page *)) (page)->mapping )
 
+#else
+
+extern struct address_space xen_foreign_dummy_mapping;
+
+#define PageForeign(page)      \
+       ((page)->mapping == &xen_foreign_dummy_mapping)
+
+#define SetPageForeign(page, dtor) do {                                \
+       set_page_private((page), (unsigned long)(dtor));        \
+       (page)->mapping = &xen_foreign_dummy_mapping;   \
+       smp_rmb();                                              \
+} while (0)
+
+#define ClearPageForeign(page) do {    \
+       (page)->mapping = NULL;         \
+       smp_rmb();                      \
+       set_page_private((page), 0);    \
+} while (0)
+
+#define PageForeignDestructor(page)    \
+       ( (void (*) (struct page *)) page_private(page) )
+
+#define HAVE_ARCH_FREE_PAGE
+#define arch_free_page(_page,_order)                   \
+({      int foreign = PageForeign(_page);               \
+       if (foreign)                                    \
+               (PageForeignDestructor(_page))(_page);  \
+       foreign;                                        \
+})
+#endif
 #endif /* __ASM_XEN_FOREIGN_PAGE_H__ */
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/xenbus.h
--- a/include/xen/xenbus.h      Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/xenbus.h      Mon Oct 16 09:31:03 2006 -0400
@@ -37,6 +37,7 @@
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
+#include <linux/completion.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
@@ -74,6 +75,7 @@ struct xenbus_device {
        struct xenbus_watch otherend_watch;
        struct device dev;
        enum xenbus_state state;
+       struct completion down;
 };
 
 static inline struct xenbus_device *to_xenbus_device(struct device *dev)
@@ -274,7 +276,7 @@ int xenbus_free_evtchn(struct xenbus_dev
 
 /**
  * Return the state of the driver rooted at the given store path, or
- * XenbusStateClosed if no state can be read.
+ * XenbusStateUnknown if no state can be read.
  */
 enum xenbus_state xenbus_read_driver_state(const char *path);
 
@@ -295,5 +297,10 @@ void xenbus_dev_fatal(struct xenbus_devi
 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
                      ...);
 
+int __init xenbus_dev_init(void);
+
+char *xenbus_strstate(enum xenbus_state state);
+int xenbus_dev_is_online(struct xenbus_device *dev);
+int xenbus_frontend_closed(struct xenbus_device *dev);
 
 #endif /* _XEN_XENBUS_H */
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/hvm.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/include/xen/hvm.h Mon Oct 16 09:31:03 2006 -0400
@@ -0,0 +1,13 @@
+/* Simple wrappers around HVM functions */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
+
+#include <xen/interface/hvm/params.h>
+#include <asm/hypercall.h>
+
+static inline unsigned long hvm_get_parameter(int idx)
+{
+       return 0;
+}
+
+#endif /* XEN_HVM_H__ */

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.