[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 1bab7d65171b762bb3cf1ae426bc6c403f847ebf
# Parent  4ba0982264290acfa208304b4e3343ec8c3ec903
# Parent  3e6325b73474b3764573178152503af27a914ab8
merge with xen-unstable.hg
---
 xen/arch/powerpc/htab.c                                         |   68 --
 .hgignore                                                       |    2 
 extras/mini-os/Makefile                                         |    3 
 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c               |    7 
 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c                 |   42 -
 linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c              |   56 +
 linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c              |    2 
 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c             |   11 
 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c               |   10 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c            |    4 
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c                |    8 
 linux-2.6-xen-sparse/drivers/xen/blktap/common.h                |    1 
 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c             |   23 
 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c                |   16 
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c               |   10 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c            |   67 +-
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c |   12 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c         |   21 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c          |   34 -
 linux-2.6-xen-sparse/include/xen/xenbus.h                       |    6 
 tools/blktap/drivers/block-aio.c                                |   19 
 tools/blktap/drivers/block-qcow.c                               |   19 
 tools/blktap/drivers/tapdisk.c                                  |    1 
 tools/blktap/lib/xs_api.c                                       |   23 
 tools/libxc/ia64/xc_ia64_stubs.c                                |   16 
 tools/libxc/powerpc64/xc_linux_build.c                          |    4 
 tools/libxc/xenctrl.h                                           |    5 
 tools/python/xen/xend/FlatDeviceTree.py                         |  323 
++++++++++
 tools/python/xen/xend/XendCheckpoint.py                         |    6 
 tools/python/xen/xend/XendDomain.py                             |    3 
 tools/python/xen/xend/XendDomainInfo.py                         |   63 +
 tools/python/xen/xend/arch.py                                   |   32 
 tools/python/xen/xend/image.py                                  |  143 ++--
 tools/python/xen/xend/server/DevController.py                   |    6 
 tools/python/xen/xend/server/XMLRPCServer.py                    |    3 
 tools/python/xen/xend/server/blkif.py                           |   16 
 tools/python/xen/xm/migrate.py                                  |    3 
 tools/python/xen/xm/shutdown.py                                 |   49 +
 xen/arch/ia64/xen/dom0_ops.c                                    |    4 
 xen/arch/powerpc/Makefile                                       |   31 
 xen/arch/powerpc/Rules.mk                                       |    2 
 xen/arch/powerpc/backtrace.c                                    |  193 +++++
 xen/arch/powerpc/boot_of.c                                      |  208 ++++--
 xen/arch/powerpc/dart_u3.c                                      |    8 
 xen/arch/powerpc/dom0_ops.c                                     |   20 
 xen/arch/powerpc/domain.c                                       |   58 -
 xen/arch/powerpc/domain_build.c                                 |   60 +
 xen/arch/powerpc/exceptions.c                                   |    2 
 xen/arch/powerpc/exceptions.h                                   |    3 
 xen/arch/powerpc/external.c                                     |    3 
 xen/arch/powerpc/iommu.c                                        |   17 
 xen/arch/powerpc/memory.c                                       |  206 ++++++
 xen/arch/powerpc/mm.c                                           |  298 
++++++++-
 xen/arch/powerpc/mpic.c                                         |    6 
 xen/arch/powerpc/ofd_fixup.c                                    |  101 ---
 xen/arch/powerpc/ofd_fixup_memory.c                             |  107 +++
 xen/arch/powerpc/oftree.h                                       |    8 
 xen/arch/powerpc/papr/tce.c                                     |    6 
 xen/arch/powerpc/papr/xlate.c                                   |   46 +
 xen/arch/powerpc/powerpc64/exceptions.S                         |   37 +
 xen/arch/powerpc/powerpc64/ppc970.c                             |  112 ++-
 xen/arch/powerpc/setup.c                                        |  207 +++---
 xen/arch/powerpc/shadow.c                                       |  159 ++++
 xen/arch/powerpc/xen.lds.S                                      |   10 
 xen/arch/x86/hvm/io.c                                           |   10 
 xen/arch/x86/hvm/platform.c                                     |   32 
 xen/arch/x86/hvm/svm/intr.c                                     |   43 -
 xen/arch/x86/hvm/svm/svm.c                                      |    5 
 xen/arch/x86/hvm/vlapic.c                                       |   14 
 xen/arch/x86/hvm/vmx/io.c                                       |   13 
 xen/arch/x86/hvm/vmx/vmx.c                                      |   29 
 xen/arch/x86/mm/shadow/multi.c                                  |   66 --
 xen/arch/x86/physdev.c                                          |    5 
 xen/arch/x86/time.c                                             |    4 
 xen/arch/x86/traps.c                                            |    5 
 xen/arch/x86/x86_32/traps.c                                     |   46 +
 xen/arch/x86/x86_64/traps.c                                     |   43 -
 xen/common/perfc.c                                              |    4 
 xen/include/asm-ia64/mm.h                                       |    1 
 xen/include/asm-powerpc/config.h                                |    4 
 xen/include/asm-powerpc/current.h                               |    4 
 xen/include/asm-powerpc/domain.h                                |    7 
 xen/include/asm-powerpc/htab.h                                  |    4 
 xen/include/asm-powerpc/mm.h                                    |  183 +++--
 xen/include/asm-powerpc/powerpc64/procarea.h                    |    1 
 xen/include/asm-powerpc/processor.h                             |    7 
 xen/include/asm-powerpc/shadow.h                                |   16 
 xen/include/asm-powerpc/smp.h                                   |    4 
 xen/include/asm-powerpc/types.h                                 |   19 
 xen/include/asm-x86/mm.h                                        |    1 
 xen/include/asm-x86/page.h                                      |    7 
 xen/include/asm-x86/processor.h                                 |    7 
 xen/include/asm-x86/x86_32/page-2level.h                        |    3 
 xen/include/asm-x86/x86_32/page-3level.h                        |    2 
 xen/include/asm-x86/x86_32/page.h                               |    9 
 xen/include/asm-x86/x86_64/page.h                               |   11 
 xen/include/public/arch-ia64.h                                  |    3 
 xen/include/public/arch-powerpc.h                               |    3 
 xen/include/public/arch-x86_32.h                                |   17 
 xen/include/public/arch-x86_64.h                                |    3 
 xen/include/public/domctl.h                                     |   21 
 xen/include/public/sysctl.h                                     |   16 
 xen/include/public/xen.h                                        |    1 
 103 files changed, 2690 insertions(+), 1032 deletions(-)

diff -r 4ba098226429 -r 1bab7d65171b .hgignore
--- a/.hgignore Fri Sep 01 12:52:12 2006 -0600
+++ b/.hgignore Fri Sep 01 13:04:02 2006 -0600
@@ -203,6 +203,8 @@
 ^xen/arch/powerpc/firmware$
 ^xen/arch/powerpc/firmware_image$
 ^xen/arch/powerpc/xen\.lds$
+^xen/arch/powerpc/.xen-syms$
+^xen/arch/powerpc/xen-syms.S$
 ^unmodified_drivers/linux-2.6/\.tmp_versions
 ^unmodified_drivers/linux-2.6/.*\.cmd$
 ^unmodified_drivers/linux-2.6/.*\.ko$
diff -r 4ba098226429 -r 1bab7d65171b extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Fri Sep 01 12:52:12 2006 -0600
+++ b/extras/mini-os/Makefile   Fri Sep 01 13:04:02 2006 -0600
@@ -7,9 +7,12 @@ include $(XEN_ROOT)/Config.mk
 # Set TARGET_ARCH
 override TARGET_ARCH     := $(XEN_TARGET_ARCH)
 
+XEN_INTERFACE_VERSION := 0x00030203
+
 # NB. '-Wcast-qual' is nasty, so I omitted it.
 CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format
 CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline
+CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION)
 
 ASFLAGS = -D__ASSEMBLY__
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 13:04:02 
2006 -0600
@@ -1380,8 +1380,10 @@ legacy_init_iomem_resources(struct e820e
                         *  so we try it repeatedly and let the resource manager
                         *  test it.
                         */
+#ifndef CONFIG_XEN
                        request_resource(res, code_resource);
                        request_resource(res, data_resource);
+#endif
 #ifdef CONFIG_KEXEC
                        request_resource(res, &crashk_res);
 #endif
@@ -1454,11 +1456,8 @@ static void __init register_memory(void)
        int           i;
 
        /* Nothing to do if not running in dom0. */
-       if (!is_initial_xendomain()) {
-               legacy_init_iomem_resources(e820.map, e820.nr_map,
-                                           &code_resource, &data_resource);
+       if (!is_initial_xendomain())
                return;
-       }
 
 #ifdef CONFIG_XEN
        machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Fri Sep 01 13:04:02 
2006 -0600
@@ -22,15 +22,6 @@
 #define ISA_START_ADDRESS      0x0
 #define ISA_END_ADDRESS                0x100000
 
-#if 0 /* not PAE safe */
-/* These hacky macros avoid phys->machine translations. */
-#define __direct_pte(x) ((pte_t) { (x) } )
-#define __direct_mk_pte(page_nr,pgprot) \
-  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-#define direct_mk_pte_phys(physpage, pgprot) \
-  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-#endif
-
 static int direct_remap_area_pte_fn(pte_t *pte, 
                                    struct page *pmd_page,
                                    unsigned long address, 
@@ -66,17 +57,16 @@ static int __direct_remap_pfn_range(stru
 
        for (i = 0; i < size; i += PAGE_SIZE) {
                if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) {
-                       /* Fill in the PTE pointers. */
+                       /* Flush a full batch after filling in the PTE ptrs. */
                        rc = apply_to_page_range(mm, start_address, 
                                                 address - start_address,
                                                 direct_remap_area_pte_fn, &w);
                        if (rc)
                                goto out;
-                       w = u;
                        rc = -EFAULT;
                        if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
                                goto out;
-                       v = u;
+                       v = w = u;
                        start_address = address;
                }
 
@@ -92,7 +82,7 @@ static int __direct_remap_pfn_range(stru
        }
 
        if (v != u) {
-               /* get the ptep's filled in */
+               /* Final batch. */
                rc = apply_to_page_range(mm, start_address,
                                         address - start_address,
                                         direct_remap_area_pte_fn, &w);
@@ -178,32 +168,6 @@ int touch_pte_range(struct mm_struct *mm
 } 
 
 EXPORT_SYMBOL(touch_pte_range);
-
-void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
-{
-       int error;
-       
-       struct vm_struct *vma;
-       vma = get_vm_area (vm_size, VM_IOREMAP);
-      
-       if (vma == NULL) {
-               printk ("ioremap.c,vm_map_xen_pages(): "
-                       "Failed to get VMA area\n");
-               return NULL;
-       }
-
-       error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
-                                             maddr >> PAGE_SHIFT, vm_size,
-                                             prot, DOMID_SELF );
-       if (error == 0) {
-               return vma->addr;
-       } else {
-               printk ("ioremap.c,vm_map_xen_pages(): "
-                       "Failed to map xen shared pages into kernel space\n");
-               return NULL;
-       }
-}
-EXPORT_SYMBOL(vm_map_xen_pages);
 
 /*
  * Does @address reside within a non-highmem page that is local to this virtual
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
--- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c        Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c        Fri Sep 01 
13:04:02 2006 -0600
@@ -26,6 +26,7 @@
 #include <xen/evtchn.h>
 #include "op_counter.h"
 
+#include <xen/driver_util.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/xenoprof.h>
 #include <../../../drivers/oprofile/cpu_buffer.h>
@@ -33,8 +34,6 @@
 
 static int xenoprof_start(void);
 static void xenoprof_stop(void);
-
-void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
 
 static int xenoprof_enabled = 0;
 static unsigned int num_events = 0;
@@ -373,9 +372,9 @@ static int xenoprof_set_passive(int * p_
 {
        int ret;
        int i, j;
-       int vm_size;
        int npages;
        struct xenoprof_buf *buf;
+       struct vm_struct *area;
        pgprot_t prot = __pgprot(_KERNPG_TABLE);
 
        if (!is_primary)
@@ -391,19 +390,29 @@ static int xenoprof_set_passive(int * p_
        for (i = 0; i < pdoms; i++) {
                passive_domains[i].domain_id = p_domains[i];
                passive_domains[i].max_samples = 2048;
-               ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, 
&passive_domains[i]);
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive,
+                                            &passive_domains[i]);
                if (ret)
-                       return ret;
+                       goto out;
 
                npages = (passive_domains[i].bufsize * passive_domains[i].nbuf 
- 1) / PAGE_SIZE + 1;
-               vm_size = npages * PAGE_SIZE;
-
-               p_shared_buffer[i] = (char 
*)vm_map_xen_pages(passive_domains[i].buf_maddr,
-                                                             vm_size, prot);
-               if (!p_shared_buffer[i]) {
+
+               area = alloc_vm_area(npages * PAGE_SIZE);
+               if (area == NULL) {
                        ret = -ENOMEM;
                        goto out;
                }
+
+               ret = direct_kernel_remap_pfn_range(
+                       (unsigned long)area->addr,
+                       passive_domains[i].buf_maddr >> PAGE_SHIFT,
+                       npages * PAGE_SIZE, prot, DOMID_SELF);
+               if (ret) {
+                       vunmap(area->addr);
+                       goto out;
+               }
+
+               p_shared_buffer[i] = area->addr;
 
                for (j = 0; j < passive_domains[i].nbuf; j++) {
                        buf = (struct xenoprof_buf *)
@@ -473,11 +482,9 @@ int __init oprofile_arch_init(struct opr
 int __init oprofile_arch_init(struct oprofile_operations * ops)
 {
        struct xenoprof_init init;
-       struct xenoprof_buf * buf;
-       int vm_size;
-       int npages;
-       int ret;
-       int i;
+       struct xenoprof_buf *buf;
+       int npages, ret, i;
+       struct vm_struct *area;
 
        init.max_samples = 16;
        ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
@@ -495,14 +502,23 @@ int __init oprofile_arch_init(struct opr
                        num_events = OP_MAX_COUNTER;
 
                npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1;
-               vm_size = npages * PAGE_SIZE;
-
-               shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr,
-                                                        vm_size, prot);
-               if (!shared_buffer) {
+
+               area = alloc_vm_area(npages * PAGE_SIZE);
+               if (area == NULL) {
                        ret = -ENOMEM;
                        goto out;
                }
+
+               ret = direct_kernel_remap_pfn_range(
+                       (unsigned long)area->addr,
+                       init.buf_maddr >> PAGE_SHIFT,
+                       npages * PAGE_SIZE, prot, DOMID_SELF);
+               if (ret) {
+                       vunmap(area->addr);
+                       goto out;
+               }
+
+               shared_buffer = area->addr;
 
                for (i=0; i< nbuf; i++) {
                        buf = (struct xenoprof_buf*) 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Fri Sep 01 
13:04:02 2006 -0600
@@ -255,8 +255,10 @@ void __init e820_reserve_resources(struc
                         *  so we try it repeatedly and let the resource manager
                         *  test it.
                         */
+#ifndef CONFIG_XEN
                        request_resource(res, &code_resource);
                        request_resource(res, &data_resource);
+#endif
 #ifdef CONFIG_KEXEC
                        request_resource(res, &crashk_res);
 #endif
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Fri Sep 01 
13:04:02 2006 -0600
@@ -846,7 +846,7 @@ void __init setup_arch(char **cmdline_p)
 
                if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                        /* Make sure we have a large enough P->M table. */
-                       phys_to_machine_mapping = alloc_bootmem(
+                       phys_to_machine_mapping = alloc_bootmem_pages(
                                end_pfn * sizeof(unsigned long));
                        memset(phys_to_machine_mapping, ~0,
                               end_pfn * sizeof(unsigned long));
@@ -863,7 +863,7 @@ void __init setup_arch(char **cmdline_p)
                         * list of frames that make up the p2m table. Used by
                          * save/restore.
                         */
-                       pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE);
+                       pfn_to_mfn_frame_list_list = 
alloc_bootmem_pages(PAGE_SIZE);
                        HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list 
=
                                virt_to_mfn(pfn_to_mfn_frame_list_list);
 
@@ -873,7 +873,7 @@ void __init setup_arch(char **cmdline_p)
                                        k++;
                                        BUG_ON(k>=fpp);
                                        pfn_to_mfn_frame_list[k] =
-                                               alloc_bootmem(PAGE_SIZE);
+                                               alloc_bootmem_pages(PAGE_SIZE);
                                        pfn_to_mfn_frame_list_list[k] =
                                                
virt_to_mfn(pfn_to_mfn_frame_list[k]);
                                        j=0;
@@ -944,9 +944,10 @@ void __init setup_arch(char **cmdline_p)
                BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, 
&memmap));
 
                e820_reserve_resources(machine_e820, memmap.nr_entries);
-       } else
-#endif
+       }
+#else
        e820_reserve_resources(e820.map, e820.nr_map);
+#endif
 
        request_resource(&iomem_resource, &video_ram_resource);
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 13:04:02 
2006 -0600
@@ -301,11 +301,11 @@ static void frontend_changed(struct xenb
        struct backend_info *be = dev->dev.driver_data;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", xenbus_strstate(frontend_state));
 
        switch (frontend_state) {
        case XenbusStateInitialising:
-               if (dev->state == XenbusStateClosing) {
+               if (dev->state == XenbusStateClosed) {
                        printk("%s: %s: prepare for reconnect\n",
                               __FUNCTION__, dev->nodename);
                        xenbus_switch_state(dev, XenbusStateInitWait);
@@ -331,8 +331,12 @@ static void frontend_changed(struct xenb
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
        case XenbusStateUnknown:
-       case XenbusStateClosed:
                device_unregister(&dev->dev);
                break;
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri Sep 01 
13:04:02 2006 -0600
@@ -273,7 +273,7 @@ static void backend_changed(struct xenbu
                        xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
 
                down(&bd->bd_sem);
-               if (info->users > 0)
+               if (info->users > 0 && system_state == SYSTEM_RUNNING)
                        xenbus_dev_error(dev, -EBUSY,
                                         "Device in use; refusing to close");
                else
@@ -360,7 +360,7 @@ static void blkfront_closing(struct xenb
 
        xlvbd_del(info);
 
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri Sep 01 13:04:02 
2006 -0600
@@ -114,8 +114,8 @@ typedef struct domid_translate {
 } domid_translate_t ;
 
 
-domid_translate_t  translate_domid[MAX_TAP_DEV];
-tap_blkif_t *tapfds[MAX_TAP_DEV];
+static domid_translate_t  translate_domid[MAX_TAP_DEV];
+static tap_blkif_t *tapfds[MAX_TAP_DEV];
 
 static int __init set_blkif_reqs(char *str)
 {
@@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif
                               "ring does not exist!\n");
                        print_dbug = 0; /*We only print this message once*/
                }
-               return 1;
+               return 0;
        }
 
        info = tapfds[blkif->dev_num];
@@ -1127,7 +1127,7 @@ static int do_block_io_op(blkif_t *blkif
                        WPRINTK("Can't get UE info!\n");
                        print_dbug = 0;
                }
-               return 1;
+               return 0;
        }
 
        while (rc != rp) {
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Fri Sep 01 13:04:02 
2006 -0600
@@ -91,6 +91,7 @@ void tap_blkif_free(blkif_t *blkif);
 void tap_blkif_free(blkif_t *blkif);
 int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
                  unsigned int evtchn);
+void tap_blkif_unmap(blkif_t *blkif);
 
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 #define blkif_put(_b)                                  \
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Fri Sep 01 
13:04:02 2006 -0600
@@ -135,20 +135,25 @@ int tap_blkif_map(blkif_t *blkif, unsign
        return 0;
 }
 
+void tap_blkif_unmap(blkif_t *blkif)
+{
+       if (blkif->irq) {
+               unbind_from_irqhandler(blkif->irq, blkif);
+               blkif->irq = 0;
+       }
+       if (blkif->blk_ring.sring) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
+       }
+}
+
 void tap_blkif_free(blkif_t *blkif)
 {
        atomic_dec(&blkif->refcnt);
        wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
 
-       /* Already disconnected? */
-       if (blkif->irq)
-               unbind_from_irqhandler(blkif->irq, blkif);
-
-       if (blkif->blk_ring.sring) {
-               unmap_frontend_page(blkif);
-               free_vm_area(blkif->blk_ring_area);
-       }
-
+       tap_blkif_unmap(blkif);
        kmem_cache_free(blkif_cachep, blkif);
 }
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Fri Sep 01 13:04:02 
2006 -0600
@@ -247,6 +247,11 @@ static void tap_frontend_changed(struct 
 
        switch (frontend_state) {
        case XenbusStateInitialising:
+               if (dev->state == XenbusStateClosed) {
+                       printk("%s: %s: prepare for reconnect\n",
+                              __FUNCTION__, dev->nodename);
+                       xenbus_switch_state(dev, XenbusStateInitWait);
+               }
                break;
 
        case XenbusStateInitialised:
@@ -264,11 +269,20 @@ static void tap_frontend_changed(struct 
                break;
 
        case XenbusStateClosing:
+               if (be->blkif->xenblkd) {
+                       kthread_stop(be->blkif->xenblkd);
+                       be->blkif->xenblkd = NULL;
+               }
+               tap_blkif_unmap(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
        case XenbusStateUnknown:
-       case XenbusStateClosed:
                device_unregister(&dev->dev);
                break;
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 13:04:02 
2006 -0600
@@ -228,13 +228,13 @@ static void frontend_changed(struct xenb
 {
        struct backend_info *be = dev->dev.driver_data;
 
-       DPRINTK("");
+       DPRINTK("%s", xenbus_strstate(frontend_state));
 
        be->frontend_state = frontend_state;
 
        switch (frontend_state) {
        case XenbusStateInitialising:
-               if (dev->state == XenbusStateClosing) {
+               if (dev->state == XenbusStateClosed) {
                        printk("%s: %s: prepare for reconnect\n",
                               __FUNCTION__, dev->nodename);
                        if (be->netif) {
@@ -260,8 +260,12 @@ static void frontend_changed(struct xenb
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
        case XenbusStateUnknown:
-       case XenbusStateClosed:
                if (be->netif != NULL)
                        kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
                device_unregister(&dev->dev);
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri Sep 01 
13:04:02 2006 -0600
@@ -193,6 +193,7 @@ static void netfront_closing(struct xenb
 
 static void end_access(int, void *);
 static void netif_disconnect_backend(struct netfront_info *);
+static int open_netdev(struct netfront_info *);
 static void close_netdev(struct netfront_info *);
 static void netif_free(struct netfront_info *);
 
@@ -263,15 +264,22 @@ static int __devinit netfront_probe(stru
        dev->dev.driver_data = info;
 
        err = talk_to_backend(dev, info);
-       if (err) {
-               xennet_sysfs_delif(info->netdev);
-               unregister_netdev(netdev);
-               free_netdev(netdev);
-               dev->dev.driver_data = NULL;
-               return err;
-       }
+       if (err)
+               goto fail_backend;
+
+       err = open_netdev(info);
+       if (err)
+               goto fail_open;
 
        return 0;
+
+ fail_open:
+       xennet_sysfs_delif(info->netdev);
+       unregister_netdev(netdev);
+ fail_backend:
+       free_netdev(netdev);
+       dev->dev.driver_data = NULL;
+       return err;
 }
 
 
@@ -478,7 +486,7 @@ static void backend_changed(struct xenbu
        struct netfront_info *np = dev->dev.driver_data;
        struct net_device *netdev = np->netdev;
 
-       DPRINTK("\n");
+       DPRINTK("%s\n", xenbus_strstate(backend_state));
 
        switch (backend_state) {
        case XenbusStateInitialising:
@@ -1887,27 +1895,9 @@ create_netdev(int handle, int copying_re
        SET_MODULE_OWNER(netdev);
        SET_NETDEV_DEV(netdev, &dev->dev);
 
-       err = register_netdev(netdev);
-       if (err) {
-               printk(KERN_WARNING "%s> register_netdev err=%d\n",
-                      __FUNCTION__, err);
-               goto exit_free_rx;
-       }
-
-       err = xennet_sysfs_addif(netdev);
-       if (err) {
-               /* This can be non-fatal: it only means no tuning parameters */
-               printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
-                      __FUNCTION__, err);
-       }
-
        np->netdev = netdev;
-
        return netdev;
 
-
- exit_free_rx:
-       gnttab_free_grant_references(np->gref_rx_head);
  exit_free_tx:
        gnttab_free_grant_references(np->gref_tx_head);
  exit:
@@ -1946,11 +1936,10 @@ static void netfront_closing(struct xenb
 {
        struct netfront_info *info = dev->dev.driver_data;
 
-       DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+       DPRINTK("%s\n", dev->nodename);
 
        close_netdev(info);
-
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
@@ -1966,6 +1955,26 @@ static int __devexit netfront_remove(str
        return 0;
 }
 
+
+static int open_netdev(struct netfront_info *info)
+{
+       int err;
+       
+       err = register_netdev(info->netdev);
+       if (err) {
+               printk(KERN_WARNING "%s: register_netdev err=%d\n",
+                      __FUNCTION__, err);
+               return err;
+       }
+
+       err = xennet_sysfs_addif(info->netdev);
+       if (err) {
+               /* This can be non-fatal: it only means no tuning parameters */
+               printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
+                      __FUNCTION__, err);
+       }
+       return 0;
+}
 
 static void close_netdev(struct netfront_info *info)
 {
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c   Fri Sep 
01 12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c   Fri Sep 
01 13:04:02 2006 -0600
@@ -132,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
 
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+       int rc, val;
+
+       rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
+       if (rc != 1)
+               val = 0; /* no online node present */
+
+       return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+
 MODULE_LICENSE("Dual BSD/GPL");
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Fri Sep 01 
13:04:02 2006 -0600
@@ -41,6 +41,20 @@ extern char *kasprintf(const char *fmt, 
 #define DPRINTK(fmt, args...) \
     pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, 
##args)
 
+char *xenbus_strstate(enum xenbus_state state)
+{
+       static char *name[] = {
+               [ XenbusStateUnknown      ] = "Unknown",
+               [ XenbusStateInitialising ] = "Initialising",
+               [ XenbusStateInitWait     ] = "InitWait",
+               [ XenbusStateInitialised  ] = "Initialised",
+               [ XenbusStateConnected    ] = "Connected",
+               [ XenbusStateClosing      ] = "Closing",
+               [ XenbusStateClosed       ] = "Closed",
+       };
+       return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
                      struct xenbus_watch *watch,
                      void (*callback)(struct xenbus_watch *,
@@ -124,6 +138,13 @@ int xenbus_switch_state(struct xenbus_de
 }
 EXPORT_SYMBOL_GPL(xenbus_switch_state);
 
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+       xenbus_switch_state(dev, XenbusStateClosed);
+       complete(&dev->down);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
 
 /**
  * Return the path to the error node for the given device, or NULL on failure.
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Sep 01 
13:04:02 2006 -0600
@@ -73,6 +73,7 @@ static int xenbus_probe_backend(const ch
 
 static int xenbus_dev_probe(struct device *_dev);
 static int xenbus_dev_remove(struct device *_dev);
+static void xenbus_dev_shutdown(struct device *_dev);
 
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
@@ -192,6 +193,7 @@ static struct xen_bus_type xenbus_fronte
                .match    = xenbus_match,
                .probe    = xenbus_dev_probe,
                .remove   = xenbus_dev_remove,
+               .shutdown = xenbus_dev_shutdown,
        },
        .dev = {
                .bus_id = "xen",
@@ -246,6 +248,7 @@ static struct xen_bus_type xenbus_backen
                .match    = xenbus_match,
                .probe    = xenbus_dev_probe,
                .remove   = xenbus_dev_remove,
+//             .shutdown = xenbus_dev_shutdown,
                .uevent   = xenbus_uevent_backend,
        },
        .dev = {
@@ -316,8 +319,9 @@ static void otherend_changed(struct xenb
 
        state = xenbus_read_driver_state(dev->otherend);
 
-       DPRINTK("state is %d, %s, %s",
-               state, dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+       DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
+               dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+
        if (drv->otherend_changed)
                drv->otherend_changed(dev, state);
 }
@@ -348,7 +352,7 @@ static int xenbus_dev_probe(struct devic
        const struct xenbus_device_id *id;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        if (!drv->probe) {
                err = -ENODEV;
@@ -393,7 +397,7 @@ static int xenbus_dev_remove(struct devi
        struct xenbus_device *dev = to_xenbus_device(_dev);
        struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        free_otherend_watch(dev);
        free_otherend_details(dev);
@@ -403,6 +407,27 @@ static int xenbus_dev_remove(struct devi
 
        xenbus_switch_state(dev, XenbusStateClosed);
        return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       unsigned long timeout = 5*HZ;
+
+       DPRINTK("%s", dev->nodename);
+
+       get_device(&dev->dev);
+       if (dev->state != XenbusStateConnected) {
+               printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
+                      dev->nodename, xenbus_strstate(dev->state));
+               goto out;
+       }
+       xenbus_switch_state(dev, XenbusStateClosing);
+       timeout = wait_for_completion_timeout(&dev->down, timeout);
+       if (!timeout)
+               printk("%s: %s timeout closing device\n", __FUNCTION__, 
dev->nodename);
+ out:
+       put_device(&dev->dev);
 }
 
 static int xenbus_register_driver_common(struct xenbus_driver *drv,
@@ -587,6 +612,7 @@ static int xenbus_probe_node(struct xen_
        tmpstring += strlen(tmpstring) + 1;
        strcpy(tmpstring, type);
        xendev->devicetype = tmpstring;
+       init_completion(&xendev->down);
 
        xendev->dev.parent = &bus->dev;
        xendev->dev.bus = &bus->bus;
diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 13:04:02 2006 -0600
@@ -37,6 +37,7 @@
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
+#include <linux/completion.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
@@ -74,6 +75,7 @@ struct xenbus_device {
        struct xenbus_watch otherend_watch;
        struct device dev;
        enum xenbus_state state;
+       struct completion down;
 };
 
 static inline struct xenbus_device *to_xenbus_device(struct device *dev)
@@ -297,4 +299,8 @@ void xenbus_dev_fatal(struct xenbus_devi
 
 int __init xenbus_dev_init(void);
 
+char *xenbus_strstate(enum xenbus_state state);
+int xenbus_dev_is_online(struct xenbus_device *dev);
+int xenbus_frontend_closed(struct xenbus_device *dev);
+
 #endif /* _XEN_XENBUS_H */
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/block-aio.c  Fri Sep 01 13:04:02 2006 -0600
@@ -52,7 +52,7 @@
  */
 #define REQUEST_ASYNC_FD 1
 
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
 struct pending_aio {
        td_callback_t cb;
@@ -146,7 +146,7 @@ int tdaio_open (struct td_state *s, cons
        struct tdaio_state *prv = (struct tdaio_state *)s->private;
        s->private = prv;
 
-       DPRINTF("XXX: block-aio open('%s')", name);
+       DPRINTF("block-aio open('%s')", name);
        /* Initialize AIO */
        prv->iocb_free_count = MAX_AIO_REQS;
        prv->iocb_queued     = 0;
@@ -156,9 +156,18 @@ int tdaio_open (struct td_state *s, cons
 
        if (prv->poll_fd < 0) {
                ret = prv->poll_fd;
-               DPRINTF("Couldn't get fd for AIO poll support.  This is "
-                       "probably because your kernel does not have the "
-                       "aio-poll patch applied.\n");
+                if (ret == -EAGAIN) {
+                        DPRINTF("Couldn't setup AIO context.  If you are "
+                                "trying to concurrently use a large number "
+                                "of blktap-based disks, you may need to "
+                                "increase the system-wide aio request limit. "
+                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+                                "aio-max-nr')\n");
+                } else {
+                        DPRINTF("Couldn't get fd for AIO poll support.  This "
+                                "is probably because your kernel does not "
+                                "have the aio-poll patch applied.\n");
+                }
                goto done;
        }
 
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/block-qcow.c Fri Sep 01 13:04:02 2006 -0600
@@ -51,7 +51,7 @@
 /******AIO DEFINES******/
 #define REQUEST_ASYNC_FD 1
 #define MAX_QCOW_IDS  0xFFFF
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
 struct pending_aio {
         td_callback_t cb;
@@ -176,10 +176,21 @@ static int init_aio_state(struct td_stat
         s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
         s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);
 
-        if (s->poll_fd < 0) {
-                DPRINTF("Retrieving Async poll fd failed\n");
+       if (s->poll_fd < 0) {
+                if (s->poll_fd == -EAGAIN) {
+                        DPRINTF("Couldn't setup AIO context.  If you are "
+                                "trying to concurrently use a large number "
+                                "of blktap-based disks, you may need to "
+                                "increase the system-wide aio request limit. "
+                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+                                "aio-max-nr')\n");
+                } else {
+                        DPRINTF("Couldn't get fd for AIO poll support.  This "
+                                "is probably because your kernel does not "
+                                "have the aio-poll patch applied.\n");
+                }
                goto fail;
-        }
+       }
 
         for (i=0;i<MAX_AIO_REQS;i++)
                 s->iocb_free[i] = &s->iocb_list[i];
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/tapdisk.c
--- a/tools/blktap/drivers/tapdisk.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/tapdisk.c    Fri Sep 01 13:04:02 2006 -0600
@@ -110,6 +110,7 @@ static void unmap_disk(struct td_state *
        free(s->fd_entry);
        free(s->blkif);
        free(s->ring_info);
+        free(s->private);
        free(s);
 
        return;
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/lib/xs_api.c
--- a/tools/blktap/lib/xs_api.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/lib/xs_api.c Fri Sep 01 13:04:02 2006 -0600
@@ -204,7 +204,7 @@ int convert_dev_name_to_num(char *name) 
 int convert_dev_name_to_num(char *name) {
        char *p_sd, *p_hd, *p_xvd, *p_plx, *p, *alpha,*ptr;
        int majors[10] = {3,22,33,34,56,57,88,89,90,91};
-       int maj,i;
+       int maj,i,ret = 0;
 
        asprintf(&p_sd,"/dev/sd");
        asprintf(&p_hd,"/dev/hd");
@@ -221,7 +221,7 @@ int convert_dev_name_to_num(char *name) 
                        *ptr++;
                }
                *p++;
-               return BASE_DEV_VAL + (16*i) + atoi(p);
+               ret = BASE_DEV_VAL + (16*i) + atoi(p);
        } else if (strstr(name, p_hd) != NULL) {
                p = name + strlen(p_hd);
                for (i = 0, ptr = alpha; i < strlen(alpha); i++) {
@@ -229,7 +229,7 @@ int convert_dev_name_to_num(char *name) 
                        *ptr++;
                }
                *p++;
-               return (majors[i/2]*256) + atoi(p);
+               ret = (majors[i/2]*256) + atoi(p);
 
        } else if (strstr(name, p_xvd) != NULL) {
                p = name + strlen(p_xvd);
@@ -238,17 +238,24 @@ int convert_dev_name_to_num(char *name) 
                        *ptr++;
                }
                *p++;
-               return (202*256) + (16*i) + atoi(p);
+               ret = (202*256) + (16*i) + atoi(p);
 
        } else if (strstr(name, p_plx) != NULL) {
                p = name + strlen(p_plx);
-               return atoi(p);
+               ret = atoi(p);
 
        } else {
                DPRINTF("Unknown device type, setting to default.\n");
-               return BASE_DEV_VAL;
-       }
-       return 0;
+               ret = BASE_DEV_VAL;
+       }
+
+        free(p_sd);
+        free(p_hd);
+        free(p_xvd);
+        free(p_plx);
+        free(alpha);
+        
+       return ret;
 }
 
 /**
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/ia64/xc_ia64_stubs.c
--- a/tools/libxc/ia64/xc_ia64_stubs.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/ia64/xc_ia64_stubs.c  Fri Sep 01 13:04:02 2006 -0600
@@ -36,7 +36,6 @@ xc_ia64_get_pfn_list(int xc_handle, uint
     struct xen_domctl domctl;
     int num_pfns,ret;
     unsigned int __start_page, __nr_pages;
-    unsigned long max_pfns;
     xen_pfn_t *__pfn_buf;
 
     __start_page = start_page;
@@ -44,27 +43,22 @@ xc_ia64_get_pfn_list(int xc_handle, uint
     __pfn_buf = pfn_buf;
   
     while (__nr_pages) {
-        max_pfns = ((unsigned long)__start_page << 32) | __nr_pages;
         domctl.cmd = XEN_DOMCTL_getmemlist;
-        domctl.domain   = (domid_t)domid;
-        domctl.u.getmemlist.max_pfns = max_pfns;
+        domctl.domain = (domid_t)domid;
+        domctl.u.getmemlist.max_pfns = __nr_pages;
+        domctl.u.getmemlist.start_pfn =__start_page;
         domctl.u.getmemlist.num_pfns = 0;
         set_xen_guest_handle(domctl.u.getmemlist.buffer, __pfn_buf);
 
-        if ((max_pfns != -1UL)
-            && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
+        if (mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
             PERROR("Could not lock pfn list buffer");
             return -1;
         }
 
         ret = do_domctl(xc_handle, &domctl);
 
-        if (max_pfns != -1UL)
-            (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
+        (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
 
-        if (max_pfns == -1UL)
-            return 0;
-        
         num_pfns = domctl.u.getmemlist.num_pfns;
         __start_page += num_pfns;
         __nr_pages -= num_pfns;
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/powerpc64/xc_linux_build.c
--- a/tools/libxc/powerpc64/xc_linux_build.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/powerpc64/xc_linux_build.c    Fri Sep 01 13:04:02 2006 -0600
@@ -309,7 +309,7 @@ static unsigned long create_start_info(s
     si->store_evtchn = store_evtchn;
     si->console.domU.mfn = si->nr_pages - 3;
     si->console.domU.evtchn = console_evtchn;
-    si_addr = eomem - (PAGE_SIZE * 4);
+    si_addr = (si->nr_pages - 4) << PAGE_SHIFT;
 
     return si_addr;
 }
@@ -388,7 +388,7 @@ int xc_linux_build(int xc_handle,
     }
 
     si_addr = create_start_info(&si, console_evtchn, store_evtchn);
-    *console_mfn = page_array[si.console_mfn];
+    *console_mfn = page_array[si.console.domU.mfn];
     *store_mfn = page_array[si.store_mfn];
     
     if (install_image(xc_handle, domid, page_array, &si, si_addr,
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/xenctrl.h     Fri Sep 01 13:04:02 2006 -0600
@@ -8,6 +8,11 @@
 
 #ifndef XENCTRL_H
 #define XENCTRL_H
+
+/* Tell the Xen public headers we are a user-space tools build. */
+#ifndef __XEN_TOOLS__
+#define __XEN_TOOLS__ 1
+#endif
 
 #include <stddef.h>
 #include <stdint.h>
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py   Fri Sep 01 13:04:02 2006 -0600
@@ -161,10 +161,12 @@ def restore(xd, fd):
         if handler.store_mfn is None or handler.console_mfn is None:
             raise XendError('Could not read store/console MFN')
 
+        #Block until src closes connection
+        os.read(fd, 1)
         dominfo.unpause()
-
+        
         dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
-
+        
         return dominfo
     except:
         dominfo.destroy()
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Fri Sep 01 13:04:02 2006 -0600
@@ -431,7 +431,8 @@ class XendDomain:
         sock.send("receive\n")
         sock.recv(80)
         XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst)
-
+        dominfo.testDeviceComplete()
+        sock.close()
 
     def domain_save(self, domid, dst):
         """Start saving a domain to file.
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 01 13:04:02 2006 -0600
@@ -30,7 +30,6 @@ import time
 import time
 import threading
 import os
-import math
 
 import xen.lowlevel.xc
 from xen.util import asserts
@@ -703,6 +702,9 @@ class XendDomainInfo:
                 if security[idx][0] == 'ssidref':
                     to_store['security/ssidref'] = str(security[idx][1])
 
+        if not self.readVm('xend/restart_count'):
+            to_store['xend/restart_count'] = str(0)
+
         log.debug("Storing VM details: %s", to_store)
 
         self.writeVm(to_store)
@@ -823,6 +825,9 @@ class XendDomainInfo:
 
     def setResume(self, state):
         self.info['resume'] = state
+
+    def getRestartCount(self):
+        return self.readVm('xend/restart_count')
 
     def refreshShutdown(self, xeninfo = None):
         # If set at the end of this method, a restart is required, with the
@@ -1280,34 +1285,28 @@ class XendDomainInfo:
                 for v in range(0, self.info['max_vcpu_id']+1):
                     xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
 
-            # set domain maxmem in KiB
-            xc.domain_setmaxmem(self.domid, self.info['maxmem'] * 1024)
-
-            m = self.image.getDomainMemory(self.info['memory'] * 1024)
+            # set memory limit
+            maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024)
+            xc.domain_setmaxmem(self.domid, maxmem)
+
+            mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024)
 
             # get the domain's shadow memory requirement
-            sm = int(math.ceil(self.image.getDomainShadowMemory(m) / 1024.0))
-            if self.info['shadow_memory'] > sm:
-                sm = self.info['shadow_memory']
+            shadow_kb = self.image.getRequiredShadowMemory(mem_kb)
+            shadow_kb_req = self.info['shadow_memory'] * 1024
+            if shadow_kb_req > shadow_kb:
+                shadow_kb = shadow_kb_req
+            shadow_mb = (shadow_kb + 1023) / 1024
 
             # Make sure there's enough RAM available for the domain
-            balloon.free(m + sm * 1024)
+            balloon.free(mem_kb + shadow_mb * 1024)
 
             # Set up the shadow memory
-            sm = xc.shadow_mem_control(self.domid, mb=sm)
-            self.info['shadow_memory'] = sm
-
-            init_reservation = self.info['memory'] * 1024
-            if os.uname()[4] in ('ia64', 'ppc64'):
-                # Workaround for architectures that don't yet support
-                # ballooning.
-                init_reservation = m
-                # Following line from xiantao.zhang@xxxxxxxxx
-                # Needed for IA64 until supports ballooning -- okay for PPC64?
-                xc.domain_setmaxmem(self.domid, m)
-
-            xc.domain_memory_increase_reservation(self.domid, init_reservation,
-                                                  0, 0)
+            shadow_cur = xc.shadow_mem_control(self.domid, shadow_mb)
+            self.info['shadow_memory'] = shadow_cur
+
+            # initial memory allocation
+            xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0)
 
             self.createChannels()
 
@@ -1495,6 +1494,21 @@ class XendDomainInfo:
             if rc != 0:
                 raise XendError("Device of type '%s' refuses migration." % n)
 
+    def testDeviceComplete(self):
+        """ For Block IO migration safety we must ensure that
+        the device has shutdown correctly, i.e. all blocks are
+        flushed to disk
+        """
+        while True:
+            test = 0
+            for i in self.getDeviceController('vbd').deviceIDs():
+                test = 1
+                log.info("Dev %s still active, looping...", i)
+                time.sleep(0.1)
+                
+            if test == 0:
+                break
+
     def migrateDevices(self, network, dst, step, domName=''):
         """Notify the devices about migration
         """
@@ -1615,6 +1629,9 @@ class XendDomainInfo:
             try:
                 new_dom = XendDomain.instance().domain_create(config)
                 new_dom.unpause()
+                rst_cnt = self.readVm('xend/restart_count')
+                rst_cnt = int(rst_cnt) + 1
+                self.writeVm('xend/restart_count', str(rst_cnt))
                 new_dom.removeVm(RESTART_IN_PROGRESS)
             except:
                 if new_dom:
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/image.py    Fri Sep 01 13:04:02 2006 -0600
@@ -27,6 +27,8 @@ from xen.xend.XendLogging import log
 from xen.xend.XendLogging import log
 from xen.xend.server.netif import randomMAC
 from xen.xend.xenstore.xswatch import xswatch
+from xen.xend import arch
+from xen.xend import FlatDeviceTree
 
 
 xc = xen.lowlevel.xc.xc()
@@ -141,19 +143,10 @@ class ImageHandler:
             raise VmError('Building domain failed: ostype=%s dom=%d err=%s'
                           % (self.ostype, self.vm.getDomid(), str(result)))
 
-
-    def getDomainMemory(self, mem_kb):
-        """@return The memory required, in KiB, by the domain to store the
-        given amount, also in KiB."""
-        if os.uname()[4] != 'ia64':
-            # A little extra because auto-ballooning is broken w.r.t. HVM
-            # guests. Also, slack is necessary for live migration since that
-            # uses shadow page tables.
-            if 'hvm' in xc.xeninfo()['xen_caps']:
-                mem_kb += 4*1024;
+    def getRequiredMemory(self, mem_kb):
         return mem_kb
 
-    def getDomainShadowMemory(self, mem_kb):
+    def getRequiredShadowMemory(self, mem_kb):
         """@return The minimum shadow memory required, in KiB, for a domain 
         with mem_kb KiB of RAM."""
         # PV domains don't need any shadow memory
@@ -197,9 +190,39 @@ class LinuxImageHandler(ImageHandler):
                               ramdisk        = self.ramdisk,
                               features       = self.vm.getFeatures())
 
+class PPC_LinuxImageHandler(LinuxImageHandler):
+
+    ostype = "linux"
+
+    def configure(self, imageConfig, deviceConfig):
+        LinuxImageHandler.configure(self, imageConfig, deviceConfig)
+        self.imageConfig = imageConfig
+
+    def buildDomain(self):
+        store_evtchn = self.vm.getStorePort()
+        console_evtchn = self.vm.getConsolePort()
+
+        log.debug("dom            = %d", self.vm.getDomid())
+        log.debug("image          = %s", self.kernel)
+        log.debug("store_evtchn   = %d", store_evtchn)
+        log.debug("console_evtchn = %d", console_evtchn)
+        log.debug("cmdline        = %s", self.cmdline)
+        log.debug("ramdisk        = %s", self.ramdisk)
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+        log.debug("features       = %s", self.vm.getFeatures())
+
+        devtree = FlatDeviceTree.build(self)
+
+        return xc.linux_build(dom            = self.vm.getDomid(),
+                              image          = self.kernel,
+                              store_evtchn   = store_evtchn,
+                              console_evtchn = console_evtchn,
+                              cmdline        = self.cmdline,
+                              ramdisk        = self.ramdisk,
+                              features       = self.vm.getFeatures(),
+                              arch_args      = devtree.to_bin())
+
 class HVMImageHandler(ImageHandler):
-
-    ostype = "hvm"
 
     def configure(self, imageConfig, deviceConfig):
         ImageHandler.configure(self, imageConfig, deviceConfig)
@@ -282,7 +305,7 @@ class HVMImageHandler(ImageHandler):
         for (name, info) in deviceConfig:
             if name == 'vbd':
                 uname = sxp.child_value(info, 'uname')
-                if 'file:' in uname:
+                if uname is not None and 'file:' in uname:
                     (_, vbdparam) = string.split(uname, ':', 1)
                     if not os.path.isfile(vbdparam):
                         raise VmError('Disk image does not exist: %s' %
@@ -355,32 +378,6 @@ class HVMImageHandler(ImageHandler):
         os.waitpid(self.pid, 0)
         self.pid = 0
 
-    def getDomainMemory(self, mem_kb):
-        """@see ImageHandler.getDomainMemory"""
-        if os.uname()[4] == 'ia64':
-            page_kb = 16
-            # ROM size for guest firmware, ioreq page and xenstore page
-            extra_pages = 1024 + 2
-        else:
-            page_kb = 4
-            # This was derived emperically:
-            #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
-            #   + 4 to avoid low-memory condition
-            extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
-            extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
-        return mem_kb + extra_pages * page_kb
-
-    def getDomainShadowMemory(self, mem_kb):
-        """@return The minimum shadow memory required, in KiB, for a domain 
-        with mem_kb KiB of RAM."""
-        if os.uname()[4] in ('ia64', 'ppc64'):
-            # Explicit shadow memory is not a concept 
-            return 0
-        else:
-            # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
-            # the minimum that Xen would allocate if no value were given.
-            return 1024 * self.vm.getVCpuCount() + mem_kb / 256
-
     def register_shutdown_watch(self):
         """ add xen store watch on control/shutdown """
         self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \
@@ -417,15 +414,51 @@ class HVMImageHandler(ImageHandler):
 
         return 1 # Keep watching
 
-"""Table of image handler classes for virtual machine images.  Indexed by
-image type.
-"""
-imageHandlerClasses = {}
-
-
-for h in LinuxImageHandler, HVMImageHandler:
-    imageHandlerClasses[h.ostype] = h
-
+class IA64_HVM_ImageHandler(HVMImageHandler):
+
+    ostype = "hvm"
+
+    def getRequiredMemory(self, mem_kb):
+        page_kb = 16
+        # ROM size for guest firmware, ioreq page and xenstore page
+        extra_pages = 1024 + 2
+        return mem_kb + extra_pages * page_kb
+
+    def getRequiredShadowMemory(self, mem_kb):
+        # Explicit shadow memory is not a concept 
+        return 0
+
+class X86_HVM_ImageHandler(HVMImageHandler):
+
+    ostype = "hvm"
+
+    def getRequiredMemory(self, mem_kb):
+        page_kb = 4
+        # This was derived emperically:
+        #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
+        #   + 4 to avoid low-memory condition
+        extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
+        extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
+        return mem_kb + extra_pages * page_kb
+
+    def getRequiredShadowMemory(self, mem_kb):
+        # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
+        # the minimum that Xen would allocate if no value were given.
+        return 1024 * self.vm.getVCpuCount() + mem_kb / 256
+
+_handlers = {
+    "powerpc": {
+        "linux": PPC_LinuxImageHandler,
+    },
+    "ia64": {
+        "linux": LinuxImageHandler,
+        "hvm": IA64_HVM_ImageHandler,
+    },
+    "x86": {
+        "linux": LinuxImageHandler,
+        "hvm": X86_HVM_ImageHandler,
+    },
+}
 
 def findImageHandlerClass(image):
     """Find the image handler class for an image config.
@@ -433,10 +466,10 @@ def findImageHandlerClass(image):
     @param image config
     @return ImageHandler subclass or None
     """
-    ty = sxp.name(image)
-    if ty is None:
+    type = sxp.name(image)
+    if type is None:
         raise VmError('missing image type')
-    imageClass = imageHandlerClasses.get(ty)
-    if imageClass is None:
-        raise VmError('unknown image type: ' + ty)
-    return imageClass
+    try:
+        return _handlers[arch.type][type]
+    except KeyError:
+        raise VmError('unknown image type: ' + type)
diff -r 4ba098226429 -r 1bab7d65171b 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Fri Sep 01 12:52:12 
2006 -0600
+++ b/tools/python/xen/xend/server/DevController.py     Fri Sep 01 13:04:02 
2006 -0600
@@ -207,6 +207,9 @@ class DevController:
 
         devid = int(devid)
 
+        # Modify online status /before/ updating state (latter is watched by
+        # drivers, so this ordering avoids a race).
+        self.writeBackend(devid, 'online', "0")
         self.writeBackend(devid, 'state', str(xenbusState['Closing']))
 
 
@@ -406,7 +409,8 @@ class DevController:
             'domain' : self.vm.getName(),
             'frontend' : frontpath,
             'frontend-id' : "%i" % self.vm.getDomid(),
-            'state' : str(xenbusState['Initialising'])
+            'state' : str(xenbusState['Initialising']),
+            'online' : "1"
             })
 
         return (backpath, frontpath)
diff -r 4ba098226429 -r 1bab7d65171b 
tools/python/xen/xend/server/XMLRPCServer.py
--- a/tools/python/xen/xend/server/XMLRPCServer.py      Fri Sep 01 12:52:12 
2006 -0600
+++ b/tools/python/xen/xend/server/XMLRPCServer.py      Fri Sep 01 13:04:02 
2006 -0600
@@ -78,7 +78,8 @@ methods = ['device_create', 'device_conf
 methods = ['device_create', 'device_configure', 'destroyDevice',
            'getDeviceSxprs',
            'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
-           'send_sysrq', 'getVCPUInfo', 'waitForDevices']
+           'send_sysrq', 'getVCPUInfo', 'waitForDevices',
+           'getRestartCount']
 
 exclude = ['domain_create', 'domain_restore']
 
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/server/blkif.py     Fri Sep 01 13:04:02 2006 -0600
@@ -52,10 +52,18 @@ class BlkifController(DevController):
         except ValueError:
             dev_type = "disk"
 
-        try:
-            (typ, params) = string.split(uname, ':', 1)
-        except ValueError:
-            (typ, params) = ("", "")
+        if uname is None:
+            if dev_type == 'cdrom':
+                (typ, params) = ("", "")
+            else:
+                raise VmError(
+                    'Block device must have physical details specified')
+        else:
+            try:
+                (typ, params) = string.split(uname, ':', 1)
+            except ValueError:
+                (typ, params) = ("", "")
+
         back = { 'dev'    : dev,
                  'type'   : typ,
                  'params' : params,
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xm/migrate.py    Fri Sep 01 13:04:02 2006 -0600
@@ -57,7 +57,8 @@ def main(argv):
         opts.usage()
         return
     if len(args) != 2:
-        opts.err('Invalid arguments: ' + str(args))
+        opts.usage()
+        sys.exit(1)
     dom = args[0]
     dst = args[1]
     server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.resource, 
opts.vals.port)
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/shutdown.py
--- a/tools/python/xen/xm/shutdown.py   Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xm/shutdown.py   Fri Sep 01 13:04:02 2006 -0600
@@ -48,21 +48,48 @@ gopts.opt('reboot', short='R',
           fn=set_true, default=0,
           use='Shutdown and reboot.')
 
+def wait_reboot(opts, doms, rcs):
+    while doms:
+        alive = server.xend.domains(0)
+        reboot = []
+        for d in doms:
+            if d in alive:
+                rc = server.xend.domain.getRestartCount(d)
+                if rc == rcs[d]: continue
+                reboot.append(d)
+            else:
+                opts.info("Domain %s destroyed for failed in rebooting" % d)
+                doms.remove(d)
+        for d in reboot:
+            opts.info("Domain %s rebooted" % d)
+            doms.remove(d)
+        time.sleep(1)
+    opts.info("All domains rebooted")
+
+def wait_shutdown(opts, doms):
+    while doms:
+        alive = server.xend.domains(0)
+        dead = []
+        for d in doms:
+            if d in alive: continue
+            dead.append(d)
+        for d in dead:
+            opts.info("Domain %s terminated" % d)
+            doms.remove(d)
+        time.sleep(1)
+    opts.info("All domains terminated")
+
 def shutdown(opts, doms, mode, wait):
+    rcs = {}
     for d in doms:
+        rcs[d] = server.xend.domain.getRestartCount(d)
         server.xend.domain.shutdown(d, mode)
+
     if wait:
-        while doms:
-            alive = server.xend.domains(0)
-            dead = []
-            for d in doms:
-                if d in alive: continue
-                dead.append(d)
-            for d in dead:
-                opts.info("Domain %s terminated" % d)
-                doms.remove(d)
-            time.sleep(1)
-        opts.info("All domains terminated")
+        if mode == 'reboot':
+            wait_reboot(opts, doms, rcs)
+        else:
+            wait_shutdown(opts, doms)
 
 def shutdown_mode(opts):
     if opts.vals.halt and opts.vals.reboot:
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/ia64/xen/dom0_ops.c      Fri Sep 01 13:04:02 2006 -0600
@@ -40,8 +40,8 @@ long arch_do_domctl(xen_domctl_t *op, XE
     {
         unsigned long i;
         struct domain *d = find_domain_by_id(op->domain);
-        unsigned long start_page = op->u.getmemlist.max_pfns >> 32;
-        unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff;
+        unsigned long start_page = op->u.getmemlist.start_pfn;
+        unsigned long nr_pages = op->u.getmemlist.max_pfns;
         unsigned long mfn;
 
         if ( d == NULL ) {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Makefile
--- a/xen/arch/powerpc/Makefile Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/Makefile Fri Sep 01 13:04:02 2006 -0600
@@ -6,6 +6,7 @@ subdir-y += papr
 subdir-y += papr
 
 obj-y += audit.o
+obj-y += backtrace.o
 obj-y += bitops.o
 obj-y += boot_of.o
 obj-y += dart.o
@@ -19,19 +20,21 @@ obj-y += external.o
 obj-y += external.o
 obj-y += float.o
 obj-y += hcalls.o
-obj-y += htab.o
 obj-y += iommu.o
 obj-y += irq.o
 obj-y += mambo.o
+obj-y += memory.o
 obj-y += mm.o
 obj-y += mpic.o
 obj-y += mpic_init.o
 obj-y += of-devtree.o
 obj-y += of-devwalk.o
 obj-y += ofd_fixup.o
+obj-y += ofd_fixup_memory.o
 obj-y += physdev.o
 obj-y += rtas.o
 obj-y += setup.o
+obj-y += shadow.o
 obj-y += smp.o
 obj-y += time.o
 obj-y += usercopy.o
@@ -47,6 +50,7 @@ obj-y += elf32.o
 # These are extra warnings like for the arch/ppc directory but may not
 # allow the rest of the tree to build.
 PPC_C_WARNINGS += -Wundef -Wmissing-prototypes -Wmissing-declarations
+PPC_C_WARNINGS += -Wshadow
 CFLAGS += $(PPC_C_WARNINGS)
 
 LINK=0x400000
@@ -91,8 +95,27 @@ start.o: boot/start.S
 start.o: boot/start.S
        $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
 
-$(TARGET)-syms: start.o $(ALL_OBJS) xen.lds
-       $(CC) $(CFLAGS) $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds 
start.o $(ALL_OBJS) -o $@
+TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds
+TARGET_OPTS += start.o $(ALL_OBJS)
+
+.xen-syms: start.o $(ALL_OBJS) xen.lds
+       $(CC) $(CFLAGS) $(TARGET_OPTS) -o $@
+
+NM=$(CROSS_COMPILE)nm
+new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; 
then echo y; else echo n; fi)
+
+ifeq ($(new_nm),y)
+NM             := $(NM) --synthetic
+endif
+
+xen-syms.S: .xen-syms
+       $(NM) -n $^ | $(BASEDIR)/tools/symbols > $@
+
+xen-syms.o: xen-syms.S
+       $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+
+$(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds
+       $(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@
 
 $(TARGET).bin: $(TARGET)-syms
        $(CROSS_COMPILE)objcopy --output-target=binary $< $@
@@ -122,4 +145,4 @@ dom0.bin: $(DOM0_IMAGE)
 
 clean::
        $(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean
-       rm -f firmware firmware_image dom0.bin
+       rm -f firmware firmware_image dom0.bin .xen-syms
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Rules.mk
--- a/xen/arch/powerpc/Rules.mk Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/Rules.mk Fri Sep 01 13:04:02 2006 -0600
@@ -4,7 +4,7 @@ LD := $(CROSS_COMPILE)ld
 LD := $(CROSS_COMPILE)ld
 
 # These are goodess that applies to all source.
-C_WARNINGS := -Wpointer-arith -Wredundant-decls
+C_WARNINGS := -Wredundant-decls
 
 # _no_ common code can have packed data structures or we are in touble.
 C_WARNINGS += -Wpacked
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/boot_of.c
--- a/xen/arch/powerpc/boot_of.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/boot_of.c        Fri Sep 01 13:04:02 2006 -0600
@@ -26,10 +26,14 @@
 #include <xen/spinlock.h>
 #include <xen/serial.h>
 #include <xen/time.h>
+#include <xen/sched.h>
 #include <asm/page.h>
 #include <asm/io.h>
 #include "exceptions.h"
 #include "of-devtree.h"
+
+/* Secondary processors use this for handshaking with main processor.  */
+volatile unsigned int __spin_ack;
 
 static ulong of_vec;
 static ulong of_msr;
@@ -322,17 +326,18 @@ static void __init of_test(const char *o
     }
 }
 
-static int __init of_claim(void * virt, u32 size)
+static int __init of_claim(u32 virt, u32 size, u32 align)
 {
     int rets[1] = { OF_FAILURE };
     
-    of_call("claim", 3, 1, rets, virt, size, 0/*align*/);
+    of_call("claim", 3, 1, rets, virt, size, align);
     if (rets[0] == OF_FAILURE) {
-        DBG("%s 0x%p 0x%08x -> FAIL\n", __func__, virt, size);
+        DBG("%s 0x%08x 0x%08x  0x%08x -> FAIL\n", __func__, virt, size, align);
         return OF_FAILURE;
     }
 
-    DBG("%s 0x%p 0x%08x -> 0x%x\n", __func__, virt, size, rets[0]);
+    DBG("%s 0x%08x 0x%08x  0x%08x -> 0x%08x\n", __func__, virt, size, align,
+        rets[0]);
     return rets[0];
 }
 
@@ -683,32 +688,53 @@ static int boot_of_fixup_chosen(void *me
 }
 
 static ulong space_base;
-static ulong find_space(u32 size, ulong align, multiboot_info_t *mbi)
+
+/*
+ * The following function is necessary because we cannot depend on all
+ * FW to actually allocate us any space, so we look for it _hoping_
+ * that at least is will fail if we try to claim something that
+ * belongs to FW.  This hope does not seem to be true on some version
+ * of PIBS.
+ */
+static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi)
 {
     memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr);
     ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low;
     ulong base;
 
-    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%lx\n",
+    if (size == 0)
+        return 0;
+
+    if (align == 0)
+        of_panic("cannot call %s() with align of 0\n", __func__);
+
+#ifdef BROKEN_CLAIM_WORKAROUND
+    {
+        static int broken_claim;
+        if (!broken_claim) {
+            /* just try and claim it to the FW chosen address */
+            base = of_claim(0, size, align);
+            if (base != OF_FAILURE)
+                return base;
+            of_printf("%s: Firmware does not allocate memory for you\n",
+                      __func__);
+            broken_claim = 1;
+        }
+    }
+#endif
+
+    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%x\n",
                     __func__, space_base, eomem, size, align);
     base = ALIGN_UP(space_base, PAGE_SIZE);
-    if ((base + size) >= 0x4000000) return 0;
-    if (base + size > eomem) of_panic("not enough RAM\n");
-
-    if (size == 0) return base;
-    if (of_claim((void*)base, size) != OF_FAILURE) {
-        space_base = base + size;
-        return base;
-    } else {
-        for(base += 0x100000; (base+size) < 0x4000000; base += 0x100000) {
-            of_printf("Trying 0x%016lx\n", base);
-            if (of_claim((void*)base, size) != OF_FAILURE) {
-                space_base = base + size;
-                return base;
-            }
-        }
-        return 0;
-    }
+
+    while ((base + size) < rma_size(cpu_default_rma_order_pages())) {
+        if (of_claim(base, size, 0) != OF_FAILURE) {
+            space_base = base + size;
+            return base;
+        }
+        base += (PAGE_SIZE >  align) ? PAGE_SIZE : align;
+    }
+    of_panic("Cannot find memory in the RMA\n");
 }
 
 /* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges
@@ -834,9 +860,8 @@ static void boot_of_module(ulong r3, ulo
     static module_t mods[3];
     void *oftree;
     ulong oftree_sz = 48 * PAGE_SIZE;
-    char *mod0_start;
+    ulong mod0_start;
     ulong mod0_size;
-    ulong mod0;
     static const char sepr[] = " -- ";
     extern char dom0_start[] __attribute__ ((weak));
     extern char dom0_size[] __attribute__ ((weak));
@@ -844,59 +869,48 @@ static void boot_of_module(ulong r3, ulo
 
     if ((r3 > 0) && (r4 > 0)) {
         /* was it handed to us in registers ? */
-        mod0_start = (void *)r3;
+        mod0_start = r3;
         mod0_size = r4;
+            of_printf("%s: Dom0 was loaded and found using r3/r4:"
+                      "0x%lx[size 0x%lx]\n",
+                      __func__, mod0_start, mod0_size);
     } else {
         /* see if it is in the boot params */
         p = strstr((char *)((ulong)mbi->cmdline), "dom0_start=");
         if ( p != NULL) {
             p += 11;
-            mod0_start = (char *)simple_strtoul(p, NULL, 0);
+            mod0_start = simple_strtoul(p, NULL, 0);
 
             p = strstr((char *)((ulong)mbi->cmdline), "dom0_size=");
             p += 10;
             mod0_size = simple_strtoul(p, NULL, 0);
-
-            of_printf("mod0: %o %c %c %c\n",
-                      mod0_start[0],
-                      mod0_start[1],
-                      mod0_start[2],
-                      mod0_start[3]);
-
+            of_printf("%s: Dom0 was loaded and found using cmdline:"
+                      "0x%lx[size 0x%lx]\n",
+                      __func__, mod0_start, mod0_size);
         } else if ( ((ulong)dom0_start != 0) && ((ulong)dom0_size != 0) ) {
             /* was it linked in ? */
         
-            mod0_start = dom0_start;
+            mod0_start = (ulong)dom0_start;
             mod0_size = (ulong)dom0_size;
-            of_printf("%s: linked in module copied after _end "
-                      "(start 0x%p size 0x%lx)\n",
+            of_printf("%s: Dom0 is linked in: 0x%lx[size 0x%lx]\n",
                       __func__, mod0_start, mod0_size);
         } else {
-            mod0_start = _end;
+            mod0_start = (ulong)_end;
             mod0_size = 0;
-        }
+            of_printf("%s: FYI Dom0 is unknown, will be caught later\n",
+                      __func__);
+        }
+    }
+
+    if (mod0_size > 0) {
+        const char *c = (const char *)mod0_start;
+
+        of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]);
     }
 
     space_base = (ulong)_end;
-    mod0 = find_space(mod0_size, PAGE_SIZE, mbi);
-
-    /* three cases
-     * 1) mod0_size is not 0 and the image can be copied
-     * 2) mod0_size is not 0 and the image cannot be copied
-     * 3) mod0_size is 0
-     */
-    if (mod0_size > 0) {
-        if (mod0 != 0) {
-            memcpy((void *)mod0, mod0_start, mod0_size);
-            mods[0].mod_start = mod0;
-            mods[0].mod_end = mod0 + mod0_size;
-        } else {
-            of_panic("No space to copy mod0\n");
-        }
-    } else {
-        mods[0].mod_start = mod0;
-        mods[0].mod_end = mod0;
-    }
+    mods[0].mod_start = mod0_start;
+    mods[0].mod_end = mod0_start + mod0_size;
 
     of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__,
               mods[0].mod_start, mods[0].mod_end);
@@ -909,15 +923,22 @@ static void boot_of_module(ulong r3, ulo
 
     /* snapshot the tree */
     oftree = (void*)find_space(oftree_sz, PAGE_SIZE, mbi);
-    if (oftree == 0) of_panic("Could not allocate OFD tree\n");
+    if (oftree == 0)
+        of_panic("Could not allocate OFD tree\n");
 
     of_printf("creating oftree\n");
     of_test("package-to-path");
-    ofd_create(oftree, oftree_sz);
+    oftree = ofd_create(oftree, oftree_sz);
     pkg_save(oftree);
+
+    if (ofd_size(oftree) > oftree_sz)
+         of_panic("Could not fit all of native devtree\n");
 
     boot_of_fixup_refs(oftree);
     boot_of_fixup_chosen(oftree);
+
+    if (ofd_size(oftree) > oftree_sz)
+         of_panic("Could not fit all devtree fixups\n");
 
     ofd_walk(oftree, OFD_ROOT, /* add_hype_props */ NULL, 2);
 
@@ -937,7 +958,7 @@ static int __init boot_of_cpus(void)
 static int __init boot_of_cpus(void)
 {
     int cpus;
-    int cpu;
+    int cpu, bootcpu, logical;
     int result;
     u32 cpu_clock[2];
 
@@ -962,10 +983,68 @@ static int __init boot_of_cpus(void)
     cpu_khz /= 1000;
     of_printf("OF: clock-frequency = %ld KHz\n", cpu_khz);
 
-    /* FIXME: should not depend on the boot CPU bring the first child */
+    /* Look up which CPU we are running on right now.  */
+    result = of_getprop(bof_chosen, "cpu", &bootcpu, sizeof (bootcpu));
+    if (result == OF_FAILURE)
+        of_panic("Failed to look up boot cpu\n");
+
     cpu = of_getpeer(cpu);
-    while (cpu > 0) {
-        of_start_cpu(cpu, (ulong)spin_start, 0);
+
+    /* We want a continuous logical cpu number space.  */
+    cpu_set(0, cpu_present_map);
+    cpu_set(0, cpu_online_map);
+    cpu_set(0, cpu_possible_map);
+
+    /* Spin up all CPUS, even if there are more than NR_CPUS, because
+     * Open Firmware has them spinning on cache lines which will
+     * eventually be scrubbed, which could lead to random CPU activation.
+     */
+    for (logical = 1; cpu > 0; logical++) {
+        unsigned int cpuid, ping, pong;
+        unsigned long now, then, timeout;
+
+        if (cpu == bootcpu) {
+            of_printf("skipping boot cpu!\n");
+            continue;
+        }
+
+        result = of_getprop(cpu, "reg", &cpuid, sizeof(cpuid));
+        if (result == OF_FAILURE)
+            of_panic("cpuid lookup failed\n");
+
+        of_printf("spinning up secondary processor #%d: ", logical);
+
+        __spin_ack = ~0x0;
+        ping = __spin_ack;
+        pong = __spin_ack;
+        of_printf("ping = 0x%x: ", ping);
+
+        mb();
+        result = of_start_cpu(cpu, (ulong)spin_start, logical);
+        if (result == OF_FAILURE)
+            of_panic("start cpu failed\n");
+
+        /* We will give the secondary processor five seconds to reply.  */
+        then = mftb();
+        timeout = then + (5 * timebase_freq);
+
+        do {
+            now = mftb();
+            if (now >= timeout) {
+                of_printf("BROKEN: ");
+                break;
+            }
+
+            mb();
+            pong = __spin_ack;
+        } while (pong == ping);
+        of_printf("pong = 0x%x\n", pong);
+
+        if (pong != ping) {
+            cpu_set(logical, cpu_present_map);
+            cpu_set(logical, cpu_possible_map);
+        }
+
         cpu = of_getpeer(cpu);
     }
     return 1;
@@ -1013,6 +1092,7 @@ multiboot_info_t __init *boot_of_init(
     boot_of_rtas();
 
     /* end of OF */
+    of_printf("Quiescing Open Firmware ...\n");
     of_call("quiesce", 0, 0, NULL);
 
     return &mbi;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dart_u3.c
--- a/xen/arch/powerpc/dart_u3.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/dart_u3.c        Fri Sep 01 13:04:02 2006 -0600
@@ -55,10 +55,10 @@ static void u3_inv_all(void)
         dc.reg.dc_invtlb = 1;
         out_32(dart_ctl_reg, dc.dc_word);
 
-    do {
-        dc.dc_word = in_32(dart_ctl_reg);
-        r++;
-    } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
+        do {
+            dc.dc_word = in_32(dart_ctl_reg);
+            r++;
+        } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
 
         if (r == (1 << l)) {
             if (l < 4) {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dom0_ops.c
--- a/xen/arch/powerpc/dom0_ops.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/dom0_ops.c       Fri Sep 01 13:04:02 2006 -0600
@@ -23,16 +23,20 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <xen/guest_access.h>
+#include <xen/shadow.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
 
+void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
 void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
 { 
     memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
     /* XXX fill in rest of vcpu_guest_context_t */
 }
 
+long arch_do_domctl(struct xen_domctl *domctl,
+                    XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
 long arch_do_domctl(struct xen_domctl *domctl,
                     XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
 {
@@ -75,6 +79,19 @@ long arch_do_domctl(struct xen_domctl *d
         }
     }
     break;
+    case XEN_DOMCTL_shadow_op:
+    {
+        struct domain *d;
+        ret = -ESRCH;
+        d = find_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
+            put_domain(d);
+            copy_to_guest(u_domctl, domctl, 1);
+        } 
+    }
+    break;
 
     default:
         ret = -ENOSYS;
@@ -84,6 +101,8 @@ long arch_do_domctl(struct xen_domctl *d
     return ret;
 }
 
+long arch_do_sysctl(struct xen_sysctl *sysctl,
+                    XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl);
 long arch_do_sysctl(struct xen_sysctl *sysctl,
                     XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
 {
@@ -109,6 +128,7 @@ long arch_do_sysctl(struct xen_sysctl *s
     break;
 
     default:
+        printk("%s: unsupported sysctl: 0x%x\n", __func__, (sysctl->cmd));
         ret = -ENOSYS;
         break;
     }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/domain.c Fri Sep 01 13:04:02 2006 -0600
@@ -27,6 +27,8 @@
 #include <xen/domain.h>
 #include <xen/console.h>
 #include <xen/shutdown.h>
+#include <xen/shadow.h>
+#include <xen/mm.h>
 #include <asm/htab.h>
 #include <asm/current.h>
 #include <asm/hcalls.h>
@@ -75,7 +77,8 @@ int arch_domain_create(struct domain *d)
 {
     unsigned long rma_base;
     unsigned long rma_sz;
-    uint htab_order;
+    uint rma_order_pages;
+    int rc;
 
     if (d->domain_id == IDLE_DOMAIN_ID) {
         d->shared_info = (void *)alloc_xenheap_page();
@@ -84,44 +87,31 @@ int arch_domain_create(struct domain *d)
         return 0;
     }
 
-    d->arch.rma_order = cpu_rma_order();
-    rma_sz = rma_size(d->arch.rma_order);
-
     /* allocate the real mode area */
-    d->max_pages = 1UL << d->arch.rma_order;
+    rma_order_pages = cpu_default_rma_order_pages();
+    d->max_pages = 1UL << rma_order_pages;
     d->tot_pages = 0;
-    d->arch.rma_page = alloc_domheap_pages(d, d->arch.rma_order, 0);
-    if (NULL == d->arch.rma_page)
-        return 1;
+
+    rc = allocate_rma(d, rma_order_pages);
+    if (rc)
+        return rc;
     rma_base = page_to_maddr(d->arch.rma_page);
-
-    BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
-
-    printk("clearing RMO: 0x%lx[0x%lx]\n", rma_base, rma_sz);
-    memset((void *)rma_base, 0, rma_sz);
+    rma_sz = rma_size(rma_order_pages);
 
     d->shared_info = (shared_info_t *)
         (rma_addr(&d->arch, RMA_SHARED_INFO) + rma_base);
 
-    d->arch.large_page_sizes = 1;
-    d->arch.large_page_shift[0] = 24; /* 16 M for 970s */
-
-    /* FIXME: we need to the the maximum addressible memory for this
-     * domain to calculate this correctly. It should probably be set
-     * by the managment tools */
-    htab_order = d->arch.rma_order - 6; /* (1/64) */
-    if (test_bit(_DOMF_privileged, &d->domain_flags)) {
-        /* bump the htab size of privleged domains */
-        ++htab_order;
-    }
-    htab_alloc(d, htab_order);
+    d->arch.large_page_sizes = cpu_large_page_orders(
+        d->arch.large_page_order, ARRAY_SIZE(d->arch.large_page_order));
+
+    INIT_LIST_HEAD(&d->arch.extent_list);
 
     return 0;
 }
 
 void arch_domain_destroy(struct domain *d)
 {
-    htab_free(d);
+    shadow_teardown(d);
 }
 
 void machine_halt(void)
@@ -162,6 +152,16 @@ int arch_set_info_guest(struct vcpu *v, 
 int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
 { 
     memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
+
+    printf("Domain[%d].%d: initializing\n",
+           v->domain->domain_id, v->vcpu_id);
+
+    if (v->domain->arch.htab.order == 0)
+        panic("Page table never allocated for Domain: %d\n",
+              v->domain->domain_id);
+    if (v->domain->arch.rma_order == 0)
+        panic("RMA never allocated for Domain: %d\n",
+              v->domain->domain_id);
 
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
@@ -253,17 +253,19 @@ void continue_running(struct vcpu *same)
 void continue_running(struct vcpu *same)
 {
     /* nothing to do */
+    return;
 }
 
 void sync_vcpu_execstate(struct vcpu *v)
 {
-    /* XXX for now, for domain destruction, make this non-fatal */
-    printf("%s: called\n", __func__);
+    /* do nothing */
+    return;
 }
 
 void domain_relinquish_resources(struct domain *d)
 {
     free_domheap_pages(d->arch.rma_page, d->arch.rma_order);
+    free_extents(d);
 }
 
 void arch_dump_domain_info(struct domain *d)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain_build.c
--- a/xen/arch/powerpc/domain_build.c   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/domain_build.c   Fri Sep 01 13:04:02 2006 -0600
@@ -25,6 +25,7 @@
 #include <xen/init.h>
 #include <xen/ctype.h>
 #include <xen/iocap.h>
+#include <xen/shadow.h>
 #include <xen/version.h>
 #include <asm/processor.h>
 #include <asm/papr.h>
@@ -34,17 +35,21 @@ extern int loadelfimage_32(struct domain
 extern int loadelfimage_32(struct domain_setup_info *dsi);
 
 /* opt_dom0_mem: memory allocated to domain 0. */
-static unsigned int opt_dom0_mem;
+static unsigned int dom0_nrpages;
 static void parse_dom0_mem(char *s)
 {
-    unsigned long long bytes = parse_size_and_unit(s);
-    /* If no unit is specified we default to kB units, not bytes. */
-    if (isdigit(s[strlen(s)-1]))
-        opt_dom0_mem = (unsigned int)bytes;
-    else
-        opt_dom0_mem = (unsigned int)(bytes >> 10);
+    unsigned long long bytes;
+
+    bytes = parse_size_and_unit(s);
+    dom0_nrpages = bytes >> PAGE_SHIFT;
 }
 custom_param("dom0_mem", parse_dom0_mem);
+
+static unsigned int opt_dom0_max_vcpus;
+integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
+
+static unsigned int opt_dom0_shadow;
+boolean_param("dom0_shadow", opt_dom0_shadow);
 
 int elf_sanity_check(Elf_Ehdr *ehdr)
 {
@@ -105,11 +110,13 @@ int construct_dom0(struct domain *d,
     struct domain_setup_info dsi;
     ulong dst;
     u64 *ofh_tree;
+    uint rma_nrpages = 1 << d->arch.rma_order;
     ulong rma_sz = rma_size(d->arch.rma_order);
     ulong rma = page_to_maddr(d->arch.rma_page);
     start_info_t *si;
     ulong eomem;
     int am64 = 1;
+    int preempt = 0;
     ulong msr;
     ulong pc;
     ulong r2;
@@ -118,13 +125,18 @@ int construct_dom0(struct domain *d,
     BUG_ON(d->domain_id != 0);
     BUG_ON(d->vcpu[0] == NULL);
 
+    if (image_len == 0)
+        panic("No Dom0 image supplied\n");
+
     cpu_init_vcpu(v);
 
     memset(&dsi, 0, sizeof(struct domain_setup_info));
     dsi.image_addr = image_start;
     dsi.image_len  = image_len;
 
+    printk("Trying Dom0 as 64bit ELF\n");
     if ((rc = parseelfimage(&dsi)) != 0) {
+        printk("Trying Dom0 as 32bit ELF\n");
         if ((rc = parseelfimage_32(&dsi)) != 0)
             return rc;
         am64 = 0;
@@ -141,7 +153,33 @@ int construct_dom0(struct domain *d,
 
     /* By default DOM0 is allocated all available memory. */
     d->max_pages = ~0U;
-    d->tot_pages = 1UL << d->arch.rma_order;
+
+    /* default is the max(1/16th of memory, CONFIG_MIN_DOM0_PAGES) */
+    if (dom0_nrpages == 0) {
+        dom0_nrpages = total_pages >> 4;
+
+        if (dom0_nrpages < CONFIG_MIN_DOM0_PAGES)
+            dom0_nrpages = CONFIG_MIN_DOM0_PAGES;
+    }
+
+    /* make sure we are at least as big as the RMA */
+    if (dom0_nrpages > rma_nrpages)
+        dom0_nrpages = allocate_extents(d, dom0_nrpages, rma_nrpages);
+
+    ASSERT(d->tot_pages == dom0_nrpages);
+    ASSERT(d->tot_pages >= rma_nrpages);
+
+    if (opt_dom0_shadow == 0) {
+        /* 1/64 of memory  */
+        opt_dom0_shadow = (d->tot_pages >> 6) >> (20 - PAGE_SHIFT);
+    }
+
+    do {
+        shadow_set_allocation(d, opt_dom0_shadow, &preempt);
+    } while (preempt);
+    if (shadow_get_allocation(d) == 0)
+        panic("shadow allocation failed 0x%x < 0x%x\n",
+              shadow_get_allocation(d), opt_dom0_shadow);
 
     ASSERT( image_len < rma_sz );
 
@@ -156,10 +194,6 @@ int construct_dom0(struct domain *d,
     printk("shared_info: 0x%lx,%p\n", si->shared_info, d->shared_info);
 
     eomem = si->shared_info;
-
-    /* allow dom0 to access all of system RAM */
-    d->arch.logical_base_pfn = 128 << (20 - PAGE_SHIFT); /* 128 MB */
-    d->arch.logical_end_pfn = max_page;
 
     /* number of pages accessible */
     si->nr_pages = rma_sz >> PAGE_SHIFT;
@@ -265,7 +299,7 @@ int construct_dom0(struct domain *d,
 
     printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2);
 
-    ofd_dom0_fixup(d, *ofh_tree + rma, si, dst - rma);
+    ofd_dom0_fixup(d, *ofh_tree + rma, si);
 
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.c
--- a/xen/arch/powerpc/exceptions.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/exceptions.c     Fri Sep 01 13:04:02 2006 -0600
@@ -82,6 +82,8 @@ void program_exception(struct cpu_user_r
     show_registers(regs);
     printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
     printk("hid4 0x%016lx\n", regs->hid4);
+    printk("---[ backtrace ]---\n");
+    show_backtrace(regs->gprs[1], regs->lr, regs->pc);
     panic("%s: 0x%lx\n", __func__, cookie);
 #endif /* CRASH_DEBUG */
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.h
--- a/xen/arch/powerpc/exceptions.h     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/exceptions.h     Fri Sep 01 13:04:02 2006 -0600
@@ -51,7 +51,4 @@ extern char exception_vectors[];
 extern char exception_vectors[];
 extern char exception_vectors_end[];
 extern int spin_start[];
-extern int firmware_image_start[0];
-extern int firmware_image_size[0];
-
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/external.c       Fri Sep 01 13:04:02 2006 -0600
@@ -175,8 +175,7 @@ void init_IRQ(void)
 
 void ack_APIC_irq(void)
 {
-    printk("%s: EOI the whole MPIC?\n", __func__);
-    for (;;);
+    panic("%s: EOI the whole MPIC?\n", __func__);
 }
 
 void ack_bad_irq(unsigned int irq)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/iommu.c
--- a/xen/arch/powerpc/iommu.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/iommu.c  Fri Sep 01 13:04:02 2006 -0600
@@ -52,17 +52,14 @@ int iommu_put(u32 buid, ulong ioba, unio
 
         pfn = tce.tce_bits.tce_rpn;
         mfn = pfn2mfn(d, pfn, &mtype);
-        if (mtype != 0) {
-            panic("we don't do non-RMO memory yet\n");
+        if (mfn > 0) {
+#ifdef DEBUG
+            printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
+                   ioba, pfn, mfn);
+#endif
+            tce.tce_bits.tce_rpn = mfn;
+            return iommu_phbs[buid].iommu_put(ioba, tce);
         }
-
-#ifdef DEBUG
-        printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
-               ioba, pfn, mfn);
-#endif
-        tce.tce_bits.tce_rpn = mfn;
-
-        return iommu_phbs[buid].iommu_put(ioba, tce);
     }
     return -1;
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/mm.c     Fri Sep 01 13:04:02 2006 -0600
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #include <xen/config.h>
@@ -23,9 +24,18 @@
 #include <xen/shadow.h>
 #include <xen/kernel.h>
 #include <xen/sched.h>
+#include <xen/perfc.h>
 #include <asm/misc.h>
 #include <asm/init.h>
 #include <asm/page.h>
+
+#ifdef VERBOSE
+#define MEM_LOG(_f, _a...)                                  \
+  printk("DOM%u: (file=mm.c, line=%d) " _f "\n",            \
+         current->domain->domain_id , __LINE__ , ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
 
 /* Frame table and its size in pages. */
 struct page_info *frame_table;
@@ -53,16 +63,128 @@ int steal_page(struct domain *d, struct 
     return 1;
 }
 
-
-int get_page_type(struct page_info *page, u32 type)
-{
-    panic("%s called\n", __func__);
-    return 1;
-}
-
 void put_page_type(struct page_info *page)
 {
-    panic("%s called\n", __func__);
+    unsigned long nx, x, y = page->u.inuse.type_info;
+
+    do {
+        x  = y;
+        nx = x - 1;
+
+        ASSERT((x & PGT_count_mask) != 0);
+
+        /*
+         * The page should always be validated while a reference is held. The 
+         * exception is during domain destruction, when we forcibly invalidate 
+         * page-table pages if we detect a referential loop.
+         * See domain.c:relinquish_list().
+         */
+        ASSERT((x & PGT_validated) || 
+               test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
+
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            /* Record TLB information for flush later. */
+            page->tlbflush_timestamp = tlbflush_current_time();
+        }
+        else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == 
+                           (PGT_pinned | 1)) )
+        {
+            /* Page is now only pinned. Make the back pointer mutable again. */
+            nx |= PGT_va_mutable;
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+}
+
+
+int get_page_type(struct page_info *page, unsigned long type)
+{
+    unsigned long nx, x, y = page->u.inuse.type_info;
+
+ again:
+    do {
+        x  = y;
+        nx = x + 1;
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
+            return 0;
+        }
+        else if ( unlikely((x & PGT_count_mask) == 0) )
+        {
+            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
+            {
+                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
+                {
+                    /*
+                     * On type change we check to flush stale TLB
+                     * entries. This may be unnecessary (e.g., page
+                     * was GDT/LDT) but those circumstances should be
+                     * very rare.
+                     */
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
+                    tlbflush_filter(mask, page->tlbflush_timestamp);
+
+                    if ( unlikely(!cpus_empty(mask)) )
+                    {
+                        perfc_incrc(need_flush_tlb_flush);
+                        flush_tlb_mask(mask);
+                    }
+                }
+
+                /* We lose existing type, back pointer, and validity. */
+                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
+                nx |= type;
+
+                /* No special validation needed for writable pages. */
+                /* Page tables and GDT/LDT need to be scanned for validity. */
+                if ( type == PGT_writable_page )
+                    nx |= PGT_validated;
+            }
+        }
+        else
+        {
+            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+            {
+                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+                {
+                    return 0;
+                }
+                else if ( (x & PGT_va_mask) == PGT_va_mutable )
+                {
+                    /* The va backpointer is mutable, hence we update it. */
+                    nx &= ~PGT_va_mask;
+                    nx |= type; /* we know the actual type is correct */
+                }
+                else if ( (type & PGT_va_mask) != PGT_va_mutable )
+                {
+                    ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
+
+                    /* This table is possibly mapped at multiple locations. */
+                    nx &= ~PGT_va_mask;
+                    nx |= PGT_va_unknown;
+                }
+            }
+            if ( unlikely(!(x & PGT_validated)) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                    cpu_relax();
+                goto again;
+            }
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+
+    if ( unlikely(!(nx & PGT_validated)) )
+    {
+        /* Noone else is updating simultaneously. */
+        __set_bit(_PGT_validated, &page->u.inuse.type_info);
+    }
+
+    return 1;
 }
 
 void __init init_frametable(void)
@@ -107,44 +229,148 @@ extern void copy_page(void *dp, void *sp
     }
 }
 
+static int mfn_in_hole(ulong mfn)
+{
+    /* totally cheating */
+    if (mfn >= (0xf0000000UL >> PAGE_SHIFT) &&
+        mfn < (((1UL << 32) - 1) >> PAGE_SHIFT))
+        return 1;
+
+    return 0;
+}
+
+static uint add_extent(struct domain *d, struct page_info *pg, uint order)
+{
+    struct page_extents *pe;
+
+    pe = xmalloc(struct page_extents);
+    if (pe == NULL)
+        return 0;
+
+    pe->pg = pg;
+    pe->order = order;
+    pe->pfn = page_to_mfn(pg);
+
+    list_add_tail(&pe->pe_list, &d->arch.extent_list);
+
+    return pe->pfn;
+}
+
+void free_extents(struct domain *d)
+{
+    /* we just need to free the memory behind list */
+    struct list_head *list;
+    struct list_head *ent;
+    struct list_head *next;
+
+    list = &d->arch.extent_list;
+    ent = list->next;
+
+    while (ent != list) {
+        next = ent->next;
+        xfree(ent);
+        ent = next;
+    }
+}
+
+uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages)
+{
+    uint ext_order;
+    uint ext_nrpages;
+    uint total_nrpages;
+    struct page_info *pg;
+
+    ext_order = cpu_extent_order();
+    ext_nrpages = 1 << ext_order;
+
+    total_nrpages = rma_nrpages;
+
+    /* We only allocate in nr_extsz chunks so if you are not divisible
+     * you get more than you asked for */
+    while (total_nrpages < nrpages) {
+        pg = alloc_domheap_pages(d, ext_order, 0);
+        if (pg == NULL)
+            return total_nrpages;
+
+        if (add_extent(d, pg, ext_order) == 0) {
+            free_domheap_pages(pg, ext_order);
+            return total_nrpages;
+        }
+        total_nrpages += ext_nrpages;
+    }
+
+    return total_nrpages;
+}
+        
+int allocate_rma(struct domain *d, unsigned int order_pages)
+{
+    ulong rma_base;
+    ulong rma_sz = rma_size(order_pages);
+
+    d->arch.rma_page = alloc_domheap_pages(d, order_pages, 0);
+    if (d->arch.rma_page == NULL) {
+        DPRINTK("Could not allocate order_pages=%d RMA for domain %u\n",
+                order_pages, d->domain_id);
+        return -ENOMEM;
+    }
+    d->arch.rma_order = order_pages;
+
+    rma_base = page_to_maddr(d->arch.rma_page);
+    BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
+
+    /* XXX */
+    printk("clearing RMA: 0x%lx[0x%lx]\n", rma_base, rma_sz);
+    memset((void *)rma_base, 0, rma_sz);
+
+    return 0;
+}
+
 ulong pfn2mfn(struct domain *d, long pfn, int *type)
 {
     ulong rma_base_mfn = page_to_mfn(d->arch.rma_page);
     ulong rma_size_mfn = 1UL << d->arch.rma_order;
-    ulong mfn;
-    int t;
+    struct page_extents *pe;
 
     if (pfn < rma_size_mfn) {
-        mfn = pfn + rma_base_mfn;
-        t = PFN_TYPE_RMA;
-    } else if (pfn >= d->arch.logical_base_pfn &&
-               pfn < d->arch.logical_end_pfn) {
-        if (test_bit(_DOMF_privileged, &d->domain_flags)) {
-            /* This hack allows dom0 to map all memory, necessary to
-             * initialize domU state. */
-            mfn = pfn;
-        } else {
-            panic("we do not handle the logical area yet\n");
-            mfn = 0;
-        }
-
-        t = PFN_TYPE_LOGICAL;
-    } else {
-        /* don't know */
-        mfn = pfn;
-        t = PFN_TYPE_IO;
-    }
-
-    if (type != NULL)
-        *type = t;
-
-    return mfn;
+        if (type)
+            *type = PFN_TYPE_RMA;
+        return pfn + rma_base_mfn;
+    }
+
+    if (test_bit(_DOMF_privileged, &d->domain_flags) &&
+        mfn_in_hole(pfn)) {
+        if (type)
+            *type = PFN_TYPE_IO;
+        return pfn;
+    }
+
+    /* quick tests first */
+    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+        uint end_pfn = pe->pfn + (1 << pe->order);
+
+        if (pfn >= pe->pfn && pfn < end_pfn) {
+            if (type)
+                *type = PFN_TYPE_LOGICAL;
+            return page_to_mfn(pe->pg) + (pfn - pe->pfn);
+        }
+    }
+
+    /* This hack allows dom0 to map all memory, necessary to
+     * initialize domU state. */
+    if (test_bit(_DOMF_privileged, &d->domain_flags)) {
+        if (type)
+            *type = PFN_TYPE_REMOTE;
+        return pfn;
+    }
+
+    BUG();
+    return 0;
 }
 
 void guest_physmap_add_page(
     struct domain *d, unsigned long gpfn, unsigned long mfn)
 {
-    panic("%s\n", __func__);
+    printk("%s(%d, 0x%lx, 0x%lx)\n", __func__, d->domain_id, gpfn, mfn);
 }
 void guest_physmap_remove_page(
     struct domain *d, unsigned long gpfn, unsigned long mfn)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mpic.c
--- a/xen/arch/powerpc/mpic.c   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/mpic.c   Fri Sep 01 13:04:02 2006 -0600
@@ -498,10 +498,10 @@ static void mpic_enable_irq(unsigned int
 
 #ifdef CONFIG_MPIC_BROKEN_U3
        if (mpic->flags & MPIC_BROKEN_U3) {
-               unsigned int src = irq - mpic->irq_offset;
-               if (mpic_is_ht_interrupt(mpic, src) &&
+               unsigned int bsrc = irq - mpic->irq_offset;
+               if (mpic_is_ht_interrupt(mpic, bsrc) &&
                    (irq_desc[irq].status & IRQ_LEVEL))
-                       mpic_ht_end_irq(mpic, src);
+                       mpic_ht_end_irq(mpic, bsrc);
        }
 #endif /* CONFIG_MPIC_BROKEN_U3 */
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup.c
--- a/xen/arch/powerpc/ofd_fixup.c      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/ofd_fixup.c      Fri Sep 01 13:04:02 2006 -0600
@@ -13,7 +13,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
@@ -24,6 +24,7 @@
 #include <xen/version.h>
 #include <public/xen.h>
 #include "of-devtree.h"
+#include "oftree.h"
 
 #undef RTAS
 
@@ -316,91 +317,6 @@ static ofdn_t ofd_rtas_props(void *m)
 }
 #endif
 
-struct mem_reg {
-    u64 addr;
-    u64 sz;
-};
-
-static ofdn_t ofd_memory_chunk_create(void *m, ofdn_t p,
-        const char *ppath,
-        const char *name,
-        const char *dt,
-        ulong start, ulong size)
-{
-    struct mem_reg reg;
-    char path[128];
-    ulong l;
-    u32 v;
-    ofdn_t n;
-    ulong nl = strlen(name) + 1;
-    ulong dtl = strlen(dt) + 1;
-
-    l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
-    n = ofd_node_add(m, p, path, l + 1);
-    ofd_prop_add(m, n, "name", name, nl);
-
-    v = 1;
-    ofd_prop_add(m, n, "#address-cells", &v, sizeof (v));
-    v = 0;
-    ofd_prop_add(m, n, "#size-cells", &v, sizeof (v));
-
-    ofd_prop_add(m, n, "device_type", dt, dtl);
-
-    /* physical addresses usable without regard to OF */
-    reg.addr = start;
-    reg.sz = size;
-    ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
-
-    return n;
-}
-
-static ofdn_t ofd_memory_props(void *m, struct domain *d, ulong eoload)
-{
-    ofdn_t n = -1;
-    ulong start = 0;
-    static char name[] = "memory";
-    ulong mem_size = rma_size(d->arch.rma_order);
-    ulong chunk_size = rma_size(d->arch.rma_order);
-
-    /* Remove all old memory props */
-    do {
-        ofdn_t old;
-
-        old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
-                                    name, sizeof(name));
-        if (old <= 0) break;
-
-        ofd_node_prune(m, old);
-    } while (1);
-
-    while (start < mem_size) {
-        ulong size = (mem_size < chunk_size) ? mem_size : chunk_size;
-
-        n = ofd_memory_chunk_create(m, OFD_ROOT, "", "memory", "memory",
-                start, size);
-
-        if (start == 0) {
-            /* We are processing the first and RMA chunk */
-
-            /* free list of physical addresses available after OF and
-             * client program have been accounted for */
-            struct mem_reg avail[] = {
-                /* 0 til OF @ 32MiB - 16KiB stack */
-                { .addr = 0, .sz = ((32 << 20) - (16 << 10)) },
-                /* end of loaded material to the end the chunk - 1 page */
-                { .addr = eoload, .sz = chunk_size - eoload - PAGE_SIZE },
-                /* the last page is reserved for xen_start_info */
-            };
-            ofd_prop_add(m, n, "available", &avail,
-                    sizeof (avail));
-        }
-
-        start += size;
-        mem_size -= size;
-    }
-    return n;
-}
-
 static ofdn_t ofd_xen_props(void *m, struct domain *d, start_info_t *si)
 {
     ofdn_t n;
@@ -440,9 +356,8 @@ static ofdn_t ofd_xen_props(void *m, str
     }
     return n;
 }
-extern int ofd_dom0_fixup(
-    struct domain *d, ulong oftree, start_info_t *si, ulong dst);
-int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si, ulong eoload)
+
+int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si)
 {
     void *m;
     const ofdn_t n = OFD_ROOT;
@@ -470,8 +385,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
     printk("Add /chosen props\n");
     ofd_chosen_props(m, (char *)si->cmd_line);
 
-    printk("fix /memory@0 props\n");
-    ofd_memory_props(m, d, eoload);
+    printk("fix /memory props\n");
+    ofd_memory_props(m, d);
 
     printk("fix /xen props\n");
     ofd_xen_props(m, d, si);
@@ -497,8 +412,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
     r = ofd_prop_add(m, n, "ibm,partition-no", &did, sizeof(did));
     ASSERT( r > 0 );
 
-    const char dom0[] = "dom0";
-    r = ofd_prop_add(m, n, "ibm,partition-name", dom0, sizeof (dom0));
+    const char d0[] = "dom0";
+    r = ofd_prop_add(m, n, "ibm,partition-name", d0, sizeof (d0));
     ASSERT( r > 0 );
 
 
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/oftree.h
--- a/xen/arch/powerpc/oftree.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/oftree.h Fri Sep 01 13:04:02 2006 -0600
@@ -20,14 +20,18 @@
 
 #ifndef _OFTREE_H
 #define _OFTREE_H
+#include <xen/multiboot.h>
 
 extern ulong oftree;
 extern ulong oftree_len;
+extern ulong oftree_end;
 
-extern int ofd_dom0_fixup(
-    struct domain *d, ulong oftree, start_info_t *si, ulong dst);
+extern int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si);
+extern void ofd_memory_props(void *m, struct domain *d);
 
 extern int firmware_image_start[0];
 extern int firmware_image_size[0];
 
+extern void memory_init(module_t *mod, int mcount);
+
 #endif  /* #ifndef _OFTREE_H */
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/tce.c
--- a/xen/arch/powerpc/papr/tce.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/papr/tce.c       Fri Sep 01 13:04:02 2006 -0600
@@ -47,7 +47,7 @@ static void h_put_tce(struct cpu_user_re
         regs->gprs[3] = H_Success;
     }
 }
-    
+
 static void h_get_tce(struct cpu_user_regs *regs)
 {
     u32 liobn = regs->gprs[4];
@@ -57,7 +57,7 @@ static void h_get_tce(struct cpu_user_re
     printk("%s: liobn: 0x%x ioba: 0x%lx \n", __func__, liobn, ioba);
 #endif
     regs->gprs[3] = H_Function;
-    for(;;) ;
+    BUG();
 }
 
 static void h_stuff_tce(struct cpu_user_regs *regs)
@@ -76,7 +76,7 @@ static void h_stuff_tce(struct cpu_user_
             count);
 #endif
     regs->gprs[3] = H_Function;
-    for(;;);
+    BUG();
 }
    
 __init_papr_hcall(H_PUT_TCE, h_put_tce);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/xlate.c
--- a/xen/arch/powerpc/papr/xlate.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/papr/xlate.c     Fri Sep 01 13:04:02 2006 -0600
@@ -30,12 +30,6 @@
 #include <asm/papr.h>
 #include <asm/hcalls.h>
 
-static void not_yet(struct cpu_user_regs *regs)
-{
-    printk("not implemented yet: 0x%lx\n", regs->gprs[3]);
-    for (;;);
-}
-
 #ifdef USE_PTE_INSERT
 static inline void pte_insert(union pte volatile *pte,
         ulong vsid, ulong rpn, ulong lrpn)
@@ -160,13 +154,13 @@ static void h_enter(struct cpu_user_regs
         }
 
         /* get correct pgshift value */
-        pgshift = d->arch.large_page_shift[lp_size];
+        pgshift = d->arch.large_page_order[lp_size] + PAGE_SHIFT;
     }
 
     /* get the correct logical RPN in terms of 4K pages need to mask
      * off lp bits and unused arpn bits if this is a large page */
 
-    lpn = ~0ULL << (pgshift - 12);
+    lpn = ~0ULL << (pgshift - PAGE_SHIFT);
     lpn = pte.bits.rpn & lpn;
 
     rpn = pfn2mfn(d, lpn, &mtype);
@@ -493,8 +487,42 @@ static void h_remove(struct cpu_user_reg
     pte_tlbie(&lpte, ptex);
 }
 
+static void h_read(struct cpu_user_regs *regs)
+{
+    ulong flags = regs->gprs[4];
+    ulong ptex = regs->gprs[5];
+    struct vcpu *v = get_current();
+    struct domain *d = v->domain;
+    struct domain_htab *htab = &d->arch.htab;
+    union pte volatile *pte;
+
+       if (flags & H_READ_4)
+        ptex &= ~0x3UL;
+
+    if (ptex > (1UL << htab->log_num_ptes)) {
+        regs->gprs[3] = H_Parameter;
+        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
+        return;
+    }
+    pte = &htab->map[ptex];
+    regs->gprs[4] = pte[0].words.vsid;
+    regs->gprs[5] = pte[0].words.rpn;
+
+    if (!(flags & H_READ_4)) {
+        /* dump another 3 PTEs */
+        regs->gprs[6] = pte[1].words.vsid;
+        regs->gprs[7] = pte[1].words.rpn;
+        regs->gprs[8] = pte[2].words.vsid;
+        regs->gprs[9] = pte[2].words.rpn;
+        regs->gprs[10] = pte[3].words.vsid;
+        regs->gprs[11] = pte[3].words.rpn;
+    }
+
+    regs->gprs[3] = H_Success;
+}
+
 __init_papr_hcall(H_ENTER, h_enter);
-__init_papr_hcall(H_READ, not_yet);
+__init_papr_hcall(H_READ, h_read);
 __init_papr_hcall(H_REMOVE, h_remove);
 __init_papr_hcall(H_CLEAR_MOD, h_clear_mod);
 __init_papr_hcall(H_CLEAR_REF, h_clear_ref);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/exceptions.S
--- a/xen/arch/powerpc/powerpc64/exceptions.S   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/powerpc64/exceptions.S   Fri Sep 01 13:04:02 2006 -0600
@@ -514,6 +514,43 @@ _GLOBAL(sleep)
     mtmsrd r3
     blr
 
+/* The primary processor issues a firmware call to spin us up at this
+ * address, passing our CPU number in r3.  We only need a function
+ * entry point instead of a descriptor since this is never called from
+ * C code.
+ */    
     .globl spin_start
 spin_start:
+    /* Write our processor number as an acknowledgment that we're alive.  */
+    LOADADDR(r14, __spin_ack)
+    stw r3, 0(r14)
+    sync
+    /* If NR_CPUS is too small, we should just spin forever.  */
+    LOADADDR(r15, NR_CPUS)
+    cmpd r3, r15
+    blt 2f     
     b .
+    /* Find our index in the array of processor_area struct pointers.  */
+2:  LOADADDR(r14, global_cpu_table)
+    muli r15, r3, 8
+    add r14, r14, r15
+    /* Spin until the pointer for our processor goes valid.  */
+1:  ld r15, 0(r14)
+    cmpldi r15, 0
+    beq 1b
+    /* Dereference the pointer and load our stack pointer.  */
+    isync
+    ld r1, PAREA_stack(r15)
+    li r14, STACK_FRAME_OVERHEAD
+    sub r1, r1, r14
+    /* Load up the TOC and entry point for the C function to be called.  */
+    LOADADDR(r14, secondary_cpu_init)
+    ld r2, 8(r14)
+    ld r11, 0(r14)
+    mtctr r11
+    /* Warning: why do we need this synchronizing instruction on 970FX?  */
+    isync
+    /* Jump into C code now.  */
+    bctrl
+    nop
+    b .
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/ppc970.c
--- a/xen/arch/powerpc/powerpc64/ppc970.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/powerpc64/ppc970.c       Fri Sep 01 13:04:02 2006 -0600
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #include <xen/config.h>
@@ -31,25 +32,68 @@
 
 #undef SERIALIZE
 
-unsigned int cpu_rma_order(void)
+extern volatile struct processor_area * volatile global_cpu_table[];
+
+struct rma_settings {
+    int order;
+    int rmlr0;
+    int rmlr12;
+};
+
+static struct rma_settings rma_orders[] = {
+    { .order = 26, .rmlr0 = 0, .rmlr12 = 3, }, /*  64 MB */
+    { .order = 27, .rmlr0 = 1, .rmlr12 = 3, }, /* 128 MB */
+    { .order = 28, .rmlr0 = 1, .rmlr12 = 0, }, /* 256 MB */
+    { .order = 30, .rmlr0 = 0, .rmlr12 = 2, }, /*   1 GB */
+    { .order = 34, .rmlr0 = 0, .rmlr12 = 1, }, /*  16 GB */
+    { .order = 38, .rmlr0 = 0, .rmlr12 = 0, }, /* 256 GB */
+};
+
+static uint log_large_page_sizes[] = {
+    4 + 20, /* (1 << 4) == 16M */
+};
+
+static struct rma_settings *cpu_find_rma(unsigned int order)
 {
-    /* XXX what about non-HV mode? */
-    uint rma_log_size = 6 + 20; /* 64M */
-    return rma_log_size - PAGE_SHIFT;
+    int i;
+    for (i = 0; i < ARRAY_SIZE(rma_orders); i++) {
+        if (rma_orders[i].order == order)
+            return &rma_orders[i];
+    }
+    return NULL;
 }
 
-void cpu_initialize(void)
+unsigned int cpu_default_rma_order_pages(void)
 {
-    ulong stack;
+    return rma_orders[0].order - PAGE_SHIFT;
+}
 
-    parea = xmalloc(struct processor_area);
+unsigned int cpu_large_page_orders(uint *sizes, uint max)
+{
+    uint i = 0;
+
+    while (i < max && i < ARRAY_SIZE(log_large_page_sizes)) {
+        sizes[i] = log_large_page_sizes[i] - PAGE_SHIFT;
+        ++i;
+    }
+
+    return i;
+}
+
+unsigned int cpu_extent_order(void)
+{
+    return log_large_page_sizes[0] - PAGE_SHIFT;
+}
+
+void cpu_initialize(int cpuid)
+{
+    ulong r1, r2;
+    __asm__ __volatile__ ("mr %0, 1" : "=r" (r1));
+    __asm__ __volatile__ ("mr %0, 2" : "=r" (r2));
+
+    /* This is SMP safe because the compiler must use r13 for it.  */
+    parea = global_cpu_table[cpuid];
     ASSERT(parea != NULL);
-
-    stack = (ulong)alloc_xenheap_pages(STACK_ORDER);
-
-    ASSERT(stack != 0);
-    parea->hyp_stack_base = (void *)(stack + STACK_SIZE);
-    printk("stack is here: %p\n", parea->hyp_stack_base);
 
     mthsprg0((ulong)parea); /* now ready for exceptions */
 
@@ -79,7 +123,10 @@ void cpu_initialize(void)
     s |= 1UL << (63-3);     /* ser-gp */
     hid0.word |= s;
 #endif
-    printk("hid0: 0x%016lx\n", hid0.word);
+
+    printk("CPU #%d: Hello World! SP = %lx TOC = %lx HID0 = %lx\n", 
+           smp_processor_id(), r1, r2, hid0.word);
+
     mthid0(hid0.word);
 
     union hid1 hid1;
@@ -115,45 +162,22 @@ void cpu_init_vcpu(struct vcpu *v)
 {
     struct domain *d = v->domain;
     union hid4 hid4;
-    ulong rma_base = page_to_maddr(d->arch.rma_page);
-    ulong rma_size = rma_size(d->arch.rma_order);
+    struct rma_settings *rma_settings;
 
     hid4.word = mfhid4();
 
     hid4.bits.lpes0 = 0; /* exceptions set MSR_HV=1 */
     hid4.bits.lpes1 = 1; /* RMA applies */
 
-    hid4.bits.rmor = rma_base >> 26;
+    hid4.bits.rmor = page_to_maddr(d->arch.rma_page) >> 26;
 
     hid4.bits.lpid01 = d->domain_id & 3;
     hid4.bits.lpid25 = (d->domain_id >> 2) & 0xf;
 
-    switch (rma_size) {
-        case 256ULL << 30:  /* 256 GB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 0;
-            break;
-        case 16ULL << 30:   /* 16 GB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 1;
-            break;
-        case 1ULL << 30:    /* 1 GB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 2;
-            break;
-        case 64ULL << 20:   /* 64 MB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 3;
-            break;
-        case 256ULL << 20:  /* 256 MB */
-            hid4.bits.rmlr0 = 1;
-            hid4.bits.rmlr12 = 0;
-            break;
-        case 128ULL << 20:  /* 128 MB */
-            hid4.bits.rmlr0 = 1;
-            hid4.bits.rmlr12 = 3;
-            break;
-    }
+    rma_settings = cpu_find_rma(d->arch.rma_order + PAGE_SHIFT);
+    ASSERT(rma_settings != NULL);
+    hid4.bits.rmlr0 = rma_settings->rmlr0;
+    hid4.bits.rmlr12 = rma_settings->rmlr12;
 
     v->arch.cpu.hid4.word = hid4.word;
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/setup.c  Fri Sep 01 13:04:02 2006 -0600
@@ -43,9 +43,9 @@
 #include <asm/percpu.h>
 #include "exceptions.h"
 #include "of-devtree.h"
+#include "oftree.h"
 
 #define DEBUG
-unsigned long xenheap_phys_end;
 
 /* opt_noht: If true, Hyperthreading is ignored. */
 int opt_noht = 0;
@@ -53,6 +53,14 @@ boolean_param("noht", opt_noht);
 
 int opt_earlygdb = 0;
 boolean_param("earlygdb", opt_earlygdb);
+
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus);
 
 u32 tlbflush_clock = 1U;
 DEFINE_PER_CPU(u32, tlbflush_time);
@@ -61,9 +69,12 @@ unsigned long wait_init_idle;
 unsigned long wait_init_idle;
 ulong oftree;
 ulong oftree_len;
+ulong oftree_end;
 
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_online_map; /* missing ifdef in schedule.c */
+cpumask_t cpu_present_map;
+cpumask_t cpu_possible_map;
 
 /* XXX get this from ISA node in device tree */
 ulong isa_io_base;
@@ -74,6 +85,8 @@ extern void idle_loop(void);
 
 /* move us to a header file */
 extern void initialize_keytable(void);
+
+volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
 
 int is_kernel_text(unsigned long addr)
 {
@@ -169,6 +182,21 @@ static void __init start_of_day(void)
 
     percpu_free_unused_areas();
 
+    {
+        /* FIXME: Xen assumes that an online CPU is a schedualable
+         * CPU, but we just are not there yet. Remove this fragment when
+         * scheduling processors actually works. */
+        int cpuid;
+
+        printk("WARNING!: Taking all secondary CPUs offline\n");
+
+        for_each_online_cpu(cpuid) {
+            if (cpuid == 0)
+                continue;
+            cpu_clear(cpuid, cpu_online_map);
+        }
+    }
+
     initialize_keytable();
     /* Register another key that will allow for the the Harware Probe
      * to be contacted, this works with RiscWatch probes and should
@@ -193,17 +221,60 @@ void startup_cpu_idle_loop(void)
     reset_stack_and_jump(idle_loop);
 }
 
+static void init_parea(int cpuid)
+{
+    /* Be careful not to shadow the global variable.  */
+    volatile struct processor_area *pa;
+    void *stack;
+
+    pa = xmalloc(struct processor_area);
+    if (pa == NULL)
+        panic("%s: failed to allocate parea for cpu #%d\n", __func__, cpuid);
+
+    stack = alloc_xenheap_pages(STACK_ORDER);
+    if (stack == NULL)
+        panic("%s: failed to allocate stack (order %d) for cpu #%d\n", 
+              __func__, STACK_ORDER, cpuid);
+
+    pa->whoami = cpuid;
+    pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
+
+    /* This store has the effect of invoking secondary_cpu_init.  */
+    global_cpu_table[cpuid] = pa;
+    mb();
+}
+
+static int kick_secondary_cpus(int maxcpus)
+{
+    int cpuid;
+
+    for_each_present_cpu(cpuid) {
+        if (cpuid == 0)
+            continue;
+        if (cpuid >= maxcpus)
+            break;
+        init_parea(cpuid);
+        cpu_set(cpuid, cpu_online_map);
+        cpu_set(cpuid, cpu_possible_map);
+    }
+
+    return 0;
+}
+
+/* This is the first C code that secondary processors invoke.  */
+int secondary_cpu_init(int cpuid, unsigned long r4);
+int secondary_cpu_init(int cpuid, unsigned long r4)
+{
+    cpu_initialize(cpuid);
+    while(1);
+}
+
 static void __init __start_xen(multiboot_info_t *mbi)
 {
     char *cmdline;
     module_t *mod = (module_t *)((ulong)mbi->mods_addr);
-    ulong heap_start;
-    ulong modules_start, modules_size;
-    ulong eomem = 0;
-    ulong heap_size = 0;
-    ulong bytes = 0;
-    ulong freemem = (ulong)_end;
-    ulong oftree_end;
+    ulong dom0_start, dom0_len;
+    ulong initrd_start, initrd_len;
 
     memcpy(0, exception_vectors, exception_vectors_end - exception_vectors);
     synchronize_caches(0, exception_vectors_end - exception_vectors);
@@ -226,6 +297,9 @@ static void __init __start_xen(multiboot
     console_start_sync();
 #endif
 
+    /* we give the first RMA to the hypervisor */
+    xenheap_phys_end = rma_size(cpu_default_rma_order_pages());
+
     /* Check that we have at least one Multiboot module. */
     if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) {
         panic("FATAL ERROR: Require at least one Multiboot module.\n");
@@ -234,10 +308,6 @@ static void __init __start_xen(multiboot
     if (!(mbi->flags & MBI_MEMMAP)) {
         panic("FATAL ERROR: Bootloader provided no memory information.\n");
     }
-
-    /* mark the begining of images */
-    modules_start = mod[0].mod_start;
-    modules_size = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
 
     /* OF dev tree is the last module */
     oftree = mod[mbi->mods_count-1].mod_start;
@@ -249,71 +319,7 @@ static void __init __start_xen(multiboot
     mod[mbi->mods_count-1].mod_end = 0;
     --mbi->mods_count;
 
-    printk("Physical RAM map:\n");
-
-    /* lets find out how much memory there is */
-    while (bytes < mbi->mmap_length) {
-        u64 end;
-        u64 addr;
-        u64 size;
-
-        memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr + bytes);
-        addr = ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
-        size = ((u64)map->length_high << 32) | (u64)map->length_low;
-        end = addr + size;
-
-        printk(" %016lx - %016lx (usable)\n", addr, end);
-
-        if (addr > eomem) {
-            printk("found a hole skipping remainder of memory at:\n"
-                   " %016lx and beyond\n", addr);
-            break;
-        }
-        if (end > eomem) {
-            eomem = end;
-        }
-        bytes += map->size + 4;
-    }
-
-    printk("System RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
-
-    /* top of memory */
-    max_page = PFN_DOWN(ALIGN_DOWN(eomem, PAGE_SIZE));
-    total_pages = max_page;
-
-    /* Architecturally the first 4 pages are exception hendlers, we
-     * will also be copying down some code there */
-    heap_start = init_boot_allocator(4 << PAGE_SHIFT);
-
-    /* we give the first RMA to the hypervisor */
-    xenheap_phys_end = rma_size(cpu_rma_order());
-
-    /* allow everything else to be allocated */
-    init_boot_pages(xenheap_phys_end, eomem);
-    init_frametable();
-    end_boot_allocator();
-
-    /* Add memory between the beginning of the heap and the beginning
-     * of out text */
-    init_xenheap_pages(heap_start, (ulong)_start);
-
-    /* move the modules to just after _end */
-    if (modules_start) {
-        printk("modules at: %016lx - %016lx\n", modules_start,
-                modules_start + modules_size);
-        freemem = ALIGN_UP(freemem, PAGE_SIZE);
-        memmove((void *)freemem, (void *)modules_start, modules_size);
-
-        oftree -= modules_start - freemem;
-        modules_start = freemem;
-        freemem += modules_size;
-        printk("  moved to: %016lx - %016lx\n", modules_start,
-                modules_start + modules_size);
-    }
-
-    /* the rest of the xenheap, starting at the end of modules */
-    init_xenheap_pages(freemem, xenheap_phys_end);
-
+    memory_init(mod, mbi->mods_count);
 
 #ifdef OF_DEBUG
     printk("ofdump:\n");
@@ -321,13 +327,10 @@ static void __init __start_xen(multiboot
     ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
 #endif
 
-    heap_size = xenheap_phys_end - heap_start;
-
-    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
-
     percpu_init_areas();
 
-    cpu_initialize();
+    init_parea(0);
+    cpu_initialize(0);
 
 #ifdef CONFIG_GDB
     initialise_gdb();
@@ -335,6 +338,14 @@ static void __init __start_xen(multiboot
         debugger_trap_immediate();
 #endif
 
+    /* Deal with secondary processors.  */
+    if (opt_nosmp) {
+        printk("nosmp: leaving secondary processors spinning forever\n");
+    } else {
+        printk("spinning up at most %d total processors ...\n", max_cpus);
+        kick_secondary_cpus(max_cpus);
+    }
+
     start_of_day();
 
     /* Create initial domain 0. */
@@ -353,22 +364,26 @@ static void __init __start_xen(multiboot
     /* Scrub RAM that is still free and so may go to an unprivileged domain. */
     scrub_heap_pages();
 
-    /*
-     * We're going to setup domain0 using the module(s) that we
-     * stashed safely above our heap. The second module, if present,
-     * is an initrd ramdisk.  The last module is the OF devtree.
-     */
-    if (construct_dom0(dom0,
-                       modules_start, 
-                       mod[0].mod_end-mod[0].mod_start,
-                       (mbi->mods_count == 1) ? 0 :
-                       modules_start + 
-                       (mod[1].mod_start-mod[0].mod_start),
-                       (mbi->mods_count == 1) ? 0 :
-                       mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
+    dom0_start = mod[0].mod_start;
+    dom0_len = mod[0].mod_end - mod[0].mod_start;
+    if (mbi->mods_count > 1) {
+        initrd_start = mod[1].mod_start;
+        initrd_len = mod[1].mod_end - mod[1].mod_start;
+    } else {
+        initrd_start = 0;
+        initrd_len = 0;
+    }
+    if (construct_dom0(dom0, dom0_start, dom0_len,
+                       initrd_start, initrd_len,
                        cmdline) != 0) {
         panic("Could not set up DOM0 guest OS\n");
     }
+
+    init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE),
+                 ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
+    if (initrd_start)
+        init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE),
+                     ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
 
     init_trace_bufs();
 
@@ -407,6 +422,8 @@ void arch_get_xen_caps(xen_capabilities_
 void arch_get_xen_caps(xen_capabilities_info_t info)
 {
 }
+
+
 
 /*
  * Local variables:
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/xen.lds.S
--- a/xen/arch/powerpc/xen.lds.S        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/xen.lds.S        Fri Sep 01 13:04:02 2006 -0600
@@ -10,11 +10,15 @@ SEARCH_DIR("=/usr/local/lib64"); SEARCH_
 SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); 
SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); 
SEARCH_DIR("=/usr/lib");
 /* Do we need any of these for elf?
    __DYNAMIC = 0;    */
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS;
+}   
 SECTIONS
 {
   /* Read-only sections, merged into text segment: */
   PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS;
-  .interp         : { *(.interp) }
+  .interp         : { *(.interp) } :text
   .hash           : { *(.hash) }
   .dynsym         : { *(.dynsym) }
   .dynstr         : { *(.dynstr) }
@@ -103,7 +107,7 @@ SECTIONS
   PROVIDE (__fini_array_end = .);
   .data           :
   {
-    *(.data .data.* .gnu.linkonce.d.*)
+    *(.data .gnu.linkonce.d.*)
     SORT(CONSTRUCTORS)
   }
 
@@ -121,7 +125,7 @@ SECTIONS
   __inithcall_end = .;
 
   __per_cpu_start = .;
-  .data.percpu : { *(.data.percpu) } :text
+  .data.percpu : { *(.data.percpu) }
   __per_cpu_data_end = .;
   . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
   . = ALIGN(STACK_SIZE);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/io.c     Fri Sep 01 13:04:02 2006 -0600
@@ -646,9 +646,13 @@ static void hvm_mmio_assist(struct cpu_u
         break;
 
     case INSTR_BT:
-        index = operand_index(src);
-        value = get_reg_value(size, index, 0, regs);
-
+        if ( src & REGISTER )
+        {
+            index = operand_index(src);
+            value = get_reg_value(size, index, 0, regs);
+        }
+        else if ( src & IMMEDIATE )
+            value = mmio_opp->immediate;
         if (p->u.data & (1 << (value & ((1 << 5) - 1))))
             regs->eflags |= X86_EFLAGS_CF;
         else
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/platform.c       Fri Sep 01 13:04:02 2006 -0600
@@ -652,6 +652,23 @@ static int hvm_decode(int realmode, unsi
         instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
         return DECODE_success;
 
+    case 0xBA:
+        if (((opcode[1] >> 3) & 7) == 4) /* BT $imm8, m16/32/64 */
+        {
+            instr->instr = INSTR_BT;
+            GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+            instr->immediate =
+                    (signed char)get_immediate(realmode, opcode+1, BYTE);
+            instr->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
+            instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+            return DECODE_success;
+        }
+        else
+        {
+            printf("0f %x, This opcode subtype isn't handled yet\n", *opcode);
+            return DECODE_failure;
+        }
+
     default:
         printf("0f %x, This opcode isn't handled yet\n", *opcode);
         return DECODE_failure;
@@ -1002,10 +1019,17 @@ void handle_mmio(unsigned long va, unsig
             mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */
             mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */
 
-            index = operand_index(mmio_inst.operand[0]);
-            size = operand_size(mmio_inst.operand[0]);
-            value = get_reg_value(size, index, 0, regs);
-
+            if ( mmio_inst.operand[0] & REGISTER )
+            { 
+                index = operand_index(mmio_inst.operand[0]);
+                size = operand_size(mmio_inst.operand[0]);
+                value = get_reg_value(size, index, 0, regs);
+            }
+            else if ( mmio_inst.operand[0] & IMMEDIATE )
+            {
+                mmio_opp->immediate = mmio_inst.immediate;
+                value = mmio_inst.immediate;
+            } 
             send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
                           mmio_inst.op_size, 0, IOREQ_READ, 0);
             break;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Fri Sep 01 13:04:02 2006 -0600
@@ -79,22 +79,22 @@ asmlinkage void svm_intr_assist(void)
     ASSERT(vmcb);
 
     /* Check if an Injection is active */
-       /* Previous Interrupt delivery caused this Intercept? */
-       if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) 
{
-           v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
+    /* Previous Interrupt delivery caused this Intercept? */
+    if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) {
+        v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
 //           printk("Injecting PF#: saving IRQ from ExitInfo\n");
-           vmcb->exitintinfo.bytes = 0;
-           re_injecting = 1;
-       }
+        vmcb->exitintinfo.bytes = 0;
+        re_injecting = 1;
+    }
 
     /* Guest's interrputs masked? */
     rflags = vmcb->rflags;
     if (irq_masked(rflags)) {
         HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags);
-       /* bail out, we won't be injecting an interrupt this time */
-       return;
+        /* bail out, we won't be injecting an interrupt this time */
+        return;
     }
-  
+    
     /* Previous interrupt still pending? */
     if (vmcb->vintr.fields.irq) {
 //        printk("Re-injecting IRQ from Vintr\n");
@@ -115,27 +115,24 @@ asmlinkage void svm_intr_assist(void)
       if ( v->vcpu_id == 0 )
          hvm_pic_assist(v);
 
+
+      if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+          pic_set_irq(pic, pt->irq, 0);
+          pic_set_irq(pic, pt->irq, 1);
+      }
+
       callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ];
-
-      /* Before we deal with PIT interrupts, let's check for
-         interrupts set by the device model or paravirtualised event
-         channel interrupts.
-      */
-      if ( cpu_has_pending_irq(v) ) {
-           intr_vector = cpu_get_interrupt(v, &intr_type);
-      }
-      else  if ( callback_irq != 0 && local_events_need_delivery() ) {
+      if ( callback_irq != 0 &&
+           local_events_need_delivery() ) {
           /*inject para-device call back irq*/
           v->vcpu_info->evtchn_upcall_mask = 1;
           pic_set_irq(pic, callback_irq, 0);
           pic_set_irq(pic, callback_irq, 1);
-          intr_vector = callback_irq;
       }
-      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
-          pic_set_irq(pic, pt->irq, 0);
-          pic_set_irq(pic, pt->irq, 1);
+
+      if ( cpu_has_pending_irq(v) )
           intr_vector = cpu_get_interrupt(v, &intr_type);
-      }
+
     }
 
     /* have we got an interrupt to inject? */
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Fri Sep 01 13:04:02 2006 -0600
@@ -243,6 +243,7 @@ static void svm_store_cpu_guest_regs(
     {
         /* Returning the guest's regs */
         crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
+        crs[2] = v->arch.hvm_svm.cpu_cr2;
         crs[3] = v->arch.hvm_svm.cpu_cr3;
         crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
     }
@@ -2793,9 +2794,7 @@ asmlinkage void svm_vmexit_handler(struc
         break;
 
     case VMEXIT_INTR:
-        raise_softirq(SCHEDULE_SOFTIRQ);
-        break;
-
+        break;
 
     case VMEXIT_INVD:
         svm_vmexit_do_invd(vmcb);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Fri Sep 01 13:04:02 2006 -0600
@@ -919,6 +919,20 @@ int cpu_has_apic_interrupt(struct vcpu* 
     return 0;
 }
 
+/* check to see if there is pending interrupt  */
+int cpu_has_pending_irq(struct vcpu *v)
+{
+    struct hvm_domain *plat = &v->domain->arch.hvm_domain;
+
+    /* APIC */
+    if ( cpu_has_apic_interrupt(v) ) return 1;
+    
+    /* PIC */
+    if ( !vlapic_accept_pic_intr(v) ) return 0;
+
+    return plat->interrupt_request;
+}
+
 void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode)
 {
     struct vlapic *vlapic = VLAPIC(v);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 13:04:02 2006 -0600
@@ -68,19 +68,6 @@ static inline int is_interruptibility_st
     return interruptibility;
 }
 
-/* check to see if there is pending interrupt  */
-int cpu_has_pending_irq(struct vcpu *v)
-{
-    struct hvm_domain *plat = &v->domain->arch.hvm_domain;
-
-    /* APIC */
-    if ( cpu_has_apic_interrupt(v) ) return 1;
-    
-    /* PIC */
-    if ( !vlapic_accept_pic_intr(v) ) return 0;
-
-    return plat->interrupt_request;
-}
 
 asmlinkage void vmx_intr_assist(void)
 {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri Sep 01 13:04:02 2006 -0600
@@ -46,6 +46,8 @@
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
 
+extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
+
 static DEFINE_PER_CPU(unsigned long, trace_values[5]);
 #define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value
 
@@ -518,6 +520,7 @@ static void vmx_store_cpu_guest_regs(
     if ( crs != NULL )
     {
         __vmread(CR0_READ_SHADOW, &crs[0]);
+        crs[2] = v->arch.hvm_vmx.cpu_cr2;
         __vmread(GUEST_CR3, &crs[3]);
         __vmread(CR4_READ_SHADOW, &crs[4]);
     }
@@ -953,8 +956,6 @@ static void vmx_vmexit_do_cpuid(struct c
                      bitmaskof(X86_FEATURE_MWAIT) );
 
             edx &= ~( bitmaskof(X86_FEATURE_HT)   |
-                     bitmaskof(X86_FEATURE_MCA)   |
-                     bitmaskof(X86_FEATURE_MCE)   |
                      bitmaskof(X86_FEATURE_ACPI)  |
                      bitmaskof(X86_FEATURE_ACC) );
         }
@@ -1615,6 +1616,7 @@ static int mov_to_cr(int gp, int cr, str
     unsigned long value;
     unsigned long old_cr;
     struct vcpu *v = current;
+    struct vlapic *vlapic = VLAPIC(v);
 
     switch ( gp ) {
     CASE_GET_REG(EAX, eax);
@@ -1758,6 +1760,12 @@ static int mov_to_cr(int gp, int cr, str
             shadow_update_paging_modes(v);
         break;
     }
+    case 8:
+    {
+        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+        vlapic_update_ppr(vlapic);
+        break;
+    }
     default:
         printk("invalid cr: %d\n", gp);
         __hvm_bug(regs);
@@ -1771,13 +1779,20 @@ static int mov_to_cr(int gp, int cr, str
  */
 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
 {
-    unsigned long value;
+    unsigned long value = 0;
     struct vcpu *v = current;
-
-    if ( cr != 3 )
+    struct vlapic *vlapic = VLAPIC(v);
+
+    if ( cr != 3 && cr != 8)
         __hvm_bug(regs);
 
-    value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
+    if ( cr == 3 )
+        value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
+    else if ( cr == 8 )
+    {
+        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+        value = (value & 0xF0) >> 4;
+    }
 
     switch ( gp ) {
     CASE_SET_REG(EAX, eax);
@@ -1888,7 +1903,7 @@ static inline void vmx_do_msr_read(struc
         }
 
         rdmsr_safe(regs->ecx, regs->eax, regs->edx);
-        break;
+        return;
     }
 
     regs->eax = msr_content & 0xFFFFFFFF;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Sep 01 13:04:02 2006 -0600
@@ -2861,11 +2861,11 @@ static int sh_page_fault(struct vcpu *v,
     //      bunch of 4K maps.
     //
 
+    shadow_lock(d);
+
     SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
                    v->domain->domain_id, v->vcpu_id, va, regs->error_code);
     
-    shadow_lock(d);
-
     shadow_audit_tables(v);
                    
     if ( guest_walk_tables(v, va, &gw, 1) != 0 )
@@ -3291,12 +3291,6 @@ sh_update_linear_entries(struct vcpu *v)
         {
             ml3e = __linear_l3_table;
             l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
-#if GUEST_PAGING_LEVELS == 2
-            /* Shadow l3 tables are made up by update_cr3 */
-            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
-            sl3e = v->arch.shadow_vtable;
-#endif
         }
         else 
         {   
@@ -3306,13 +3300,15 @@ sh_update_linear_entries(struct vcpu *v)
             l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
             ml3e = sh_map_domain_page(l3mfn);
             sh_unmap_domain_page(ml4e);
+        }
+
 #if GUEST_PAGING_LEVELS == 2
-            /* Shadow l3 tables are made up by update_cr3 */
-            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+        /* Shadow l3 tables are made up by update_cr3 */
+        sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
 #else
-            sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
-#endif
-        }
+        /* Always safe to use shadow_vtable, because it's globally mapped */
+        sl3e = v->arch.shadow_vtable;
+#endif
 
         for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
         {
@@ -3324,12 +3320,7 @@ sh_update_linear_entries(struct vcpu *v)
         }
 
         if ( v != current ) 
-        {
             sh_unmap_domain_page(ml3e);
-#if GUEST_PAGING_LEVELS != 2
-            sh_unmap_domain_page(sl3e);
-#endif
-        }
     }
 
 #elif CONFIG_PAGING_LEVELS == 3
@@ -3361,31 +3352,10 @@ sh_update_linear_entries(struct vcpu *v)
         
 #else /* GUEST_PAGING_LEVELS == 3 */
         
-        /* Use local vcpu's mappings if we can; otherwise make new mappings */
-        if ( v == current ) 
-        {
-            shadow_l3e = v->arch.shadow_vtable;
-            if ( !shadow_mode_external(d) )
-                guest_l3e = v->arch.guest_vtable;
-        }
-        else 
-        {
-            mfn_t smfn;
-            int idx;
-            
-            /* Map the shadow l3 */
-            smfn = pagetable_get_mfn(v->arch.shadow_table);
-            idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
-            shadow_l3e = sh_map_domain_page(smfn);
-            shadow_l3e += idx;
-            if ( !shadow_mode_external(d) )
-            {
-                /* Also the guest l3 */
-                mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); 
-                guest_l3e = sh_map_domain_page(gmfn);
-                guest_l3e += guest_index(v->arch.guest_vtable);
-            }
-        }
+        /* Always safe to use *_vtable, because they're globally mapped */
+        shadow_l3e = v->arch.shadow_vtable;
+        guest_l3e = v->arch.guest_vtable;
+
 #endif /* GUEST_PAGING_LEVELS */
         
         /* Choose where to write the entries, using linear maps if possible */
@@ -3443,14 +3413,6 @@ sh_update_linear_entries(struct vcpu *v)
         if ( v != current || !shadow_mode_external(d) )
             sh_unmap_domain_page(l2e);
         
-#if GUEST_PAGING_LEVELS == 3
-        if ( v != current) 
-        {
-            sh_unmap_domain_page(shadow_l3e);
-            if ( !shadow_mode_external(d) )
-                sh_unmap_domain_page(guest_l3e);
-        }
-#endif
     }
 
 #elif CONFIG_PAGING_LEVELS == 2
@@ -3601,7 +3563,7 @@ sh_detach_old_tables(struct vcpu *v)
          v->arch.shadow_vtable )
     {
         // Q: why does this need to use (un)map_domain_page_*global* ?
-        //
+        /* A: so sh_update_linear_entries can operate on other vcpus */
         sh_unmap_domain_page_global(v->arch.shadow_vtable);
         v->arch.shadow_vtable = NULL;
     }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/physdev.c    Fri Sep 01 13:04:02 2006 -0600
@@ -96,10 +96,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
         if ( !IS_PRIV(current->domain) )
             break;
 
+        irq = irq_op.irq;
         ret = -EINVAL;
-        if ( (irq = irq_op.irq) >= NR_IRQS )
+        if ( (irq < 0) || (irq >= NR_IRQS) )
             break;
-        
+
         irq_op.vector = assign_irq_vector(irq);
         ret = copy_to_guest(arg, &irq_op, 1) ? -EFAULT : 0;
         break;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/time.c       Fri Sep 01 13:04:02 2006 -0600
@@ -676,7 +676,7 @@ static inline void __update_vcpu_system_
     struct vcpu_time_info *u;
 
     t = &this_cpu(cpu_time);
-    u = &v->domain->shared_info->vcpu_info[v->vcpu_id].time;
+    u = &v->vcpu_info->time;
 
     version_update_begin(&u->version);
 
@@ -690,7 +690,7 @@ static inline void __update_vcpu_system_
 
 void update_vcpu_system_time(struct vcpu *v)
 {
-    if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp != 
+    if ( v->vcpu_info->time.tsc_timestamp !=
          this_cpu(cpu_time).local_tsc_stamp )
         __update_vcpu_system_time(v);
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/traps.c      Fri Sep 01 13:04:02 2006 -0600
@@ -339,7 +339,6 @@ asmlinkage void fatal_trap(int trapnr, s
 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
 {
     int cpu = smp_processor_id();
-    unsigned long cr2;
     static char *trapstr[] = { 
         "divide error", "debug", "nmi", "bkpt", "overflow", "bounds", 
         "invalid opcode", "device not available", "double fault", 
@@ -356,7 +355,7 @@ asmlinkage void fatal_trap(int trapnr, s
 
     if ( trapnr == TRAP_page_fault )
     {
-        __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
+        unsigned long cr2 = read_cr2();
         printk("Faulting linear address: %p\n", _p(cr2));
         show_page_walk(cr2);
     }
@@ -911,7 +910,7 @@ asmlinkage int do_page_fault(struct cpu_
 
     ASSERT(!in_irq());
 
-    __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
+    addr = read_cr2();
 
     DEBUGGER_trap_entry(TRAP_page_fault, regs);
 
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Fri Sep 01 13:04:02 2006 -0600
@@ -21,11 +21,28 @@
 /* All CPUs have their own IDT to allow int80 direct trap. */
 idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
 
+static void print_xen_info(void)
+{
+    char taint_str[TAINT_STRING_MAX_LEN];
+    char debug = 'n', *arch = "x86_32";
+
+#ifndef NDEBUG
+    debug = 'y';
+#endif
+
+#ifdef CONFIG_X86_PAE
+    arch = "x86_32p";
+#endif
+
+    printk("----[ Xen-%d.%d%s  %s  debug=%c  %s ]----\n",
+           xen_major_version(), xen_minor_version(), xen_extra_version(),
+           arch, debug, print_tainted(taint_str));
+}
+
 void show_registers(struct cpu_user_regs *regs)
 {
     struct cpu_user_regs fault_regs = *regs;
     unsigned long fault_crs[8];
-    char taint_str[TAINT_STRING_MAX_LEN];
     const char *context;
 
     if ( hvm_guest(current) && guest_mode(regs) )
@@ -35,25 +52,29 @@ void show_registers(struct cpu_user_regs
     }
     else
     {
-        context = guest_mode(regs) ? "guest" : "hypervisor";
-
         if ( !guest_mode(regs) )
         {
+            context = "hypervisor";
             fault_regs.esp = (unsigned long)&regs->esp;
             fault_regs.ss = read_segment_register(ss);
             fault_regs.ds = read_segment_register(ds);
             fault_regs.es = read_segment_register(es);
             fault_regs.fs = read_segment_register(fs);
             fault_regs.gs = read_segment_register(gs);
+            fault_crs[2] = read_cr2();
+        }
+        else
+        {
+            context = "guest";
+            fault_crs[2] = current->vcpu_info->arch.cr2;
         }
 
         fault_crs[0] = read_cr0();
         fault_crs[3] = read_cr3();
-    }
-
-    printk("----[ Xen-%d.%d%s    %s ]----\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+        fault_crs[4] = read_cr4();
+    }
+
+    print_xen_info();
     printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
            smp_processor_id(), fault_regs.cs, fault_regs.eip);
     if ( !guest_mode(regs) )
@@ -63,7 +84,8 @@ void show_registers(struct cpu_user_regs
            fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx);
     printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08x\n",
            fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp);
-    printk("cr0: %08lx   cr3: %08lx\n", fault_crs[0], fault_crs[3]);
+    printk("cr0: %08lx   cr4: %08lx   cr3: %08lx   cr2: %08lx\n",
+           fault_crs[0], fault_crs[4], fault_crs[3], fault_crs[2]);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
@@ -125,7 +147,6 @@ asmlinkage void do_double_fault(void)
 {
     struct tss_struct *tss = &doublefault_tss;
     unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
-    char taint_str[TAINT_STRING_MAX_LEN];
 
     watchdog_disable();
 
@@ -133,9 +154,8 @@ asmlinkage void do_double_fault(void)
 
     /* Find information saved during fault and dump it to the console. */
     tss = &init_tss[cpu];
-    printk("*** DOUBLE FAULT: Xen-%d.%d%s    %s\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+    printk("*** DOUBLE FAULT ***\n");
+    print_xen_info();
     printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
            cpu, tss->cs, tss->eip);
     print_symbol(" %s\n", tss->eip);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Fri Sep 01 13:04:02 2006 -0600
@@ -21,11 +21,24 @@
 
 #include <public/callback.h>
 
+static void print_xen_info(void)
+{
+    char taint_str[TAINT_STRING_MAX_LEN];
+    char debug = 'n';
+
+#ifndef NDEBUG
+    debug = 'y';
+#endif
+
+    printk("----[ Xen-%d.%d%s  x86_64  debug=%c  %s ]----\n",
+           xen_major_version(), xen_minor_version(), xen_extra_version(),
+           debug, print_tainted(taint_str));
+}
+
 void show_registers(struct cpu_user_regs *regs)
 {
     struct cpu_user_regs fault_regs = *regs;
     unsigned long fault_crs[8];
-    char taint_str[TAINT_STRING_MAX_LEN];
     const char *context;
 
     if ( hvm_guest(current) && guest_mode(regs) )
@@ -35,18 +48,27 @@ void show_registers(struct cpu_user_regs
     }
     else
     {
-        context = guest_mode(regs) ? "guest" : "hypervisor";
+        if ( guest_mode(regs) )
+        {
+            context = "guest";
+            fault_crs[2] = current->vcpu_info->arch.cr2;
+        }
+        else
+        {
+            context = "hypervisor";
+            fault_crs[2] = read_cr2();
+        }
+
         fault_crs[0] = read_cr0();
         fault_crs[3] = read_cr3();
+        fault_crs[4] = read_cr4();
         fault_regs.ds = read_segment_register(ds);
         fault_regs.es = read_segment_register(es);
         fault_regs.fs = read_segment_register(fs);
         fault_regs.gs = read_segment_register(gs);
     }
 
-    printk("----[ Xen-%d.%d%s    %s ]----\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+    print_xen_info();
     printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
            smp_processor_id(), fault_regs.cs, fault_regs.rip);
     if ( !guest_mode(regs) )
@@ -62,8 +84,9 @@ void show_registers(struct cpu_user_regs
            fault_regs.r9,  fault_regs.r10, fault_regs.r11);
     printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
            fault_regs.r12, fault_regs.r13, fault_regs.r14);
-    printk("r15: %016lx   cr0: %016lx   cr3: %016lx\n",
-           fault_regs.r15, fault_crs[0], fault_crs[3]);
+    printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
+           fault_regs.r15, fault_crs[0], fault_crs[4]);
+    printk("cr3: %016lx   cr2: %016lx\n", fault_crs[3], fault_crs[2]);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
@@ -121,7 +144,6 @@ asmlinkage void do_double_fault(struct c
 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
 {
     unsigned int cpu, tr;
-    char taint_str[TAINT_STRING_MAX_LEN];
 
     asm ( "str %0" : "=r" (tr) );
     cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
@@ -131,9 +153,8 @@ asmlinkage void do_double_fault(struct c
     console_force_unlock();
 
     /* Find information saved during fault and dump it to the console. */
-    printk("*** DOUBLE FAULT: Xen-%d.%d%s    %s\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+    printk("*** DOUBLE FAULT ***\n");
+    print_xen_info();
     printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
            cpu, regs->cs, regs->rip);
     print_symbol(" %s", regs->rip);
diff -r 4ba098226429 -r 1bab7d65171b xen/common/perfc.c
--- a/xen/common/perfc.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/common/perfc.c        Fri Sep 01 13:04:02 2006 -0600
@@ -136,8 +136,8 @@ static xen_sysctl_perfc_val_t *perfc_val
 static xen_sysctl_perfc_val_t *perfc_vals;
 static int               perfc_nbr_vals;
 static int               perfc_init = 0;
-static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
-                           XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
+static int perfc_copy_info(XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc,
+                           XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val)
 {
     unsigned int i, j;
     unsigned int v = 0;
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-ia64/mm.h Fri Sep 01 13:04:02 2006 -0600
@@ -451,7 +451,6 @@ extern u64 translate_domain_pte(u64 ptev
 
 #define INVALID_M2P_ENTRY        (~0UL)
 #define VALID_M2P(_e)            (!((_e) & (1UL<<63)))
-#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
 
 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
 #define get_gpfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/config.h
--- a/xen/include/asm-powerpc/config.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/config.h  Fri Sep 01 13:04:02 2006 -0600
@@ -47,11 +47,13 @@ extern char __bss_start[];
 /* this should be per processor, but for now */
 #define CACHE_LINE_SIZE 128
 
+/* 256M - 64M of Xen space seems like a nice number */
+#define CONFIG_MIN_DOM0_PAGES (192 << (20 - PAGE_SHIFT))
 #define CONFIG_SHADOW 1
 #define CONFIG_GDB 1
 #define CONFIG_SMP 1
 #define CONFIG_PCI 1
-#define NR_CPUS 1
+#define NR_CPUS 16
 
 #ifndef ELFSIZE
 #define ELFSIZE 64
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/current.h
--- a/xen/include/asm-powerpc/current.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/current.h Fri Sep 01 13:04:02 2006 -0600
@@ -27,7 +27,7 @@
 
 struct vcpu;
 
-register struct processor_area *parea asm("r13");
+register volatile struct processor_area *parea asm("r13");
 
 static inline struct vcpu *get_current(void)
 {
@@ -66,7 +66,7 @@ static inline struct cpu_user_regs *gues
 
 static inline void reset_stack_and_jump(void (*f)(void))
 {
-    void _reset_stack_and_jump(void (*f)(void), struct cpu_user_regs *regs);
+    void _reset_stack_and_jump(void (*)(void), struct cpu_user_regs *);
     struct cpu_user_regs *regs = guest_cpu_user_regs();
 
 #ifdef TRACK_RESUME
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/domain.h  Fri Sep 01 13:04:02 2006 -0600
@@ -38,15 +38,14 @@ struct arch_domain {
     struct page_info *rma_page;
     uint rma_order;
 
-    /* This is regular memory, only available thru translataion */
-    ulong logical_base_pfn;
-    ulong logical_end_pfn;
+    /* list of extents beyond RMA */
+    struct list_head extent_list;
 
     /* I/O-port access bitmap mask. */
     u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
 
     uint large_page_sizes;
-    char large_page_shift[4];
+    uint large_page_order[4];
 } __cacheline_aligned;
 
 struct slb_entry {
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/htab.h
--- a/xen/include/asm-powerpc/htab.h    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/htab.h    Fri Sep 01 13:04:02 2006 -0600
@@ -133,8 +133,4 @@ struct domain_htab {
     union pte *map;     /* access the htab like an array */
     ulong *shadow;      /* idx -> logical translation array */
 };
-
-struct domain;
-extern void htab_alloc(struct domain *d, uint order);
-extern void htab_free(struct domain *d);
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/mm.h      Fri Sep 01 13:04:02 2006 -0600
@@ -24,6 +24,7 @@
 #include <public/xen.h>
 #include <xen/list.h>
 #include <xen/types.h>
+#include <xen/mm.h>
 #include <asm/misc.h>
 #include <asm/system.h>
 #include <asm/flushtlb.h>
@@ -33,7 +34,6 @@
 #define memguard_unguard_range(_p,_l)    ((void)0)
 
 extern unsigned long xenheap_phys_end;
-#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
 
 /*
  * Per-page-frame information.
@@ -43,7 +43,6 @@ extern unsigned long xenheap_phys_end;
  *  2. Provide a PFN_ORDER() macro for accessing the order of a free page.
  */
 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
-#define PRtype_info "016lx"
 
 /* XXX copy-and-paste job; re-examine me */
 struct page_info
@@ -63,7 +62,7 @@ struct page_info
         /* Page is in use: ((count_info & PGC_count_mask) != 0). */
         struct {
             /* Owner of this page (NULL if page is anonymous). */
-            struct domain *_domain;
+            u32 _domain;
             /* Type reference count and various PGT_xxx flags and fields. */
             unsigned long type_info;
         } inuse;
@@ -80,80 +79,132 @@ struct page_info
 
 };
 
+struct page_extents {
+    /* Each frame can be threaded onto a doubly-linked list. */
+    struct list_head pe_list;
+
+    /* page extent */
+    struct page_info *pg;
+    uint order;
+    ulong pfn;
+};
+
  /* The following page types are MUTUALLY EXCLUSIVE. */
 #define PGT_none            (0<<29) /* no special uses of this page */
-#define PGT_l1_page_table   (1<<29) /* using this page as an L1 page table? */
-#define PGT_l2_page_table   (2<<29) /* using this page as an L2 page table? */
-#define PGT_l3_page_table   (3<<29) /* using this page as an L3 page table? */
-#define PGT_l4_page_table   (4<<29) /* using this page as an L4 page table? */
-#define PGT_gdt_page        (5<<29) /* using this page in a GDT? */
-#define PGT_ldt_page        (6<<29) /* using this page in an LDT? */
+#define PGT_RMA             (1<<29) /* This page is an RMA page? */
 #define PGT_writable_page   (7<<29) /* has writable mappings of this page? */
 #define PGT_type_mask       (7<<29) /* Bits 29-31. */
+
+ /* Owning guest has pinned this page to its current type? */
+#define _PGT_pinned         28
+#define PGT_pinned          (1U<<_PGT_pinned)
  /* Has this page been validated for use as its current type? */
-#define _PGT_validated      28
+#define _PGT_validated      27
 #define PGT_validated       (1U<<_PGT_validated)
- /* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned         27
-#define PGT_pinned          (1U<<_PGT_pinned)
- /* The 10 most significant bits of virt address if this is a page table. */
-#define PGT_va_shift        17
-#define PGT_va_mask         (((1U<<10)-1)<<PGT_va_shift)
+
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        32
+#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
  /* Is the back pointer still mutable (i.e. not fixed yet)? */
-#define PGT_va_mutable      (((1U<<10)-1)<<PGT_va_shift)
+#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
  /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
-#define PGT_va_unknown      (((1U<<10)-2)<<PGT_va_shift)
- /* 17-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<17)-1)
+#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
+
+ /* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask      ((1U<<16)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
 #define PGC_allocated       (1U<<_PGC_allocated)
- /* 31-bit count of references to this frame. */
-#define PGC_count_mask      ((1U<<31)-1)
+ /* Set on a *guest* page to mark it out-of-sync with its shadow */
+#define _PGC_out_of_sync     30
+#define PGC_out_of_sync     (1U<<_PGC_out_of_sync)
+ /* Set when is using a page as a page table */
+#define _PGC_page_table      29
+#define PGC_page_table      (1U<<_PGC_page_table)
+ /* 29-bit count of references to this frame. */
+#define PGC_count_mask      ((1U<<29)-1)
+
+#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
+
+static inline struct domain *unpickle_domptr(u32 _domain)
+{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
+
+static inline u32 pickle_domptr(struct domain *domain)
+{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
+
+#define PRtype_info "016lx"/* should only be used for printk's */
+
+#define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
+#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
+extern struct page_info *frame_table;
+extern unsigned long max_page;
+extern unsigned long total_pages;
+void init_frametable(void);
 
 static inline void put_page(struct page_info *page)
 {
-#if 0
-    int count;
-
-    count = atomic_dec_return(&page->count_info);
-
-    if ( unlikely((count & PGC_count_mask) == 0) )
+    u32 nx, x, y = page->count_info;
+
+    do {
+        x  = y;
+        nx = x - 1;
+    }
+    while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
+
+    if ( unlikely((nx & PGC_count_mask) == 0) ) {
+        panic("about to free page\n");
         free_domheap_page(page);
-#else
-    trap();
-#endif
+    }
 }
 
 static inline int get_page(struct page_info *page,
                            struct domain *domain)
 {
-#if 0
-    int count;
-
-    count = atomic_inc_return(&page->count_info);
-
-    if (((count & PGC_count_mask) == 0) ||      /* Count overflow? */
-            ((count & PGC_count_mask) == 1) ||  /* Wasn't allocated? */
-            ((page->domain != domain)))         /* Wrong owner? */
-    {
-        atomic_dec(&page->count_info);
-        return 0;
-    }
-
-#else
-    trap();
-#endif
+    u32 x, nx, y = page->count_info;
+    u32 d, nd = page->u.inuse._domain;
+    u32 _domain = pickle_domptr(domain);
+
+    do {
+        x  = y;
+        nx = x + 1;
+        d  = nd;
+        if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
+             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+             unlikely(d != _domain) )                /* Wrong owner? */
+        {
+            return 0;
+        }
+        y = cmpxchg(&page->count_info, x, nx);
+    }
+    while ( unlikely(y != x) );
+
     return 1;
+}
+
+extern void put_page_type(struct page_info *page);
+extern int  get_page_type(struct page_info *page, unsigned long type);
+
+static inline void put_page_and_type(struct page_info *page)
+{
+    put_page_type(page);
+    put_page(page);
 }
 
 static inline int get_page_and_type(struct page_info *page,
                                     struct domain *domain,
-                                    u32 type)
-{
-    trap();
-    return 1;
+                                    unsigned long type)
+{
+    int rc = get_page(page, domain);
+
+    if ( likely(rc) && unlikely(!get_page_type(page, type)) )
+    {
+        put_page(page);
+        rc = 0;
+    }
+
+    return rc;
 }
 
 static inline int page_is_removable(struct page_info *page)
@@ -161,16 +212,9 @@ static inline int page_is_removable(stru
     return ((page->count_info & PGC_count_mask) == 1);
 }
 
-int get_page_type(struct page_info *page, u32 type);
-
 #define set_machinetophys(_mfn, _pfn) (trap(), 0)
 
 extern void synchronise_pagetables(unsigned long cpu_mask);
-
-static inline void put_page_and_type(struct page_info *page)
-{
-    trap();
-}
 
 /* XXX don't know what this is for */
 typedef struct {
@@ -179,17 +223,10 @@ typedef struct {
 } vm_assist_info_t;
 extern vm_assist_info_t vm_assist_info[];
 
-#define page_get_owner(_p)    ((_p)->u.inuse._domain)
-#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = _d)
-
 #define share_xen_page_with_guest(p, d, r) do { } while (0)
 #define share_xen_page_with_privileged_guests(p, r) do { } while (0)
 
-extern struct page_info *frame_table;
 extern unsigned long frame_table_size;
-extern unsigned long max_page;
-extern unsigned long total_pages;
-void init_frametable(void);
 
 /* hope that accesses to this will fail spectacularly */
 #define machine_to_phys_mapping ((u32 *)-1UL)
@@ -199,12 +236,12 @@ extern int update_grant_va_mapping(unsig
                                    struct domain *,
                                    struct vcpu *);
 
-extern void put_page_type(struct page_info *page);
-
-#define PFN_TYPE_RMA 0
-#define PFN_TYPE_LOGICAL 1
-#define PFN_TYPE_IO 2
-extern ulong pfn2mfn(struct domain *d, long mfn, int *type);
+#define PFN_TYPE_RMA 1
+#define PFN_TYPE_LOGICAL 2
+#define PFN_TYPE_IO 3
+#define PFN_TYPE_REMOTE 4
+
+extern ulong pfn2mfn(struct domain *d, long pfn, int *type);
 
 /* Arch-specific portion of memory_op hypercall. */
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
@@ -221,6 +258,10 @@ static inline unsigned long gmfn_to_mfn(
 
 #define mfn_to_gmfn(_d, mfn) (mfn)
 
+extern int allocate_rma(struct domain *d, unsigned int order_pages);
+extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages);
+extern void free_extents(struct domain *d);
+
 extern int steal_page(struct domain *d, struct page_info *page,
                         unsigned int memflags);
 
diff -r 4ba098226429 -r 1bab7d65171b 
xen/include/asm-powerpc/powerpc64/procarea.h
--- a/xen/include/asm-powerpc/powerpc64/procarea.h      Fri Sep 01 12:52:12 
2006 -0600
+++ b/xen/include/asm-powerpc/powerpc64/procarea.h      Fri Sep 01 13:04:02 
2006 -0600
@@ -28,6 +28,7 @@ struct gdb_state;
 
 struct processor_area
 {
+    unsigned int whoami;
     struct vcpu *cur_vcpu;
     void *hyp_stack_base;
     ulong saved_regs[2];
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/processor.h       Fri Sep 01 13:04:02 2006 -0600
@@ -39,8 +39,11 @@ struct cpu_user_regs;
 struct cpu_user_regs;
 extern void show_registers(struct cpu_user_regs *);
 extern void show_execution_state(struct cpu_user_regs *);
-extern unsigned int cpu_rma_order(void);
-extern void cpu_initialize(void);
+extern void show_backtrace(ulong sp, ulong lr, ulong pc);
+extern unsigned int cpu_extent_order(void);
+extern unsigned int cpu_default_rma_order_pages(void);
+extern uint cpu_large_page_orders(uint *sizes, uint max);
+extern void cpu_initialize(int cpuid);
 extern void cpu_init_vcpu(struct vcpu *);
 extern void save_cpu_sprs(struct vcpu *);
 extern void load_cpu_sprs(struct vcpu *);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/shadow.h
--- a/xen/include/asm-powerpc/shadow.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/shadow.h  Fri Sep 01 13:04:02 2006 -0600
@@ -13,7 +13,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
  */
@@ -55,4 +55,18 @@ static inline void mark_dirty(struct dom
 {
     return;
 }
+#define gnttab_mark_dirty(d, f) mark_dirty((d), (f))
+
+extern int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+extern unsigned int shadow_teardown(struct domain *d);
+extern unsigned int shadow_set_allocation(
+    struct domain *d, unsigned int megabytes, int *preempted);
+
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static inline unsigned int shadow_get_allocation(struct domain *d)
+{
+    return (1ULL << (d->arch.htab.order + PAGE_SHIFT)) >> 20;
+}
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/smp.h     Fri Sep 01 13:04:02 2006 -0600
@@ -28,8 +28,8 @@ extern int smp_num_siblings;
 
 /* revisit when we support SMP */
 #define get_hard_smp_processor_id(i) i
-#define hard_smp_processor_id() 0
-#define raw_smp_processor_id() 0
+#define raw_smp_processor_id() (parea->whoami)
+#define hard_smp_processor_id() raw_smp_processor_id()
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
 
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/types.h
--- a/xen/include/asm-powerpc/types.h   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/types.h   Fri Sep 01 13:04:02 2006 -0600
@@ -3,8 +3,18 @@
 #ifndef _PPC_TYPES_H
 #define _PPC_TYPES_H
 
+#include <xen/config.h>
+
+#if defined(__ppc__)
+#define BYTES_PER_LONG 4
+#define BITS_PER_LONG 32
+#elif defined(__PPC64__)
+#define BYTES_PER_LONG 8
+#define BITS_PER_LONG 64
+#endif
+
+#ifndef __ASSEMBLY__
 typedef unsigned short umode_t;
-
 
 /*
  * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
@@ -31,8 +41,6 @@ typedef unsigned long __u64;
 #endif
 #endif
 
-#include <xen/config.h>
-
 typedef signed char s8;
 typedef unsigned char u8;
 
@@ -45,14 +53,10 @@ typedef unsigned int u32;
 #if defined(__ppc__)
 typedef signed long long s64;
 typedef unsigned long long u64;
-#define BYTES_PER_LONG 4
-#define BITS_PER_LONG 32
 typedef unsigned int size_t;
 #elif defined(__PPC64__)
 typedef signed long s64;
 typedef unsigned long u64;
-#define BYTES_PER_LONG 8
-#define BITS_PER_LONG 64
 typedef unsigned long size_t;
 #endif
 
@@ -66,4 +70,5 @@ typedef u64 dma64_addr_t;
 
 typedef unsigned short xmem_bufctl_t;
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/mm.h  Fri Sep 01 13:04:02 2006 -0600
@@ -338,7 +338,6 @@ int check_descriptor(struct desc_struct 
 #define machine_to_phys_mapping  ((unsigned long *)RDWR_MPT_VIRT_START)
 #define INVALID_M2P_ENTRY        (~0UL)
 #define VALID_M2P(_e)            (!((_e) & (1UL<<(BITS_PER_LONG-1))))
-#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
 
 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
 #define get_gpfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/page.h        Fri Sep 01 13:04:02 2006 -0600
@@ -300,13 +300,6 @@ void setup_idle_pagetable(void);
 #define _PAGE_GNTTAB   0
 #endif
 
-/*
- * Disallow unused flag bits plus PAT, PSE and GLOBAL.
- * Also disallow GNTTAB if we are using it for grant-table debugging.
- * Permit the NX bit if the hardware supports it.
- */
-#define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX)
-
 #define __PAGE_HYPERVISOR \
     (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define __PAGE_HYPERVISOR_NOCACHE \
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/processor.h   Fri Sep 01 13:04:02 2006 -0600
@@ -288,6 +288,13 @@ static inline void write_cr0(unsigned lo
 static inline void write_cr0(unsigned long val)
 {
        __asm__("mov %0,%%cr0": :"r" ((unsigned long)val));
+}
+
+static inline unsigned long read_cr2(void)
+{
+    unsigned long __cr2;
+    __asm__("mov %%cr2,%0\n\t" :"=r" (__cr2));
+    return __cr2;
 }
 
 static inline unsigned long read_cr4(void)
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-2level.h
--- a/xen/include/asm-x86/x86_32/page-2level.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page-2level.h  Fri Sep 01 13:04:02 2006 -0600
@@ -53,7 +53,4 @@ typedef l2_pgentry_t root_pgentry_t;
 #define get_pte_flags(x) ((int)(x) & 0xFFF)
 #define put_pte_flags(x) ((intpte_t)((x) & 0xFFF))
 
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
-
 #endif /* __X86_32_PAGE_2LEVEL_H__ */
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page-3level.h  Fri Sep 01 13:04:02 2006 -0600
@@ -66,8 +66,6 @@ typedef l3_pgentry_t root_pgentry_t;
 #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
 #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
 
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
 #define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
 
 #endif /* __X86_32_PAGE_3LEVEL_H__ */
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page.h Fri Sep 01 13:04:02 2006 -0600
@@ -26,6 +26,15 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
 #define GRANT_PTE_FLAGS \
     (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
 
+/*
+ * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Permit the NX bit if the hardware supports it.
+ */
+#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX)
+
+#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+
 #endif /* __X86_32_PAGE_H__ */
 
 /*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_64/page.h Fri Sep 01 13:04:02 2006 -0600
@@ -75,8 +75,15 @@ typedef l4_pgentry_t root_pgentry_t;
 #define _PAGE_NX_BIT (1U<<23)
 #define _PAGE_NX     (cpu_has_nx ? _PAGE_NX_BIT : 0U)
 
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
+/*
+ * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Permit the NX bit if the hardware supports it.
+ * Note that range [62:52] is available for software use on x86/64.
+ */
+#define BASE_DISALLOW_MASK (0xFF000180U & ~_PAGE_NX)
+
+#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
 #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
 #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
 
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-ia64.h    Fri Sep 01 13:04:02 2006 -0600
@@ -18,15 +18,12 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)       __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #ifdef __XEN_TOOLS__
 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
 #endif
 
 #ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-powerpc.h Fri Sep 01 13:04:02 2006 -0600
@@ -29,7 +29,6 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)     __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val) \
     do { \
         if (sizeof ((hnd).__pad)) \
@@ -42,8 +41,6 @@
 #endif
 
 #ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-x86_32.h  Fri Sep 01 13:04:02 2006 -0600
@@ -28,14 +28,7 @@
 #endif
 
 /* Structural guest handles introduced in 0x00030201. */
-#if (defined(__XEN__) || defined(__XEN_TOOLS__)) && !defined(__ASSEMBLY__)
-typedef uint64_t __attribute__((aligned(8))) uint64_aligned_t;
-#define __DEFINE_XEN_GUEST_HANDLE(name, type)                   \
-    typedef struct { type *p; }                                 \
-        __guest_handle_ ## name;                                \
-    typedef struct { union { type *p; uint64_aligned_t q; }; }  \
-        __guest_handle_64_ ## name
-#elif __XEN_INTERFACE_VERSION__ >= 0x00030201
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
 #define __DEFINE_XEN_GUEST_HANDLE(name, type) \
     typedef struct { type *p; } __guest_handle_ ## name
 #else
@@ -45,15 +38,9 @@ typedef uint64_t __attribute__((aligned(
 
 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)       __guest_handle_64_ ## name
+#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #ifdef __XEN_TOOLS__
 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
-#define set_xen_guest_handle(hnd, val)                      \
-    do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \
-         (hnd).p = val;                                     \
-    } while ( 0 )
-#else
-#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #endif
 
 #ifndef __ASSEMBLY__
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-x86_64.h  Fri Sep 01 13:04:02 2006 -0600
@@ -39,15 +39,12 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)       __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #ifdef __XEN_TOOLS__
 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
 #endif
 
 #ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/domctl.h       Fri Sep 01 13:04:02 2006 -0600
@@ -16,12 +16,10 @@
 
 #include "xen.h"
 
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000001
-
-#define uint64_t uint64_aligned_t
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000003
 
 struct xenctl_cpumap {
-    XEN_GUEST_HANDLE_64(uint8_t) bitmap;
+    XEN_GUEST_HANDLE(uint8_t) bitmap;
     uint32_t nr_cpus;
 };
 
@@ -72,8 +70,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdo
 #define XEN_DOMCTL_getmemlist         6
 struct xen_domctl_getmemlist {
     /* IN variables. */
+    /* Max entries to write to output buffer. */
     uint64_t max_pfns;
-    XEN_GUEST_HANDLE_64(ulong) buffer;
+    /* Start index in guest's page list. */
+    uint64_t start_pfn;
+    XEN_GUEST_HANDLE(xen_pfn_t) buffer;
     /* OUT variables. */
     uint64_t num_pfns;
 };
@@ -110,7 +111,7 @@ struct xen_domctl_getpageframeinfo2 {
     /* IN variables. */
     uint64_t num;
     /* IN/OUT variables. */
-    XEN_GUEST_HANDLE_64(ulong) array;
+    XEN_GUEST_HANDLE(ulong) array;
 };
 typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
@@ -184,7 +185,7 @@ struct xen_domctl_shadow_op {
     uint32_t       mb;       /* Shadow memory allocation in MB */
 
     /* OP_PEEK / OP_CLEAN */
-    XEN_GUEST_HANDLE_64(ulong) dirty_bitmap;
+    XEN_GUEST_HANDLE(ulong) dirty_bitmap;
     uint64_t       pages;    /* Size of buffer. Updated with actual size. */
     struct xen_domctl_shadow_op_stats stats;
 };
@@ -204,8 +205,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_m
 #define XEN_DOMCTL_setvcpucontext    12
 #define XEN_DOMCTL_getvcpucontext    13
 struct xen_domctl_vcpucontext {
-    uint32_t              vcpu;                     /* IN */
-    XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
+    uint32_t              vcpu;                  /* IN */
+    XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */
 };
 typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
@@ -378,8 +379,6 @@ typedef struct xen_domctl xen_domctl_t;
 typedef struct xen_domctl xen_domctl_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
 
-#undef uint64_t
-
 #endif /* __XEN_PUBLIC_DOMCTL_H__ */
 
 /*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/sysctl.h       Fri Sep 01 13:04:02 2006 -0600
@@ -16,9 +16,7 @@
 #include "xen.h"
 #include "domctl.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000001
-
-#define uint64_t uint64_aligned_t
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002
 
 /*
  * Read console content from Xen buffer ring.
@@ -26,8 +24,8 @@
 #define XEN_SYSCTL_readconsole       1
 struct xen_sysctl_readconsole {
     /* IN variables. */
-    uint32_t clear;                   /* Non-zero -> clear after reading. */
-    XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */
+    uint32_t clear;                /* Non-zero -> clear after reading. */
+    XEN_GUEST_HANDLE(char) buffer; /* Buffer start */
     /* IN/OUT variables. */
     uint32_t count;            /* In: Buffer size;  Out: Used buffer size  */
 };
@@ -105,9 +103,9 @@ struct xen_sysctl_perfc_op {
     uint32_t       nr_counters;       /*  number of counters description  */
     uint32_t       nr_vals;                      /*  number of values  */
     /* counter information (or NULL) */
-    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
+    XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc;
     /* counter values (or NULL) */
-    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
+    XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val;
 };
 typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
@@ -117,7 +115,7 @@ struct xen_sysctl_getdomaininfolist {
     /* IN variables. */
     domid_t               first_domain;
     uint32_t              max_domains;
-    XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
+    XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer;
     /* OUT variables. */
     uint32_t              num_domains;
 };
@@ -140,8 +138,6 @@ typedef struct xen_sysctl xen_sysctl_t;
 typedef struct xen_sysctl xen_sysctl_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);
 
-#undef uint64_t
-
 #endif /* __XEN_PUBLIC_SYSCTL_H__ */
 
 /*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/xen.h
--- a/xen/include/public/xen.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/xen.h  Fri Sep 01 13:04:02 2006 -0600
@@ -63,6 +63,7 @@
 #define __HYPERVISOR_hvm_op               34
 #define __HYPERVISOR_sysctl               35
 #define __HYPERVISOR_domctl               36
+#define __HYPERVISOR_kexec_op             37
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/FlatDeviceTree.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/FlatDeviceTree.py   Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,323 @@
+#!/usr/bin/env python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright (C) IBM Corp. 2006
+#
+# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+
+import os
+import sys
+import struct
+import stat
+import re
+
+_OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning
+_OF_DT_BEGIN_NODE = 0x1
+_OF_DT_END_NODE = 0x2
+_OF_DT_PROP = 0x3
+_OF_DT_END = 0x9
+
+def _bincat(seq, separator=''):
+    '''Concatenate the contents of seq into a bytestream.'''
+    strs = []
+    for item in seq:
+        if type(item) == type(0):
+            strs.append(struct.pack(">I", item))
+        else:
+            try:
+                strs.append(item.to_bin())
+            except AttributeError, e:
+                strs.append(item)
+    return separator.join(strs)
+
+def _alignup(val, alignment):
+    return (val + alignment - 1) & ~(alignment - 1)
+
+def _pad(buf, alignment):
+    '''Pad bytestream with NULLs to specified alignment.'''
+    padlen = _alignup(len(buf), alignment)
+    return buf + '\0' * (padlen - len(buf))
+    # not present in Python 2.3:
+    #return buf.ljust(_padlen, '\0')
+
+def _indent(item):
+    indented = []
+    for line in str(item).splitlines(True):
+        indented.append('    ' + line)
+    return ''.join(indented)
+
+class _Property:
+    _nonprint = re.compile('[\000-\037\200-\377]')
+    def __init__(self, node, name, value):
+        self.node = node
+        self.value = value
+        self.name = name
+        self.node.tree.stradd(name)
+
+    def __str__(self):
+        result = self.name
+        if self.value:
+            searchtext = self.value
+            # it's ok for a string to end in NULL
+            if searchtext.find('\000') == len(searchtext)-1:
+                searchtext = searchtext[:-1]
+            m = self._nonprint.search(searchtext)
+            if m:
+                bytes = struct.unpack("B" * len(self.value), self.value)
+                hexbytes = [ '%02x' % b for b in bytes ]
+                words = []
+                for i in range(0, len(self.value), 4):
+                    words.append(''.join(hexbytes[i:i+4]))
+                v = '<' + ' '.join(words) + '>'
+            else:
+                v = '"%s"' % self.value
+            result += ': ' + v
+        return result
+
+    def to_bin(self):
+        offset = self.node.tree.stroffset(self.name)
+        return struct.pack('>III', _OF_DT_PROP, len(self.value), offset) \
+            + _pad(self.value, 4)
+
+class _Node:
+    def __init__(self, tree, name):
+        self.tree = tree
+        self.name = name
+        self.props = {}
+        self.children = {}
+        self.phandle = 0
+
+    def __str__(self):
+        propstrs = [ _indent(prop) for prop in self.props.values() ]
+        childstrs = [ _indent(child) for child in self.children.values() ]
+        return '%s:\n%s\n%s' % (self.name, '\n'.join(propstrs),
+            '\n'.join(childstrs))
+
+    def to_bin(self):
+        name = _pad(self.name + '\0', 4)
+        return struct.pack('>I', _OF_DT_BEGIN_NODE) + \
+                name + \
+                _bincat(self.props.values()) + \
+                _bincat(self.children.values()) + \
+                struct.pack('>I', _OF_DT_END_NODE)
+
+    def addprop(self, propname, *cells):
+        '''setprop with duplicate error-checking.'''
+        if propname in self.props:
+            raise AttributeError('%s/%s already exists' % (self.name, 
propname))
+        self.setprop(propname, *cells)
+
+    def setprop(self, propname, *cells):
+        self.props[propname] = _Property(self, propname, _bincat(cells))
+
+    def addnode(self, nodename):
+        '''newnode with duplicate error-checking.'''
+        if nodename in self.children:
+            raise AttributeError('%s/%s already exists' % (self.name, 
nodename))
+        return self.newnode(nodename)
+
+    def newnode(self, nodename):
+        node = _Node(self.tree, nodename)
+        self.children[nodename] = node
+        return node
+
+    def getprop(self, propname):
+        return self.props[propname]
+
+    def getchild(self, nodename):
+        return self.children[nodename]
+
+    def get_phandle(self):
+        if self.phandle:
+            return self.phandle
+        self.phandle = self.tree.alloc_phandle()
+        self.addprop('linux,phandle', self.phandle)
+        return self.phandle
+
+class _Header:
+    def __init__(self):
+        self.magic = 0
+        self.totalsize = 0
+        self.off_dt_struct = 0
+        self.off_dt_strings = 0
+        self.off_mem_rsvmap = 0
+        self.version = 0
+        self.last_comp_version = 0
+        self.boot_cpuid_phys = 0
+        self.size_dt_strings = 0
+    def to_bin(self):
+        return struct.pack('>9I',
+            self.magic,
+            self.totalsize,
+            self.off_dt_struct,
+            self.off_dt_strings,
+            self.off_mem_rsvmap,
+            self.version,
+            self.last_comp_version,
+            self.boot_cpuid_phys,
+            self.size_dt_strings)
+
+class _StringBlock:
+    def __init__(self):
+        self.table = []
+    def to_bin(self):
+        return _bincat(self.table, '\0') + '\0'
+    def add(self, str):
+        self.table.append(str)
+    def getoffset(self, str):
+        return self.to_bin().index(str + '\0')
+
+class Tree(_Node):
+    def __init__(self):
+        self.last_phandle = 0
+        self.strings = _StringBlock()
+        self.reserved = [(0, 0)]
+        _Node.__init__(self, self, '\0')
+
+    def alloc_phandle(self):
+        self.last_phandle += 1
+        return self.last_phandle
+
+    def stradd(self, str):
+        return self.strings.add(str)
+
+    def stroffset(self, str):
+        return self.strings.getoffset(str)
+
+    def reserve(self, start, len):
+        self.reserved.insert(0, (start, len))
+
+    def to_bin(self):
+        # layout:
+        #   header
+        #   reservation map
+        #   string block
+        #   data block
+
+        datablock = _Node.to_bin(self)
+
+        r = [ struct.pack('>QQ', rsrv[0], rsrv[1]) for rsrv in self.reserved ]
+        reserved = _bincat(r)
+
+        strblock = _pad(self.strings.to_bin(), 4)
+        strblocklen = len(strblock)
+
+        header = _Header()
+        header.magic = _OF_DT_HEADER
+        header.off_mem_rsvmap = _alignup(len(header.to_bin()), 8)
+        header.off_dt_strings = header.off_mem_rsvmap + len(reserved)
+        header.off_dt_struct = header.off_dt_strings + strblocklen
+        header.version = 0x10
+        header.last_comp_version = 0x10
+        header.boot_cpuid_phys = 0
+        header.size_dt_strings = strblocklen
+
+        payload = reserved + \
+                strblock + \
+                datablock + \
+                struct.pack('>I', _OF_DT_END)
+        header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8)
+        return _pad(header.to_bin(), 8) + payload
+
+_host_devtree_root = '/proc/device-tree'
+def _getprop(propname):
+    '''Extract a property from the system's device tree.'''
+    f = file(os.path.join(_host_devtree_root, propname), 'r')
+    data = f.read()
+    f.close()
+    return data
+
+def _copynode(node, dirpath, propfilter):
+    '''Extract all properties from a node in the system's device tree.'''
+    dirents = os.listdir(dirpath)
+    for dirent in dirents:
+        fullpath = os.path.join(dirpath, dirent)
+        st = os.lstat(fullpath)
+        if stat.S_ISDIR(st.st_mode):
+            child = node.addnode(dirent)
+            _copytree(child, fullpath, propfilter)
+        elif stat.S_ISREG(st.st_mode) and propfilter(fullpath):
+            node.addprop(dirent, _getprop(fullpath))
+
+def _copytree(node, dirpath, propfilter):
+    path = os.path.join(_host_devtree_root, dirpath)
+    _copynode(node, path, propfilter)
+
+def build(imghandler):
+    '''Construct a device tree by combining the domain's configuration and
+    the host's device tree.'''
+    root = Tree()
+
+    # 4 pages: start_info, console, store, shared_info
+    root.reserve(0x3ffc000, 0x4000)
+
+    root.addprop('device_type', 'chrp-but-not-really\0')
+    root.addprop('#size-cells', 2)
+    root.addprop('#address-cells', 2)
+    root.addprop('model', 'Momentum,Maple-D\0')
+    root.addprop('compatible', 'Momentum,Maple\0')
+
+    xen = root.addnode('xen')
+    xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000)
+    xen.addprop('version', 'Xen-3.0-unstable\0')
+    xen.addprop('reg', 0, imghandler.vm.domid, 0, 0)
+    xen.addprop('domain-name', imghandler.vm.getName() + '\0')
+    xencons = xen.addnode('console')
+    xencons.addprop('interrupts', 1, 0)
+
+    # XXX split out RMA node
+    mem = root.addnode('memory@0')
+    totalmem = imghandler.vm.getMemoryTarget() * 1024
+    mem.addprop('reg', 0, 0, 0, totalmem)
+    mem.addprop('device_type', 'memory\0')
+
+    cpus = root.addnode('cpus')
+    cpus.addprop('smp-enabled')
+    cpus.addprop('#size-cells', 0)
+    cpus.addprop('#address-cells', 1)
+
+    # Copy all properties the system firmware gave us, except for 'linux,'
+    # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are
+    # identical...
+    cpu0 = None
+    def _nolinuxprops(fullpath):
+        return not os.path.basename(fullpath).startswith('linux,')
+    for i in range(imghandler.vm.getVCpuCount()):
+        cpu = cpus.addnode('PowerPC,970@0')
+        _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops)
+        # and then overwrite what we need to
+        pft_size = imghandler.vm.info.get('pft-size', 0x14)
+        cpu.setprop('ibm,pft-size', 0, pft_size)
+
+        # set default CPU
+        if cpu0 == None:
+            cpu0 = cpu
+
+    chosen = root.addnode('chosen')
+    chosen.addprop('cpu', cpu0.get_phandle())
+    chosen.addprop('memory', mem.get_phandle())
+    chosen.addprop('linux,stdout-path', '/xen/console\0')
+    chosen.addprop('interrupt-controller', xen.get_phandle())
+    chosen.addprop('bootargs', imghandler.cmdline + '\0')
+    # xc_linux_load.c will overwrite these 64-bit properties later
+    chosen.addprop('linux,initrd-start', 0, 0)
+    chosen.addprop('linux,initrd-end', 0, 0)
+
+    if 1:
+        f = file('/tmp/domU.dtb', 'w')
+        f.write(root.to_bin())
+        f.close()
+
+    return root
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/arch.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/arch.py     Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright (C) IBM Corp. 2006
+#
+# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+
+import os
+
+_types = {
+    "i386": "x86",
+    "i486": "x86",
+    "i586": "x86",
+    "i686": "x86",
+    "x86_64": "x86",
+    "ia64": "ia64",
+    "ppc": "powerpc",
+    "ppc64": "powerpc",
+}
+type = _types.get(os.uname()[4], "unknown")
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/backtrace.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/backtrace.c      Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,193 @@
+/*
+ * Routines providing a simple monitor for use on the PowerMac.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/console.h>
+#include <xen/sched.h>
+#include <xen/symbols.h>
+
+static char namebuf[KSYM_NAME_LEN+1];
+
+/* Shamelessly lifted from Linux Xmon try to keep pristene */
+#ifdef __powerpc64__
+#define LRSAVE_OFFSET          0x10
+#define REG_FRAME_MARKER       0x7265677368657265ul    /* "regshere" */
+#define MARKER_OFFSET          0x60
+#define REGS_OFFSET            0x70
+#define REG "%016lX"
+#else
+#define LRSAVE_OFFSET          4
+#define REG_FRAME_MARKER       0x72656773
+#define MARKER_OFFSET          8
+#define REGS_OFFSET            16
+#define REG "%08lX"
+#endif
+
+#define TRAP(regs) ((regs)->entry_vector & ~0xF)
+static int xmon_depth_to_print = 64;
+
+/* Very cheap human name for vector lookup. */
+static
+const char *getvecname(unsigned long vec)
+{
+       char *ret;
+
+       switch (vec) {
+       case 0x100:     ret = "(System Reset)"; break;
+       case 0x200:     ret = "(Machine Check)"; break;
+       case 0x300:     ret = "(Data Access)"; break;
+       case 0x380:     ret = "(Data SLB Access)"; break;
+       case 0x400:     ret = "(Instruction Access)"; break;
+       case 0x480:     ret = "(Instruction SLB Access)"; break;
+       case 0x500:     ret = "(Hardware Interrupt)"; break;
+       case 0x600:     ret = "(Alignment)"; break;
+       case 0x700:     ret = "(Program Check)"; break;
+       case 0x800:     ret = "(FPU Unavailable)"; break;
+       case 0x900:     ret = "(Decrementer)"; break;
+       case 0xc00:     ret = "(System Call)"; break;
+       case 0xd00:     ret = "(Single Step)"; break;
+       case 0xf00:     ret = "(Performance Monitor)"; break;
+       case 0xf20:     ret = "(Altivec Unavailable)"; break;
+       case 0x1300:    ret = "(Instruction Breakpoint)"; break;
+       default: ret = "";
+       }
+       return ret;
+}
+
+static int mread(unsigned long adrs, void *buf, int size)
+{
+    memcpy(buf, (void *)adrs, size);
+    return size;
+}
+
+static void get_function_bounds(unsigned long pc, unsigned long *startp,
+                               unsigned long *endp)
+{
+    unsigned long size, offset;
+       const char *name;
+
+    *startp = *endp = 0;
+       if (pc == 0)
+               return;
+
+    name = symbols_lookup(pc, &size, &offset, namebuf);
+    if (name != NULL) {
+                       *startp = pc - offset;
+                       *endp = pc - offset + size;
+    }
+}
+    
+/* Print an address in numeric and symbolic form (if possible) */
+static void xmon_print_symbol(unsigned long address, const char *mid,
+                              const char *after)
+{
+       const char *name = NULL;
+       unsigned long offset, size;
+
+       printf(REG, address);
+
+    name = symbols_lookup(address, &size, &offset, namebuf);
+       if (name) {
+               printf("%s%s+%#lx/%#lx", mid, name, offset, size);
+       }
+       printf("%s", after);
+}
+
+static void backtrace(
+    unsigned long sp, unsigned long lr, unsigned long pc)
+{
+       unsigned long ip;
+       unsigned long newsp;
+       unsigned long marker;
+       int count = 0;
+       struct cpu_user_regs regs;
+
+       do {
+               if (sp > xenheap_phys_end) {
+                       if (sp != 0)
+                               printf("SP (%lx) is not in xen space\n", sp);
+                       break;
+               }
+
+               if (!mread(sp + LRSAVE_OFFSET, &ip, sizeof(unsigned long))
+                   || !mread(sp, &newsp, sizeof(unsigned long))) {
+                       printf("Couldn't read stack frame at %lx\n", sp);
+                       break;
+               }
+
+               /*
+                * For the first stack frame, try to work out if
+                * LR and/or the saved LR value in the bottommost
+                * stack frame are valid.
+                */
+               if ((pc | lr) != 0) {
+                       unsigned long fnstart, fnend;
+                       unsigned long nextip;
+                       int printip = 1;
+
+                       get_function_bounds(pc, &fnstart, &fnend);
+                       nextip = 0;
+                       if (newsp > sp)
+                               mread(newsp + LRSAVE_OFFSET, &nextip,
+                                     sizeof(unsigned long));
+                       if (lr == ip) {
+                               if (lr >= xenheap_phys_end
+                                   || (fnstart <= lr && lr < fnend))
+                                       printip = 0;
+                       } else if (lr == nextip) {
+                               printip = 0;
+                       } else if (lr < xenheap_phys_end
+                       && !(fnstart <= lr && lr < fnend)) {
+                               printf("[link register   ] ");
+                               xmon_print_symbol(lr, " ", "\n");
+                       }
+                       if (printip) {
+                               printf("["REG"] ", sp);
+                               xmon_print_symbol(ip, " ", " (unreliable)\n");
+                       }
+                       pc = lr = 0;
+
+               } else {
+                       printf("["REG"] ", sp);
+                       xmon_print_symbol(ip, " ", "\n");
+               }
+
+               /* Look for "regshere" marker to see if this is
+                  an exception frame. */
+               if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
+                   && marker == REG_FRAME_MARKER) {
+                       if (mread(sp + REGS_OFFSET, &regs, sizeof(regs))
+                           != sizeof(regs)) {
+                               printf("Couldn't read registers at %lx\n",
+                                      sp + REGS_OFFSET);
+                               break;
+                       }
+            printf("--- Exception: %x %s at ", regs.entry_vector,
+                              getvecname(TRAP(&regs)));
+                       pc = regs.pc;
+                       lr = regs.lr;
+                       xmon_print_symbol(pc, " ", "\n");
+               }
+
+               if (newsp == 0)
+                       break;
+        
+               sp = newsp;
+       } while (count++ < xmon_depth_to_print);
+}
+
+void show_backtrace(ulong sp, ulong lr, ulong pc)
+{
+    console_start_sync();
+    backtrace(sp, lr, pc);
+    console_end_sync();
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/memory.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/memory.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,206 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Dan Poff <poff@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+#include <xen/sched.h>
+#include <xen/mm.h>
+#include "of-devtree.h"
+#include "oftree.h"
+
+unsigned long xenheap_phys_end;
+struct membuf {
+    ulong start;
+    ulong size;
+};
+
+typedef void (*walk_mem_fn)(struct membuf *, uint);
+
+static ulong free_xenheap(ulong start, ulong end)
+{
+    start = ALIGN_UP(start, PAGE_SIZE);
+    end = ALIGN_DOWN(end, PAGE_SIZE);
+
+    printk("%s: 0x%lx - 0x%lx\n", __func__, start, end);
+
+    if (oftree <= end && oftree >= start) {
+        printk("%s:     Go around the devtree: 0x%lx - 0x%lx\n",
+               __func__, oftree, oftree_end);
+        init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE));
+        init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end);
+    } else {
+        init_xenheap_pages(start, end);
+    }
+
+    return ALIGN_UP(end, PAGE_SIZE);
+}
+
+static void set_max_page(struct membuf *mb, uint entries)
+{
+    int i;
+
+    for (i = 0; i < entries; i++) {
+        ulong end_page;
+
+        end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT;
+
+        if (end_page > max_page)
+            max_page = end_page;
+    }
+}
+
+/* mark all memory from modules onward as unused */
+static void heap_init(struct membuf *mb, uint entries)
+{
+    int i;
+    ulong start_blk;
+    ulong end_blk = 0;
+
+       for (i = 0; i < entries; i++) {
+           start_blk = mb[i].start;
+           end_blk = start_blk + mb[i].size;
+
+           if (start_blk < xenheap_phys_end) {
+            if (xenheap_phys_end > end_blk) {
+                panic("xenheap spans LMB\n");
+            }
+            if (xenheap_phys_end == end_blk)
+                continue;
+
+            start_blk = xenheap_phys_end;
+        }
+
+        init_boot_pages(start_blk, end_blk);
+        total_pages += (end_blk - start_blk) >> PAGE_SHIFT;
+       }
+}
+
+static void ofd_walk_mem(void *m, walk_mem_fn fn)
+{
+    ofdn_t n;
+    uint p_len;
+    struct membuf mb[8];
+    static char name[] = "memory";
+
+    n = ofd_node_find_by_prop(m, OFD_ROOT, "device_type", name, sizeof(name));
+    while (n > 0) {
+
+        p_len = ofd_getprop(m, n, "reg", mb, sizeof (mb));
+        if (p_len <= 0) {
+            panic("ofd_getprop(): failed\n");
+        }
+        if (p_len > sizeof(mb))
+            panic("%s: buffer is not big enuff for this firmware: "
+                  "0x%lx < 0x%x\n", __func__, sizeof(mb), p_len);
+
+        fn(mb, p_len / sizeof(mb[0]));
+        n = ofd_node_find_next(m, n);
+    }
+}
+
+static void setup_xenheap(module_t *mod, int mcount)
+{
+    int i;
+    ulong freemem;
+
+    freemem = ALIGN_UP((ulong)_end, PAGE_SIZE);
+
+    for (i = 0; i < mcount; i++) {
+        u32 s;
+
+        if(mod[i].mod_end == mod[i].mod_start)
+            continue;
+
+        s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE);
+
+        if (mod[i].mod_start > (ulong)_start &&
+            mod[i].mod_start < (ulong)_end) {
+            /* mod was linked in */
+            continue;
+        }
+
+        if (s < freemem) 
+            panic("module addresses must assend\n");
+
+        free_xenheap(freemem, s);
+        freemem = ALIGN_UP(mod[i].mod_end, PAGE_SIZE);
+        
+    }
+
+    /* the rest of the xenheap, starting at the end of modules */
+    free_xenheap(freemem, xenheap_phys_end);
+}
+
+void memory_init(module_t *mod, int mcount)
+{
+    ulong eomem;
+    ulong heap_start, heap_size;
+
+    printk("Physical RAM map:\n");
+
+    /* lets find out how much memory there is and set max_page */
+    max_page = 0;
+    ofd_walk_mem((void *)oftree, set_max_page);
+    eomem = max_page << PAGE_SHIFT;
+
+    if (eomem == 0){
+        panic("ofd_walk_mem() failed\n");
+    }
+    printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
+
+    /* Architecturally the first 4 pages are exception hendlers, we
+     * will also be copying down some code there */
+    heap_start = 4 << PAGE_SHIFT;
+    if (oftree < (ulong)_start)
+        heap_start = ALIGN_UP(oftree_end, PAGE_SIZE);
+
+    heap_start = init_boot_allocator(heap_start);
+    if (heap_start > (ulong)_start) {
+        panic("space below _start (%p) is not enough memory "
+              "for heap (0x%lx)\n", _start, heap_start);
+    }
+
+    /* allow everything else to be allocated */
+    total_pages = 0;
+    ofd_walk_mem((void *)oftree, heap_init);
+    if (total_pages == 0)
+        panic("heap_init: failed");
+
+    if (total_pages > max_page)
+        panic("total_pages > max_page: 0x%lx > 0x%lx\n",
+              total_pages, max_page);
+
+    printk("total_pages: 0x%016lx\n", total_pages);
+
+    init_frametable();
+    end_boot_allocator();
+
+    /* Add memory between the beginning of the heap and the beginning
+     * of out text */
+    free_xenheap(heap_start, (ulong)_start);
+
+    heap_size = xenheap_phys_end - heap_start;
+    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
+
+    setup_xenheap(mod, mcount);
+
+    eomem = avail_domheap_pages();
+    printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem,
+           (eomem << PAGE_SHIFT) >> 20,
+           (eomem << PAGE_SHIFT) >> 10);
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup_memory.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/ofd_fixup_memory.c       Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,107 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <public/xen.h>
+#include "of-devtree.h"
+#include "oftree.h"
+
+static char memory[] = "memory";
+
+struct mem_reg {
+    u64 addr;
+    u64 sz;
+};
+
+static void ofd_memory_clean(void *m)
+{
+    ofdn_t old;
+
+    /* Remove all old memory props */
+    do {
+        old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
+                                    memory, sizeof(memory));
+        if (old <= 0)
+            break;
+
+        ofd_node_prune(m, old);
+    } while (1);
+}
+
+static ofdn_t ofd_memory_node_create(
+    void *m, ofdn_t p, const char *ppath, const char *name,
+    const char *dt, ulong start, ulong size)
+{
+    struct mem_reg reg;
+    char path[128];
+    ulong l;
+    ofdn_t n;
+    ulong nl = strlen(name) + 1;
+    ulong dtl = strlen(dt) + 1;
+
+    l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
+    n = ofd_node_add(m, p, path, l + 1);
+    ofd_prop_add(m, n, "name", name, nl);
+    ofd_prop_add(m, n, "device_type", dt, dtl);
+
+    /* physical addresses usable without regard to OF */
+    reg.addr = start;
+    reg.sz = size;
+    ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
+
+    return n;
+}
+
+static void ofd_memory_rma_node(void *m, struct domain *d)
+{
+    ulong size = rma_size(d->arch.rma_order);
+    ofdn_t n;
+
+    n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, 0, size);
+    BUG_ON(n <= 0);
+}
+
+static void ofd_memory_extent_nodes(void *m, struct domain *d)
+{
+    ulong start;
+    ulong size;
+    ofdn_t n;
+    struct page_extents *pe;
+
+    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+
+        start = pe->pfn << PAGE_SHIFT;
+        size = 1UL << (pe->order + PAGE_SHIFT);
+
+        n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
+                                    start, size);
+
+        BUG_ON(n <= 0);
+    }
+}
+
+void ofd_memory_props(void *m, struct domain *d)
+{
+    ofd_memory_clean(m);
+    ofd_memory_rma_node(m, d);
+    ofd_memory_extent_nodes(m,d);
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/shadow.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/shadow.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,159 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/shadow.h>
+
+static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
+{
+    ulong sdr1_htabsize;
+
+    ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
+    ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
+    ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
+
+    sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
+
+    return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
+}
+
+static ulong htab_alloc(struct domain *d, uint order)
+{
+    ulong htab_raddr;
+    uint log_htab_bytes = order + PAGE_SHIFT;
+    uint htab_bytes = 1UL << log_htab_bytes;
+
+    /* we use xenheap pages to keep domheap pages usefull for domains */
+
+    if (order < 6)
+        order = 6;              /* architectural minimum is 2^18 */
+    if (order > 34)
+        order = 34;             /* architectural minimum is 2^46 */
+
+    htab_raddr = (ulong)alloc_xenheap_pages(order);
+    if (htab_raddr > 0) {
+        ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
+
+        d->arch.htab.order = order;
+        d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
+        d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
+        d->arch.htab.map = (union pte *)htab_raddr;
+    }
+    return htab_raddr;
+}
+
+static void htab_free(struct domain *d)
+{
+    ulong htab_raddr = GET_HTAB(d);
+
+    free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
+}
+
+
+unsigned int shadow_teardown(struct domain *d)
+{
+    htab_free(d);
+    return 0;
+}
+
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted)
+{
+    unsigned int rc;
+    uint pages;
+    uint p;
+    uint order;
+    ulong addr;
+    
+
+    if (d->arch.htab.order)
+        return -EBUSY;
+
+    if (megabytes == 0) {
+        /* old management tools */
+        megabytes = 1;          /* 1/64th of 64M */
+        printk("%s: Fix management tools to set and get shadow/htab values\n"
+               "    using %d MiB htab\n",
+               __func__, megabytes);
+    }
+    pages = megabytes << (20 - PAGE_SHIFT);
+    order = fls(pages) - 1;     /* log2 truncated */
+    if (pages & ((1 << order) - 1))
+        ++order;                /* round up */
+
+    addr = htab_alloc(d, order);
+
+    printk("%s: ibm,fpt-size should be: 0x%x\n", __func__,
+           d->arch.htab.log_num_ptes + LOG_PTE_SIZE);
+
+    if (addr == 0)
+        return -ENOMEM;
+
+    /* XXX make this a continuation */
+    for (p = 0; p < (1 << order); p++)
+        clear_page((void *)(addr + (p << PAGE_SHIFT)));
+
+    return rc;
+}
+
+int shadow_domctl(struct domain *d, 
+                                 xen_domctl_shadow_op_t *sc,
+                                 XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+{
+    if ( unlikely(d == current->domain) )
+    {
+        DPRINTK("Don't try to do a shadow op on yourself!\n");
+        return -EINVAL;
+    }
+
+    switch ( sc->op )
+    {
+    case XEN_DOMCTL_SHADOW_OP_OFF:
+         DPRINTK("Shadow is mandatory!\n");
+         return -EINVAL;
+
+    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+        sc->mb = shadow_get_allocation(d);
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: {
+        int rc;
+        int preempted = 0;
+
+        rc = shadow_set_allocation(d, sc->mb, &preempted);
+
+        if (preempted)
+            /* Not finished.  Set up to re-run the call. */
+            rc = hypercall_create_continuation(
+                __HYPERVISOR_domctl, "h", u_domctl);
+        else 
+            /* Finished.  Return the new allocation */
+            sc->mb = shadow_get_allocation(d);
+        return rc;
+    }
+
+    default:
+        printk("Bad shadow op %u\n", sc->op);
+        BUG();
+        return -EINVAL;
+    }
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/htab.c
--- a/xen/arch/powerpc/htab.c   Fri Sep 01 12:52:12 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright (C) IBM Corp. 2005
- *
- * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-
-static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
-{
-    ulong sdr1_htabsize;
-
-    ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
-    ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
-    ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
-
-    sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
-
-    return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
-}
-
-void htab_alloc(struct domain *d, uint order)
-{
-    ulong htab_raddr;
-    ulong log_htab_bytes = order + PAGE_SHIFT;
-    ulong htab_bytes = 1UL << log_htab_bytes;
-
-    /* XXX use alloc_domheap_pages instead? */
-    htab_raddr = (ulong)alloc_xenheap_pages(order);
-    ASSERT(htab_raddr != 0);
-    /* XXX check alignment guarantees */
-    ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
-
-    /* XXX slow. move memset out to service partition? */
-    memset((void *)htab_raddr, 0, htab_bytes);
-
-    d->arch.htab.order = order;
-    d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
-    d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
-    d->arch.htab.map = (union pte *)htab_raddr;
-    d->arch.htab.shadow = xmalloc_array(ulong,
-                                        1UL << d->arch.htab.log_num_ptes);
-    ASSERT(d->arch.htab.shadow != NULL);
-}
-
-void htab_free(struct domain *d)
-{
-    ulong htab_raddr = GET_HTAB(d);
-
-    free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
-    xfree(d->arch.htab.shadow);
-}
-

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.