[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User awilliam@xxxxxxxxxxxx
# Node ID 4762d73ced42da37b957cd465b191b4f9c8ea3b7
# Parent  ed56ef3e9716c407351918424e2c1054a249c4f9
# Parent  35c724302bdd1339e17dad43085c841917a5dd88
merge with xen-unstable.hg
---
 xen/arch/powerpc/boot/boot32.S                     |   75 --
 xen/arch/powerpc/boot/start.S                      |   51 -
 xen/arch/powerpc/delay.c                           |   37 -
 xen/arch/powerpc/mambo.S                           |   64 --
 xen/include/asm-powerpc/misc.h                     |   33 -
 xen/include/asm-powerpc/uaccess.h                  |   38 -
 .hgignore                                          |   13 
 config/powerpc64.mk                                |    2 
 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c      |   10 
 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c    |   25 
 tools/blktap/drivers/blktapctrl.c                  |   49 +
 tools/examples/external-device-migrate             |    4 
 tools/ioemu/target-i386-dm/exec-dm.c               |   11 
 tools/ioemu/vl.c                                   |    7 
 tools/libaio/src/syscall-ppc.h                     |    6 
 tools/libxc/powerpc64/Makefile                     |    4 
 tools/libxc/powerpc64/flatdevtree.c                |   23 
 tools/libxc/powerpc64/flatdevtree.h                |    2 
 tools/libxc/powerpc64/utils.c                      |  211 ++++++
 tools/libxc/powerpc64/utils.h                      |   38 +
 tools/libxc/powerpc64/xc_linux_build.c             |  292 ++-------
 tools/libxc/powerpc64/xc_prose_build.c             |  323 ++++++++++
 tools/libxc/xc_linux_build.c                       |   16 
 tools/libxc/xc_load_elf.c                          |   28 
 tools/libxc/xenctrl.h                              |    4 
 tools/libxc/xenguest.h                             |   15 
 tools/libxc/xg_private.h                           |    1 
 tools/libxen/include/xen_console.h                 |    4 
 tools/libxen/include/xen_host.h                    |    4 
 tools/libxen/include/xen_host_cpu.h                |    4 
 tools/libxen/include/xen_network.h                 |    4 
 tools/libxen/include/xen_pif.h                     |    4 
 tools/libxen/include/xen_sr.h                      |    4 
 tools/libxen/include/xen_user.h                    |    4 
 tools/libxen/include/xen_vdi.h                     |    4 
 tools/libxen/include/xen_vif.h                     |    4 
 tools/libxen/include/xen_vm.h                      |   66 +-
 tools/libxen/include/xen_vtpm.h                    |    4 
 tools/libxen/src/xen_vm.c                          |  119 +++
 tools/python/xen/lowlevel/xc/xc.c                  |   83 ++
 tools/python/xen/xend/FlatDeviceTree.py            |   94 ++
 tools/python/xen/xend/XendDomain.py                |    4 
 tools/python/xen/xend/XendDomainInfo.py            |   30 
 tools/python/xen/xend/image.py                     |   68 ++
 tools/python/xen/xend/server/DevController.py      |   35 +
 tools/python/xen/xend/server/blkif.py              |    6 
 tools/python/xen/xm/main.py                        |   18 
 tools/xenstore/xenstored_domain.c                  |    2 
 xen/arch/powerpc/Makefile                          |   69 --
 xen/arch/powerpc/backtrace.c                       |   34 -
 xen/arch/powerpc/bitops.c                          |  124 +--
 xen/arch/powerpc/boot_of.c                         |  621 +++++++++++++------
 xen/arch/powerpc/cmdline.c                         |   24 
 xen/arch/powerpc/crash.c                           |    1 
 xen/arch/powerpc/dart.c                            |   13 
 xen/arch/powerpc/dart_u4.c                         |    7 
 xen/arch/powerpc/domain.c                          |   33 -
 xen/arch/powerpc/domain_build.c                    |    3 
 xen/arch/powerpc/domctl.c                          |    6 
 xen/arch/powerpc/exceptions.c                      |   34 -
 xen/arch/powerpc/exceptions.h                      |    7 
 xen/arch/powerpc/external.c                        |   30 
 xen/arch/powerpc/gdbstub.c                         |    1 
 xen/arch/powerpc/iommu.c                           |   34 -
 xen/arch/powerpc/machine_kexec.c                   |    6 
 xen/arch/powerpc/memory.c                          |  104 ++-
 xen/arch/powerpc/mm.c                              |  235 ++++++-
 xen/arch/powerpc/mpic.c                            |  127 +---
 xen/arch/powerpc/mpic_init.c                       |   54 +
 xen/arch/powerpc/numa.c                            |    1 
 xen/arch/powerpc/of-devtree.h                      |   40 -
 xen/arch/powerpc/of-devwalk.c                      |   14 
 xen/arch/powerpc/of_handler/console.c              |   12 
 xen/arch/powerpc/ofd_fixup.c                       |   12 
 xen/arch/powerpc/ofd_fixup_memory.c                |   18 
 xen/arch/powerpc/papr/xlate.c                      |  259 ++++----
 xen/arch/powerpc/powerpc64/exceptions.S            |   18 
 xen/arch/powerpc/powerpc64/io.S                    |   65 +-
 xen/arch/powerpc/powerpc64/ppc970.c                |   71 +-
 xen/arch/powerpc/powerpc64/ppc970_machinecheck.c   |    7 
 xen/arch/powerpc/powerpc64/ppc970_scom.c           |  175 +++--
 xen/arch/powerpc/powerpc64/scom.h                  |   39 +
 xen/arch/powerpc/powerpc64/traps.c                 |    4 
 xen/arch/powerpc/rtas.c                            |   84 ++
 xen/arch/powerpc/rtas.h                            |   34 +
 xen/arch/powerpc/setup.c                           |  144 ++--
 xen/arch/powerpc/shadow.c                          |    7 
 xen/arch/powerpc/smp.c                             |  192 +++++-
 xen/arch/powerpc/smpboot.c                         |   29 
 xen/arch/powerpc/start.S                           |   62 +
 xen/arch/powerpc/systemsim.S                       |   64 ++
 xen/arch/powerpc/time.c                            |    3 
 xen/arch/powerpc/usercopy.c                        |  248 -------
 xen/arch/powerpc/xen.lds.S                         |    8 
 xen/arch/x86/crash.c                               |    4 
 xen/arch/x86/domain_build.c                        |    8 
 xen/arch/x86/mm.c                                  |   12 
 xen/arch/x86/mm/shadow/common.c                    |    4 
 xen/arch/x86/mm/shadow/multi.c                     |    3 
 xen/arch/x86/numa.c                                |    2 
 xen/common/Makefile                                |    2 
 xen/common/domain.c                                |   25 
 xen/common/elf.c                                   |   27 
 xen/common/gdbstub.c                               |    1 
 xen/common/kexec.c                                 |   14 
 xen/common/sched_credit.c                          |  663 +++++++++------------
 xen/common/xencomm.c                               |  316 ++++++++++
 xen/include/asm-powerpc/acpi.h                     |    2 
 xen/include/asm-powerpc/cache.h                    |    1 
 xen/include/asm-powerpc/config.h                   |    4 
 xen/include/asm-powerpc/debugger.h                 |   70 +-
 xen/include/asm-powerpc/delay.h                    |   16 
 xen/include/asm-powerpc/domain.h                   |    5 
 xen/include/asm-powerpc/flushtlb.h                 |    1 
 xen/include/asm-powerpc/grant_table.h              |   12 
 xen/include/asm-powerpc/guest_access.h             |   78 --
 xen/include/asm-powerpc/mach-default/irq_vectors.h |   22 
 xen/include/asm-powerpc/mm.h                       |  100 ++-
 xen/include/asm-powerpc/msr.h                      |    4 
 xen/include/asm-powerpc/numa.h                     |    2 
 xen/include/asm-powerpc/page.h                     |    5 
 xen/include/asm-powerpc/powerpc64/string.h         |    3 
 xen/include/asm-powerpc/processor.h                |  108 ++-
 xen/include/asm-powerpc/smp.h                      |   22 
 xen/include/asm-powerpc/spinlock.h                 |   33 -
 xen/include/asm-powerpc/xenoprof.h                 |   26 
 xen/include/asm-x86/numa.h                         |    2 
 xen/include/asm-x86/page.h                         |   36 -
 xen/include/asm-x86/shadow.h                       |    3 
 xen/include/asm-x86/x86_32/page-2level.h           |    6 
 xen/include/asm-x86/x86_32/page-3level.h           |   29 
 xen/include/asm-x86/x86_64/page.h                  |    6 
 xen/include/public/arch-powerpc.h                  |    2 
 xen/include/public/domctl.h                        |    8 
 xen/include/public/io/fbif.h                       |   88 +-
 xen/include/public/io/kbdif.h                      |   70 +-
 xen/include/public/io/pciif.h                      |   44 -
 xen/include/public/io/xenbus.h                     |   12 
 xen/include/public/memory.h                        |    2 
 xen/include/public/sysctl.h                        |    2 
 xen/include/public/trace.h                         |    2 
 xen/include/public/xenoprof.h                      |    2 
 xen/include/xen/elfcore.h                          |    4 
 xen/include/xen/sched.h                            |    2 
 xen/include/xen/xencomm.h                          |  115 +++
 145 files changed, 4717 insertions(+), 2437 deletions(-)

diff -r ed56ef3e9716 -r 4762d73ced42 .hgignore
--- a/.hgignore Thu Dec 14 08:54:54 2006 -0700
+++ b/.hgignore Thu Dec 14 08:57:36 2006 -0700
@@ -53,6 +53,8 @@
 ^docs/user/labels\.pl$
 ^docs/user/user\.css$
 ^docs/user/user\.html$
+^docs/xen-api/vm_lifecycle.eps$
+^docs/xen-api/xenapi-datamodel-graph.eps$
 ^extras/mini-os/h/hypervisor-ifs$
 ^extras/mini-os/h/xen-public$
 ^extras/mini-os/mini-os\..*$
@@ -98,17 +100,15 @@
 ^tools/firmware/.*\.bin$
 ^tools/firmware/.*\.sym$
 ^tools/firmware/.*bios/.*bios.*\.txt$
+^tools/firmware/hvmloader/acpi/acpigen$
 ^tools/firmware/hvmloader/hvmloader$
 ^tools/firmware/hvmloader/roms\.h$
 ^tools/firmware/rombios/BIOS-bochs-[^/]*$
 ^tools/firmware/rombios/_rombios[^/]*_\.c$
 ^tools/firmware/rombios/rombios[^/]*\.s$
-^tools/firmware/vmxassist/acpi\.h$
 ^tools/firmware/vmxassist/gen$
 ^tools/firmware/vmxassist/offsets\.h$
-^tools/firmware/vmxassist/roms\.h$
 ^tools/firmware/vmxassist/vmxassist$
-^tools/firmware/vmxassist/vmxloader$
 ^tools/ioemu/\.pc/.*$
 ^tools/ioemu/config-host\.h$
 ^tools/ioemu/config-host\.mak$
@@ -220,10 +220,11 @@
 ^xen/arch/powerpc/dom0\.bin$
 ^xen/arch/powerpc/asm-offsets\.s$
 ^xen/arch/powerpc/firmware$
-^xen/arch/powerpc/firmware_image$
+^xen/arch/powerpc/firmware_image.bin$
 ^xen/arch/powerpc/xen\.lds$
-^xen/arch/powerpc/.xen-syms$
-^xen/arch/powerpc/xen-syms.S$
+^xen/arch/powerpc/\.xen-syms$
+^xen/arch/powerpc/xen-syms\.S$
+^xen/arch/powerpc/cmdline.dep$
 ^unmodified_drivers/linux-2.6/\.tmp_versions
 ^unmodified_drivers/linux-2.6/.*\.cmd$
 ^unmodified_drivers/linux-2.6/.*\.ko$
diff -r ed56ef3e9716 -r 4762d73ced42 config/powerpc64.mk
--- a/config/powerpc64.mk       Thu Dec 14 08:54:54 2006 -0700
+++ b/config/powerpc64.mk       Thu Dec 14 08:57:36 2006 -0700
@@ -1,5 +1,7 @@ CONFIG_POWERPC := y
 CONFIG_POWERPC := y
 CONFIG_POWERPC_$(XEN_OS) := y
 
+CONFIG_XENCOMM := y
+
 CFLAGS += -DELFSIZE=64
 LIBDIR := lib
diff -r ed56ef3e9716 -r 4762d73ced42 
linux-2.6-xen-sparse/arch/i386/kernel/fixup.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c     Thu Dec 14 08:54:54 
2006 -0700
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c     Thu Dec 14 08:57:36 
2006 -0700
@@ -43,17 +43,17 @@ fastcall void do_fixup_4gb_segment(struc
        char info[100];
        int i;
 
-       if (test_and_set_bit(0, &printed))
+       /* Ignore statically-linked init. */
+       if (current->tgid == 1)
                return;
-
-        if (current->tgid == 1) /* Ignore statically linked init */
-                return; 
             
        HYPERVISOR_vm_assist(
                VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify);
 
+       if (test_and_set_bit(0, &printed))
+               return;
+
        sprintf(info, "%s (pid=%d)", current->comm, current->tgid);
-
 
        DP("");
        DP("***************************************************************");
diff -r ed56ef3e9716 -r 4762d73ced42 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Thu Dec 14 08:54:54 
2006 -0700
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Thu Dec 14 08:57:36 
2006 -0700
@@ -110,6 +110,18 @@ set_cpu_sibling_map(int cpu)
        cpu_data[cpu].booted_cores = 1;
 }
 
+static void
+remove_siblinginfo(int cpu)
+{
+       phys_proc_id[cpu] = BAD_APICID;
+       cpu_core_id[cpu]  = BAD_APICID;
+
+       cpus_clear(cpu_sibling_map[cpu]);
+       cpus_clear(cpu_core_map[cpu]);
+
+       cpu_data[cpu].booted_cores = 0;
+}
+
 static int xen_smp_intr_init(unsigned int cpu)
 {
        int rc;
@@ -358,18 +370,6 @@ static int __init initialize_cpu_present
 }
 core_initcall(initialize_cpu_present_map);
 
-static void
-remove_siblinginfo(int cpu)
-{
-       phys_proc_id[cpu] = BAD_APICID;
-       cpu_core_id[cpu]  = BAD_APICID;
-
-       cpus_clear(cpu_sibling_map[cpu]);
-       cpus_clear(cpu_core_map[cpu]);
-
-       cpu_data[cpu].booted_cores = 0;
-}
-
 int __cpu_disable(void)
 {
        cpumask_t map = cpu_online_map;
@@ -432,7 +432,6 @@ int __devinit __cpu_up(unsigned int cpu)
        /* This must be done before setting cpu_online_map */
        set_cpu_sibling_map(cpu);
        wmb();
-
 
        rc = xen_smp_intr_init(cpu);
        if (rc) {
diff -r ed56ef3e9716 -r 4762d73ced42 tools/blktap/drivers/blktapctrl.c
--- a/tools/blktap/drivers/blktapctrl.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/blktap/drivers/blktapctrl.c Thu Dec 14 08:57:36 2006 -0700
@@ -57,6 +57,8 @@
 #include "blktapctrl.h"
 #include "tapdisk.h"
 
+#define PIDFILE "/var/run/blktapctrl.pid"
+
 #define NUM_POLL_FDS 2
 #define MSG_SIZE 4096
 #define MAX_TIMEOUT 10
@@ -622,6 +624,42 @@ static void print_drivers(void)
                DPRINTF("Found driver: [%s]\n",dtypes[i]->name);
 } 
 
+static void write_pidfile(long pid)
+{
+       char buf[100];
+       int len;
+       int fd;
+       int flags;
+
+       fd = open(PIDFILE, O_RDWR | O_CREAT, 0600);
+       if (fd == -1) {
+               DPRINTF("Opening pid file failed (%d)\n", errno);
+               exit(1);
+       }
+
+       /* We exit silently if daemon already running. */
+       if (lockf(fd, F_TLOCK, 0) == -1)
+               exit(0);
+
+       /* Set FD_CLOEXEC, so that tapdisk doesn't get this file
+          descriptor. */
+       if ((flags = fcntl(fd, F_GETFD)) == -1) {
+               DPRINTF("F_GETFD failed (%d)\n", errno);
+               exit(1);
+       }
+       flags |= FD_CLOEXEC;
+       if (fcntl(fd, F_SETFD, flags) == -1) {
+               DPRINTF("F_SETFD failed (%d)\n", errno);
+               exit(1);
+       }
+
+       len = sprintf(buf, "%ld\n", pid);
+       if (write(fd, buf, len) != len) {
+               DPRINTF("Writing pid file failed (%d)\n", errno);
+               exit(1);
+       }
+}
+
 int main(int argc, char *argv[])
 {
        char *devname;
@@ -681,6 +719,7 @@ int main(int argc, char *argv[])
        ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
 
        process = getpid();
+       write_pidfile(process);
        ret = ioctl(ctlfd, BLKTAP_IOCTL_SENDPID, process );
 
        /*Static pollhooks*/
@@ -716,3 +755,13 @@ int main(int argc, char *argv[])
        closelog();
        return -1;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 tools/examples/external-device-migrate
--- a/tools/examples/external-device-migrate    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/examples/external-device-migrate    Thu Dec 14 08:57:36 2006 -0700
@@ -60,8 +60,8 @@ function evaluate_params()
                -step)          step=$2; shift 2;;
                -host)          host=$2; shift 2;;
                -domname)       domname=$2; shift 2;;
-               -type)          type=$2; shift 2;;
-               -subtype)       subtype=$2; shift 2;;
+               -type)          typ=$2; shift 2;;
+               -subtype)       stype=$2; shift 2;;
                -recover)       recover=1; shift;;
                -help)          ext_dev_migrate_usage; exit 0;;
                *)              break;;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/ioemu/target-i386-dm/exec-dm.c      Thu Dec 14 08:57:36 2006 -0700
@@ -439,7 +439,12 @@ void cpu_physical_memory_rw(target_phys_
     int l, io_index;
     uint8_t *ptr;
     uint32_t val;
-    
+
+#if defined(__i386__) || defined(__x86_64__)
+    static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+    pthread_mutex_lock(&mutex);
+#endif
+
     while (len > 0) {
         /* How much can we copy before the next page boundary? */
         l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); 
@@ -504,6 +509,10 @@ void cpu_physical_memory_rw(target_phys_
         buf += l;
         addr += l;
     }
+
+#if defined(__i386__) || defined(__x86_64__)
+    pthread_mutex_unlock(&mutex);
+#endif
 }
 #endif
 
diff -r ed56ef3e9716 -r 4762d73ced42 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/ioemu/vl.c  Thu Dec 14 08:57:36 2006 -0700
@@ -5820,8 +5820,8 @@ static int qemu_map_cache_init(unsigned 
     if (nr_pages < max_pages)
         max_pages = nr_pages;
 
-    nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT;
-
+    nr_buckets   = max_pages + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1;
+    nr_buckets >>= (MCACHE_BUCKET_SHIFT - PAGE_SHIFT);
     fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
 
     mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache));
@@ -5857,8 +5857,7 @@ uint8_t *qemu_map_cache(target_phys_addr
 
     entry = &mapcache_entry[address_index % nr_buckets];
 
-    if (entry->vaddr_base == NULL || entry->paddr_index != address_index)
-    { 
+    if (entry->vaddr_base == NULL || entry->paddr_index != address_index) {
         /* We need to remap a bucket. */
         uint8_t *vaddr_base;
         unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libaio/src/syscall-ppc.h
--- a/tools/libaio/src/syscall-ppc.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libaio/src/syscall-ppc.h    Thu Dec 14 08:57:36 2006 -0700
@@ -1,3 +1,6 @@
+#include <asm/unistd.h>
+#include <errno.h>
+
 #define __NR_io_setup          227
 #define __NR_io_destroy                228
 #define __NR_io_getevents      229
@@ -9,7 +12,7 @@
  * "sc; bnslr" sequence) and CR (where only CR0.SO is clobbered to signal
  * an error return status).
  */
-
+#ifndef __syscall_nr
 #define __syscall_nr(nr, type, name, args...)                          \
        unsigned long __sc_ret, __sc_err;                               \
        {                                                               \
@@ -37,6 +40,7 @@
        }                                                               \
        if (__sc_err & 0x10000000) return -((int)__sc_ret);             \
        return (type) __sc_ret
+#endif
 
 #define __sc_loadargs_0(name, dummy...)                                        
\
        __sc_0 = __NR_##name
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/Makefile
--- a/tools/libxc/powerpc64/Makefile    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/Makefile    Thu Dec 14 08:57:36 2006 -0700
@@ -1,4 +1,6 @@ GUEST_SRCS-y += powerpc64/xc_linux_build
+GUEST_SRCS-y += powerpc64/flatdevtree.c
 GUEST_SRCS-y += powerpc64/xc_linux_build.c
-GUEST_SRCS-y += powerpc64/flatdevtree.c
+GUEST_SRCS-y += powerpc64/xc_prose_build.c
+GUEST_SRCS-y += powerpc64/utils.c
 
 CTRL_SRCS-y += powerpc64/xc_memory.c
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/flatdevtree.c
--- a/tools/libxc/powerpc64/flatdevtree.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/flatdevtree.c       Thu Dec 14 08:57:36 2006 -0700
@@ -220,6 +220,29 @@ void ft_add_rsvmap(struct ft_cxt *cxt, u
        cxt->p_anchor = cxt->pres + 16; /* over the terminator */
 }
 
+int ft_set_rsvmap(void *bphp, int m, u64 physaddr, u64 size)
+{
+       const struct boot_param_header *bph = bphp;
+       u64 *p_rsvmap = (u64 *)
+               ((char *)bph + be32_to_cpu(bph->off_mem_rsvmap));
+       u32 i;
+
+       for (i = 0;; i++) {
+               u64 addr, sz;
+
+               addr = be64_to_cpu(p_rsvmap[i * 2]);
+               sz = be64_to_cpu(p_rsvmap[i * 2 + 1]);
+               if (addr == 0 && size == 0)
+                       break;
+               if (m == i) {
+                       p_rsvmap[i * 2] = cpu_to_be64(physaddr);
+                       p_rsvmap[i * 2 + 1] = cpu_to_be64(size);
+                       return 0;
+               }
+       }
+       return -1;
+}
+
 void ft_begin_tree(struct ft_cxt *cxt)
 {
        cxt->p_begin = cxt->p_anchor;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/flatdevtree.h
--- a/tools/libxc/powerpc64/flatdevtree.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/flatdevtree.h       Thu Dec 14 08:57:36 2006 -0700
@@ -66,8 +66,10 @@ void ft_prop_int(struct ft_cxt *cxt, con
 void ft_prop_int(struct ft_cxt *cxt, const char *name, unsigned int val);
 void ft_begin(struct ft_cxt *cxt, void *blob, unsigned int max_size);
 void ft_add_rsvmap(struct ft_cxt *cxt, u64 physaddr, u64 size);
+int ft_set_rsvmap(void *bphp, int m, u64 physaddr, u64 size);
 
 void ft_dump_blob(const void *bphp);
+void ft_backtrack_node(struct ft_cxt *cxt);
 void ft_merge_blob(struct ft_cxt *cxt, void *blob);
 
 void *ft_find_node(const void *bphp, const char *srch_path);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/xc_linux_build.c
--- a/tools/libxc/powerpc64/xc_linux_build.c    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/xc_linux_build.c    Thu Dec 14 08:57:36 2006 -0700
@@ -35,60 +35,10 @@
 
 #include "flatdevtree_env.h"
 #include "flatdevtree.h"
+#include "utils.h"
 
 #define INITRD_ADDR (24UL << 20)
 #define DEVTREE_ADDR (16UL << 20)
-
-#define ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
-
-#define max(x,y) ({ \
-        const typeof(x) _x = (x);       \
-        const typeof(y) _y = (y);       \
-        (void) (&_x == &_y);            \
-        _x > _y ? _x : _y; })
-
-static void *load_file(const char *path, unsigned long *filesize)
-{
-    void *img;
-    ssize_t size;
-    int fd;
-
-    DPRINTF("load_file(%s)\n", path);
-
-    fd = open(path, O_RDONLY);
-    if (fd < 0) {
-        perror(path);
-        return NULL;
-    }
-
-    size = lseek(fd, 0, SEEK_END);
-    if (size < 0) {
-        perror(path);
-        close(fd);
-        return NULL;
-    }
-    lseek(fd, 0, SEEK_SET);
-
-    img = malloc(size);
-    if (img == NULL) {
-        perror(path);
-        close(fd);
-        return NULL;
-    }
-
-    size = read(fd, img, size);
-    if (size <= 0) {
-        perror(path);
-        close(fd);
-        free(img);
-        return NULL;
-    }
-
-    if (filesize)
-        *filesize = size;
-    close(fd);
-    return img;
-}
 
 static int init_boot_vcpu(
     int xc_handle,
@@ -128,37 +78,6 @@ static int init_boot_vcpu(
     return rc;
 }
 
-static int install_image(
-        int xc_handle,
-        int domid,
-        xen_pfn_t *page_array,
-        void *image,
-        unsigned long paddr,
-        unsigned long size)
-{
-    uint8_t *img = image;
-    int i;
-    int rc = 0;
-
-    if (paddr & ~PAGE_MASK) {
-        printf("*** unaligned address\n");
-        return -1;
-    }
-
-    for (i = 0; i < size; i += PAGE_SIZE) {
-        void *page = img + i;
-        xen_pfn_t pfn = (paddr + i) >> PAGE_SHIFT;
-        xen_pfn_t mfn = page_array[pfn];
-
-        rc = xc_copy_to_domain_page(xc_handle, domid, mfn, page);
-        if (rc < 0) {
-            perror("xc_copy_to_domain_page");
-            break;
-        }
-    }
-    return rc;
-}
-
 static int load_devtree(
     int xc_handle,
     int domid,
@@ -167,10 +86,10 @@ static int load_devtree(
     unsigned long devtree_addr,
     uint64_t initrd_base,
     unsigned long initrd_len,
-    start_info_t *si,
-    unsigned long si_addr)
-{
-    uint32_t start_info[4] = {0, si_addr, 0, 0x1000};
+    start_info_t *start_info __attribute__((unused)),
+    unsigned long start_info_addr)
+{
+    uint32_t si[4] = {0, start_info_addr, 0, 0x1000};
     struct boot_param_header *header;
     void *chosen;
     void *xen;
@@ -208,9 +127,14 @@ static int load_devtree(
         return rc;
     }
 
+    rc = ft_set_rsvmap(devtree, 1, initrd_base, initrd_len);
+    if (rc < 0) {
+        DPRINTF("couldn't set initrd reservation\n");
+        return ~0UL;
+    }
+
     /* start-info (XXX being removed soon) */
-    rc = ft_set_prop(&devtree, xen, "start-info",
-            start_info, sizeof(start_info));
+    rc = ft_set_prop(&devtree, xen, "start-info", si, sizeof(si));
     if (rc < 0) {
         DPRINTF("couldn't set /xen/start-info\n");
         return rc;
@@ -218,91 +142,19 @@ static int load_devtree(
 
     header = devtree;
     devtree_size = header->totalsize;
+    {
+        static const char dtb[] = "/tmp/xc_domU.dtb";
+        int dfd = creat(dtb, 0666);
+        if (dfd != -1) {
+            write(dfd, devtree, devtree_size);
+            close(dfd);
+        } else
+            DPRINTF("could not open(\"%s\")\n", dtb);
+    }
 
     DPRINTF("copying device tree to 0x%lx[0x%x]\n", DEVTREE_ADDR, 
devtree_size);
     return install_image(xc_handle, domid, page_array, devtree, DEVTREE_ADDR,
                        devtree_size);
-}
-
-unsigned long spin_list[] = {
-#if 0
-    0x100,
-    0x200,
-    0x300,
-    0x380,
-    0x400,
-    0x480,
-    0x500,
-    0x700,
-    0x900,
-    0xc00,
-#endif
-    0
-};
-
-/* XXX yes, this is a hack */
-static void hack_kernel_img(char *img)
-{
-    const off_t file_offset = 0x10000;
-    unsigned long *addr = spin_list;
-
-    while (*addr) {
-        uint32_t *instruction = (uint32_t *)(img + *addr + file_offset);
-        printf("installing spin loop at %lx (%x)\n", *addr, *instruction);
-        *instruction = 0x48000000;
-        addr++;
-    }
-}
-
-static int load_kernel(
-    int xc_handle,
-    int domid,
-    const char *kernel_path,
-    struct domain_setup_info *dsi,
-    xen_pfn_t *page_array)
-{
-    struct load_funcs load_funcs;
-    char *kernel_img;
-    unsigned long kernel_size;
-    int rc;
-
-    /* load the kernel ELF file */
-    kernel_img = load_file(kernel_path, &kernel_size);
-    if (kernel_img == NULL) {
-        rc = -1;
-        goto out;
-    }
-
-    hack_kernel_img(kernel_img);
-
-    DPRINTF("probe_elf\n");
-    rc = probe_elf(kernel_img, kernel_size, &load_funcs);
-    if (rc < 0) {
-        rc = -1;
-        printf("%s is not an ELF file\n", kernel_path);
-        goto out;
-    }
-
-    DPRINTF("parseimage\n");
-    rc = (load_funcs.parseimage)(kernel_img, kernel_size, dsi);
-    if (rc < 0) {
-        rc = -1;
-        goto out;
-    }
-
-    DPRINTF("loadimage\n");
-    (load_funcs.loadimage)(kernel_img, kernel_size, xc_handle, domid,
-            page_array, dsi);
-
-    DPRINTF("  v_start     %016"PRIx64"\n", dsi->v_start);
-    DPRINTF("  v_end       %016"PRIx64"\n", dsi->v_end);
-    DPRINTF("  v_kernstart %016"PRIx64"\n", dsi->v_kernstart);
-    DPRINTF("  v_kernend   %016"PRIx64"\n", dsi->v_kernend);
-    DPRINTF("  v_kernentry %016"PRIx64"\n", dsi->v_kernentry);
-
-out:
-    free(kernel_img);
-    return rc;
 }
 
 static int load_initrd(
@@ -334,49 +186,38 @@ out:
     return rc;
 }
 
-static unsigned long create_start_info(start_info_t *si,
+static unsigned long create_start_info(
+       void *devtree, start_info_t *start_info,
         unsigned int console_evtchn, unsigned int store_evtchn,
-        unsigned long nr_pages)
-{
-    unsigned long si_addr;
-
-    memset(si, 0, sizeof(*si));
-    snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0);
-
-    si->nr_pages = nr_pages;
-    si->shared_info = (nr_pages - 1) << PAGE_SHIFT;
-    si->store_mfn = si->nr_pages - 2;
-    si->store_evtchn = store_evtchn;
-    si->console.domU.mfn = si->nr_pages - 3;
-    si->console.domU.evtchn = console_evtchn;
-    si_addr = (si->nr_pages - 4) << PAGE_SHIFT;
-
-    return si_addr;
-}
-
-static int get_page_array(int xc_handle, int domid, xen_pfn_t **page_array,
-                          unsigned long *nr_pages)
-{
+       unsigned long nr_pages, unsigned long rma_pages)
+{
+    unsigned long start_info_addr;
+    uint64_t rma_top;
     int rc;
 
-    DPRINTF("xc_get_tot_pages\n");
-    *nr_pages = xc_get_tot_pages(xc_handle, domid);
-    DPRINTF("  0x%lx\n", *nr_pages);
-
-    *page_array = malloc(*nr_pages * sizeof(xen_pfn_t));
-    if (*page_array == NULL) {
-        perror("malloc");
-        return -1;
-    }
-
-    DPRINTF("xc_get_pfn_list\n");
-    rc = xc_get_pfn_list(xc_handle, domid, *page_array, *nr_pages);
-    if (rc != *nr_pages) {
-        perror("Could not get the page frame list");
-        return -1;
-    }
-
-    return 0;
+    memset(start_info, 0, sizeof(*start_info));
+    snprintf(start_info->magic, sizeof(start_info->magic),
+             "xen-%d.%d-powerpc64HV", 3, 0);
+
+    rma_top = rma_pages << PAGE_SHIFT;
+    DPRINTF("RMA top = 0x%"PRIX64"\n", rma_top);
+
+    start_info->nr_pages = nr_pages;
+    start_info->shared_info = rma_top - PAGE_SIZE;
+    start_info->store_mfn = (rma_top >> PAGE_SHIFT) - 2;
+    start_info->store_evtchn = store_evtchn;
+    start_info->console.domU.mfn = (rma_top >> PAGE_SHIFT) - 3;
+    start_info->console.domU.evtchn = console_evtchn;
+    start_info_addr = rma_top - 4*PAGE_SIZE;
+
+    rc = ft_set_rsvmap(devtree, 0, start_info_addr, 4*PAGE_SIZE);
+    if (rc < 0) {
+        DPRINTF("couldn't set start_info reservation\n");
+        return ~0UL;
+    }
+
+
+    return start_info_addr;
 }
 
 static void free_page_array(xen_pfn_t *page_array)
@@ -388,6 +229,7 @@ static void free_page_array(xen_pfn_t *p
 
 int xc_linux_build(int xc_handle,
                    uint32_t domid,
+                   unsigned int mem_mb,
                    const char *image_name,
                    const char *initrd_name,
                    const char *cmdline,
@@ -399,7 +241,7 @@ int xc_linux_build(int xc_handle,
                    unsigned long *console_mfn,
                    void *devtree)
 {
-    start_info_t si;
+    start_info_t start_info;
     struct domain_setup_info dsi;
     xen_pfn_t *page_array = NULL;
     unsigned long nr_pages;
@@ -407,18 +249,28 @@ int xc_linux_build(int xc_handle,
     unsigned long kern_addr;
     unsigned long initrd_base = 0;
     unsigned long initrd_len = 0;
-    unsigned long si_addr;
+    unsigned long start_info_addr;
+    unsigned long rma_pages;
     int rc = 0;
 
     DPRINTF("%s\n", __func__);
 
-    if (get_page_array(xc_handle, domid, &page_array, &nr_pages)) {
+    nr_pages = mem_mb << (20 - PAGE_SHIFT);
+    DPRINTF("nr_pages 0x%lx\n", nr_pages);
+
+    rma_pages = get_rma_pages(devtree);
+    if (rma_pages == 0) {
+           rc = -1;
+           goto out;
+    }
+
+    if (get_rma_page_array(xc_handle, domid, &page_array, rma_pages)) {
         rc = -1;
         goto out;
     }
 
     DPRINTF("loading image '%s'\n", image_name);
-    if (load_kernel(xc_handle, domid, image_name, &dsi, page_array)) {
+    if (load_elf_kernel(xc_handle, domid, image_name, &dsi, page_array)) {
         rc = -1;
         goto out;
     }
@@ -434,11 +286,12 @@ int xc_linux_build(int xc_handle,
     }
 
     /* start_info stuff: about to be removed  */
-    si_addr = create_start_info(&si, console_evtchn, store_evtchn, nr_pages);
-    *console_mfn = page_array[si.console.domU.mfn];
-    *store_mfn = page_array[si.store_mfn];
-    if (install_image(xc_handle, domid, page_array, &si, si_addr,
-                sizeof(start_info_t))) {
+    start_info_addr = create_start_info(devtree, &start_info, console_evtchn,
+                                        store_evtchn, nr_pages, rma_pages);
+    *console_mfn = page_array[start_info.console.domU.mfn];
+    *store_mfn = page_array[start_info.store_mfn];
+    if (install_image(xc_handle, domid, page_array, &start_info,
+                      start_info_addr, sizeof(start_info_t))) {
         rc = -1;
         goto out;
     }
@@ -447,7 +300,8 @@ int xc_linux_build(int xc_handle,
         DPRINTF("loading flattened device tree\n");
         devtree_addr = DEVTREE_ADDR;
         if (load_devtree(xc_handle, domid, page_array, devtree, devtree_addr,
-                     initrd_base, initrd_len, &si, si_addr)) {
+                         initrd_base, initrd_len, &start_info,
+                         start_info_addr)) {
             DPRINTF("couldn't load flattened device tree.\n");
             rc = -1;
             goto out;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xc_linux_build.c      Thu Dec 14 08:57:36 2006 -0700
@@ -596,15 +596,21 @@ static int compat_check(int xc_handle, s
     }
 
     if (strstr(xen_caps, "xen-3.0-x86_32p")) {
-        if (dsi->pae_kernel == PAEKERN_no) {
+        if (dsi->pae_kernel == PAEKERN_bimodal) {
+            dsi->pae_kernel = PAEKERN_extended_cr3;
+        } else if (dsi->pae_kernel == PAEKERN_no) {
             xc_set_error(XC_INVALID_KERNEL,
                          "Non PAE-kernel on PAE host.");
             return 0;
         }
-    } else if (dsi->pae_kernel != PAEKERN_no) {
-        xc_set_error(XC_INVALID_KERNEL,
-                     "PAE-kernel on non-PAE host.");
-        return 0;
+    } else {
+        if (dsi->pae_kernel == PAEKERN_bimodal) {
+            dsi->pae_kernel = PAEKERN_no;
+        } else if (dsi->pae_kernel != PAEKERN_no) {
+            xc_set_error(XC_INVALID_KERNEL,
+                         "PAE-kernel on non-PAE host.");
+            return 0;
+        }
     }
 
     return 1;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xc_load_elf.c Thu Dec 14 08:57:36 2006 -0700
@@ -325,17 +325,6 @@ static int parseelfimage(const char *ima
         return -EINVAL;
     }
 
-    /* Find the section-header strings table. */
-    if ( ehdr->e_shstrndx == SHN_UNDEF )
-    {
-        xc_set_error(XC_INVALID_KERNEL,
-                     "ELF image has no section-header strings table 
(shstrtab).");
-        return -EINVAL;
-    }
-    shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
-                        (ehdr->e_shstrndx*ehdr->e_shentsize));
-    shstrtab = image + shdr->sh_offset;
-
     dsi->__elfnote_section = NULL;
     dsi->__xen_guest_string = NULL;
 
@@ -354,6 +343,17 @@ static int parseelfimage(const char *ima
     /* Fall back to looking for the special '__xen_guest' section. */
     if ( dsi->__elfnote_section == NULL )
     {
+        /* Find the section-header strings table. */
+        if ( ehdr->e_shstrndx == SHN_UNDEF )
+        {
+            xc_set_error(XC_INVALID_KERNEL,
+                         "ELF image has no section-header strings table.");
+            return -EINVAL;
+        }
+        shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
+                            (ehdr->e_shstrndx*ehdr->e_shentsize));
+        shstrtab = image + shdr->sh_offset;
+
         for ( h = 0; h < ehdr->e_shnum; h++ )
         {
             shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize));
@@ -400,6 +400,8 @@ static int parseelfimage(const char *ima
     }
 
     /*
+     * A "bimodal" ELF note indicates the kernel will adjust to the
+     * current paging mode, including handling extended cr3 syntax.
      * If we have ELF notes then PAE=yes implies that we must support
      * the extended cr3 syntax. Otherwise we need to find the
      * [extended-cr3] syntax in the __xen_guest string.
@@ -408,7 +410,9 @@ static int parseelfimage(const char *ima
     if ( dsi->__elfnote_section )
     {
         p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE);
-        if ( p != NULL && strncmp(p, "yes", 3) == 0 )
+        if ( p != NULL && strncmp(p, "bimodal", 7) == 0 )
+            dsi->pae_kernel = PAEKERN_bimodal;
+        else if ( p != NULL && strncmp(p, "yes", 3) == 0 )
             dsi->pae_kernel = PAEKERN_extended_cr3;
 
     }
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xenctrl.h     Thu Dec 14 08:57:36 2006 -0700
@@ -728,4 +728,8 @@ const char *xc_error_code_to_desc(int co
  */
 xc_error_handler xc_set_error_handler(xc_error_handler handler);
 
+/* PowerPC specific. */
+int xc_alloc_real_mode_area(int xc_handle,
+                            uint32_t domid,
+                            unsigned int log);
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xenguest.h    Thu Dec 14 08:57:36 2006 -0700
@@ -122,4 +122,19 @@ int xc_get_hvm_param(
 int xc_get_hvm_param(
     int handle, domid_t dom, int param, unsigned long *value);
 
+/* PowerPC specific. */
+int xc_prose_build(int xc_handle,
+                   uint32_t domid,
+                   unsigned int mem_mb,
+                   const char *image_name,
+                   const char *ramdisk_name,
+                   const char *cmdline,
+                   const char *features,
+                   unsigned long flags,
+                   unsigned int store_evtchn,
+                   unsigned long *store_mfn,
+                   unsigned int console_evtchn,
+                   unsigned long *console_mfn,
+                   void *arch_args);
+
 #endif /* XENGUEST_H */
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xg_private.h  Thu Dec 14 08:57:36 2006 -0700
@@ -132,6 +132,7 @@ struct domain_setup_info
 #define PAEKERN_no           0
 #define PAEKERN_yes          1
 #define PAEKERN_extended_cr3 2
+#define PAEKERN_bimodal      3
     unsigned int  pae_kernel;
 
     unsigned int  load_symtab;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_console.h
--- a/tools/libxen/include/xen_console.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_console.h        Thu Dec 14 08:57:36 2006 -0700
@@ -149,14 +149,14 @@ xen_console_record_opt_set_free(xen_cons
 
 
 /**
- * Get the current state of the given console.
+ * Get a record containing the current state of the given console.
  */
 extern bool
 xen_console_get_record(xen_session *session, xen_console_record **result, 
xen_console console);
 
 
 /**
- * Get a reference to the object with the specified UUID.
+ * Get a reference to the console instance with the specified UUID.
  */
 extern bool
 xen_console_get_by_uuid(xen_session *session, xen_console *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_host.h
--- a/tools/libxen/include/xen_host.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_host.h   Thu Dec 14 08:57:36 2006 -0700
@@ -154,14 +154,14 @@ xen_host_record_opt_set_free(xen_host_re
 
 
 /**
- * Get the current state of the given host.  !!!
+ * Get a record containing the current state of the given host.
  */
 extern bool
 xen_host_get_record(xen_session *session, xen_host_record **result, xen_host 
host);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the host instance with the specified UUID.
  */
 extern bool
 xen_host_get_by_uuid(xen_session *session, xen_host *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_host_cpu.h
--- a/tools/libxen/include/xen_host_cpu.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_host_cpu.h       Thu Dec 14 08:57:36 2006 -0700
@@ -153,14 +153,14 @@ xen_host_cpu_record_opt_set_free(xen_hos
 
 
 /**
- * Get the current state of the given host_cpu.  !!!
+ * Get a record containing the current state of the given host_cpu.
  */
 extern bool
 xen_host_cpu_get_record(xen_session *session, xen_host_cpu_record **result, 
xen_host_cpu host_cpu);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the host_cpu instance with the specified UUID.
  */
 extern bool
 xen_host_cpu_get_by_uuid(xen_session *session, xen_host_cpu *result, char 
*uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_network.h
--- a/tools/libxen/include/xen_network.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_network.h        Thu Dec 14 08:57:36 2006 -0700
@@ -152,14 +152,14 @@ xen_network_record_opt_set_free(xen_netw
 
 
 /**
- * Get the current state of the given network.  !!!
+ * Get a record containing the current state of the given network.
  */
 extern bool
 xen_network_get_record(xen_session *session, xen_network_record **result, 
xen_network network);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the network instance with the specified UUID.
  */
 extern bool
 xen_network_get_by_uuid(xen_session *session, xen_network *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_pif.h
--- a/tools/libxen/include/xen_pif.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_pif.h    Thu Dec 14 08:57:36 2006 -0700
@@ -155,14 +155,14 @@ xen_pif_record_opt_set_free(xen_pif_reco
 
 
 /**
- * Get the current state of the given PIF.  !!!
+ * Get a record containing the current state of the given PIF.
  */
 extern bool
 xen_pif_get_record(xen_session *session, xen_pif_record **result, xen_pif pif);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the PIF instance with the specified UUID.
  */
 extern bool
 xen_pif_get_by_uuid(xen_session *session, xen_pif *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_sr.h
--- a/tools/libxen/include/xen_sr.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_sr.h     Thu Dec 14 08:57:36 2006 -0700
@@ -153,14 +153,14 @@ xen_sr_record_opt_set_free(xen_sr_record
 
 
 /**
- * Get the current state of the given SR.  !!!
+ * Get a record containing the current state of the given SR.
  */
 extern bool
 xen_sr_get_record(xen_session *session, xen_sr_record **result, xen_sr sr);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the SR instance with the specified UUID.
  */
 extern bool
 xen_sr_get_by_uuid(xen_session *session, xen_sr *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_user.h
--- a/tools/libxen/include/xen_user.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_user.h   Thu Dec 14 08:57:36 2006 -0700
@@ -146,14 +146,14 @@ xen_user_record_opt_set_free(xen_user_re
 
 
 /**
- * Get the current state of the given user.  !!!
+ * Get a record containing the current state of the given user.
  */
 extern bool
 xen_user_get_record(xen_session *session, xen_user_record **result, xen_user 
user);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the user instance with the specified UUID.
  */
 extern bool
 xen_user_get_by_uuid(xen_session *session, xen_user *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vdi.h
--- a/tools/libxen/include/xen_vdi.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vdi.h    Thu Dec 14 08:57:36 2006 -0700
@@ -159,14 +159,14 @@ xen_vdi_record_opt_set_free(xen_vdi_reco
 
 
 /**
- * Get the current state of the given VDI.  !!!
+ * Get a record containing the current state of the given VDI.
  */
 extern bool
 xen_vdi_get_record(xen_session *session, xen_vdi_record **result, xen_vdi vdi);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VDI instance with the specified UUID.
  */
 extern bool
 xen_vdi_get_by_uuid(xen_session *session, xen_vdi *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vif.h
--- a/tools/libxen/include/xen_vif.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vif.h    Thu Dec 14 08:57:36 2006 -0700
@@ -156,14 +156,14 @@ xen_vif_record_opt_set_free(xen_vif_reco
 
 
 /**
- * Get the current state of the given VIF.  !!!
+ * Get a record containing the current state of the given VIF.
  */
 extern bool
 xen_vif_get_record(xen_session *session, xen_vif_record **result, xen_vif vif);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VIF instance with the specified UUID.
  */
 extern bool
 xen_vif_get_by_uuid(xen_session *session, xen_vif *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vm.h
--- a/tools/libxen/include/xen_vm.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vm.h     Thu Dec 14 08:57:36 2006 -0700
@@ -79,6 +79,7 @@ typedef struct xen_vm_record
     char *name_description;
     int64_t user_version;
     bool is_a_template;
+    bool auto_power_on;
     struct xen_host_record_opt *resident_on;
     int64_t memory_static_max;
     int64_t memory_dynamic_max;
@@ -198,14 +199,14 @@ xen_vm_record_opt_set_free(xen_vm_record
 
 
 /**
- * Get the current state of the given VM.  !!!
+ * Get a record containing the current state of the given VM.
  */
 extern bool
 xen_vm_get_record(xen_session *session, xen_vm_record **result, xen_vm vm);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VM instance with the specified UUID.
  */
 extern bool
 xen_vm_get_by_uuid(xen_session *session, xen_vm *result, char *uuid);
@@ -277,6 +278,13 @@ xen_vm_get_is_a_template(xen_session *se
 
 
 /**
+ * Get the auto_power_on field of the given VM.
+ */
+extern bool
+xen_vm_get_auto_power_on(xen_session *session, bool *result, xen_vm vm);
+
+
+/**
  * Get the resident_on field of the given VM.
  */
 extern bool
@@ -564,6 +572,13 @@ xen_vm_set_is_a_template(xen_session *se
 
 
 /**
+ * Set the auto_power_on field of the given VM.
+ */
+extern bool
+xen_vm_set_auto_power_on(xen_session *session, xen_vm vm, bool auto_power_on);
+
+
+/**
  * Set the memory/dynamic_max field of the given VM.
  */
 extern bool
@@ -592,6 +607,13 @@ xen_vm_set_vcpus_params(xen_session *ses
 
 
 /**
+ * Set the VCPUs/number field of the given VM.
+ */
+extern bool
+xen_vm_set_vcpus_number(xen_session *session, xen_vm vm, int64_t number);
+
+
+/**
  * Set the VCPUs/features/force_on field of the given VM.
  */
 extern bool
@@ -599,10 +621,42 @@ xen_vm_set_vcpus_features_force_on(xen_s
 
 
 /**
+ * Add the given value to the VCPUs/features/force_on field of the
+ * given VM.  If the value is already in that Set, then do nothing.
+ */
+extern bool
+xen_vm_add_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
+
+
+/**
+ * Remove the given value from the VCPUs/features/force_on field of the
+ * given VM.  If the value is not in that Set, then do nothing.
+ */
+extern bool
+xen_vm_remove_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
+
+
+/**
  * Set the VCPUs/features/force_off field of the given VM.
  */
 extern bool
 xen_vm_set_vcpus_features_force_off(xen_session *session, xen_vm vm, struct 
xen_cpu_feature_set *force_off);
+
+
+/**
+ * Add the given value to the VCPUs/features/force_off field of the
+ * given VM.  If the value is already in that Set, then do nothing.
+ */
+extern bool
+xen_vm_add_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
+
+
+/**
+ * Remove the given value from the VCPUs/features/force_off field of
+ * the given VM.  If the value is not in that Set, then do nothing.
+ */
+extern bool
+xen_vm_remove_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
 
 
 /**
@@ -817,12 +871,4 @@ xen_vm_get_all(xen_session *session, str
 xen_vm_get_all(xen_session *session, struct xen_vm_set **result);
 
 
-/**
- * Destroy the specified VM.  The VM is completely removed from the system.
- * This function can only be called when the VM is in the Halted State.
- */
-extern bool
-xen_vm_destroy(xen_session *session, xen_vm vm);
-
-
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vtpm.h
--- a/tools/libxen/include/xen_vtpm.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vtpm.h   Thu Dec 14 08:57:36 2006 -0700
@@ -151,14 +151,14 @@ xen_vtpm_record_opt_set_free(xen_vtpm_re
 
 
 /**
- * Get the current state of the given VTPM.  !!!
+ * Get a record containing the current state of the given VTPM.
  */
 extern bool
 xen_vtpm_get_record(xen_session *session, xen_vtpm_record **result, xen_vtpm 
vtpm);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VTPM instance with the specified UUID.
  */
 extern bool
 xen_vtpm_get_by_uuid(xen_session *session, xen_vtpm *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/src/xen_vm.c
--- a/tools/libxen/src/xen_vm.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/src/xen_vm.c Thu Dec 14 08:57:36 2006 -0700
@@ -67,6 +67,9 @@ static const struct_member xen_vm_record
         { .key = "is_a_template",
           .type = &abstract_type_bool,
           .offset = offsetof(xen_vm_record, is_a_template) },
+        { .key = "auto_power_on",
+          .type = &abstract_type_bool,
+          .offset = offsetof(xen_vm_record, auto_power_on) },
         { .key = "resident_on",
           .type = &abstract_type_ref,
           .offset = offsetof(xen_vm_record, resident_on) },
@@ -399,6 +402,22 @@ xen_vm_get_is_a_template(xen_session *se
 
 
 bool
+xen_vm_get_auto_power_on(xen_session *session, bool *result, xen_vm vm)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm }
+        };
+
+    abstract_type result_type = abstract_type_bool;
+
+    XEN_CALL_("VM.get_auto_power_on");
+    return session->ok;
+}
+
+
+bool
 xen_vm_get_resident_on(xen_session *session, xen_host *result, xen_vm vm)
 {
     abstract_value param_values[] =
@@ -1082,6 +1101,22 @@ xen_vm_set_is_a_template(xen_session *se
 
 
 bool
+xen_vm_set_auto_power_on(xen_session *session, xen_vm vm, bool auto_power_on)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &abstract_type_bool,
+              .u.bool_val = auto_power_on }
+        };
+
+    xen_call_(session, "VM.set_auto_power_on", param_values, 2, NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_memory_dynamic_max(xen_session *session, xen_vm vm, int64_t 
dynamic_max)
 {
     abstract_value param_values[] =
@@ -1146,6 +1181,22 @@ xen_vm_set_vcpus_params(xen_session *ses
 
 
 bool
+xen_vm_set_vcpus_number(xen_session *session, xen_vm vm, int64_t number)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &abstract_type_int,
+              .u.int_val = number }
+        };
+
+    xen_call_(session, "VM.set_VCPUs_number", param_values, 2, NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_vcpus_features_force_on(xen_session *session, xen_vm vm, struct 
xen_cpu_feature_set *force_on)
 {
     abstract_value param_values[] =
@@ -1162,6 +1213,38 @@ xen_vm_set_vcpus_features_force_on(xen_s
 
 
 bool
+xen_vm_add_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.add_VCPUs_features_force_on", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
+xen_vm_remove_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.remove_VCPUs_features_force_on", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_vcpus_features_force_off(xen_session *session, xen_vm vm, struct 
xen_cpu_feature_set *force_off)
 {
     abstract_value param_values[] =
@@ -1178,6 +1261,38 @@ xen_vm_set_vcpus_features_force_off(xen_
 
 
 bool
+xen_vm_add_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.add_VCPUs_features_force_off", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
+xen_vm_remove_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.remove_VCPUs_features_force_off", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_actions_after_shutdown(xen_session *session, xen_vm vm, enum 
xen_on_normal_exit after_shutdown)
 {
     abstract_value param_values[] =
@@ -1268,7 +1383,7 @@ xen_vm_set_platform_std_vga(xen_session 
               .u.bool_val = std_vga }
         };
 
-    xen_call_(session, "VM.set_platform_std_vga", param_values, 2, NULL, NULL);
+    xen_call_(session, "VM.set_platform_std_VGA", param_values, 2, NULL, NULL);
     return session->ok;
 }
 
@@ -1444,7 +1559,7 @@ xen_vm_set_otherconfig(xen_session *sess
               .u.set_val = (arbitrary_set *)otherconfig }
         };
 
-    xen_call_(session, "VM.set_otherconfig", param_values, 2, NULL, NULL);
+    xen_call_(session, "VM.set_otherConfig", param_values, 2, NULL, NULL);
     return session->ok;
 }
 
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Dec 14 08:57:36 2006 -0700
@@ -919,6 +919,68 @@ static PyObject *dom_op(XcObject *self, 
     return zero;
 }
 
+#ifdef __powerpc__
+static PyObject *pyxc_alloc_real_mode_area(XcObject *self,
+                                           PyObject *args,
+                                           PyObject *kwds)
+{
+    uint32_t dom;
+    unsigned int log;
+
+    static char *kwd_list[] = { "dom", "log", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list, 
+                                      &dom, &log) )
+        return NULL;
+
+    if ( xc_alloc_real_mode_area(self->xc_handle, dom, log) )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_prose_build(XcObject *self,
+                                  PyObject *args,
+                                  PyObject *kwds)
+{
+    uint32_t dom;
+    char *image, *ramdisk = NULL, *cmdline = "", *features = NULL;
+    int flags = 0;
+    int store_evtchn, console_evtchn;
+    unsigned long store_mfn = 0;
+    unsigned long console_mfn = 0;
+    void *arch_args = NULL;
+    int unused;
+
+    static char *kwd_list[] = { "dom", "store_evtchn",
+                                "console_evtchn", "image",
+                                /* optional */
+                                "ramdisk", "cmdline", "flags",
+                                "features", "arch_args", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssiss#", kwd_list,
+                                      &dom, &store_evtchn,
+                                      &console_evtchn, &image,
+                                      /* optional */
+                                      &ramdisk, &cmdline, &flags,
+                                      &features, &arch_args, &unused) )
+        return NULL;
+
+    if ( xc_prose_build(self->xc_handle, dom, image,
+                        ramdisk, cmdline, features, flags,
+                        store_evtchn, &store_mfn,
+                        console_evtchn, &console_mfn,
+                        arch_args) != 0 ) {
+        if (!errno)
+             errno = EINVAL;
+        return PyErr_SetFromErrno(xc_error);
+    }
+    return Py_BuildValue("{s:i,s:i}", 
+                         "store_mfn", store_mfn,
+                         "console_mfn", console_mfn);
+}
+#endif /* powerpc */
 
 static PyMethodDef pyxc_methods[] = {
     { "handle",
@@ -1224,6 +1286,27 @@ static PyMethodDef pyxc_methods[] = {
       "Set a domain's time offset to Dom0's localtime\n"
       " dom        [int]: Domain whose time offset is being set.\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
+
+#ifdef __powerpc__
+    { "arch_alloc_real_mode_area", 
+      (PyCFunction)pyxc_alloc_real_mode_area, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Allocate a domain's real mode area.\n"
+      " dom [int]: Identifier of domain.\n"
+      " log [int]: Specifies the area's size.\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "arch_prose_build", 
+      (PyCFunction)pyxc_prose_build, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Build a new Linux guest OS.\n"
+      " dom     [int]:      Identifier of domain to build into.\n"
+      " image   [str]:      Name of kernel image file. May be gzipped.\n"
+      " ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
+      " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
+      " vcpus   [int, 1]:   Number of Virtual CPUS in domain.\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+#endif /* __powerpc */
 
     { NULL, NULL, 0, NULL }
 };
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/FlatDeviceTree.py
--- a/tools/python/xen/xend/FlatDeviceTree.py   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/FlatDeviceTree.py   Thu Dec 14 08:57:36 2006 -0700
@@ -22,6 +22,10 @@ import struct
 import struct
 import stat
 import re
+import glob
+import math
+
+_host_devtree_root = '/proc/device-tree'
 
 _OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning
 _OF_DT_BEGIN_NODE = 0x1
@@ -33,8 +37,10 @@ def _bincat(seq, separator=''):
     '''Concatenate the contents of seq into a bytestream.'''
     strs = []
     for item in seq:
-        if type(item) == type(0):
+        if isinstance(item, int):
             strs.append(struct.pack(">I", item))
+        elif isinstance(item, long):
+            strs.append(struct.pack(">Q", item))
         else:
             try:
                 strs.append(item.to_bin())
@@ -231,37 +237,50 @@ class Tree(_Node):
         header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8)
         return _pad(header.to_bin(), 8) + payload
 
-_host_devtree_root = '/proc/device-tree'
-def _getprop(propname):
-    '''Extract a property from the system's device tree.'''
-    f = file(os.path.join(_host_devtree_root, propname), 'r')
+def _readfile(fullpath):
+    '''Return full contents of a file.'''
+    f = file(fullpath, 'r')
     data = f.read()
     f.close()
     return data
 
+def _find_first_cpu(dirpath):
+    '''Find the first node of type 'cpu' in a directory tree.'''
+    cpulist = glob.glob(os.path.join(dirpath, 'cpus', '*'))
+    for node in cpulist:
+        try:
+            data = _readfile(os.path.join(node, 'device_type'))
+        except IOError:
+            continue
+        if 'cpu' in data:
+            return node
+    raise IOError("couldn't find any CPU nodes under " + dirpath)
+
 def _copynode(node, dirpath, propfilter):
-    '''Extract all properties from a node in the system's device tree.'''
+    '''Copy all properties and children nodes from a directory tree.'''
     dirents = os.listdir(dirpath)
     for dirent in dirents:
         fullpath = os.path.join(dirpath, dirent)
         st = os.lstat(fullpath)
         if stat.S_ISDIR(st.st_mode):
             child = node.addnode(dirent)
-            _copytree(child, fullpath, propfilter)
+            _copynode(child, fullpath, propfilter)
         elif stat.S_ISREG(st.st_mode) and propfilter(fullpath):
-            node.addprop(dirent, _getprop(fullpath))
-
-def _copytree(node, dirpath, propfilter):
-    path = os.path.join(_host_devtree_root, dirpath)
-    _copynode(node, path, propfilter)
+            node.addprop(dirent, _readfile(fullpath))
 
 def build(imghandler):
     '''Construct a device tree by combining the domain's configuration and
     the host's device tree.'''
     root = Tree()
 
-    # 4 pages: start_info, console, store, shared_info
+    # 1st reseravtion entry used for start_info, console, store, shared_info
     root.reserve(0x3ffc000, 0x4000)
+
+    # 2nd reservation enrty used for initrd, later on when we load the
+    # initrd we may fill this in with zeroes which signifies the end
+    # of the reservation map.  So as to avoid adding a zero map now we
+    # put some bogus yet sensible numbers here.
+    root.reserve(0x1000000, 0x1000)
 
     root.addprop('device_type', 'chrp-but-not-really\0')
     root.addprop('#size-cells', 2)
@@ -270,35 +289,52 @@ def build(imghandler):
     root.addprop('compatible', 'Momentum,Maple\0')
 
     xen = root.addnode('xen')
-    xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000)
+    xen.addprop('start-info', long(0x3ffc000), long(0x1000))
     xen.addprop('version', 'Xen-3.0-unstable\0')
-    xen.addprop('reg', 0, imghandler.vm.domid, 0, 0)
+    xen.addprop('reg', long(imghandler.vm.domid), long(0))
     xen.addprop('domain-name', imghandler.vm.getName() + '\0')
     xencons = xen.addnode('console')
     xencons.addprop('interrupts', 1, 0)
 
-    # XXX split out RMA node
-    mem = root.addnode('memory@0')
+    # add memory nodes
     totalmem = imghandler.vm.getMemoryTarget() * 1024
-    mem.addprop('reg', 0, 0, 0, totalmem)
-    mem.addprop('device_type', 'memory\0')
-
+    rma_log = 26 ### imghandler.vm.info.get('powerpc_rma_log')
+    rma_bytes = 1 << rma_log
+
+    # RMA node
+    rma = root.addnode('memory@0')
+    rma.addprop('reg', long(0), long(rma_bytes))
+    rma.addprop('device_type', 'memory\0')
+
+    # all the rest in a single node
+    remaining = totalmem - rma_bytes
+    if remaining > 0:
+        mem = root.addnode('memory@1')
+        mem.addprop('reg', long(rma_bytes), long(remaining))
+        mem.addprop('device_type', 'memory\0')
+
+    # add CPU nodes
     cpus = root.addnode('cpus')
     cpus.addprop('smp-enabled')
     cpus.addprop('#size-cells', 0)
     cpus.addprop('#address-cells', 1)
 
     # Copy all properties the system firmware gave us, except for 'linux,'
-    # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are
-    # identical...
+    # properties, from the first CPU node in the device tree. Do this once for
+    # every vcpu. Hopefully all cpus are identical...
     cpu0 = None
+    cpu0path = _find_first_cpu(_host_devtree_root)
     def _nolinuxprops(fullpath):
         return not os.path.basename(fullpath).startswith('linux,')
     for i in range(imghandler.vm.getVCpuCount()):
-        cpu = cpus.addnode('PowerPC,970@0')
-        _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops)
-        # and then overwrite what we need to
-        pft_size = imghandler.vm.info.get('pft-size', 0x14)
+        # create new node and copy all properties
+        cpu = cpus.addnode('PowerPC,970@%d' % i)
+        _copynode(cpu, cpu0path, _nolinuxprops)
+
+        # overwrite what we need to
+        shadow_mb = imghandler.vm.info.get('shadow_memory', 1)
+        shadow_mb_log = int(math.log(shadow_mb, 2))
+        pft_size = shadow_mb_log + 20
         cpu.setprop('ibm,pft-size', 0, pft_size)
 
         # set default CPU
@@ -307,13 +343,13 @@ def build(imghandler):
 
     chosen = root.addnode('chosen')
     chosen.addprop('cpu', cpu0.get_phandle())
-    chosen.addprop('memory', mem.get_phandle())
+    chosen.addprop('memory', rma.get_phandle())
     chosen.addprop('linux,stdout-path', '/xen/console\0')
     chosen.addprop('interrupt-controller', xen.get_phandle())
     chosen.addprop('bootargs', imghandler.cmdline + '\0')
     # xc_linux_load.c will overwrite these 64-bit properties later
-    chosen.addprop('linux,initrd-start', 0, 0)
-    chosen.addprop('linux,initrd-end', 0, 0)
+    chosen.addprop('linux,initrd-start', long(0))
+    chosen.addprop('linux,initrd-end', long(0))
 
     if 1:
         f = file('/tmp/domU.dtb', 'w')
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/XendDomain.py       Thu Dec 14 08:57:36 2006 -0700
@@ -591,7 +591,9 @@ class XendDomain:
         try:
             self.domains_lock.acquire()
             result = [d.get_uuid() for d in self.domains.values()]
-            result += self.managed_domains.keys()
+            for d in self.managed_domains.keys():
+                if d not in result:
+                    result.append(d)
             return result
         finally:
             self.domains_lock.release()
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Dec 14 08:57:36 2006 -0700
@@ -167,7 +167,7 @@ def recreate(info, priv):
 
     @param xeninfo: Parsed configuration
     @type  xeninfo: Dictionary
-    @param priv: TODO, unknown, something to do with memory
+    @param priv: Is a privileged domain (Dom 0)
     @type  priv: bool
 
     @rtype:  XendDomainInfo
@@ -381,7 +381,7 @@ class XendDomainInfo:
         @type    dompath: string
         @keyword augment: Augment given info with xenstored VM info
         @type    augment: bool
-        @keyword priv: Is a privledged domain (Dom 0) (TODO: really?)
+        @keyword priv: Is a privileged domain (Dom 0)
         @type    priv: bool
         @keyword resume: Is this domain being resumed?
         @type    resume: bool
@@ -563,7 +563,7 @@ class XendDomainInfo:
         for devclass in XendDevices.valid_devices():
             self.getDeviceController(devclass).waitForDevices()
 
-    def destroyDevice(self, deviceClass, devid):
+    def destroyDevice(self, deviceClass, devid, force=None):
         try:
             devid = int(devid)
         except ValueError:
@@ -578,7 +578,7 @@ class XendDomainInfo:
                     devid = entry
                     break
                 
-        return self.getDeviceController(deviceClass).destroyDevice(devid)
+        return self.getDeviceController(deviceClass).destroyDevice(devid, 
force)
 
 
 
@@ -647,6 +647,8 @@ class XendDomainInfo:
         if priv:
             augment_entries.remove('memory')
             augment_entries.remove('maxmem')
+            augment_entries.remove('vcpus')
+            augment_entries.remove('vcpu_avail')
 
         vm_config = self._readVMDetails([(k, XendConfig.LEGACY_CFG_TYPES[k])
                                          for k in augment_entries])
@@ -663,6 +665,14 @@ class XendDomainInfo:
                     self.info[xapiarg] = val
                 else:
                     self.info[arg] = val
+
+        # For dom0, we ignore any stored value for the vcpus fields, and
+        # read the current value from Xen instead.  This allows boot-time
+        # settings to take precedence over any entries in the store.
+        if priv:
+            xeninfo = dom_get(self.domid)
+            self.info['vcpus_number'] = xeninfo['online_vcpus']
+            self.info['vcpu_avail'] = (1 << xeninfo['online_vcpus']) - 1
 
         # read image value
         image_sxp = self._readVm('image')
@@ -895,6 +905,10 @@ class XendDomainInfo:
     def getMemoryTarget(self):
         """Get this domain's target memory size, in KB."""
         return self.info['memory_static_min'] * 1024
+
+    def getMemoryMaximum(self):
+        """Get this domain's maximum memory size, in KB."""
+        return self.info['memory_static_max'] * 1024
 
     def getResume(self):
         return str(self._resume)
@@ -1363,9 +1377,9 @@ class XendDomainInfo:
             # Use architecture- and image-specific calculations to determine
             # the various headrooms necessary, given the raw configured
             # values. maxmem, memory, and shadow are all in KiB.
+            memory = self.image.getRequiredAvailableMemory(
+                self.info['memory_static_min'] * 1024)
             maxmem = self.image.getRequiredAvailableMemory(
-                self.info['memory_static_min'] * 1024)
-            memory = self.image.getRequiredAvailableMemory(
                 self.info['memory_static_max'] * 1024)
             shadow = self.image.getRequiredShadowMemory(
                 self.info['shadow_memory'] * 1024,
@@ -1727,7 +1741,7 @@ class XendDomainInfo:
             raise VmError("VM name '%s' already exists%s" %
                           (name,
                            dom.domid is not None and
-                           ("as domain %s" % str(dom.domid)) or ""))
+                           (" as domain %s" % str(dom.domid)) or ""))
         
 
     def update(self, info = None, refresh = True):
@@ -2031,7 +2045,7 @@ class XendDomainInfo:
         if not dev_uuid:
             raise XendError('Failed to create device')
         
-        if self.state in (DOM_STATE_HALTED,):
+        if self.state in (XEN_API_VM_POWER_STATE_RUNNING,):
             sxpr = self.info.device_sxpr(dev_uuid)
             devid = self.getDeviceController('vif').createDevice(sxpr)
             raise XendError("Device creation failed")
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/image.py    Thu Dec 14 08:57:36 2006 -0700
@@ -145,6 +145,14 @@ class ImageHandler:
         add headroom where necessary."""
         return self.getRequiredAvailableMemory(self.vm.getMemoryTarget())
 
+    def getRequiredMaximumReservation(self):
+        """@param mem_kb The maximum possible memory, in KiB.
+        @return The corresponding required amount of memory to be free, also
+        in KiB. This is normally the same as getRequiredAvailableMemory, but
+        architecture- or image-specific code may override this to
+        add headroom where necessary."""
+        return self.getRequiredAvailableMemory(self.vm.getMemoryMaximum())
+
     def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
         """@param shadow_mem_kb The configured shadow memory, in KiB.
         @param maxmem_kb The configured maxmem, in KiB.
@@ -234,6 +242,60 @@ class PPC_LinuxImageHandler(LinuxImageHa
                               ramdisk        = self.ramdisk,
                               features       = self.vm.getFeatures(),
                               arch_args      = devtree.to_bin())
+
+    def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
+        """@param shadow_mem_kb The configured shadow memory, in KiB.
+        @param maxmem_kb The configured maxmem, in KiB.
+        @return The corresponding required amount of shadow memory, also in
+        KiB.
+        PowerPC currently uses "shadow memory" to refer to the hash table."""
+        return max(maxmem_kb / 64, shadow_mem_kb)
+
+
+class PPC_ProseImageHandler(LinuxImageHandler):
+
+    ostype = "prose"
+
+    def configure(self, imageConfig, deviceConfig):
+        LinuxImageHandler.configure(self, imageConfig, deviceConfig)
+        self.imageConfig = imageConfig
+
+    def buildDomain(self):
+        store_evtchn = self.vm.getStorePort()
+        console_evtchn = self.vm.getConsolePort()
+
+        mem_mb = self.getRequiredInitialReservation() / 1024
+
+        log.debug("dom            = %d", self.vm.getDomid())
+        log.debug("memsize        = %d", mem_mb)
+        log.debug("image          = %s", self.kernel)
+        log.debug("store_evtchn   = %d", store_evtchn)
+        log.debug("console_evtchn = %d", console_evtchn)
+        log.debug("cmdline        = %s", self.cmdline)
+        log.debug("ramdisk        = %s", self.ramdisk)
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+        log.debug("features       = %s", self.vm.getFeatures())
+
+        devtree = FlatDeviceTree.build(self)
+
+        return xc.arch_prose_build(dom            = self.vm.getDomid(),
+                                   memsize        = mem_mb,
+                                   image          = self.kernel,
+                                   store_evtchn   = store_evtchn,
+                                   console_evtchn = console_evtchn,
+                                   cmdline        = self.cmdline,
+                                   ramdisk        = self.ramdisk,
+                                   features       = self.vm.getFeatures(),
+                                   arch_args      = devtree.to_bin())
+
+    def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
+        """@param shadow_mem_kb The configured shadow memory, in KiB.
+        @param maxmem_kb The configured maxmem, in KiB.
+        @return The corresponding required amount of shadow memory, also in
+        KiB.
+        PowerPC currently uses "shadow memory" to refer to the hash table."""
+        return max(maxmem_kb / 64, shadow_mem_kb)
+
 
 class HVMImageHandler(ImageHandler):
 
@@ -539,6 +601,9 @@ class X86_HVM_ImageHandler(HVMImageHandl
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
 
+    def getRequiredMaximumReservation(self):
+        return self.vm.getMemoryMaximum()
+
     def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
         # 256 pages (1MB) per vcpu,
         # plus 1 page per MiB of RAM for the P2M map,
@@ -553,13 +618,14 @@ class X86_Linux_ImageHandler(LinuxImageH
     def buildDomain(self):
         # set physical mapping limit
         # add an 8MB slack to balance backend allocations.
-        mem_kb = self.getRequiredInitialReservation() + (8 * 1024)
+        mem_kb = self.getRequiredMaximumReservation() + (8 * 1024)
         xc.domain_set_memmap_limit(self.vm.getDomid(), mem_kb)
         return LinuxImageHandler.buildDomain(self)
 
 _handlers = {
     "powerpc": {
         "linux": PPC_LinuxImageHandler,
+        "prose": PPC_ProseImageHandler,
     },
     "ia64": {
         "linux": LinuxImageHandler,
diff -r ed56ef3e9716 -r 4762d73ced42 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Thu Dec 14 08:54:54 
2006 -0700
+++ b/tools/python/xen/xend/server/DevController.py     Thu Dec 14 08:57:36 
2006 -0700
@@ -19,12 +19,14 @@ from threading import Event
 from threading import Event
 import types
 
-from xen.xend import sxp
+from xen.xend import sxp, XendRoot
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
 
 from xen.xend.xenstore.xstransact import xstransact, complete
 from xen.xend.xenstore.xswatch import xswatch
+
+import os
 
 DEVICE_CREATE_TIMEOUT = 100
 HOTPLUG_STATUS_NODE = "hotplug-status"
@@ -47,6 +49,8 @@ xenbusState = {
     'Closing'      : 5,
     'Closed'       : 6,
     }
+
+xroot = XendRoot.instance()
 
 xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
 
@@ -191,7 +195,7 @@ class DevController:
         raise VmError('%s devices may not be reconfigured' % self.deviceClass)
 
 
-    def destroyDevice(self, devid):
+    def destroyDevice(self, devid, force):
         """Destroy the specified device.
 
         @param devid The device ID, or something device-specific from which
@@ -211,6 +215,13 @@ class DevController:
         # drivers, so this ordering avoids a race).
         self.writeBackend(devid, 'online', "0")
         self.writeBackend(devid, 'state', str(xenbusState['Closing']))
+
+        if force:
+            frontpath = self.frontendPath(devid)
+            backpath = xstransact.Read(frontpath, "backend")
+            if backpath:
+                xstransact.Remove(backpath)
+            xstransact.Remove(frontpath)
 
 
     def configurations(self):
@@ -313,6 +324,16 @@ class DevController:
                       Make sure that the migration has finished and only
                       then return from the call.
         """
+        tool = xroot.get_external_migration_tool()
+        if tool:
+            log.info("Calling external migration tool for step %d" % step)
+            fd = os.popen("%s -type %s -step %d -host %s -domname %s" %
+                          (tool, self.deviceClass, step, dst, domName))
+            for line in fd:
+                log.info(line.rstrip())
+            rc = fd.close()
+            if rc:
+                raise VmError('Migration tool returned %d' % (rc >> 8))
         return 0
 
 
@@ -320,6 +341,16 @@ class DevController:
         """ Recover from device migration. The given step was the
             last one that was successfully executed.
         """
+        tool = xroot.get_external_migration_tool()
+        if tool:
+            log.info("Calling external migration tool")
+            fd = os.popen("%s -type %s -step %d -host %s -domname %s -recover" 
%
+                          (tool, self.deviceClass, step, dst, domName))
+            for line in fd:
+                log.info(line.rstrip())
+            rc = fd.close()
+            if rc:
+                raise VmError('Migration tool returned %d' % (rc >> 8))
         return 0
 
 
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/server/blkif.py     Thu Dec 14 08:57:36 2006 -0700
@@ -133,7 +133,7 @@ class BlkifController(DevController):
 
         return config
 
-    def destroyDevice(self, devid):
+    def destroyDevice(self, devid, force):
         """@see DevController.destroyDevice"""
 
         # If we are given a device name, then look up the device ID from it,
@@ -142,13 +142,13 @@ class BlkifController(DevController):
         # superclass's method.
 
         try:
-            DevController.destroyDevice(self, int(devid))
+            DevController.destroyDevice(self, int(devid), force)
         except ValueError:
             devid_end = type(devid) is str and devid.split('/')[-1] or None
 
             for i in self.deviceIDs():
                 d = self.readBackend(i, 'dev')
                 if d == devid or (devid_end and d == devid_end):
-                    DevController.destroyDevice(self, i)
+                    DevController.destroyDevice(self, i, force)
                     return
             raise VmError("Device %s not connected" % devid)
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xm/main.py       Thu Dec 14 08:57:36 2006 -0700
@@ -142,14 +142,14 @@ SUBCOMMAND_HELP = {
                         'Create a new virtual block device.'),
     'block-configure': ('<Domain> <BackDev> <FrontDev> <Mode> [BackDomain]',
                         'Change block device configuration'),
-    'block-detach'  :  ('<Domain> <DevId>',
+    'block-detach'  :  ('<Domain> <DevId> [-f|--force]',
                         'Destroy a domain\'s virtual block device.'),
     'block-list'    :  ('<Domain> [--long]',
                         'List virtual block devices for a domain.'),
     'network-attach':  ('<Domain> [--script=<script>] [--ip=<ip>] '
                         '[--mac=<mac>]',
                         'Create a new virtual network device.'),
-    'network-detach':  ('<Domain> <DevId>',
+    'network-detach':  ('<Domain> <DevId> [-f|--force]',
                         'Destroy a domain\'s virtual network device.'),
     'network-list'  :  ('<Domain> [--long]',
                         'List virtual network interfaces for a domain.'),
@@ -1493,16 +1493,24 @@ def xm_network_attach(args):
 
 
 def detach(args, command, deviceClass):
-    arg_check(args, command, 2)
+    arg_check(args, command, 2, 3)
 
     dom = args[0]
     dev = args[1]
-
-    server.xend.domain.destroyDevice(dom, deviceClass, dev)
+    try:
+        force = args[2]
+        if (force != "--force") and (force != "-f"):
+            print "Ignoring option %s"%(force)
+            force = None
+    except IndexError:
+        force = None
+
+    server.xend.domain.destroyDevice(dom, deviceClass, dev, force)
 
 
 def xm_block_detach(args):
     detach(args, 'block-detach', 'vbd')
+    detach(args, 'block-detach', 'tap')
 
 
 def xm_network_detach(args):
diff -r ed56ef3e9716 -r 4762d73ced42 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/xenstore/xenstored_domain.c Thu Dec 14 08:57:36 2006 -0700
@@ -459,6 +459,8 @@ static int dom0_init(void)
                return -1;
 
        dom0 = new_domain(NULL, 0, port); 
+       if (dom0 == NULL)
+               return -1;
 
        dom0->interface = xenbus_map();
        if (dom0->interface == NULL)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/Makefile
--- a/xen/arch/powerpc/Makefile Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/Makefile Thu Dec 14 08:57:36 2006 -0700
@@ -9,10 +9,10 @@ obj-y += backtrace.o
 obj-y += backtrace.o
 obj-y += bitops.o
 obj-y += boot_of.o
+obj-y += cmdline.o
 obj-y += dart.o
 obj-y += dart_u3.o
 obj-y += dart_u4.o
-obj-y += delay.o
 obj-y += domctl.o
 obj-y += domain_build.o
 obj-y += domain.o
@@ -22,11 +22,12 @@ obj-y += hcalls.o
 obj-y += hcalls.o
 obj-y += iommu.o
 obj-y += irq.o
-obj-y += mambo.o
+obj-y += systemsim.o
 obj-y += memory.o
 obj-y += mm.o
 obj-y += mpic.o
 obj-y += mpic_init.o
+obj-y += numa.o
 obj-y += of-devtree.o
 obj-y += of-devwalk.o
 obj-y += ofd_fixup.o
@@ -36,6 +37,7 @@ obj-y += setup.o
 obj-y += setup.o
 obj-y += shadow.o
 obj-y += smp.o
+obj-y += smpboot.o
 obj-y += smp-tbsync.o
 obj-y += sysctl.o
 obj-y += time.o
@@ -57,11 +59,6 @@ PPC_C_WARNINGS += -Wshadow
 PPC_C_WARNINGS += -Wshadow
 CFLAGS += $(PPC_C_WARNINGS)
 
-LINK=0x400000
-boot32_link_base = $(LINK)
-xen_link_offset  = 100
-xen_link_base    = $(patsubst %000,%$(xen_link_offset),$(LINK))
-
 #
 # The following flags are fed to gcc in order to link several
 # objects into a single ELF segment and to not link in any additional
@@ -72,34 +69,39 @@ firmware: of_handler/built_in.o $(TARGET
 firmware: of_handler/built_in.o $(TARGET_SUBARCH)/memcpy.o of-devtree.o
        $(CC) $(CFLAGS) $(OMAGIC) -e __ofh_start -Wl,-Ttext,0x0 $^ -o $@
 
-firmware_image: firmware
+firmware_image.bin: firmware
        $(CROSS_COMPILE)objcopy --output-target=binary $< $@
-
-firmware_image.o: firmware_image
-       $(CROSS_COMPILE)objcopy --input-target=binary \
-               --output-target=elf64-powerpc \
-               --binary-architecture=powerpc \
-               --redefine-sym _binary_$<_start=$(@:%.o=%)_start \
-               --redefine-sym _binary_$<_end=$(@:%.o=%)_end \
-               --redefine-sym _binary_$<_size=$(@:%.o=%)_size  $< $@
 
 #
 # Hacks for included C files
 #
 irq.o: ../x86/irq.c
 physdev.o: ../x86/physdev.c
+numa.o: ../x86/numa.c
 
 HDRS += $(wildcard *.h)
 
+ifneq ($(CMDLINE),)
 # The first token in the arguments will be silently dropped.
-IMAGENAME = xen
-CMDLINE = ""
-boot_of.o: CFLAGS += -DCMDLINE="\"$(IMAGENAME) $(CMDLINE)\""
+FULL_CMDLINE := xen $(CMDLINE)
+endif
 
-start.o: boot/start.S
-       $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+ifeq ($(wildcard cmdline.dep),)
+cmdline.dep:
+       echo $(FULL_CMDLINE) > cmdline.dep
+else
+ifneq ($(FULL_CMDLINE),$(shell cat cmdline.dep))
+cmdline.dep::
+       echo $(FULL_CMDLINE) > cmdline.dep
+else
+cmdline.dep:
+endif
+endif
 
-TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds
+cmdline.o: cmdline.dep
+cmdline.o: CFLAGS += -DCMDLINE="\"$(FULL_CMDLINE)\""
+
+TARGET_OPTS = $(OMAGIC) -Wl,-T,xen.lds
 TARGET_OPTS += start.o $(ALL_OBJS)
 
 .xen-syms: start.o $(ALL_OBJS) xen.lds
@@ -122,22 +124,10 @@ xen-syms.o: xen-syms.S
 $(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds
        $(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@
 
-$(TARGET).bin: $(TARGET)-syms
-       $(CROSS_COMPILE)objcopy --output-target=binary $< $@
-
-$(TARGET).bin.o: $(TARGET).bin
-       $(CROSS_COMPILE)objcopy --input-target=binary \
-               --output-target=elf32-powerpc \
-               --binary-architecture=powerpc  $< $@
-
-boot32.o: boot/boot32.S
-       $(CC) -m32 -Wa,-a32,-mppc64bridge \
-               -D__ASSEMBLY__ -D__BRIDGE64__ $(CFLAGS) -c $< -o $@
-
-$(TARGET): boot32.o $(TARGET).bin.o
-       $(CC) -m32 -N -Wl,-melf32ppclinux -static -nostdlib \
-               -Wl,-Ttext,$(boot32_link_base)  -Wl,-Tdata,$(xen_link_base) \
-               $(CFLAGS) $^ -o $@
+# our firmware only loads 32-bit ELF files
+OCPYFLAGS := --input-target=elf64-powerpc --output-target=elf32-powerpc
+$(TARGET): $(TARGET)-syms
+       $(CROSS_COMPILE)objcopy $(OCPYFLAGS) $^ $@
 
 asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
        $(CC) $(CFLAGS) -S -o $@ $<
@@ -150,4 +140,5 @@ dom0.bin: $(DOM0_IMAGE)
 
 clean::
        $(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean
-       rm -f firmware firmware_image dom0.bin .xen-syms
+       rm -f firmware firmware_image.bin dom0.bin .xen-syms xen-syms.S \
+               xen.lds asm-offsets.s cmdline.dep
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/backtrace.c
--- a/xen/arch/powerpc/backtrace.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/backtrace.c      Thu Dec 14 08:57:36 2006 -0700
@@ -14,6 +14,7 @@
 #include <xen/console.h>
 #include <xen/sched.h>
 #include <xen/symbols.h>
+#include <asm/debugger.h>
 
 static char namebuf[KSYM_NAME_LEN+1];
 
@@ -192,6 +193,19 @@ void show_backtrace(ulong sp, ulong lr, 
     console_end_sync();
 }
 
+void show_backtrace_regs(struct cpu_user_regs *regs)
+{
+    console_start_sync();
+    
+    show_registers(regs);
+    printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
+    printk("hid4 0x%016lx\n", regs->hid4);
+    printk("---[ backtrace ]---\n");
+    show_backtrace(regs->gprs[1], regs->lr, regs->pc);
+
+    console_end_sync();
+}
+
 void __warn(char *file, int line)
 {
     ulong sp;
@@ -202,9 +216,19 @@ void __warn(char *file, int line)
 
     sp = (ulong)__builtin_frame_address(0);
     lr = (ulong)__builtin_return_address(0);
-
     backtrace(sp, lr, lr);
-    console_end_sync();
-}
-
-    
+
+    console_end_sync();
+}
+
+void dump_execution_state(void)
+{
+    struct vcpu *v = current;
+    struct cpu_user_regs *regs = &v->arch.ctxt;
+
+    show_registers(regs);
+    if (regs->msr & MSR_HV) {
+        printk("In Xen:\n");
+        show_backtrace(regs->gprs[1], regs->pc, regs->lr);
+    }
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/bitops.c
--- a/xen/arch/powerpc/bitops.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/bitops.c Thu Dec 14 08:57:36 2006 -0700
@@ -12,42 +12,42 @@
  * @size: The maximum size to search
  */
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-                           unsigned long offset)
+                            unsigned long offset)
 {
-       const unsigned long *p = addr + BITOP_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
 
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset %= BITS_PER_LONG;
-       if (offset) {
-               tmp = *(p++);
-               tmp &= (~0UL << offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if ((tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
+    if (offset >= size)
+        return size;
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp &= (~0UL << offset);
+        if (size < BITS_PER_LONG)
+            goto found_first;
+        if (tmp)
+            goto found_middle;
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if ((tmp = *(p++)))
+            goto found_middle;
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size)
+        return result;
+    tmp = *p;
 
 found_first:
-       tmp &= (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)         /* Are any bits set? */
-               return result + size;   /* Nope. */
+    tmp &= (~0UL >> (BITS_PER_LONG - size));
+    if (tmp == 0UL)        /* Are any bits set? */
+        return result + size;    /* Nope. */
 found_middle:
-       return result + __ffs(tmp);
+    return result + __ffs(tmp);
 }
 
 /*
@@ -55,40 +55,40 @@ found_middle:
  * Linus' asm-alpha/bitops.h.
  */
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-                                unsigned long offset)
+                                 unsigned long offset)
 {
-       const unsigned long *p = addr + BITOP_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
 
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset %= BITS_PER_LONG;
-       if (offset) {
-               tmp = *(p++);
-               tmp |= ~0UL >> (BITS_PER_LONG - offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (~tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if (~(tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
+    if (offset >= size)
+        return size;
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp |= ~0UL >> (BITS_PER_LONG - offset);
+        if (size < BITS_PER_LONG)
+            goto found_first;
+        if (~tmp)
+            goto found_middle;
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if (~(tmp = *(p++)))
+            goto found_middle;
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size)
+        return result;
+    tmp = *p;
 
 found_first:
-       tmp |= ~0UL << size;
-       if (tmp == ~0UL)        /* Are any bits zero? */
-               return result + size;   /* Nope. */
+    tmp |= ~0UL << size;
+    if (tmp == ~0UL)    /* Are any bits zero? */
+        return result + size;    /* Nope. */
 found_middle:
-       return result + ffz(tmp);
+    return result + ffz(tmp);
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/boot_of.c
--- a/xen/arch/powerpc/boot_of.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/boot_of.c        Thu Dec 14 08:57:36 2006 -0700
@@ -16,6 +16,7 @@
  * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ *          Hollis Blanchard <hollisb@xxxxxxxxxx>
  */
 
 #include <xen/config.h>
@@ -32,6 +33,7 @@
 #include "exceptions.h"
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
 
 /* Secondary processors use this for handshaking with main processor.  */
 volatile unsigned int __spin_ack;
@@ -39,20 +41,27 @@ static ulong of_vec;
 static ulong of_vec;
 static ulong of_msr;
 static int of_out;
-static char bootargs[256];
-
-#define COMMAND_LINE_SIZE 512
-static char builtin_cmdline[COMMAND_LINE_SIZE]
-    __attribute__((section("__builtin_cmdline"))) = CMDLINE;
-
+static ulong eomem;
+
+#define MEM_AVAILABLE_PAGES ((32 << 20) >> PAGE_SHIFT)
+static DECLARE_BITMAP(mem_available_pages, MEM_AVAILABLE_PAGES);
+
+extern char builtin_cmdline[];
 extern struct ns16550_defaults ns16550;
 
 #undef OF_DEBUG
+#undef OF_DEBUG_LOW
 
 #ifdef OF_DEBUG
 #define DBG(args...) of_printf(args)
 #else
 #define DBG(args...)
+#endif
+
+#ifdef OF_DEBUG_LOW
+#define DBG_LOW(args...) of_printf(args)
+#else
+#define DBG_LOW(args...)
 #endif
 
 #define of_panic(MSG...) \
@@ -68,7 +77,6 @@ static int bof_chosen;
 static int bof_chosen;
 
 static struct of_service s;
-extern s32 prom_call(void *arg, ulong rtas_base, ulong func, ulong msr);
 
 static int __init of_call(
     const char *service, u32 nargs, u32 nrets, s32 rets[], ...)
@@ -78,7 +86,6 @@ static int __init of_call(
     if (of_vec != 0) {
         va_list args;
         int i;
-
         memset(&s, 0, sizeof (s));
         s.ofs_service = (ulong)service;
         s.ofs_nargs = nargs;
@@ -189,7 +196,7 @@ static int __init of_finddevice(const ch
         DBG("finddevice %s -> FAILURE %d\n",devspec,rets[0]);
         return OF_FAILURE;
     }
-    DBG("finddevice %s -> %d\n",devspec, rets[0]);
+    DBG_LOW("finddevice %s -> %d\n",devspec, rets[0]);
     return rets[0];
 }
 
@@ -200,11 +207,11 @@ static int __init of_getprop(int ph, con
     of_call("getprop", 4, 1, rets, ph, name, buf, buflen);
 
     if (rets[0] == OF_FAILURE) {
-        DBG("getprop 0x%x %s -> FAILURE\n", ph, name);
+        DBG_LOW("getprop 0x%x %s -> FAILURE\n", ph, name);
         return OF_FAILURE;
     }
 
-    DBG("getprop 0x%x %s -> 0x%x (%s)\n", ph, name, rets[0], (char *)buf);
+    DBG_LOW("getprop 0x%x %s -> 0x%x (%s)\n", ph, name, rets[0], (char *)buf);
     return rets[0];
 }
 
@@ -220,7 +227,7 @@ static int __init of_setprop(
         return OF_FAILURE;
     }
 
-    DBG("setprop 0x%x %s -> %s\n", ph, name, (char *)buf);
+    DBG_LOW("setprop 0x%x %s -> %s\n", ph, name, (char *)buf);
     return rets[0];
 }
 
@@ -232,7 +239,7 @@ static int __init of_getchild(int ph)
     int rets[1] = { OF_FAILURE };
 
     of_call("child", 1, 1, rets, ph);
-    DBG("getchild 0x%x -> 0x%x\n", ph, rets[0]);
+    DBG_LOW("getchild 0x%x -> 0x%x\n", ph, rets[0]);
 
     return rets[0];
 }
@@ -245,7 +252,7 @@ static int __init of_getpeer(int ph)
     int rets[1] = { OF_FAILURE };
 
     of_call("peer", 1, 1, rets, ph);
-    DBG("getpeer 0x%x -> 0x%x\n", ph, rets[0]);
+    DBG_LOW("getpeer 0x%x -> 0x%x\n", ph, rets[0]);
 
     return rets[0];
 }
@@ -259,7 +266,7 @@ static int __init of_getproplen(int ph, 
         DBG("getproplen 0x%x %s -> FAILURE\n", ph, name);
         return OF_FAILURE;
     }
-    DBG("getproplen 0x%x %s -> 0x%x\n", ph, name, rets[0]);
+    DBG_LOW("getproplen 0x%x %s -> 0x%x\n", ph, name, rets[0]);
     return rets[0];
 }
 
@@ -272,7 +279,7 @@ static int __init of_package_to_path(int
         DBG("%s 0x%x -> FAILURE\n", __func__, ph);
         return OF_FAILURE;
     }
-    DBG("%s 0x%x %s -> 0x%x\n", __func__, ph, buffer, rets[0]);
+    DBG_LOW("%s 0x%x %s -> 0x%x\n", __func__, ph, buffer, rets[0]);
     if (rets[0] <= buflen)
         buffer[rets[0]] = '\0';
     return rets[0];
@@ -289,7 +296,7 @@ static int __init of_nextprop(int ph, co
         return OF_FAILURE;
     }
 
-    DBG("nextprop 0x%x %s -> %s\n", ph, name, (char *)buf);
+    DBG_LOW("nextprop 0x%x %s -> %s\n", ph, name, (char *)buf);
     return rets[0];
 }
 
@@ -336,7 +343,7 @@ static int __init of_claim(u32 virt, u32
         return OF_FAILURE;
     }
 
-    DBG("%s 0x%08x 0x%08x  0x%08x -> 0x%08x\n", __func__, virt, size, align,
+    DBG_LOW("%s 0x%08x 0x%08x  0x%08x -> 0x%08x\n", __func__, virt, size, 
align,
         rets[0]);
     return rets[0];
 }
@@ -358,29 +365,194 @@ static int __init of_getparent(int ph)
 
     of_call("parent", 1, 1, rets, ph);
 
-    DBG("getparent 0x%x -> 0x%x\n", ph, rets[0]);
-    return rets[0];
-}
-
-static void boot_of_probemem(multiboot_info_t *mbi)
+    DBG_LOW("getparent 0x%x -> 0x%x\n", ph, rets[0]);
+    return rets[0];
+}
+
+static int __init of_open(const char *devspec)
+{
+    int rets[1] = { OF_FAILURE };
+
+    of_call("open", 1, 1, rets, devspec);
+    return rets[0];
+}
+
+static void boot_of_alloc_init(int m, uint addr_cells, uint size_cells)
+{
+    int rc;
+    uint pg;
+    uint a[64];
+    int tst;
+    u64 start;
+    u64 size;
+
+    rc = of_getprop(m, "available", a, sizeof (a));
+    if (rc > 0) {
+        int l =  rc / sizeof(a[0]);
+        int r = 0;
+
+#ifdef OF_DEBUG
+        { 
+            int i;
+            of_printf("avail:\n");
+            for (i = 0; i < l; i += 4)
+                of_printf("  0x%x%x, 0x%x%x\n",
+                          a[i], a[i + 1],
+                          a[i + 2] ,a[i + 3]);
+        }
+#endif
+            
+        pg = 0;
+        while (pg < MEM_AVAILABLE_PAGES && r < l) {
+            ulong end;
+
+            start = a[r++];
+            if (addr_cells == 2 && (r < l) )
+                start = (start << 32) | a[r++];
+            
+            size = a[r++];
+            if (size_cells == 2 && (r < l) )
+                size = (size << 32) | a[r++];
+                
+            end = ALIGN_DOWN(start + size, PAGE_SIZE);
+
+            start = ALIGN_UP(start, PAGE_SIZE);
+
+            DBG("%s: marking 0x%x - 0x%lx\n", __func__,
+                pg << PAGE_SHIFT, start);
+
+            start >>= PAGE_SHIFT;
+            while (pg < MEM_AVAILABLE_PAGES && pg < start) {
+                set_bit(pg, mem_available_pages);
+                pg++;
+            }
+
+            pg = end  >> PAGE_SHIFT;
+        }
+    }
+
+    /* Now make sure we mark our own memory */
+    pg =  (ulong)_start >> PAGE_SHIFT;
+    start = (ulong)_end >> PAGE_SHIFT;
+
+    DBG("%s: marking 0x%x - 0x%lx\n", __func__,
+        pg << PAGE_SHIFT, start << PAGE_SHIFT);
+
+    /* Lets try and detect if our image has stepped on something. It
+     * is possible that FW has already subtracted our image from
+     * available memory so we must make sure that the previous bits
+     * are the same for the whole image */
+    tst = test_and_set_bit(pg, mem_available_pages);
+    ++pg;
+    while (pg <= start) {
+        if (test_and_set_bit(pg, mem_available_pages) != tst)
+            of_panic("%s: pg :0x%x of our image is different\n",
+                     __func__, pg);
+        ++pg;
+    }
+
+    DBG("%s: marking 0x%x - 0x%x\n", __func__,
+        0 << PAGE_SHIFT, 3 << PAGE_SHIFT);
+    /* First for pages (where the vectors are) should be left alone as well */
+    set_bit(0, mem_available_pages);
+    set_bit(1, mem_available_pages);
+    set_bit(2, mem_available_pages);
+    set_bit(3, mem_available_pages);
+}
+
+#ifdef BOOT_OF_FREE
+/* this is here in case we ever need a free call at a later date */
+static void boot_of_free(ulong addr, ulong size)
+{
+    ulong bits;
+    ulong pos;
+    ulong i;
+
+    size = ALIGN_UP(size, PAGE_SIZE);
+    bits = size >> PAGE_SHIFT;
+    pos = addr >> PAGE_SHIFT;
+
+    for (i = 0; i < bits; i++) {
+        if (!test_and_clear_bit(pos + i, mem_available_pages))
+            of_panic("%s: pg :0x%lx was never allocated\n",
+                     __func__, pos + i);
+    }
+}
+#endif
+
+static ulong boot_of_alloc(ulong size)
+{
+    ulong bits;
+    ulong pos;
+
+    if (size == 0)
+        return 0;
+
+    DBG("%s(0x%lx)\n", __func__, size);
+
+    size = ALIGN_UP(size, PAGE_SIZE);
+    bits = size >> PAGE_SHIFT;
+    pos = 0;
+    for (;;) {
+        ulong i;
+
+        pos = find_next_zero_bit(mem_available_pages,
+                                 MEM_AVAILABLE_PAGES, pos);
+        DBG("%s: found start bit at: 0x%lx\n", __func__, pos);
+
+        /* found nothing */
+        if ((pos + bits) > MEM_AVAILABLE_PAGES) {
+            of_printf("%s: allocation of size: 0x%lx failed\n",
+                     __func__, size);
+            return 0;
+        }
+
+        /* find a set that fits */
+        DBG("%s: checking for 0x%lx bits: 0x%lx\n", __func__, bits, pos);
+
+        i = find_next_bit(mem_available_pages, MEM_AVAILABLE_PAGES, pos);  
+        if (i - pos >= bits) {
+            uint addr = pos << PAGE_SHIFT;
+
+            /* make sure OF is happy with our choice */
+            if (of_claim(addr, size, 0) != OF_FAILURE) {
+                for (i = 0; i < bits; i++)
+                    set_bit(pos + i, mem_available_pages);
+
+                DBG("%s: 0x%lx is good returning 0x%x\n",
+                    __func__, pos, addr);
+                return addr;
+            }
+            /* if OF did not like the address then simply start from
+             * the next bit */
+            i = 1;
+        }
+
+        pos = pos + i;
+    }
+}
+
+static ulong boot_of_mem_init(void)
 {
     int root;
     int p;
-    u32 addr_cells = 1;
-    u32 size_cells = 1;
     int rc;
-    int mcount = 0;
-    static memory_map_t mmap[16];
+    uint addr_cells;
+    uint size_cells;
 
     root = of_finddevice("/");
     p = of_getchild(root);
 
     /* code is writen to assume sizes of 1 */
-    of_getprop(root, "#address-cells", &addr_cells, sizeof (addr_cells));
-    of_getprop(root, "#size-cells", &size_cells, sizeof (size_cells));
+    of_getprop(root, "#address-cells", &addr_cells,
+               sizeof (addr_cells));
+    of_getprop(root, "#size-cells", &size_cells,
+               sizeof (size_cells));
     DBG("%s: address_cells=%d  size_cells=%d\n",
                     __func__, addr_cells, size_cells);
-    
+
+    /* We do ream memory discovery later, for now we only want to find
+     * the first LMB */
     do {
         const char memory[] = "memory";
         char type[32];
@@ -389,82 +561,69 @@ static void boot_of_probemem(multiboot_i
 
         of_getprop(p, "device_type", type, sizeof (type));
         if (strncmp(type, memory, sizeof (memory)) == 0) {
-            u32 reg[48];  
-            u32 al, ah, ll, lh;
+            uint reg[48];  
+            u64 start;
+            u64 size;
             int r;
+            int l;
 
             rc = of_getprop(p, "reg", reg, sizeof (reg));
             if (rc == OF_FAILURE) {
                 of_panic("no reg property for memory node: 0x%x.\n", p);
             }
-            int l = rc/sizeof(u32); /* number reg element */
+
+            l = rc / sizeof(reg[0]); /* number reg element */
             DBG("%s: number of bytes in property 'reg' %d\n",
                             __func__, rc);
             
             r = 0;
             while (r < l) {
-                al = ah = ll = lh = 0;
-                if (addr_cells == 2) {
-                    ah = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip  */
-                    al = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip */
-                } else {
-                    al = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip */
+                start = reg[r++];
+                if (addr_cells == 2 && (r < l) )
+                    start = (start << 32) | reg[r++];
+
+                if (r >= l)
+                    break;  /* partial line.  Skip */
+
+                if (start > 0) {
+                    /* this is not the first LMB so we skip it */
+                    break;
                 }
-                if (size_cells == 2) {
-                    lh = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip */
-                    ll = reg[r++];
-                } else {
-                    ll = reg[r++];
-                }
-
-                if ((ll != 0) || (lh != 0)) {
-                    mmap[mcount].size = 20; /* - size field */
-                    mmap[mcount].type = 1; /* Regular ram */
-                    mmap[mcount].length_high = lh;
-                    mmap[mcount].length_low = ll;
-                    mmap[mcount].base_addr_high = ah;
-                    mmap[mcount].base_addr_low = al;
-                    of_printf("%s: memory 0x%016lx[0x%08lx]\n",
-                      __func__,
-                      (u64)(((u64)mmap[mcount].base_addr_high << 32)
-                            | mmap[mcount].base_addr_low),
-                      (u64)(((u64)mmap[mcount].length_high << 32)
-                            | mmap[mcount].length_low));
-                    ++mcount;
-                }
+
+                size = reg[r++];
+                if (size_cells == 2 && (r < l) )
+                    size = (size << 32) | reg[r++];
+                
+                if (r > l)
+                    break;  /* partial line.  Skip */
+
+                boot_of_alloc_init(p, addr_cells, size_cells);
+                
+                eomem = size;
+                return size;
             }
         }
         p = of_getpeer(p);
     } while (p != OF_FAILURE && p != 0);
 
-    if (mcount > 0) {
-        mbi->flags |= MBI_MEMMAP;
-        mbi->mmap_length = sizeof (mmap[0]) * mcount;
-        mbi->mmap_addr = (ulong)mmap;
-    }
+    return 0;
 }
 
 static void boot_of_bootargs(multiboot_info_t *mbi)
 {
     int rc;
 
-    rc = of_getprop(bof_chosen, "bootargs", &bootargs, sizeof (bootargs));
-    if (rc == OF_FAILURE || bootargs[0] == '\0') {
-        strlcpy(bootargs, builtin_cmdline, sizeof(bootargs));
+    if (builtin_cmdline[0] == '\0') {
+        rc = of_getprop(bof_chosen, "bootargs", builtin_cmdline,
+                CONFIG_CMDLINE_SIZE);
+        if (rc > CONFIG_CMDLINE_SIZE)
+            of_panic("bootargs[] not big enough for /chosen/bootargs\n");
     }
 
     mbi->flags |= MBI_CMDLINE;
-    mbi->cmdline = (u32)bootargs;
-
-    of_printf("bootargs = %s\n", bootargs);
+    mbi->cmdline = (ulong)builtin_cmdline;
+
+    of_printf("bootargs = %s\n", builtin_cmdline);
 }
 
 static int save_props(void *m, ofdn_t n, int pkg)
@@ -500,7 +659,8 @@ static int save_props(void *m, ofdn_t n,
                     of_panic("obj array not big enough for 0x%x\n", sz);
                 }
                 actual = of_getprop(pkg, name, obj, sz);
-                if (actual > sz) of_panic("obj too small");
+                if (actual > sz)
+                    of_panic("obj too small");
             }
 
             if (strncmp(name, name_str, sizeof(name_str)) == 0) {
@@ -512,7 +672,8 @@ static int save_props(void *m, ofdn_t n,
             }
 
             pos = ofd_prop_add(m, n, name, obj, actual);
-            if (pos == 0) of_panic("prop_create");
+            if (pos == 0)
+                of_panic("prop_create");
         }
 
         result = of_nextprop(pkg, name, name);
@@ -536,10 +697,12 @@ retry:
 
     if (pnext != 0) {
         sz = of_package_to_path(pnext, path, psz);
-        if (sz == OF_FAILURE) of_panic("bad path\n");
+        if (sz == OF_FAILURE)
+            of_panic("bad path\n");
 
         nnext = ofd_node_child_create(m, n, path, sz);
-        if (nnext == 0) of_panic("out of mem\n");
+        if (nnext == 0)
+            of_panic("out of mem\n");
 
         do_pkg(m, nnext, pnext, path, psz);
     }
@@ -551,7 +714,8 @@ retry:
         sz = of_package_to_path(pnext, path, psz);
 
         nnext = ofd_node_peer_create(m, n, path, sz);
-        if (nnext <= 0) of_panic("out of space in OFD tree.\n");
+        if (nnext <= 0)
+            of_panic("out of space in OFD tree.\n");
 
         n = nnext;
         p = pnext;
@@ -559,7 +723,7 @@ retry:
     }
 }
 
-static int pkg_save(void *mem)
+static long pkg_save(void *mem)
 {
     int root;
     char path[256];
@@ -570,11 +734,12 @@ static int pkg_save(void *mem)
 
     /* get root */
     root = of_getpeer(0);
-    if (root == OF_FAILURE) of_panic("no root package\n");
+    if (root == OF_FAILURE)
+        of_panic("no root package\n");
 
     do_pkg(mem, OFD_ROOT, root, path, sizeof(path));
 
-    r = (((ofdn_t *)mem)[1] + 1) * sizeof (u64);
+    r = ofd_size(mem);
 
     of_printf("%s: saved device tree in 0x%x bytes\n", __func__, r);
 
@@ -604,7 +769,8 @@ static int boot_of_fixup_refs(void *mem)
             char ofpath[256];
 
             path = ofd_node_path(mem, c);
-            if (path == NULL) of_panic("no path to found prop: %s\n", name);
+            if (path == NULL)
+                of_panic("no path to found prop: %s\n", name);
 
             rp = of_finddevice(path);
             if (rp == OF_FAILURE)
@@ -629,13 +795,15 @@ static int boot_of_fixup_refs(void *mem)
                          "ref 0x%x\n", name, path, rp, ref);
 
             dp = ofd_node_find(mem, ofpath);
-            if (dp <= 0) of_panic("no ofd node for OF node[0x%x]: %s\n",
-                                  ref, ofpath);
+            if (dp <= 0)
+                of_panic("no ofd node for OF node[0x%x]: %s\n",
+                         ref, ofpath);
 
             ref = dp;
 
             upd = ofd_prop_add(mem, c, name, &ref, sizeof(ref));
-            if (upd <= 0) of_panic("update failed: %s\n", name);
+            if (upd <= 0)
+                of_panic("update failed: %s\n", name);
 
 #ifdef DEBUG
             of_printf("%s: %s/%s -> %s\n", __func__,
@@ -658,7 +826,8 @@ static int boot_of_fixup_chosen(void *me
     char ofpath[256];
 
     ch = of_finddevice("/chosen");
-    if (ch == OF_FAILURE) of_panic("/chosen not found\n");
+    if (ch == OF_FAILURE)
+        of_panic("/chosen not found\n");
 
     rc = of_getprop(ch, "cpu", &val, sizeof (val));
 
@@ -667,16 +836,19 @@ static int boot_of_fixup_chosen(void *me
 
         if (rc > 0) {
             dn = ofd_node_find(mem, ofpath);
-            if (dn <= 0) of_panic("no node for: %s\n", ofpath);
+            if (dn <= 0)
+                of_panic("no node for: %s\n", ofpath);
 
             ofd_boot_cpu = dn;
             val = dn;
 
             dn = ofd_node_find(mem, "/chosen");
-            if (dn <= 0) of_panic("no /chosen node\n");
+            if (dn <= 0)
+                of_panic("no /chosen node\n");
 
             dc = ofd_prop_add(mem, dn, "cpu", &val, sizeof (val));
-            if (dc <= 0) of_panic("could not fix /chosen/cpu\n");
+            if (dc <= 0)
+                of_panic("could not fix /chosen/cpu\n");
             rc = 1;
         } else {
             of_printf("*** can't find path to booting cpu, "
@@ -685,56 +857,6 @@ static int boot_of_fixup_chosen(void *me
         }
     }
     return rc;
-}
-
-static ulong space_base;
-
-/*
- * The following function is necessary because we cannot depend on all
- * FW to actually allocate us any space, so we look for it _hoping_
- * that at least is will fail if we try to claim something that
- * belongs to FW.  This hope does not seem to be true on some version
- * of PIBS.
- */
-static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi)
-{
-    memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr);
-    ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low;
-    ulong base;
-
-    if (size == 0)
-        return 0;
-
-    if (align == 0)
-        of_panic("cannot call %s() with align of 0\n", __func__);
-
-#ifdef BROKEN_CLAIM_WORKAROUND
-    {
-        static int broken_claim;
-        if (!broken_claim) {
-            /* just try and claim it to the FW chosen address */
-            base = of_claim(0, size, align);
-            if (base != OF_FAILURE)
-                return base;
-            of_printf("%s: Firmware does not allocate memory for you\n",
-                      __func__);
-            broken_claim = 1;
-        }
-    }
-#endif
-
-    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%x\n",
-                    __func__, space_base, eomem, size, align);
-    base = ALIGN_UP(space_base, PAGE_SIZE);
-
-    while ((base + size) < rma_size(cpu_default_rma_order_pages())) {
-        if (of_claim(base, size, 0) != OF_FAILURE) {
-            space_base = base + size;
-            return base;
-        }
-        base += (PAGE_SIZE >  align) ? PAGE_SIZE : align;
-    }
-    of_panic("Cannot find memory in the RMA\n");
 }
 
 /* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges
@@ -798,8 +920,10 @@ static int __init boot_of_serial(void *o
             of_panic("package-to-path failed\n");
 
         rc = of_getprop(p, "device_type", type, sizeof (type));
-        if (rc == OF_FAILURE)
-            of_panic("fetching device type failed\n");
+        if (rc == OF_FAILURE) {
+            of_printf("%s: fetching type of `%s' failed\n", __func__, buf);
+            continue;
+        }
 
         if (strcmp(type, "serial") != 0)
             continue;
@@ -855,17 +979,104 @@ static int __init boot_of_serial(void *o
     return 1;
 }
 
-static void boot_of_module(ulong r3, ulong r4, multiboot_info_t *mbi)
-{
-    static module_t mods[3];
+static int __init boot_of_rtas(module_t *mod, multiboot_info_t *mbi)
+{
+    int rtas_node;
+    int rtas_instance;
+    uint size = 0;
+    int res[2];
+    int mem;
+    int ret;
+
+    rtas_node = of_finddevice("/rtas");
+
+    if (rtas_node <= 0) {
+        of_printf("No RTAS, Xen has no power control\n");
+        return 0;
+    }
+    of_getprop(rtas_node, "rtas-size", &size, sizeof (size));
+    if (size == 0) {
+        of_printf("RTAS, has no size\n");
+        return 0;
+    }
+
+    rtas_instance = of_open("/rtas");
+    if (rtas_instance == OF_FAILURE) {
+        of_printf("RTAS, could not open\n");
+        return 0;
+    }
+
+    size = ALIGN_UP(size, PAGE_SIZE);
+    
+    mem = boot_of_alloc(size);
+    if (mem == 0)
+        of_panic("Could not allocate RTAS tree\n");
+
+    of_printf("instantiating RTAS at: 0x%x\n", mem);
+
+    ret = of_call("call-method", 3, 2, res,
+                  "instantiate-rtas", rtas_instance, mem);
+    if (ret == OF_FAILURE) {
+        of_printf("RTAS, could not open\n");
+        return 0;
+    }
+    
+    rtas_entry = res[1];
+    rtas_base = mem;
+    rtas_end = mem + size;
+    rtas_msr = of_msr;
+
+    mod->mod_start = rtas_base;
+    mod->mod_end = rtas_end;
+    return 1;
+}
+
+static void * __init boot_of_devtree(module_t *mod, multiboot_info_t *mbi)
+{
     void *oft;
     ulong oft_sz = 48 * PAGE_SIZE;
+
+    /* snapshot the tree */
+    oft = (void *)boot_of_alloc(oft_sz);
+    if (oft == NULL)
+        of_panic("Could not allocate OFD tree\n");
+
+    of_printf("creating oftree at: 0x%p\n", oft);
+    of_test("package-to-path");
+    oft = ofd_create(oft, oft_sz);
+    pkg_save(oft);
+
+    if (ofd_size(oft) > oft_sz)
+         of_panic("Could not fit all of native devtree\n");
+
+    boot_of_fixup_refs(oft);
+    boot_of_fixup_chosen(oft);
+
+    if (ofd_size(oft) > oft_sz)
+         of_panic("Could not fit all devtree fixups\n");
+
+    ofd_walk(oft, __func__, OFD_ROOT, /* add_hype_props */ NULL, 2);
+
+    mod->mod_start = (ulong)oft;
+    mod->mod_end = mod->mod_start + oft_sz;
+    of_printf("%s: devtree mod @ 0x%016x - 0x%016x\n", __func__,
+              mod->mod_start, mod->mod_end);
+
+    return oft;
+}
+
+static void * __init boot_of_module(ulong r3, ulong r4, multiboot_info_t *mbi)
+{
+    static module_t mods[4];
     ulong mod0_start;
     ulong mod0_size;
-    static const char sepr[] = " -- ";
+    static const char * sepr[] = {" -- ", " || "};
+    int sepr_index;
     extern char dom0_start[] __attribute__ ((weak));
     extern char dom0_size[] __attribute__ ((weak));
-    const char *p;
+    const char *p = NULL;
+    int mod;
+    void *oft;
 
     if ((r3 > 0) && (r4 > 0)) {
         /* was it handed to us in registers ? */
@@ -908,57 +1119,50 @@ static void boot_of_module(ulong r3, ulo
         of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]);
     }
 
-    space_base = (ulong)_end;
-    mods[0].mod_start = mod0_start;
-    mods[0].mod_end = mod0_start + mod0_size;
-
-    of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__,
-              mods[0].mod_start, mods[0].mod_end);
-    p = strstr((char *)(ulong)mbi->cmdline, sepr);
+    mod = 0;
+    mods[mod].mod_start = mod0_start;
+    mods[mod].mod_end = mod0_start + mod0_size;
+
+    of_printf("%s: dom0 mod @ 0x%016x[0x%x]\n", __func__,
+              mods[mod].mod_start, mods[mod].mod_end);
+
+    /* look for delimiter: "--" or "||" */
+    for (sepr_index = 0; sepr_index < ARRAY_SIZE(sepr); sepr_index++){
+        p = strstr((char *)(ulong)mbi->cmdline, sepr[sepr_index]);
+        if (p != NULL)
+            break;
+    }
+
     if (p != NULL) {
-        p += sizeof (sepr) - 1;
-        mods[0].string = (u32)(ulong)p;
-        of_printf("%s: mod[0].string: %s\n", __func__, p);
-    }
-
-    /* snapshot the tree */
-    oft = (void*)find_space(oft_sz, PAGE_SIZE, mbi);
-    if (oft == 0)
-        of_panic("Could not allocate OFD tree\n");
-
-    of_printf("creating oft\n");
-    of_test("package-to-path");
-    oft = ofd_create(oft, oft_sz);
-    pkg_save(oft);
-
-    if (ofd_size(oft) > oft_sz)
-         of_panic("Could not fit all of native devtree\n");
-
-    boot_of_fixup_refs(oft);
-    boot_of_fixup_chosen(oft);
-
-    if (ofd_size(oft) > oft_sz)
-         of_panic("Could not fit all devtree fixups\n");
-
-    ofd_walk(oft, OFD_ROOT, /* add_hype_props */ NULL, 2);
-
-    mods[1].mod_start = (ulong)oft;
-    mods[1].mod_end = mods[1].mod_start + oft_sz;
-    of_printf("%s: mod[1] @ 0x%016x[0x%x]\n", __func__,
-              mods[1].mod_start, mods[1].mod_end);
-
+        /* Xen proper should never know about the dom0 args.  */
+        *(char *)p = '\0';
+        p += strlen(sepr[sepr_index]);
+        mods[mod].string = (u32)(ulong)p;
+        of_printf("%s: dom0 mod string: %s\n", __func__, p);
+    }
+
+    ++mod;
+    if (boot_of_rtas(&mods[mod], mbi))
+        ++mod;
+
+    oft = boot_of_devtree(&mods[mod], mbi);
+    if (oft == NULL)
+        of_panic("%s: boot_of_devtree failed\n", __func__);
+
+    ++mod;
 
     mbi->flags |= MBI_MODULES;
-    mbi->mods_count = 2;
+    mbi->mods_count = mod;
     mbi->mods_addr = (u32)mods;
 
-    boot_of_serial(oft);
+    return oft;
 }
 
 static int __init boot_of_cpus(void)
 {
-    int cpus_node;
-    int cpu_node, bootcpu_node, logical;
+    int cpus_node, cpu_node;
+    int bootcpu_instance, bootcpu_node;
+    int logical;
     int result;
     s32 cpuid;
     u32 cpu_clock[2];
@@ -967,9 +1171,13 @@ static int __init boot_of_cpus(void)
     /* Look up which CPU we are running on right now and get all info
      * from there */
     result = of_getprop(bof_chosen, "cpu",
-                        &bootcpu_node, sizeof (bootcpu_node));
+                        &bootcpu_instance, sizeof (bootcpu_instance));
     if (result == OF_FAILURE)
-        of_panic("Failed to look up boot cpu\n");
+        of_panic("Failed to look up boot cpu instance\n");
+
+    bootcpu_node = of_instance_to_package(bootcpu_instance);
+    if (result == OF_FAILURE)
+        of_panic("Failed to look up boot cpu package\n");
 
     cpu_node = bootcpu_node;
 
@@ -1070,15 +1278,12 @@ static int __init boot_of_cpus(void)
     return 1;
 }
 
-static int __init boot_of_rtas(void)
-{
-    return 1;
-}
-
 multiboot_info_t __init *boot_of_init(
         ulong r3, ulong r4, ulong vec, ulong r6, ulong r7, ulong orig_msr)
 {
     static multiboot_info_t mbi;
+    void *oft;
+    int r;
 
     of_vec = vec;
     of_msr = orig_msr;
@@ -1098,18 +1303,20 @@ multiboot_info_t __init *boot_of_init(
             r3, r4, vec, r6, r7, orig_msr);
 
     if ((vec >= (ulong)_start) && (vec <= (ulong)_end)) {
-        of_printf("Hmm.. OF[0x%lx] seems to have stepped on our image "
-                "that ranges: %p .. %p.\n HANG!\n",
+        of_panic("Hmm.. OF[0x%lx] seems to have stepped on our image "
+                "that ranges: %p .. %p.\n",
                 vec, _start, _end);
     }
     of_printf("%s: _start %p _end %p 0x%lx\n", __func__, _start, _end, r6);
 
     boot_of_fix_maple();
-    boot_of_probemem(&mbi);
+    r = boot_of_mem_init();
+    if (r == 0)
+        of_panic("failure to initialize memory allocator");
     boot_of_bootargs(&mbi);
-    boot_of_module(r3, r4, &mbi);
+    oft = boot_of_module(r3, r4, &mbi);
     boot_of_cpus();
-    boot_of_rtas();
+    boot_of_serial(oft);
 
     /* end of OF */
     of_printf("Quiescing Open Firmware ...\n");
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/crash.c
--- a/xen/arch/powerpc/crash.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/crash.c  Thu Dec 14 08:57:36 2006 -0700
@@ -1,5 +1,6 @@
 #include <xen/lib.h>       /* for printk() used in stub */
 #include <xen/types.h>
+#include <xen/kexec.h>
 #include <public/kexec.h>
 
 void machine_crash_shutdown(void)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/dart.c
--- a/xen/arch/powerpc/dart.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/dart.c   Thu Dec 14 08:57:36 2006 -0700
@@ -60,8 +60,8 @@ union dart_entry {
     u32 de_word;
     struct {
         u32 de_v:1;             /* valid */
-        u32 de_rp:1;             /* read protected*/
-        u32 de_wp:1;             /* write protected*/
+        u32 de_rp:1;             /* read protected */
+        u32 de_wp:1;             /* write protected */
         u32 _de_res:5;
         u32 de_ppn:24;         /* 24 bit Physical Page Number
                                  * representing address [28:51] */
@@ -98,7 +98,6 @@ static u32 dart_encode(int perm, ulong r
     if (perm & DART_WRITE) {
         e.de_bits.de_wp = 0;
     }
-
     return e.de_word;
 }
 
@@ -190,10 +189,8 @@ static int find_dart(struct dart_info *d
     ofdn_t n;
     char compat[128];
 
-
-    if (on_mambo()) {
-        /* mambo has no dart */
-        DBG("%s: Mambo does not support a dart\n", __func__);
+    if (on_systemsim()) {
+        DBG("%s: systemsim does not support a dart\n", __func__);
         return -1;
     }
 
@@ -263,7 +260,7 @@ static int init_dart(void)
 
     /* Linux uses a dummy page, filling "empty" DART entries with a
        reference to this page to capture stray DMA's */
-    dummy_page = (ulong)alloc_xenheap_pages(1);
+    dummy_page = (ulong)alloc_xenheap_pages(0);
     clear_page((void *)dummy_page);
     dummy_page >>= PAGE_SHIFT;
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/dart_u4.c
--- a/xen/arch/powerpc/dart_u4.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/dart_u4.c        Thu Dec 14 08:57:36 2006 -0700
@@ -19,6 +19,7 @@
  */
 
 #undef DEBUG
+#define INVALIDATE_ALL
 
 #include <xen/config.h>
 #include <xen/types.h>
@@ -123,8 +124,13 @@ static void u4_inv_all(void)
 
 static void u4_inv_entry(ulong pgn)
 {
+#ifdef INVALIDATE_ALL
+    return u4_inv_all();
+#else
     union dart_ctl dc;
     ulong retries = 0;
+
+    return u4_inv_all();
 
     dc.dc_word = in_32(&dart->d_dartcntl.dc_word);
     dc.dc_bits.dc_ilpn = pgn;
@@ -139,6 +145,7 @@ static void u4_inv_entry(ulong pgn)
         if (retries > 1000000)
             panic("WAY! too long\n");
     } while (dc.dc_bits.dc_ione != 0);
+#endif
 }
 
 static struct dart_ops u4_ops = {
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/domain.c Thu Dec 14 08:57:36 2006 -0700
@@ -33,6 +33,8 @@
 #include <asm/htab.h>
 #include <asm/current.h>
 #include <asm/hcalls.h>
+#include "rtas.h"
+#include "exceptions.h"
 
 #define next_arg(fmt, args) ({                                              \
     unsigned long __arg;                                                    \
@@ -46,7 +48,6 @@
     }                                                                       \
     __arg;                                                                  \
 })
-extern void idle_loop(void);
 
 unsigned long hypercall_create_continuation(unsigned int op,
         const char *format, ...)
@@ -87,26 +88,44 @@ int arch_domain_create(struct domain *d)
 
     INIT_LIST_HEAD(&d->arch.extent_list);
 
+    d->arch.foreign_mfn_count = 1024;
+    d->arch.foreign_mfns = xmalloc_array(uint, d->arch.foreign_mfn_count);
+    BUG_ON(d->arch.foreign_mfns == NULL);
+
+    memset(d->arch.foreign_mfns, -1, d->arch.foreign_mfn_count * sizeof(uint));
+
     return 0;
 }
 
 void arch_domain_destroy(struct domain *d)
 {
     shadow_teardown(d);
-}
-
+    /* shared_info is part of the RMA so no need to release it */
+}
+
+static void machine_fail(const char *s)
+{
+    printk("%s failed, manual powercycle required!\n", s);
+    for (;;)
+        sleep();
+}
 void machine_halt(void)
 {
     printk("machine_halt called: spinning....\n");
     console_start_sync();
-    while(1);
+    printk("%s called\n", __func__);
+    rtas_halt();
+
+    machine_fail(__func__);
 }
 
 void machine_restart(char * __unused)
 {
     printk("machine_restart called: spinning....\n");
     console_start_sync();
-    while(1);
+    printk("%s called\n", __func__);
+    rtas_reboot();
+    machine_fail(__func__);
 }
 
 struct vcpu *alloc_vcpu_struct(void)
@@ -222,6 +241,7 @@ void context_switch(struct vcpu *prev, s
 
     mtsdr1(next->domain->arch.htab.sdr1);
     local_flush_tlb(); /* XXX maybe flush_tlb_mask? */
+    cpu_flush_icache();
 
     if (is_idle_vcpu(next)) {
         reset_stack_and_jump(idle_loop);
@@ -278,8 +298,10 @@ static void relinquish_memory(struct dom
 
 void domain_relinquish_resources(struct domain *d)
 {
+    relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
     free_extents(d);
+    xfree(d->arch.foreign_mfns);
     return;
 }
 
@@ -291,7 +313,6 @@ void arch_dump_vcpu_info(struct vcpu *v)
 {
 }
 
-extern void sleep(void);
 static void safe_halt(void)
 {
     int cpu = smp_processor_id();
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domain_build.c
--- a/xen/arch/powerpc/domain_build.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/domain_build.c   Thu Dec 14 08:57:36 2006 -0700
@@ -178,8 +178,7 @@ int construct_dom0(struct domain *d,
         shadow_set_allocation(d, opt_dom0_shadow, &preempt);
     } while (preempt);
     if (shadow_get_allocation(d) == 0)
-        panic("shadow allocation failed 0x%x < 0x%x\n",
-              shadow_get_allocation(d), opt_dom0_shadow);
+        panic("shadow allocation failed: %dMib\n", opt_dom0_shadow);
 
     ASSERT( image_len < rma_sz );
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domctl.c
--- a/xen/arch/powerpc/domctl.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/domctl.c Thu Dec 14 08:57:36 2006 -0700
@@ -96,14 +96,14 @@ long arch_do_domctl(struct xen_domctl *d
     case XEN_DOMCTL_real_mode_area:
     {
         struct domain *d;
-        unsigned int log = domctl->u.real_mode_area.log;
+        unsigned int order = domctl->u.real_mode_area.log - PAGE_SHIFT;
 
         ret = -ESRCH;
         d = find_domain_by_id(domctl->domain);
         if (d != NULL) {
             ret = -EINVAL;
-            if (cpu_rma_valid(log))
-                ret = allocate_rma(d, log - PAGE_SHIFT);
+            if (cpu_rma_valid(order))
+                ret = allocate_rma(d, order);
             put_domain(d);
         }
     }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/exceptions.c
--- a/xen/arch/powerpc/exceptions.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/exceptions.c     Thu Dec 14 08:57:36 2006 -0700
@@ -25,8 +25,10 @@
 #include <xen/serial.h>
 #include <xen/gdbstub.h>
 #include <xen/console.h>
+#include <xen/shutdown.h>
 #include <asm/time.h>
 #include <asm/processor.h>
+#include <asm/debugger.h>
 
 #undef DEBUG
 
@@ -56,25 +58,19 @@ void do_dec(struct cpu_user_regs *regs)
 
 void program_exception(struct cpu_user_regs *regs, unsigned long cookie)
 {
+    if (cookie == 0x200) {
+        if (cpu_machinecheck(regs))
+            return;
+
+        printk("%s: machine check\n", __func__);
+    } else {
 #ifdef CRASH_DEBUG
-    __trap_to_gdb(regs, cookie);
-#else /* CRASH_DEBUG */
-    int recover = 0;
+        if (__trap_to_gdb(regs, cookie) == 0)
+            return;
+#endif /* CRASH_DEBUG */
 
-    console_start_sync();
-
-    show_registers(regs);
-    printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
-    printk("hid4 0x%016lx\n", regs->hid4);
-    printk("---[ backtrace ]---\n");
-    show_backtrace(regs->gprs[1], regs->lr, regs->pc);
-
-    if (cookie == 0x200)
-        recover = cpu_machinecheck(regs);
-
-    if (!recover)
-        panic("%s: 0x%lx\n", __func__, cookie);
-
-    console_end_sync();
-#endif /* CRASH_DEBUG */
+        printk("%s: type: 0x%lx\n", __func__, cookie);
+        show_backtrace_regs(regs);
+    }
+    machine_halt();
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/exceptions.h
--- a/xen/arch/powerpc/exceptions.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/exceptions.h     Thu Dec 14 08:57:36 2006 -0700
@@ -43,13 +43,14 @@ extern void program_exception(
     struct cpu_user_regs *regs, unsigned long cookie);
 
 extern long xen_hvcall_jump(struct cpu_user_regs *regs, ulong address);
-extern void *mambo_memset(void *, int, ulong);
-extern void *mambo_memcpy(void *, const void *, ulong);
+
+extern void sleep(void);
+extern void idle_loop(void);
 
 extern ulong *__hypercall_table[];
 
 extern char exception_vectors[];
 extern char exception_vectors_end[];
 extern int spin_start[];
-extern int secondary_cpu_init(int cpuid, unsigned long r4);
+extern void secondary_cpu_init(int cpuid, unsigned long r4);
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/external.c       Thu Dec 14 08:57:36 2006 -0700
@@ -82,7 +82,14 @@ void do_external(struct cpu_user_regs *r
 
     vec = xen_mpic_get_irq(regs);
 
-    if (vec != -1) {
+    if (irq_desc[vec].status & IRQ_PER_CPU) {
+        /* x86 do_IRQ does not respect the per cpu flag.  */
+        irq_desc_t *desc = &irq_desc[vec];
+        regs->entry_vector = vec;
+        desc->handler->ack(vec);
+        desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs);
+        desc->handler->end(vec);
+    } else if (vec != -1) {
         DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
         regs->entry_vector = vec;
         do_IRQ(regs);
@@ -253,3 +260,24 @@ int ioapic_guest_write(unsigned long phy
     BUG_ON(val != val);
     return 0;
 }
+
+void send_IPI_mask(cpumask_t mask, int vector)
+{
+    unsigned int cpus;
+    int const bits = 8 * sizeof(cpus);
+
+    switch(vector) {
+    case CALL_FUNCTION_VECTOR:
+    case EVENT_CHECK_VECTOR:
+        break;
+    default:
+        BUG();
+        return;
+    }
+
+    BUG_ON(NR_CPUS > bits);
+    BUG_ON(fls(mask.bits[0]) > bits);
+
+    cpus = mask.bits[0];
+    mpic_send_ipi(vector, cpus);
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/gdbstub.c
--- a/xen/arch/powerpc/gdbstub.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/gdbstub.c        Thu Dec 14 08:57:36 2006 -0700
@@ -25,6 +25,7 @@
 #include <asm/msr.h>
 #include <asm/bitops.h>
 #include <asm/cache.h>
+#include <asm/debugger.h>
 #include <asm/processor.h>
 
 asm(".globl trap_instruction\n"
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/iommu.c
--- a/xen/arch/powerpc/iommu.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/iommu.c  Thu Dec 14 08:57:36 2006 -0700
@@ -32,6 +32,12 @@
 #include "tce.h"
 #include "iommu.h"
 
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
 struct iommu_funcs {
     int (*iommu_put)(ulong, union tce);
 };
@@ -46,17 +52,31 @@ int iommu_put(u32 buid, ulong ioba, unio
     struct domain *d = v->domain;
 
     if (buid < iommu_phbs_num && iommu_phbs[buid].iommu_put != NULL) {
-        ulong pfn;
+        ulong gmfn;
         ulong mfn;
         int mtype;
 
-        pfn = tce.tce_bits.tce_rpn;
-        mfn = pfn2mfn(d, pfn, &mtype);
+        gmfn = tce.tce_bits.tce_rpn;
+
+        
+        mfn = pfn2mfn(d, gmfn, &mtype);
         if (mfn != INVALID_MFN) {
-#ifdef DEBUG
-            printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
-                   ioba, pfn, mfn);
-#endif
+            switch (mtype) {
+            case PFN_TYPE_RMA:
+            case PFN_TYPE_LOGICAL:
+                break;
+            case PFN_TYPE_FOREIGN:
+                DBG("%s: assigning to Foriegn page: "
+                    "gmfn: 0x%lx mfn: 0x%lx\n",  __func__, gmfn, mfn);
+                break;
+            default:
+                printk("%s: unsupported type[%d]: gmfn: 0x%lx mfn: 0x%lx\n",
+                       __func__, mtype, gmfn, mfn);
+                return -1;
+            break;
+            }
+            DBG("%s: ioba=0x%lx gmfn=0x%lx mfn=0x%lx\n", __func__,
+                ioba, gmfn, mfn);
             tce.tce_bits.tce_rpn = mfn;
             return iommu_phbs[buid].iommu_put(ioba, tce);
         }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/machine_kexec.c
--- a/xen/arch/powerpc/machine_kexec.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/machine_kexec.c  Thu Dec 14 08:57:36 2006 -0700
@@ -1,5 +1,6 @@
 #include <xen/lib.h>       /* for printk() used in stubs */
 #include <xen/types.h>
+#include <xen/kexec.h>
 #include <public/kexec.h>
 
 int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
@@ -9,11 +10,6 @@ int machine_kexec_load(int type, int slo
 }
 
 void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
-{
-    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
-}
-
-void machine_kexec(xen_kexec_image_t *image)
 {
     printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/memory.c
--- a/xen/arch/powerpc/memory.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/memory.c Thu Dec 14 08:57:36 2006 -0700
@@ -20,10 +20,31 @@
  */
 #include <xen/sched.h>
 #include <xen/mm.h>
+#include <xen/numa.h>
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
+ * page_info table and allocation bitmap.
+ */
+static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
+integer_param("xenheap_megabytes", opt_xenheap_megabytes);
 
 unsigned long xenheap_phys_end;
+static uint nr_pages;
+static ulong xenheap_size;
+static ulong save_start;
+static ulong save_end;
+
 struct membuf {
     ulong start;
     ulong size;
@@ -36,15 +57,20 @@ static ulong free_xenheap(ulong start, u
     start = ALIGN_UP(start, PAGE_SIZE);
     end = ALIGN_DOWN(end, PAGE_SIZE);
 
-    printk("%s: 0x%lx - 0x%lx\n", __func__, start, end);
-
-    if (oftree <= end && oftree >= start) {
-        printk("%s:     Go around the devtree: 0x%lx - 0x%lx\n",
-               __func__, oftree, oftree_end);
-        init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE));
-        init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end);
+    DBG("%s: 0x%lx - 0x%lx\n", __func__, start, end);
+
+    /* need to do this better */
+    if (save_start <= end && save_start >= start) {
+        DBG("%s:     Go around the saved area: 0x%lx - 0x%lx\n",
+               __func__, save_start, save_end);
+        init_xenheap_pages(start, ALIGN_DOWN(save_start, PAGE_SIZE));
+        xenheap_size += ALIGN_DOWN(save_start, PAGE_SIZE) - start;
+
+        init_xenheap_pages(ALIGN_UP(save_end, PAGE_SIZE), end);
+        xenheap_size += end - ALIGN_UP(save_end, PAGE_SIZE);
     } else {
         init_xenheap_pages(start, end);
+        xenheap_size += end - start;
     }
 
     return ALIGN_UP(end, PAGE_SIZE);
@@ -57,8 +83,10 @@ static void set_max_page(struct membuf *
     for (i = 0; i < entries; i++) {
         ulong end_page;
 
+        printk("  %016lx: %016lx\n", mb[i].start, mb[i].size);
+        nr_pages += mb[i].size >> PAGE_SHIFT;
+
         end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT;
-
         if (end_page > max_page)
             max_page = end_page;
     }
@@ -71,11 +99,11 @@ static void heap_init(struct membuf *mb,
     ulong start_blk;
     ulong end_blk = 0;
 
-       for (i = 0; i < entries; i++) {
-           start_blk = mb[i].start;
-           end_blk = start_blk + mb[i].size;
-
-           if (start_blk < xenheap_phys_end) {
+    for (i = 0; i < entries; i++) {
+        start_blk = mb[i].start;
+        end_blk = start_blk + mb[i].size;
+
+        if (start_blk < xenheap_phys_end) {
             if (xenheap_phys_end > end_blk) {
                 panic("xenheap spans LMB\n");
             }
@@ -87,7 +115,7 @@ static void heap_init(struct membuf *mb,
 
         init_boot_pages(start_blk, end_blk);
         total_pages += (end_blk - start_blk) >> PAGE_SHIFT;
-       }
+    }
 }
 
 static void ofd_walk_mem(void *m, walk_mem_fn fn)
@@ -123,7 +151,7 @@ static void setup_xenheap(module_t *mod,
     for (i = 0; i < mcount; i++) {
         u32 s;
 
-        if(mod[i].mod_end == mod[i].mod_start)
+        if (mod[i].mod_end == mod[i].mod_start)
             continue;
 
         s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE);
@@ -149,19 +177,42 @@ void memory_init(module_t *mod, int mcou
 void memory_init(module_t *mod, int mcount)
 {
     ulong eomem;
-    ulong heap_start, heap_size;
-
-    printk("Physical RAM map:\n");
+    ulong heap_start;
+    ulong xh_pages;
 
     /* lets find out how much memory there is and set max_page */
     max_page = 0;
+    printk("Physical RAM map:\n");
     ofd_walk_mem((void *)oftree, set_max_page);
     eomem = max_page << PAGE_SHIFT;
 
     if (eomem == 0){
         panic("ofd_walk_mem() failed\n");
     }
-    printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
+
+    /* find the portion of memory we need to keep safe */
+    save_start = oftree;
+    save_end = oftree_end;
+    if (rtas_base) {
+        if (save_start > rtas_base)
+            save_start = rtas_base;
+        if (save_end < rtas_end)
+            save_end = rtas_end;
+    }
+
+    /* minimum heap has to reach to the end of all Xen required memory */
+    xh_pages = ALIGN_UP(save_end, PAGE_SIZE) >> PAGE_SHIFT;
+    xh_pages += opt_xenheap_megabytes << (20 - PAGE_SHIFT);
+
+    /* While we are allocating HTABS from The Xen Heap we need it to
+     * be larger */
+    xh_pages  += nr_pages >> 5;
+
+    xenheap_phys_end = xh_pages << PAGE_SHIFT;
+    printk("End of Xen Area: %luMiB (%luKiB)\n",
+           xenheap_phys_end >> 20, xenheap_phys_end >> 10);
+
+    printk("End of RAM: %luMiB (%luKiB)\n", eomem >> 20, eomem >> 10);
 
     /* Architecturally the first 4 pages are exception hendlers, we
      * will also be copying down some code there */
@@ -185,22 +236,23 @@ void memory_init(module_t *mod, int mcou
         panic("total_pages > max_page: 0x%lx > 0x%lx\n",
               total_pages, max_page);
 
-    printk("total_pages: 0x%016lx\n", total_pages);
+    DBG("total_pages: 0x%016lx\n", total_pages);
 
     init_frametable();
+
+    numa_initmem_init(0, max_page);
+
     end_boot_allocator();
 
     /* Add memory between the beginning of the heap and the beginning
-     * of out text */
+     * of our text */
     free_xenheap(heap_start, (ulong)_start);
-
-    heap_size = xenheap_phys_end - heap_start;
-    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
-
     setup_xenheap(mod, mcount);
+    printk("Xen Heap: %luMiB (%luKiB)\n",
+           xenheap_size >> 20, xenheap_size >> 10);
 
     eomem = avail_domheap_pages();
-    printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem,
+    printk("Dom Heap: %luMiB (%luKiB)\n",
            (eomem << PAGE_SHIFT) >> 20,
            (eomem << PAGE_SHIFT) >> 10);
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/mm.c     Thu Dec 14 08:57:36 2006 -0700
@@ -25,9 +25,9 @@
 #include <xen/kernel.h>
 #include <xen/sched.h>
 #include <xen/perfc.h>
-#include <asm/misc.h>
 #include <asm/init.h>
 #include <asm/page.h>
+#include <asm/string.h>
 
 #ifdef VERBOSE
 #define MEM_LOG(_f, _a...)                                  \
@@ -42,18 +42,129 @@ unsigned long max_page;
 unsigned long max_page;
 unsigned long total_pages;
 
+void __init init_frametable(void)
+{
+    unsigned long p;
+    unsigned long nr_pages;
+    int i;
+
+    nr_pages = PFN_UP(max_page * sizeof(struct page_info));
+
+    p = alloc_boot_pages(nr_pages, 1);
+    if (p == 0)
+        panic("Not enough memory for frame table\n");
+
+    frame_table = (struct page_info *)(p << PAGE_SHIFT);
+    for (i = 0; i < nr_pages; i += 1)
+        clear_page((void *)((p + i) << PAGE_SHIFT));
+}
+
+void share_xen_page_with_guest(
+    struct page_info *page, struct domain *d, int readonly)
+{
+    if ( page_get_owner(page) == d )
+        return;
+
+    /* this causes us to leak pages in the Domain and reuslts in
+     * Zombie domains, I think we are missing a piece, until we find
+     * it we disable the following code */
+    set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+
+    spin_lock(&d->page_alloc_lock);
+
+    /* The incremented type count pins as writable or read-only. */
+    page->u.inuse.type_info  = (readonly ? PGT_none : PGT_writable_page);
+    page->u.inuse.type_info |= PGT_validated | 1;
+
+    page_set_owner(page, d);
+    wmb(); /* install valid domain ptr before updating refcnt. */
+    ASSERT(page->count_info == 0);
+    page->count_info |= PGC_allocated | 1;
+
+    if ( unlikely(d->xenheap_pages++ == 0) )
+        get_knownalive_domain(d);
+    list_add_tail(&page->list, &d->xenpage_list);
+
+    spin_unlock(&d->page_alloc_lock);
+}
+
+void share_xen_page_with_privileged_guests(
+    struct page_info *page, int readonly)
+{
+        unimplemented();
+}
+
+static ulong foreign_to_mfn(struct domain *d, ulong pfn)
+{
+
+    pfn -= 1UL << cpu_foreign_map_order();
+
+    BUG_ON(pfn >= d->arch.foreign_mfn_count);
+
+    return d->arch.foreign_mfns[pfn];
+}
+
+static int set_foreign(struct domain *d, ulong pfn, ulong mfn)
+{
+    pfn -= 1UL << cpu_foreign_map_order();
+
+    BUG_ON(pfn >= d->arch.foreign_mfn_count);
+    d->arch.foreign_mfns[pfn] = mfn;
+
+    return 0;
+}
+
+static int create_grant_va_mapping(
+    unsigned long va, unsigned long frame, struct vcpu *v)
+{
+    if (v->domain->domain_id != 0) {
+        printk("only Dom0 can map a grant entry\n");
+        BUG();
+        return GNTST_permission_denied;
+    }
+    set_foreign(v->domain, va >> PAGE_SHIFT, frame);
+    return GNTST_okay;
+}
+
+static int destroy_grant_va_mapping(
+    unsigned long addr, unsigned long frame, struct domain *d)
+{
+    if (d->domain_id != 0) {
+        printk("only Dom0 can map a grant entry\n");
+        BUG();
+        return GNTST_permission_denied;
+    }
+    set_foreign(d, addr >> PAGE_SHIFT, ~0UL);
+    return GNTST_okay;
+}
+
 int create_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags)
 {
-    panic("%s called\n", __func__);
-    return 1;
+    if (flags & GNTMAP_application_map) {
+        printk("%s: GNTMAP_application_map not supported\n", __func__);
+        BUG();
+        return GNTST_general_error;
+    }
+    if (flags & GNTMAP_contains_pte) {
+        printk("%s: GNTMAP_contains_pte not supported\n", __func__);
+        BUG();
+        return GNTST_general_error;
+    }
+    return create_grant_va_mapping(addr, frame, current);
 }
 
 int destroy_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags)
 {
-    panic("%s called\n", __func__);
-    return 1;
+    if (flags & GNTMAP_contains_pte) {
+        printk("%s: GNTMAP_contains_pte not supported\n", __func__);
+        BUG();
+        return GNTST_general_error;
+    }
+
+    /* may have force the remove here */
+    return destroy_grant_va_mapping(addr, frame, current->domain);
 }
 
 int steal_page(struct domain *d, struct page_info *page, unsigned int memflags)
@@ -139,7 +250,7 @@ int get_page_type(struct page_info *page
         {
             return 0;
         }
-        if ( unlikely(!(x & PGT_validated)) )
+        else if ( unlikely(!(x & PGT_validated)) )
         {
             /* Someone else is updating validation of this page. Wait... */
             while ( (y = page->u.inuse.type_info) == x )
@@ -158,25 +269,6 @@ int get_page_type(struct page_info *page
     return 1;
 }
 
-void __init init_frametable(void)
-{
-    unsigned long p;
-    unsigned long nr_pages;
-    int i;
-
-    nr_pages = PFN_UP(max_page * sizeof(struct page_info));
-    nr_pages = min(nr_pages, (4UL << (20 - PAGE_SHIFT)));
-    
-
-    p = alloc_boot_pages(nr_pages, 1);
-    if (p == 0)
-        panic("Not enough memory for frame table\n");
-
-    frame_table = (struct page_info *)(p << PAGE_SHIFT);
-    for (i = 0; i < nr_pages; i += 1)
-        clear_page((void *)((p + i) << PAGE_SHIFT));
-}
-
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
 {
     printk("%s: no PPC specific memory ops\n", __func__);
@@ -185,29 +277,28 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
 extern void copy_page(void *dp, void *sp)
 {
-    if (on_mambo()) {
-        extern void *mambo_memcpy(void *,const void *,__kernel_size_t);
-        mambo_memcpy(dp, sp, PAGE_SIZE);
+    if (on_systemsim()) {
+        systemsim_memcpy(dp, sp, PAGE_SIZE);
     } else {
         memcpy(dp, sp, PAGE_SIZE);
     }
 }
 
+/* XXX should probably replace with faster data structure */
 static uint add_extent(struct domain *d, struct page_info *pg, uint order)
 {
     struct page_extents *pe;
 
     pe = xmalloc(struct page_extents);
     if (pe == NULL)
-        return 0;
+        return -ENOMEM;
 
     pe->pg = pg;
     pe->order = order;
-    pe->pfn = page_to_mfn(pg);
 
     list_add_tail(&pe->pe_list, &d->arch.extent_list);
 
-    return pe->pfn;
+    return 0;
 }
 
 void free_extents(struct domain *d)
@@ -246,7 +337,7 @@ uint allocate_extents(struct domain *d, 
         if (pg == NULL)
             return total_nrpages;
 
-        if (add_extent(d, pg, ext_order) == 0) {
+        if (add_extent(d, pg, ext_order) < 0) {
             free_domheap_pages(pg, ext_order);
             return total_nrpages;
         }
@@ -299,13 +390,13 @@ int allocate_rma(struct domain *d, unsig
 
     return 0;
 }
+
 void free_rma_check(struct page_info *page)
 {
     if (test_bit(_PGC_page_RMA, &page->count_info) &&
         !test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags))
         panic("Attempt to free an RMA page: 0x%lx\n", page_to_mfn(page));
 }
-
 
 ulong pfn2mfn(struct domain *d, ulong pfn, int *type)
 {
@@ -314,9 +405,17 @@ ulong pfn2mfn(struct domain *d, ulong pf
     struct page_extents *pe;
     ulong mfn = INVALID_MFN;
     int t = PFN_TYPE_NONE;
+    ulong foreign_map_pfn = 1UL << cpu_foreign_map_order();
 
     /* quick tests first */
-    if (d->is_privileged && cpu_io_mfn(pfn)) {
+    if (pfn & foreign_map_pfn) {
+        t = PFN_TYPE_FOREIGN;
+        mfn = foreign_to_mfn(d, pfn);
+    } else if (pfn >= max_page && pfn < (max_page + NR_GRANT_FRAMES)) {
+        /* Its a grant table access */
+        t = PFN_TYPE_GNTTAB;
+        mfn = gnttab_shared_mfn(d, d->grant_table, (pfn - max_page));
+    } else if (d->is_privileged && cpu_io_mfn(pfn)) {
         t = PFN_TYPE_IO;
         mfn = pfn;
     } else {
@@ -324,17 +423,32 @@ ulong pfn2mfn(struct domain *d, ulong pf
             t = PFN_TYPE_RMA;
             mfn = pfn + rma_base_mfn;
         } else {
+            ulong cur_pfn = rma_size_mfn;
+
             list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
-                uint end_pfn = pe->pfn + (1 << pe->order);
-
-                if (pfn >= pe->pfn && pfn < end_pfn) {
+                uint pe_pages = 1UL << pe->order;
+                uint end_pfn = cur_pfn + pe_pages;
+
+                if (pfn >= cur_pfn && pfn < end_pfn) {
                     t = PFN_TYPE_LOGICAL;
-                    mfn = page_to_mfn(pe->pg) + (pfn - pe->pfn);
+                    mfn = page_to_mfn(pe->pg) + (pfn - cur_pfn);
                     break;
                 }
+                cur_pfn += pe_pages;
             }
         }
-        BUG_ON(t != PFN_TYPE_NONE && page_get_owner(mfn_to_page(mfn)) != d);
+#ifdef DEBUG
+        if (t != PFN_TYPE_NONE &&
+            (d->domain_flags & DOMF_dying) &&
+            page_get_owner(mfn_to_page(mfn)) != d) {
+            printk("%s: page type: %d owner Dom[%d]:%p expected Dom[%d]:%p\n",
+                   __func__, t,
+                   page_get_owner(mfn_to_page(mfn))->domain_id,
+                   page_get_owner(mfn_to_page(mfn)),
+                   d->domain_id, d);
+            BUG();
+        }
+#endif
     }
 
     if (t == PFN_TYPE_NONE) {
@@ -368,6 +482,42 @@ ulong pfn2mfn(struct domain *d, ulong pf
     return mfn;
 }
 
+unsigned long mfn_to_gmfn(struct domain *d, unsigned long mfn)
+{
+    struct page_extents *pe;
+    ulong cur_pfn;
+    ulong gnttab_mfn;
+    ulong rma_mfn;
+
+    /* grant? */
+    gnttab_mfn = gnttab_shared_mfn(d, d->grant_table, 0);
+    if (mfn >= gnttab_mfn && mfn < (gnttab_mfn + NR_GRANT_FRAMES))
+        return max_page + (mfn - gnttab_mfn);
+
+    /* IO? */
+    if (d->is_privileged && cpu_io_mfn(mfn))
+        return mfn;
+
+    rma_mfn = page_to_mfn(d->arch.rma_page);
+    if (mfn >= rma_mfn &&
+        mfn < (rma_mfn + (1 << d->arch.rma_order)))
+        return mfn - rma_mfn;
+
+    /* Extent? */
+    cur_pfn = 1UL << d->arch.rma_order;
+    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+        uint pe_pages = 1UL << pe->order;
+        uint b_mfn = page_to_mfn(pe->pg);
+        uint e_mfn = b_mfn + pe_pages;
+
+        if (mfn >= b_mfn && mfn < e_mfn) {
+            return cur_pfn + (mfn - b_mfn);
+        }
+        cur_pfn += pe_pages;
+    }
+    return INVALID_M2P_ENTRY;
+}
+
 void guest_physmap_add_page(
     struct domain *d, unsigned long gpfn, unsigned long mfn)
 {
@@ -382,3 +532,10 @@ void shadow_drop_references(
     struct domain *d, struct page_info *page)
 {
 }
+
+int arch_domain_add_extent(struct domain *d, struct page_info *page, int order)
+{
+    if (add_extent(d, page, order) < 0)
+        return -ENOMEM;
+    return 0;
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mpic.c
--- a/xen/arch/powerpc/mpic.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/mpic.c   Thu Dec 14 08:57:36 2006 -0700
@@ -15,22 +15,18 @@
 /* XXX Xen hacks ... */
 /* make this generic */
 
-#define le32_to_cpu(x) \
-({ \
-       __u32 __x = (x); \
-       ((__u32)( \
-               (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
-               (((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) | \
-               (((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) | \
-               (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
-})
+#define le32_to_cpu(x)                                          \
+    ({                                                          \
+        __u32 __x = (x);                                        \
+        ((__u32)(                                               \
+             (((__u32)(__x) & (__u32)0x000000ffUL) << 24) |     \
+             (((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) |     \
+             (((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) |     \
+             (((__u32)(__x) & (__u32)0xff000000UL) >> 24) ));   \
+    })
 
 
 #define alloc_bootmem(x) xmalloc_bytes(x)
-#define request_irq(irq, handler, f, devname, dev_id) \
-    panic("IPI requested: %d: %p: %s: %p\n", irq, handler, devname, dev_id)
-
-typedef int irqreturn_t;
 
 #define IRQ_NONE       (0)
 #define IRQ_HANDLED    (1)
@@ -97,11 +93,6 @@ typedef int irqreturn_t;
 #include <asm/mpic.h>
 #include <asm/smp.h>
 
-static inline void smp_message_recv(int msg, struct pt_regs *regs)
-{
-    return;
-}
-
 #ifdef DEBUG
 #define DBG(fmt...) printk(fmt)
 #else
@@ -126,7 +117,7 @@ static DEFINE_SPINLOCK(mpic_lock);
 
 
 static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base,
-                           unsigned int reg)
+                             unsigned int reg)
 {
        if (be)
                return in_be32(base + (reg >> 2));
@@ -135,7 +126,7 @@ static inline u32 _mpic_read(unsigned in
 }
 
 static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base,
-                             unsigned int reg, u32 value)
+                               unsigned int reg, u32 value)
 {
        if (be)
                out_be32(base + (reg >> 2), value);
@@ -186,17 +177,17 @@ static inline u32 _mpic_irq_read(struct 
        unsigned int    idx = src_no & mpic->isu_mask;
 
        return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
-                         reg + (idx * MPIC_IRQ_STRIDE));
+                      reg + (idx * MPIC_IRQ_STRIDE));
 }
 
 static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
-                                  unsigned int reg, u32 value)
+                                   unsigned int reg, u32 value)
 {
        unsigned int    isu = src_no >> mpic->isu_shift;
        unsigned int    idx = src_no & mpic->isu_mask;
 
        _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
-                   reg + (idx * MPIC_IRQ_STRIDE), value);
+                reg + (idx * MPIC_IRQ_STRIDE), value);
 }
 
 #define mpic_read(b,r)         _mpic_read(mpic->flags & 
MPIC_BIG_ENDIAN,(b),(r))
@@ -261,7 +252,7 @@ static inline void mpic_ht_end_irq(struc
 }
 
 static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source,
-                                     unsigned int irqflags)
+                                      unsigned int irqflags)
 {
        struct mpic_irq_fixup *fixup = &mpic->fixups[source];
        unsigned long flags;
@@ -284,7 +275,7 @@ static void mpic_startup_ht_interrupt(st
 }
 
 static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source,
-                                      unsigned int irqflags)
+                                       unsigned int irqflags)
 {
        struct mpic_irq_fixup *fixup = &mpic->fixups[source];
        unsigned long flags;
@@ -305,7 +296,7 @@ static void mpic_shutdown_ht_interrupt(s
 }
 
 static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase,
-                                   unsigned int devfn, u32 vdid)
+                                    unsigned int devfn, u32 vdid)
 {
        int i, irq, n;
        u8 __iomem *base;
@@ -485,8 +476,8 @@ static void mpic_enable_irq(unsigned int
        DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src);
 
        mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
-                      mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) &
-                      ~MPIC_VECPRI_MASK);
+                   mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) &
+                   ~MPIC_VECPRI_MASK);
 
        /* make sure mask gets to controller before we return to user */
        do {
@@ -532,8 +523,8 @@ static void mpic_disable_irq(unsigned in
        DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
 
        mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
-                      mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) |
-                      MPIC_VECPRI_MASK);
+                   mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) |
+                   MPIC_VECPRI_MASK);
 
        /* make sure mask gets to controller before we return to user */
        do {
@@ -623,7 +614,7 @@ static void mpic_set_affinity(unsigned i
        cpus_and(tmp, cpumask, cpu_online_map);
 
        mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION,
-                      mpic_physmask(cpus_addr(tmp)[0]));       
+                   mpic_physmask(cpus_addr(tmp)[0]));  
 }
 
 
@@ -633,14 +624,14 @@ static void mpic_set_affinity(unsigned i
 
 
 struct mpic * __init mpic_alloc(unsigned long phys_addr,
-                               unsigned int flags,
-                               unsigned int isu_size,
-                               unsigned int irq_offset,
-                               unsigned int irq_count,
-                               unsigned int ipi_offset,
-                               unsigned char *senses,
-                               unsigned int senses_count,
-                               const char *name)
+                                unsigned int flags,
+                                unsigned int isu_size,
+                                unsigned int irq_offset,
+                                unsigned int irq_count,
+                                unsigned int ipi_offset,
+                                unsigned char *senses,
+                                unsigned int senses_count,
+                                const char *name)
 {
        struct mpic     *mpic;
        u32             reg;
@@ -687,8 +678,8 @@ struct mpic * __init mpic_alloc(unsigned
        /* Reset */
        if (flags & MPIC_WANTS_RESET) {
                mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
-                          mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
-                          | MPIC_GREG_GCONF_RESET);
+                   mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
+                   | MPIC_GREG_GCONF_RESET);
                while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
                       & MPIC_GREG_GCONF_RESET)
                        mb();
@@ -700,15 +691,15 @@ struct mpic * __init mpic_alloc(unsigned
         */
        reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0);
        mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK)
-                         >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1;
+                      >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1;
        if (isu_size == 0)
                mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK)
-                                    >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1;
+                             >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1;
 
        /* Map the per-CPU registers */
        for (i = 0; i < mpic->num_cpus; i++) {
                mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE +
-                                          i * MPIC_CPU_STRIDE, 0x1000);
+                                   i * MPIC_CPU_STRIDE, 0x1000);
                BUG_ON(mpic->cpuregs[i] == NULL);
        }
 
@@ -716,7 +707,7 @@ struct mpic * __init mpic_alloc(unsigned
        if (mpic->isu_size == 0) {
                mpic->isu_size = mpic->num_sources;
                mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE,
-                                       MPIC_IRQ_STRIDE * mpic->isu_size);
+                                MPIC_IRQ_STRIDE * mpic->isu_size);
                BUG_ON(mpic->isus[0] == NULL);
        }
        mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
@@ -752,7 +743,7 @@ struct mpic * __init mpic_alloc(unsigned
 }
 
 void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
-                           unsigned long phys_addr)
+                            unsigned long phys_addr)
 {
        unsigned int isu_first = isu_num * mpic->isu_size;
 
@@ -764,7 +755,7 @@ void __init mpic_assign_isu(struct mpic 
 }
 
 void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler,
-                              void *data)
+                               void *data)
 {
        struct mpic *mpic = mpic_find(irq, NULL);
        unsigned long flags;
@@ -799,20 +790,20 @@ void __init mpic_init(struct mpic *mpic)
        /* Initialize timers: just disable them all */
        for (i = 0; i < 4; i++) {
                mpic_write(mpic->tmregs,
-                          i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0);
+                   i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0);
                mpic_write(mpic->tmregs,
-                          i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI,
-                          MPIC_VECPRI_MASK |
-                          (MPIC_VEC_TIMER_0 + i));
+                   i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI,
+                   MPIC_VECPRI_MASK |
+                   (MPIC_VEC_TIMER_0 + i));
        }
 
        /* Initialize IPIs to our reserved vectors and mark them disabled for 
now */
        mpic_test_broken_ipi(mpic);
        for (i = 0; i < 4; i++) {
                mpic_ipi_write(i,
-                              MPIC_VECPRI_MASK |
-                              (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
-                              (MPIC_VEC_IPI_0 + i));
+                       MPIC_VECPRI_MASK |
+                       (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
+                       (MPIC_VEC_IPI_0 + i));
 #ifdef CONFIG_SMP
                if (!(mpic->flags & MPIC_PRIMARY))
                        continue;
@@ -859,7 +850,7 @@ void __init mpic_init(struct mpic *mpic)
 #ifdef CONFIG_MPIC_BROKEN_U3
                        if (mpic_is_ht_interrupt(mpic, i)) {
                                vecpri &= ~(MPIC_VECPRI_SENSE_MASK |
-                                           MPIC_VECPRI_POLARITY_MASK);
+                            MPIC_VECPRI_POLARITY_MASK);
                                vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
                        }
 #else
@@ -873,7 +864,7 @@ void __init mpic_init(struct mpic *mpic)
                /* init hw */
                mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri);
                mpic_irq_write(i, MPIC_IRQ_DESTINATION,
-                              1 << hard_smp_processor_id());
+                       1 << hard_smp_processor_id());
 
                /* init linux descriptors */
                if (i < mpic->irq_count) {
@@ -887,8 +878,8 @@ void __init mpic_init(struct mpic *mpic)
 
        /* Disable 8259 passthrough */
        mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
-                  mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
-                  | MPIC_GREG_GCONF_8259_PTHROU_DIS);
+               mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
+               | MPIC_GREG_GCONF_8259_PTHROU_DIS);
 
        /* Set current processor priority to 0 */
        mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0);
@@ -908,12 +899,12 @@ void mpic_irq_set_priority(unsigned int 
                reg = mpic_ipi_read(irq - mpic->ipi_offset) &
                        ~MPIC_VECPRI_PRIORITY_MASK;
                mpic_ipi_write(irq - mpic->ipi_offset,
-                              reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+                       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
        } else {
                reg = mpic_irq_read(irq - mpic->irq_offset,MPIC_IRQ_VECTOR_PRI)
                        & ~MPIC_VECPRI_PRIORITY_MASK;
                mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI,
-                              reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+                       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
        }
        spin_unlock_irqrestore(&mpic_lock, flags);
 }
@@ -956,7 +947,7 @@ void mpic_setup_this_cpu(void)
        if (distribute_irqs) {
                for (i = 0; i < mpic->num_sources ; i++)
                        mpic_irq_write(i, MPIC_IRQ_DESTINATION,
-                               mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk);
+                           mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk);
        }
 
        /* Set current processor priority to 0 */
@@ -1001,7 +992,7 @@ void mpic_teardown_this_cpu(int secondar
        /* let the mpic know we don't want intrs.  */
        for (i = 0; i < mpic->num_sources ; i++)
                mpic_irq_write(i, MPIC_IRQ_DESTINATION,
-                       mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk);
+                       mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk);
 
        /* Set current processor priority to max */
        mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf);
@@ -1021,7 +1012,7 @@ void mpic_send_ipi(unsigned int ipi_no, 
 #endif
 
        mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10,
-                      mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
+                   mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
 }
 
 int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
@@ -1049,7 +1040,7 @@ int mpic_get_one_irq(struct mpic *mpic, 
                return irq + mpic->irq_offset;
        }
 #ifdef DEBUG_IPI
-               DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
+    DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
 #endif
        return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset;
 }
@@ -1075,13 +1066,13 @@ void mpic_request_ipis(void)
 
        /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
        request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT,
-                   "IPI0 (call function)", mpic);
+                "IPI0 (call function)", mpic);
        request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT,
-                  "IPI1 (reschedule)", mpic);
+                "IPI1 (reschedule)", mpic);
        request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT,
-                  "IPI2 (unused)", mpic);
+                "IPI2 (unused)", mpic);
        request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT,
-                  "IPI3 (debugger break)", mpic);
+                "IPI3 (debugger break)", mpic);
 
        printk("IPIs requested... \n");
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mpic_init.c
--- a/xen/arch/powerpc/mpic_init.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/mpic_init.c      Thu Dec 14 08:57:36 2006 -0700
@@ -22,6 +22,7 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <asm/mpic.h>
+#include <errno.h>
 #include "mpic_init.h"
 #include "oftree.h"
 #include "of-devtree.h"
@@ -74,7 +75,7 @@ static unsigned long reg2(void *oft_p, o
     rc = ofd_getprop(oft_p, c, "reg", &isa_reg, sizeof(isa_reg));
 
     DBG("%s: reg property address=0x%08x  size=0x%08x\n", __func__,
-                    isa_reg.address, isa_reg.size);
+        isa_reg.address, isa_reg.size);
     return isa_reg.address;
 }
 
@@ -92,7 +93,7 @@ static unsigned long reg1(void *oft_p, o
     rc = ofd_getprop(oft_p, c, "reg", &reg, sizeof(reg));
 
     DBG("%s: reg property address=0x%08x  size=0x%08x\n", __func__,
-                        reg.address, reg.size);
+        reg.address, reg.size);
     return reg.address;
 }
 
@@ -173,15 +174,15 @@ static unsigned long find_ranges_addr_fr
         break;
     case 2:
         ranges_addr = (((u64)ranges[ranges_i]) << 32) |
-                      ranges[ranges_i + 1];
+            ranges[ranges_i + 1];
         break;
     case 3:  /* the G5 case, how to squeeze 96 bits into 64 */
         ranges_addr = (((u64)ranges[ranges_i+1]) << 32) |
-                      ranges[ranges_i + 2];
+            ranges[ranges_i + 2];
         break;
     case 4:
         ranges_addr = (((u64)ranges[ranges_i+2]) << 32) |
-                      ranges[ranges_i + 4];
+            ranges[ranges_i + 4];
         break;
     default:
         PANIC("#address-cells out of range\n");
@@ -266,7 +267,7 @@ static int find_mpic_canonical_probe(voi
      * We select the one without an 'interrupt' property.
      */
     c = ofd_node_find_by_prop(oft_p, OFD_ROOT, "device_type", mpic_type,
-                                        sizeof(mpic_type));
+                              sizeof(mpic_type));
     while (c > 0) {
         int int_len;
         int good_mpic;
@@ -357,6 +358,42 @@ static struct hw_interrupt_type *share_m
 #define share_mpic(M,X) (M)
 
 #endif
+
+static unsigned int mpic_startup_ipi(unsigned int irq)
+{
+    mpic->hc_ipi.enable(irq);
+    return 0;
+}
+
+int request_irq(unsigned int irq,
+                irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+                unsigned long irqflags, const char * devname, void *dev_id)
+{
+    int retval;
+    struct irqaction *action;
+    void (*func)(int, void *, struct cpu_user_regs *);
+
+    action = xmalloc(struct irqaction);
+    if (!action) {
+        BUG();
+        return -ENOMEM;
+    }
+
+    /* Xen's handler prototype is slightly different than Linux's.  */
+    func = (void (*)(int, void *, struct cpu_user_regs *))handler;
+
+    action->handler = func;
+    action->name = devname;
+    action->dev_id = dev_id;
+
+    retval = setup_irq(irq, action);
+    if (retval) {
+        BUG();
+        xfree(action);
+    }
+
+    return retval;
+}
 
 struct hw_interrupt_type *xen_mpic_init(struct hw_interrupt_type *xen_irq)
 {
@@ -397,6 +434,11 @@ struct hw_interrupt_type *xen_mpic_init(
     hit = share_mpic(&mpic->hc_irq, xen_irq);
 
     printk("%s: success\n", __func__);
+
+    mpic->hc_ipi.ack = xen_irq->ack;
+    mpic->hc_ipi.startup = mpic_startup_ipi;
+    mpic_request_ipis();
+
     return hit;
 }
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of-devtree.h
--- a/xen/arch/powerpc/of-devtree.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/of-devtree.h     Thu Dec 14 08:57:36 2006 -0700
@@ -33,15 +33,15 @@ union of_pci_hi {
 union of_pci_hi {
     u32 word;
     struct {
-        u32    opa_n: 1; /* relocatable */
-        u32    opa_p: 1; /* prefetchable */
-        u32    opa_t: 1; /* aliased */
+        u32 opa_n: 1; /* relocatable */
+        u32 opa_p: 1; /* prefetchable */
+        u32 opa_t: 1; /* aliased */
         u32 _opa_res: 3;
-        u32    opa: 2; /* space code */
+        u32 opa: 2; /* space code */
         u32  opa_b: 8; /* bus number */
-        u32    opa_d: 5; /* device number */
-        u32    opa_f: 3; /* function number */
-        u32    opa_r: 8; /* register number */
+        u32 opa_d: 5; /* device number */
+        u32 opa_f: 3; /* function number */
+        u32 opa_r: 8; /* register number */
     } bits;
 };
 
@@ -79,9 +79,9 @@ typedef s32 ofdn_t;
 typedef s32 ofdn_t;
 
 #define OFD_ROOT 1
-#define OFD_DUMP_NAMES 0x1
-#define OFD_DUMP_VALUES        0x2
-#define OFD_DUMP_ALL   (OFD_DUMP_VALUES|OFD_DUMP_NAMES)
+#define OFD_DUMP_NAMES 0x1
+#define OFD_DUMP_VALUES 0x2
+#define OFD_DUMP_ALL (OFD_DUMP_VALUES|OFD_DUMP_NAMES)
 
 extern void *ofd_create(void *mem, size_t sz);
 extern ofdn_t ofd_node_parent(void *mem, ofdn_t n);
@@ -90,9 +90,9 @@ extern const char *ofd_node_path(void *m
 extern const char *ofd_node_path(void *mem, ofdn_t p);
 extern int ofd_node_to_path(void *mem, ofdn_t p, void *buf, size_t sz);
 extern ofdn_t ofd_node_child_create(void *mem, ofdn_t parent,
-                                   const char *path, size_t pathlen);
+                                    const char *path, size_t pathlen);
 extern ofdn_t ofd_node_peer_create(void *mem, ofdn_t sibling,
-                                  const char *path, size_t pathlen);
+                                   const char *path, size_t pathlen);
 extern ofdn_t ofd_node_find(void *mem, const char *devspec);
 extern ofdn_t ofd_node_add(void *m, ofdn_t n, const char *path, size_t sz);
 extern int ofd_node_prune(void *m, ofdn_t n);
@@ -102,23 +102,23 @@ extern ofdn_t ofd_nextprop(void *mem, of
 extern ofdn_t ofd_nextprop(void *mem, ofdn_t n, const char *prev, char *name);
 extern ofdn_t ofd_prop_find(void *mem, ofdn_t n, const char *name);
 extern int ofd_getprop(void *mem, ofdn_t n, const char *name,
-                       void *buf, size_t sz);
+                       void *buf, size_t sz);
 extern int ofd_getproplen(void *mem, ofdn_t n, const char *name);
 
 extern int ofd_setprop(void *mem, ofdn_t n, const char *name,
-                       const void *buf, size_t sz);
+                       const void *buf, size_t sz);
 extern void ofd_prop_remove(void *mem, ofdn_t node, ofdn_t prop);
 extern ofdn_t ofd_prop_add(void *mem, ofdn_t n, const char *name,
-                          const void *buf, size_t sz);
+                           const void *buf, size_t sz);
 extern ofdn_t ofd_io_create(void *m, ofdn_t node, u64 open);
 extern u32 ofd_io_open(void *mem, ofdn_t n);
 extern void ofd_io_close(void *mem, ofdn_t n);
 
 
-typedef void (*walk_fn)(void *m, ofdn_t p, int arg);
-extern void ofd_dump_props(void *m, ofdn_t p, int dump);
+typedef void (*walk_fn)(void *m, const char *pre, ofdn_t p, int arg);
+extern void ofd_dump_props(void *m, const char *pre, ofdn_t p, int dump);
 
-extern void ofd_walk(void *m, ofdn_t p, walk_fn fn, int arg);
+extern void ofd_walk(void *m, const char *pre, ofdn_t p, walk_fn fn, int arg);
 
 
 /* Recursively look up #address_cells and #size_cells properties */
@@ -129,10 +129,10 @@ extern size_t ofd_space(void *mem);
 extern size_t ofd_space(void *mem);
 
 extern void ofd_prop_print(const char *head, const char *path,
-                          const char *name, const char *prop, size_t sz);
+                           const char *name, const char *prop, size_t sz);
 
 extern ofdn_t ofd_node_find_by_prop(void *mem, ofdn_t n, const char *name,
-                                   const void *val, size_t sz);
+                                    const void *val, size_t sz);
 extern ofdn_t ofd_node_find_next(void *mem, ofdn_t n);
 extern ofdn_t ofd_node_find_prev(void *mem, ofdn_t n);
 extern void ofd_init(int (*write)(const char *, size_t len));
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of-devwalk.c
--- a/xen/arch/powerpc/of-devwalk.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/of-devwalk.c     Thu Dec 14 08:57:36 2006 -0700
@@ -80,7 +80,7 @@ void ofd_prop_print(
 #endif
 }
 
-void ofd_dump_props(void *mem, ofdn_t n, int dump)
+void ofd_dump_props(void *mem, const char *pre, ofdn_t n, int dump)
 {
     ofdn_t p;
     char name[128];
@@ -95,7 +95,7 @@ void ofd_dump_props(void *mem, ofdn_t n,
     }
 
     if (dump & OFD_DUMP_NAMES) {
-        printk("of_walk: %s: phandle 0x%x\n", path, n);
+        printk("%s: %s: phandle 0x%x\n", pre, path, n);
     }
 
     p = ofd_nextprop(mem, n, NULL, name);
@@ -106,30 +106,30 @@ void ofd_dump_props(void *mem, ofdn_t n,
         }
 
         if ( dump & OFD_DUMP_VALUES ) {
-            ofd_prop_print("of_walk", path, name, prop, sz);
+            ofd_prop_print(pre, path, name, prop, sz);
         }
 
         p = ofd_nextprop(mem, n, name, name);
     }
 }
 
-void ofd_walk(void *m, ofdn_t p, walk_fn fn, int arg)
+void ofd_walk(void *m, const char *pre, ofdn_t p, walk_fn fn, int arg)
 {
     ofdn_t n;
 
     if ( fn != NULL ) {
-        (*fn)(m, p, arg);
+        (*fn)(m, pre, p, arg);
     }
 
     /* child */
     n = ofd_node_child(m, p);
     if ( n != 0 ) {
-        ofd_walk(m, n, fn, arg);
+        ofd_walk(m, pre, n, fn, arg);
     }
 
     /* peer */
     n = ofd_node_peer(m, p);
     if ( n != 0 ) {
-        ofd_walk(m, n, fn, arg);
+        ofd_walk(m, pre, n, fn, arg);
     }
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of_handler/console.c
--- a/xen/arch/powerpc/of_handler/console.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/of_handler/console.c     Thu Dec 14 08:57:36 2006 -0700
@@ -113,7 +113,7 @@ static s32 ofh_xen_dom0_read(s32 chan, v
             return ret;
 
         rc = xen_hvcall(XEN_MARK(__HYPERVISOR_console_io), CONSOLEIO_read,
-                count, desc);
+                        count, desc);
         if (rc <= 0) {
             return ret;
         }
@@ -139,7 +139,7 @@ static s32 ofh_xen_dom0_write(s32 chan, 
             return ret;
 
         rc = xen_hvcall(XEN_MARK(__HYPERVISOR_console_io), CONSOLEIO_write,
-                count, desc);
+                        count, desc);
         if (rc <= 0) {
             return ret;
         }
@@ -157,8 +157,8 @@ static s32 ofh_xen_domu_read(s32 chan, v
 static s32 ofh_xen_domu_read(s32 chan, void *buf, u32 count, s32 *actual,
                              ulong b)
 {
-       struct xencons_interface *intf;
-       XENCONS_RING_IDX cons, prod;
+    struct xencons_interface *intf;
+    XENCONS_RING_IDX cons, prod;
     s32 ret;
 
     intf = DRELA(ofh_ihp, b)->ofi_intf;
@@ -180,8 +180,8 @@ static s32 ofh_xen_domu_write(s32 chan, 
 static s32 ofh_xen_domu_write(s32 chan, const void *buf, u32 count,
                               s32 *actual, ulong b)
 {
-       struct xencons_interface *intf;
-       XENCONS_RING_IDX cons, prod;
+    struct xencons_interface *intf;
+    XENCONS_RING_IDX cons, prod;
     s32 ret;
 
     intf = DRELA(ofh_ihp, b)->ofi_intf;
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/ofd_fixup.c
--- a/xen/arch/powerpc/ofd_fixup.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/ofd_fixup.c      Thu Dec 14 08:57:36 2006 -0700
@@ -25,6 +25,7 @@
 #include <public/xen.h>
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
 
 #undef RTAS
 
@@ -347,6 +348,15 @@ static ofdn_t ofd_xen_props(void *m, str
         val[0] =  rma_size(d->arch.rma_order) - val[1];
         ofd_prop_add(m, n, "reserved", val, sizeof (val));
 
+        /* tell dom0 that Xen depends on it to have power control */
+        if (!rtas_entry)
+            ofd_prop_add(m, n, "power-control", NULL, 0);
+
+        /* tell dom0 where ranted pages go in the linear map */
+        val[0] = cpu_foreign_map_order();
+        val[1] = d->arch.foreign_mfn_count;
+        ofd_prop_add(m, n, "foreign-map", val, sizeof (val));
+
         n = ofd_node_add(m, n, console, sizeof (console));
         if (n > 0) {
             val[0] = 0;
@@ -417,7 +427,7 @@ int ofd_dom0_fixup(struct domain *d, ulo
 
 
 #ifdef DEBUG
-    ofd_walk(m, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
+    ofd_walk(m, __func__, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
 #endif
     return 1;
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/ofd_fixup_memory.c
--- a/xen/arch/powerpc/ofd_fixup_memory.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/ofd_fixup_memory.c       Thu Dec 14 08:57:36 2006 -0700
@@ -68,6 +68,8 @@ static ofdn_t ofd_memory_node_create(
     reg.sz = size;
     ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
 
+    printk("Dom0: %s: %016lx, %016lx\n", path, start, size);
+
     return n;
 }
 
@@ -86,17 +88,19 @@ static void ofd_memory_extent_nodes(void
     ulong size;
     ofdn_t n;
     struct page_extents *pe;
+    ulong cur_pfn = 1UL << d->arch.rma_order;
 
+    start = cur_pfn << PAGE_SHIFT;
+    size = 0;
     list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
 
-        start = pe->pfn << PAGE_SHIFT;
-        size = 1UL << (pe->order + PAGE_SHIFT);
-
-        n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
-                                    start, size);
-
-        BUG_ON(n <= 0);
+        size += 1UL << (pe->order + PAGE_SHIFT);
+        if (pe->order != cpu_extent_order())
+            panic("we don't handle this yet\n");
     }
+    n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
+                               start, size);
+    BUG_ON(n <= 0);
 }
 
 void ofd_memory_props(void *m, struct domain *d)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/papr/xlate.c
--- a/xen/arch/powerpc/papr/xlate.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/papr/xlate.c     Thu Dec 14 08:57:36 2006 -0700
@@ -19,7 +19,7 @@
  */
 
 #undef DEBUG
-#undef DEBUG_FAIL
+#undef DEBUG_LOW
 
 #include <xen/config.h>
 #include <xen/types.h>
@@ -30,6 +30,17 @@
 #include <asm/papr.h>
 #include <asm/hcalls.h>
 
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) printk(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
 #ifdef USE_PTE_INSERT
 static inline void pte_insert(union pte volatile *pte,
         ulong vsid, ulong rpn, ulong lrpn)
@@ -106,11 +117,8 @@ static void pte_tlbie(union pte volatile
 
 }
 
-static void h_enter(struct cpu_user_regs *regs)
-{
-    ulong flags = regs->gprs[4];
-    ulong ptex = regs->gprs[5];
-
+long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn)
+{
     union pte pte;
     union pte volatile *ppte;
     struct domain_htab *htab;
@@ -129,14 +137,13 @@ static void h_enter(struct cpu_user_regs
 
     htab = &d->arch.htab;
     if (ptex > (1UL << htab->log_num_ptes)) {
-        regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
-        return;
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
+        return H_Parameter;
     }
 
     /* use local HPTE to avoid manual shifting & masking */
-    pte.words.vsid = regs->gprs[6];
-    pte.words.rpn = regs->gprs[7];
+    pte.words.vsid = vsid;
+    pte.words.rpn = rpn;
 
     if ( pte.bits.l ) {        /* large page? */
         /* figure out the page size for the selected large page */
@@ -150,10 +157,9 @@ static void h_enter(struct cpu_user_regs
         }
 
         if ( lp_size >= d->arch.large_page_sizes ) {
-            printk("%s: attempt to use unsupported lp_size %d\n",
-                   __func__, lp_size);
-            regs->gprs[3] = H_Parameter;
-            return;
+            DBG("%s: attempt to use unsupported lp_size %d\n",
+                __func__, lp_size);
+            return H_Parameter;
         }
 
         /* get correct pgshift value */
@@ -168,31 +174,32 @@ static void h_enter(struct cpu_user_regs
 
     mfn = pfn2mfn(d, pfn, &mtype);
     if (mfn == INVALID_MFN) {
-        regs->gprs[3] =  H_Parameter;
-        return;
-    }
-
+        DBG("%s: Bad PFN: 0x%lx\n", __func__, pfn);
+        return H_Parameter;
+    }
+
+    if (mtype == PFN_TYPE_IO && !d->is_privileged) {
+        /* only a privilaged dom can access outside IO space */
+        DBG("%s: unprivileged access to physical page: 0x%lx\n",
+            __func__, pfn);
+        return H_Privilege;
+    }
     if (mtype == PFN_TYPE_IO) {
-        /* only a privilaged dom can access outside IO space */
-        if ( !d->is_privileged ) {
-            regs->gprs[3] =  H_Privilege;
-            printk("%s: unprivileged access to physical page: 0x%lx\n",
-                   __func__, pfn);
-            return;
-        }
-
         if ( !((pte.bits.w == 0)
              && (pte.bits.i == 1)
              && (pte.bits.g == 1)) ) {
-#ifdef DEBUG_FAIL
-            printk("%s: expecting an IO WIMG "
-                   "w=%x i=%d m=%d, g=%d\n word 0x%lx\n", __func__,
-                   pte.bits.w, pte.bits.i, pte.bits.m, pte.bits.g,
-                   pte.words.rpn);
-#endif
-            regs->gprs[3] =  H_Parameter;
-            return;
-        }
+            DBG("%s: expecting an IO WIMG "
+                "w=%x i=%d m=%d, g=%d\n word 0x%lx\n", __func__,
+                pte.bits.w, pte.bits.i, pte.bits.m, pte.bits.g,
+                pte.words.rpn);
+            return H_Parameter;
+        }
+    }
+    if (mtype == PFN_TYPE_GNTTAB) {
+        DBG("%s: Dom[%d] mapping grant table: 0x%lx\n",
+            __func__, d->domain_id, pfn << PAGE_SHIFT);
+        pte.bits.i = 0;
+        pte.bits.g = 0;
     }
     /* fixup the RPN field of our local PTE copy */
     pte.bits.rpn = mfn | lp_bits;
@@ -213,13 +220,13 @@ static void h_enter(struct cpu_user_regs
         BUG_ON(f == d);
 
         if (unlikely(!get_domain(f))) {
-            regs->gprs[3] = H_Rescinded;
-            return;
+            DBG("%s: Rescinded, no domain: 0x%lx\n",  __func__, pfn);
+            return H_Rescinded;
         }
         if (unlikely(!get_page(pg, f))) {
             put_domain(f);
-            regs->gprs[3] = H_Rescinded;
-            return;
+            DBG("%s: Rescinded, no page: 0x%lx\n",  __func__, pfn);
+            return H_Rescinded;
         }
     }
 
@@ -276,17 +283,12 @@ static void h_enter(struct cpu_user_regs
                 : "b" (ppte), "r" (pte.words.rpn), "r" (pte.words.vsid)
                 : "memory");
 
-            regs->gprs[3] = H_Success;
-            regs->gprs[4] = idx;
-
-            return;
-        }
-    }
-
-#ifdef DEBUG
+            return idx;
+        }
+    }
+
     /* If the PTEG is full then no additional values are returned. */
-    printk("%s: PTEG FULL\n", __func__);
-#endif
+    DBG("%s: PTEG FULL\n", __func__);
 
     if (pg != NULL)
         put_page(pg);
@@ -294,7 +296,24 @@ static void h_enter(struct cpu_user_regs
     if (f != NULL)
         put_domain(f);
 
-    regs->gprs[3] = H_PTEG_Full;
+    return H_PTEG_Full;
+}
+
+static void h_enter(struct cpu_user_regs *regs)
+{
+    ulong flags = regs->gprs[4];
+    ulong ptex = regs->gprs[5];
+    ulong vsid = regs->gprs[6];
+    ulong rpn = regs->gprs[7];
+    long ret;
+
+    ret = pte_enter(flags, ptex, vsid, rpn);
+
+    if (ret >= 0) {
+        regs->gprs[3] = H_Success;
+        regs->gprs[4] = ret;
+    } else
+        regs->gprs[3] = ret;
 }
 
 static void h_protect(struct cpu_user_regs *regs)
@@ -308,13 +327,11 @@ static void h_protect(struct cpu_user_re
     union pte volatile *ppte;
     union pte lpte;
 
-#ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
-           flags, ptex, avpn);
-#endif
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
+            flags, ptex, avpn);
     if ( ptex > (1UL << htab->log_num_ptes) ) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     ppte = &htab->map[ptex];
@@ -324,10 +341,8 @@ static void h_protect(struct cpu_user_re
 
     /* the AVPN param occupies the bit-space of the word */
     if ( (flags & H_AVPN) && lpte.bits.avpn != avpn >> 7 ) {
-#ifdef DEBUG_FAIL
-        printk("%s: %p: AVPN check failed: 0x%lx, 0x%lx\n", __func__,
-                ppte, lpte.words.vsid, lpte.words.rpn);
-#endif
+        DBG_LOW("%s: %p: AVPN check failed: 0x%lx, 0x%lx\n", __func__,
+            ppte, lpte.words.vsid, lpte.words.rpn);
         regs->gprs[3] = H_Not_Found;
         return;
     }
@@ -337,9 +352,7 @@ static void h_protect(struct cpu_user_re
          * we invalidate entires where the PAPR says to 0 the whole hi
          * dword, so the AVPN should catch this first */
 
-#ifdef DEBUG_FAIL
-        printk("%s: pte invalid\n", __func__);
-#endif
+        DBG("%s: pte invalid\n", __func__);
         regs->gprs[3] =  H_Not_Found;
         return;
     }
@@ -374,7 +387,6 @@ static void h_protect(struct cpu_user_re
 
 static void h_clear_ref(struct cpu_user_regs *regs)
 {
-    ulong flags = regs->gprs[4];
     ulong ptex = regs->gprs[5];
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
@@ -382,20 +394,20 @@ static void h_clear_ref(struct cpu_user_
     union pte volatile *pte;
     union pte lpte;
 
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
+            regs->gprs[4], ptex);
+
 #ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
-           flags, ptex);
-#endif
-
-    if (flags != 0) {
-        printk("WARNING: %s: "
-                "flags are undefined and should be 0: 0x%lx\n",
-                __func__, flags);
-    }
+    if (regs->gprs[4] != 0) {
+        DBG("WARNING: %s: "
+            "flags are undefined and should be 0: 0x%lx\n",
+            __func__, regs->gprs[4]);
+    }
+#endif
 
     if (ptex > (1UL << htab->log_num_ptes)) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     pte = &htab->map[ptex];
@@ -417,7 +429,6 @@ static void h_clear_ref(struct cpu_user_
 
 static void h_clear_mod(struct cpu_user_regs *regs)
 {
-    ulong flags = regs->gprs[4];
     ulong ptex = regs->gprs[5];
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
@@ -425,19 +436,20 @@ static void h_clear_mod(struct cpu_user_
     union pte volatile *pte;
     union pte lpte;
 
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
+          regs->gprs[4], ptex);
+
 #ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
-           flags, ptex);
-#endif
-    if (flags != 0) {
-        printk("WARNING: %s: "
-                "flags are undefined and should be 0: 0x%lx\n",
-                __func__, flags);
-    }
-    
+    if (regs->gprs[4] != 0) {
+        DBG("WARNING: %s: "
+            "flags are undefined and should be 0: 0x%lx\n",
+            __func__, regs->gprs[4]);
+    }
+#endif
+
     if (ptex > (1UL << htab->log_num_ptes)) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     pte = &htab->map[ptex];
@@ -466,63 +478,53 @@ static void h_clear_mod(struct cpu_user_
     }
 }
 
-static void h_remove(struct cpu_user_regs *regs)
-{
-    ulong flags = regs->gprs[4];
-    ulong ptex = regs->gprs[5];
-    ulong avpn = regs->gprs[6];
+long pte_remove(ulong flags, ulong ptex, ulong avpn, ulong *hi, ulong *lo)
+{
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
     struct domain_htab *htab = &d->arch.htab;
     union pte volatile *pte;
     union pte lpte;
 
-#ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
-           flags, ptex, avpn);
-#endif
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
+            flags, ptex, avpn);
+
     if ( ptex > (1UL << htab->log_num_ptes) ) {
-        regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
-        return;
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
+        return H_Parameter;
     }
     pte = &htab->map[ptex];
     lpte.words.vsid = pte->words.vsid;
     lpte.words.rpn = pte->words.rpn;
 
     if ((flags & H_AVPN) && lpte.bits.avpn != (avpn >> 7)) {
-#ifdef DEBUG_FAIL
-        printk("%s: avpn doesn not match\n", __func__);
-#endif
-        regs->gprs[3] = H_Not_Found;
-        return;
+        DBG_LOW("%s: AVPN does not match\n", __func__);
+        return H_Not_Found;
     }
 
     if ((flags & H_ANDCOND) && ((avpn & pte->words.vsid) != 0)) {
-#ifdef DEBUG_FAIL
-        printk("%s: andcond does not match\n", __func__);
-#endif
-        regs->gprs[3] = H_Not_Found;
-        return;
-    }
-
-    regs->gprs[3] = H_Success;
+        DBG("%s: andcond does not match\n", __func__);
+        return H_Not_Found;
+    }
+
     /* return old PTE in regs 4 and 5 */
-    regs->gprs[4] = lpte.words.vsid;
-    regs->gprs[5] = lpte.words.rpn;
-
+    *hi = lpte.words.vsid;
+    *lo = lpte.words.rpn;
+
+#ifdef DEBUG_LOW
     /* XXX - I'm very skeptical of doing ANYTHING if not bits.v */
     /* XXX - I think the spec should be questioned in this case (MFM) */
     if (lpte.bits.v == 0) {
-        printk("%s: removing invalid entry\n", __func__);
-    }
+        DBG_LOW("%s: removing invalid entry\n", __func__);
+    }
+#endif
 
     if (lpte.bits.v) {
         ulong mfn = lpte.bits.rpn;
         if (!cpu_io_mfn(mfn)) {
             struct page_info *pg = mfn_to_page(mfn);
             struct domain *f = page_get_owner(pg);
-
+            
             if (f != d) {
                 put_domain(f);
                 put_page(pg);
@@ -536,6 +538,27 @@ static void h_remove(struct cpu_user_reg
             : "memory");
 
     pte_tlbie(&lpte, ptex);
+
+    return H_Success;
+}
+
+static void h_remove(struct cpu_user_regs *regs)
+{
+    ulong flags = regs->gprs[4];
+    ulong ptex = regs->gprs[5];
+    ulong avpn = regs->gprs[6];
+    ulong hi, lo;
+    long ret;
+
+    ret = pte_remove(flags, ptex, avpn, &hi, &lo);
+
+    regs->gprs[3] = ret;
+
+    if (ret == H_Success) {
+        regs->gprs[4] = hi;
+        regs->gprs[5] = lo;
+    }
+    return;
 }
 
 static void h_read(struct cpu_user_regs *regs)
@@ -547,12 +570,12 @@ static void h_read(struct cpu_user_regs 
     struct domain_htab *htab = &d->arch.htab;
     union pte volatile *pte;
 
-       if (flags & H_READ_4)
+    if (flags & H_READ_4)
         ptex &= ~0x3UL;
 
     if (ptex > (1UL << htab->log_num_ptes)) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     pte = &htab->map[ptex];
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/exceptions.S
--- a/xen/arch/powerpc/powerpc64/exceptions.S   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/exceptions.S   Thu Dec 14 08:57:36 2006 -0700
@@ -564,6 +564,22 @@ _GLOBAL(sleep)
  */    
     .globl spin_start
 spin_start:
+    /* We discovered by experiment that the ERAT must be flushed early.  */
+    isync
+    slbia
+    isync
+       
+    /* Do a cache flush for our text, in case the loader didn't */
+    LOADADDR(r9, _start)
+    LOADADDR(r8, _etext)
+4:  dcbf r0,r9
+    icbi r0,r9
+    addi r9,r9,0x20            /* up to a 4 way set per line */
+    cmpld cr0,r9,r8
+    blt        4b
+    sync
+    isync
+
     /* Write our processor number as an acknowledgment that we're alive.  */
     LOADADDR(r14, __spin_ack)
     stw r3, 0(r14)
@@ -575,7 +591,7 @@ spin_start:
     b .
     /* Find our index in the array of processor_area struct pointers.  */
 2:  LOADADDR(r14, global_cpu_table)
-    muli r15, r3, 8
+    mulli r15, r3, 8
     add r14, r14, r15
     /* Spin until the pointer for our processor goes valid.  */
 1:  ld r15, 0(r14)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/io.S
--- a/xen/arch/powerpc/powerpc64/io.S   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/io.S   Thu Dec 14 08:57:36 2006 -0700
@@ -23,6 +23,11 @@
 #include <asm/processor.h>
 #include <asm/percpu.h>
 
+/* There is no reason why I can't use a tlbie, which should be less
+ * "destructive" but useing SLBIE proves to be more stable result.
+ */
+#define INVALIDATE_ERAT_WITH_SLBIE
+
 /* Xen runs in real mode (i.e. untranslated, MMU disabled). This avoids TLB
  * flushes and also makes it easy to access all domains' memory. However, on
  * PowerPC real mode accesses are cacheable, which is good for general
@@ -34,12 +39,14 @@
  * make the access, then re-enable it...
  */
 
+#ifdef INVALIDATE_ERAT_WITH_SLBIE
 /* Not all useful assemblers understand 'tlbiel'.
  * 'addr' is a GPR containing the address being accessed.
  */
 .macro tlbiel addr
        .long 0x7c000224 | (\addr << 11)
 .endm
+#endif
 
 .macro DISABLE_DCACHE addr
        mfmsr r8
@@ -48,29 +55,53 @@
        ori r6, r6, MSR_EE
        andc r5, r8, r6
        mtmsr r5
+       sync
 
-       /* set HID4.RM_CI */
+#ifdef INVALIDATE_ERAT_WITH_SLBIE 
+       /* create an slbie entry for the io setting a high order bit
+        * to avoid any important SLBs */
+       extldi r0, \addr, 36, 0 
+#endif
+       /* setup HID4.RM_CI */
        mfspr r9, SPRN_HID4
        li r6, 0x100
        sldi r6, r6, 32
-       or r5, r9, r6
-       tlbiel \addr /* invalidate the ERAT entry */
-       sync
-       mtspr SPRN_HID4, r5
+       or r10, r9, r6
+
+       /* Mark the processor as "in CI mode" */
+       li r7,0
+       mfspr r5, SPRN_PIR
+       li r6, MCK_CPU_STAT_CI
+       /* store that we are in a CI routine */
+       stb r6, MCK_CPU_STAT_BASE(r5)
+       /* r7 = MCK_CPU_STAT_CI IO in progress */
+       mr r7, r5
+       lwsync
+
+       /* switch modes */
+       mtspr SPRN_HID4, r10
+       /* invalidate the ERAT entry */
+#ifdef INVALIDATE_ERAT_WITH_SLBIE
+       slbie r0
+#else
+       tlbiel \addr
+#endif
        isync
 
-       /* Mark the processor as "in CI mode" */
-       mfspr r5, SPRN_PIR
-       li r6, MCK_CPU_STAT_CI
-       stb r6, MCK_CPU_STAT_BASE(r5)
-       sync
 .endm
 
 .macro ENABLE_DCACHE addr
-       /* re-zero HID4.RM_CI */
+       /* r7 = 0, IO is complete */
+       li r7, 0
+       lwsync
+       /* restore HID4.RM_CI */
+       mtspr SPRN_HID4, r9
+       /* invalidate the ERAT entry */
+#ifdef INVALIDATE_ERAT_WITH_SLBIE
+       slbie r0
+#else
        tlbiel \addr /* invalidate the ERAT entry */
-       sync
-       mtspr SPRN_HID4, r9
+#endif
        isync
 
        /* Mark the processor as "out of CI mode" */
@@ -83,9 +114,13 @@
        mtmsr r8
 .endm
 
-/* The following assembly cannot use r8 or r9 since they hold original
- * values of msr and hid4 repectively
+/* The following assembly cannot use some registers since they hold original
+ * values of we need to keep
  */
+#undef r0
+#define r0 do_not_use_r0
+#undef r7
+#define r7 do_not_use_r7
 #undef r8
 #define r8 do_not_use_r8
 #undef r9
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/ppc970.c
--- a/xen/arch/powerpc/powerpc64/ppc970.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/ppc970.c       Thu Dec 14 08:57:36 2006 -0700
@@ -30,6 +30,7 @@
 #include <asm/powerpc64/procarea.h>
 #include <asm/powerpc64/processor.h>
 #include <asm/powerpc64/ppc970-hid.h>
+#include "scom.h"
 
 #undef DEBUG
 #undef SERIALIZE
@@ -38,48 +39,77 @@ struct cpu_caches cpu_caches = {
     .dline_size = 0x80,
     .log_dline_size = 7,
     .dlines_per_page = PAGE_SIZE >> 7,
+    .isize = (64 << 10),        /* 64 KiB */
     .iline_size = 0x80,
     .log_iline_size = 7,
     .ilines_per_page = PAGE_SIZE >> 7,
 };
 
+
+void cpu_flush_icache(void)
+{
+    union hid1 hid1;
+    ulong flags;
+    ulong ea;
+
+    local_irq_save(flags);
+
+    /* uses special processor mode that forces a real address match on
+     * the whole line */
+    hid1.word = mfhid1();
+    hid1.bits.en_icbi = 1;
+    mthid1(hid1.word);
+
+    for (ea = 0; ea < cpu_caches.isize; ea += cpu_caches.iline_size)
+        icbi(ea);
+
+    sync();
+
+    hid1.bits.en_icbi = 0;
+    mthid1(hid1.word);
+
+    local_irq_restore(flags);
+}
+
+
 struct rma_settings {
-    int order;
+    int log;
     int rmlr_0;
     int rmlr_1_2;
 };
 
-static struct rma_settings rma_orders[] = {
-    { .order = 26, .rmlr_0 = 0, .rmlr_1_2 = 3, }, /*  64 MB */
-    { .order = 27, .rmlr_0 = 1, .rmlr_1_2 = 3, }, /* 128 MB */
-    { .order = 28, .rmlr_0 = 1, .rmlr_1_2 = 0, }, /* 256 MB */
-    { .order = 30, .rmlr_0 = 0, .rmlr_1_2 = 2, }, /*   1 GB */
-    { .order = 34, .rmlr_0 = 0, .rmlr_1_2 = 1, }, /*  16 GB */
-    { .order = 38, .rmlr_0 = 0, .rmlr_1_2 = 0, }, /* 256 GB */
+static struct rma_settings rma_logs[] = {
+    { .log = 26, .rmlr_0 = 0, .rmlr_1_2 = 3, }, /*  64 MB */
+    { .log = 27, .rmlr_0 = 1, .rmlr_1_2 = 3, }, /* 128 MB */
+    { .log = 28, .rmlr_0 = 1, .rmlr_1_2 = 0, }, /* 256 MB */
+    { .log = 30, .rmlr_0 = 0, .rmlr_1_2 = 2, }, /*   1 GB */
+    { .log = 34, .rmlr_0 = 0, .rmlr_1_2 = 1, }, /*  16 GB */
+    { .log = 38, .rmlr_0 = 0, .rmlr_1_2 = 0, }, /* 256 GB */
 };
 
 static uint log_large_page_sizes[] = {
     4 + 20, /* (1 << 4) == 16M */
 };
 
-static struct rma_settings *cpu_find_rma(unsigned int order)
+static struct rma_settings *cpu_find_rma(unsigned int log)
 {
     int i;
-    for (i = 0; i < ARRAY_SIZE(rma_orders); i++) {
-        if (rma_orders[i].order == order)
-            return &rma_orders[i];
+
+    for (i = 0; i < ARRAY_SIZE(rma_logs); i++) {
+        if (rma_logs[i].log == log)
+            return &rma_logs[i];
     }
     return NULL;
 }
 
 unsigned int cpu_default_rma_order_pages(void)
 {
-    return rma_orders[0].order - PAGE_SHIFT;
-}
-
-int cpu_rma_valid(unsigned int log)
-{
-    return cpu_find_rma(log) != NULL;
+    return rma_logs[0].log - PAGE_SHIFT;
+}
+
+int cpu_rma_valid(unsigned int order)
+{
+    return cpu_find_rma(order + PAGE_SHIFT) != NULL;
 }
 
 unsigned int cpu_large_page_orders(uint *sizes, uint max)
@@ -163,8 +193,11 @@ void cpu_initialize(int cpuid)
     mtdec(timebase_freq);
     mthdec(timebase_freq);
 
-    hid0.bits.nap = 1;      /* NAP */
+    /* FIXME Do not set the NAP bit in HID0 until we have had a chance
+     * to audit the safe halt and idle loop code. */
+    hid0.bits.nap = 0;      /* NAP */
     hid0.bits.dpm = 1;      /* Dynamic Power Management */
+
     hid0.bits.nhr = 1;      /* Not Hard Reset */
     hid0.bits.hdice_en = 1; /* enable HDEC */
     hid0.bits.en_therm = 0; /* ! Enable ext thermal ints */
diff -r ed56ef3e9716 -r 4762d73ced42 
xen/arch/powerpc/powerpc64/ppc970_machinecheck.c
--- a/xen/arch/powerpc/powerpc64/ppc970_machinecheck.c  Thu Dec 14 08:54:54 
2006 -0700
+++ b/xen/arch/powerpc/powerpc64/ppc970_machinecheck.c  Thu Dec 14 08:57:36 
2006 -0700
@@ -24,6 +24,8 @@
 #include <public/xen.h>
 #include <asm/processor.h>
 #include <asm/percpu.h>
+#include <asm/debugger.h>
+#include "scom.h"
 
 #define MCK_SRR1_INSN_FETCH_UNIT    0x0000000000200000 /* 42 */
 #define MCK_SRR1_LOAD_STORE         0x0000000000100000 /* 43 */
@@ -54,6 +56,8 @@ int cpu_machinecheck(struct cpu_user_reg
     if (mck_cpu_stats[mfpir()] != 0)
         printk("While in CI IO\n");
 
+    show_backtrace_regs(regs);
+
     printk("SRR1: 0x%016lx\n", regs->msr);
     if (regs->msr & MCK_SRR1_INSN_FETCH_UNIT)
         printk("42: Exception caused by Instruction Fetch Unit (IFU)\n"
@@ -67,6 +71,7 @@ int cpu_machinecheck(struct cpu_user_reg
     case 0:
         printk("0b00: Likely caused by an asynchronous machine check,\n"
                "      see SCOM Asynchronous Machine Check Register\n");
+        cpu_scom_AMCR();
         break;
     case MCK_SRR1_CAUSE_SLB_PAR:
         printk("0b01: Exception caused by an SLB parity error detected\n"
@@ -116,5 +121,5 @@ int cpu_machinecheck(struct cpu_user_reg
         dump_segments(0);
     }
 
-    return 0; /* for now lets not recover; */
+    return 0; /* for now lets not recover */
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/ppc970_scom.c
--- a/xen/arch/powerpc/powerpc64/ppc970_scom.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/ppc970_scom.c  Thu Dec 14 08:57:36 2006 -0700
@@ -22,33 +22,17 @@
 #include <xen/types.h>
 #include <xen/lib.h>
 #include <xen/console.h>
+#include <xen/errno.h>
+#include <asm/delay.h>
+#include <asm/processor.h>
+#include "scom.h"
+
+#undef CONFIG_SCOM
 
 #define SPRN_SCOMC 276
 #define SPRN_SCOMD 277
-
-static inline void mtscomc(ulong scomc)
-{
-    __asm__ __volatile__ ("mtspr %1, %0" : : "r" (scomc), "i"(SPRN_SCOMC));
-}
-
-static inline ulong mfscomc(void)
-{
-    ulong scomc;
-    __asm__ __volatile__ ("mfspr %0, %1" : "=r" (scomc): "i"(SPRN_SCOMC));
-    return scomc;
-}
-
-static inline void mtscomd(ulong scomd)
-{
-    __asm__ __volatile__ ("mtspr %1, %0" : : "r" (scomd), "i"(SPRN_SCOMD));
-}
-
-static inline ulong mfscomd(void)
-{
-    ulong scomd;
-    __asm__ __volatile__ ("mfspr %0, %1" : "=r" (scomd): "i"(SPRN_SCOMD));
-    return scomd;
-}
+#define SCOMC_READ 1
+#define SCOMC_WRITE (!(SCOMC_READ))
 
 union scomc {
     struct scomc_bits {
@@ -68,50 +52,133 @@ union scomc {
 };
 
 
-static inline ulong read_scom(ulong addr)
+int cpu_scom_read(uint addr, ulong *d)
 {
     union scomc c;
-    ulong d;
+    ulong flags;
 
-    c.word = 0;
-    c.bits.addr = addr;
-    c.bits.RW = 0;
+    /* drop the low 8bits (including parity) */
+    addr >>= 8;
 
-    mtscomc(c.word);
-    d = mfscomd();
-    c.word = mfscomc();
-    if (c.bits.failure)
-        panic("scom status: 0x%016lx\n", c.word);
+    /* these give iface errors because the addresses are not software
+     * accessible */
+    BUG_ON(addr & 0x8000);
 
-    return d;
+    for (;;) {
+        c.word = 0;
+        c.bits.addr = addr;
+        c.bits.RW = SCOMC_READ;
+
+        local_irq_save(flags);
+        asm volatile (
+            "sync         \n\t"
+            "mtspr %2, %0 \n\t"
+            "isync        \n\t"
+            "mfspr %1, %3 \n\t"
+            "isync        \n\t"
+            "mfspr %0, %2 \n\t"
+            "isync        \n\t"
+            : "+r" (c.word), "=r" (*d)
+            : "i"(SPRN_SCOMC), "i"(SPRN_SCOMD));
+
+        local_irq_restore(flags);
+        /* WARNING! older 970s (pre FX) shift the bits right 1 position */
+
+        if (!c.bits.failure)
+            return 0;
+
+        /* deal with errors */
+        /* has SCOM been disabled? */
+        if (c.bits.disabled)
+            return -ENOSYS;
+
+        /* we were passed a bad addr return -1 */
+        if (c.bits.addr_error)
+            return -EINVAL;
+
+        /* this is way bad and we will checkstop soon */
+        BUG_ON(c.bits.proto_error);
+
+        if (c.bits.iface_error)
+            udelay(10);
+    }
 }
 
-static inline void write_scom(ulong addr, ulong val)
+int cpu_scom_write(uint addr, ulong d)
 {
     union scomc c;
+    ulong flags;
 
-    c.word = 0;
-    c.bits.addr = addr;
-    c.bits.RW = 1;
+    /* drop the low 8bits (including parity) */
+    addr >>= 8;
 
-    mtscomd(val);
-    mtscomc(c.word);
-    c.word = mfscomc();
-    if (c.bits.failure)
-        panic("scom status: 0x%016lx\n", c.word);
+    /* these give iface errors because the addresses are not software
+     * accessible */
+    BUG_ON(addr & 0x8000);
+
+    for (;;) {
+        c.word = 0;
+        c.bits.addr = addr;
+        c.bits.RW = SCOMC_WRITE;
+
+        local_irq_save(flags);
+        asm volatile(
+            "sync         \n\t"
+            "mtspr %3, %1 \n\t"
+            "isync        \n\t"
+            "mtspr %2, %0 \n\t"
+            "isync        \n\t"
+            "mfspr %0, %2 \n\t"
+            "isync        \n\t"
+            : "+r" (c.word)
+            : "r" (d), "i"(SPRN_SCOMC), "i"(SPRN_SCOMD));
+        local_irq_restore(flags);
+
+        if (!c.bits.failure)
+            return 0;
+
+        /* has SCOM been disabled? */
+        if (c.bits.disabled)
+            return -ENOSYS;
+
+        /* we were passed a bad addr return -1 */
+        if (c.bits.addr_error)
+            return -EINVAL;
+
+        /* this is way bad and we will checkstop soon */
+        BUG_ON(c.bits.proto_error);
+
+        /* check for iface and retry */
+        if (c.bits.iface_error)
+            udelay(10);
+    }
 }
-
-#define SCOM_AMCS_REG      0x022601
-#define SCOM_AMCS_AND_MASK 0x022700
-#define SCOM_AMCS_OR_MASK  0x022800
-#define SCOM_CMCE          0x030901
-#define SCOM_PMCR          0x400801
 
 void cpu_scom_init(void)
 {
-#ifdef not_yet
-    console_start_sync();
-    printk("scom PMCR: 0x%016lx\n", read_scom(SCOM_PMCR));
-    console_end_sync();
+#ifdef CONFIG_SCOM
+    ulong val;
+    if (PVR_REV(mfpvr()) == 0x0300) {
+        /* these address are only good for 970FX */
+        console_start_sync();
+        if (!cpu_scom_read(SCOM_PTSR, &val))
+            printk("SCOM PTSR: 0x%016lx\n", val);
+
+        console_end_sync();
+    }
 #endif
 }
+
+void cpu_scom_AMCR(void)
+{
+#ifdef CONFIG_SCOM
+    ulong val;
+
+    if (PVR_REV(mfpvr()) == 0x0300) {
+        /* these address are only good for 970FX */
+        cpu_scom_read(SCOM_AMC_REG, &val);
+        printk("SCOM AMCR: 0x%016lx\n", val);
+    }
+#endif
+}
+
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/traps.c
--- a/xen/arch/powerpc/powerpc64/traps.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/traps.c        Thu Dec 14 08:57:36 2006 -0700
@@ -48,7 +48,3 @@ void show_registers(struct cpu_user_regs
     console_end_sync();
 }
 
-void show_execution_state(struct cpu_user_regs *regs)
-{
-    show_registers(regs);
-}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/rtas.c
--- a/xen/arch/powerpc/rtas.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/rtas.c   Thu Dec 14 08:57:36 2006 -0700
@@ -13,12 +13,90 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2006
  *
  * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include "of-devtree.h"
+#include "rtas.h"
 
-int rtas_halt = -1;
-int rtas_reboot = -1;
+static int rtas_halt_token = -1;
+static int rtas_reboot_token = -1;
+int rtas_entry;
+unsigned long rtas_msr;
+unsigned long rtas_base;
+unsigned long rtas_end;
+
+struct rtas_args {
+    int ra_token;
+    int ra_nargs;
+    int ra_nrets;
+    int ra_args[10];
+} __attribute__ ((aligned(8)));
+
+static int rtas_call(struct rtas_args *r)
+{
+    if (rtas_entry == 0)
+        return -ENOSYS;
+
+    return prom_call(r, rtas_base, rtas_entry, rtas_msr);
+}
+
+int __init rtas_init(void *m)
+{
+    static const char halt[] = "power-off";
+    static const char reboot[] = "system-reboot";
+    ofdn_t n;
+
+    if (rtas_entry == 0)
+        return -ENOSYS;
+
+    n = ofd_node_find(m, "/rtas");
+    if (n <= 0)
+        return -ENOSYS;
+
+    ofd_getprop(m, n, halt,
+                &rtas_halt_token, sizeof (rtas_halt_token));
+    ofd_getprop(m, n, reboot,
+                &rtas_reboot_token, sizeof (rtas_reboot_token));
+    return 1;
+}
+
+int
+rtas_halt(void)
+{
+    struct rtas_args r;
+
+    if (rtas_halt_token == -1)
+        return -1;
+
+    r.ra_token = rtas_halt_token;
+    r.ra_nargs = 2;
+    r.ra_nrets = 1;
+    r.ra_args[0] = 0;
+    r.ra_args[1] = 0;
+
+    return rtas_call(&r);
+}
+
+int
+rtas_reboot(void)
+{
+    struct rtas_args r;
+
+    if (rtas_reboot_token == -1)
+        return -ENOSYS;
+
+    r.ra_token = rtas_reboot_token;
+    r.ra_nargs = 2;
+    r.ra_nrets = 1;
+    r.ra_args[0] = 0;
+    r.ra_args[1] = 0;
+
+    return rtas_call(&r);
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/setup.c  Thu Dec 14 08:57:36 2006 -0700
@@ -1,8 +1,8 @@
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -35,8 +35,10 @@
 #include <xen/gdbstub.h>
 #include <xen/symbols.h>
 #include <xen/keyhandler.h>
+#include <xen/numa.h>
 #include <acm/acm_hooks.h>
 #include <public/version.h>
+#include <asm/mpic.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
 #include <asm/cache.h>
@@ -47,6 +49,7 @@
 #include "exceptions.h"
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
 
 #define DEBUG
 
@@ -75,10 +78,7 @@ ulong oftree_end;
 ulong oftree_end;
 
 uint cpu_hard_id[NR_CPUS] __initdata;
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-cpumask_t cpu_online_map; /* missing ifdef in schedule.c */
 cpumask_t cpu_present_map;
-cpumask_t cpu_possible_map;
 
 /* XXX get this from ISA node in device tree */
 char *vgabase;
@@ -87,6 +87,8 @@ struct ns16550_defaults ns16550;
 
 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
 
+static struct domain *idle_domain;
+
 volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
 
 int is_kernel_text(unsigned long addr)
@@ -110,12 +112,28 @@ static void __init do_initcalls(void)
     }
 }
 
-static void hw_probe_attn(unsigned char key, struct cpu_user_regs *regs)
+
+void noinline __attn(void)
 {
     /* To continue the probe will step over the ATTN instruction.  The
      * NOP is there to make sure there is something sane to "step
      * over" to. */
-    asm volatile(".long 0x00000200; nop");
+    console_start_sync();
+    asm volatile(".long 0x200;nop");
+    console_end_sync();
+}
+
+static void key_hw_probe_attn(unsigned char key)
+{
+    __attn();
+}
+
+static void key_ofdump(unsigned char key)
+{
+    printk("ofdump:\n");
+    /* make sure the OF devtree is good */
+    ofd_walk((void *)oftree, "devtree", OFD_ROOT,
+             ofd_dump_props, OFD_DUMP_ALL);
 }
 
 static void percpu_init_areas(void)
@@ -150,8 +168,6 @@ static void percpu_free_unused_areas(voi
 
 static void __init start_of_day(void)
 {
-    struct domain *idle_domain;
-
     init_IRQ();
 
     scheduler_init();
@@ -166,36 +182,19 @@ static void __init start_of_day(void)
     /* for some reason we need to set our own bit in the thread map */
     cpu_set(0, cpu_sibling_map[0]);
 
-    percpu_free_unused_areas();
-
-    {
-        /* FIXME: Xen assumes that an online CPU is a schedualable
-         * CPU, but we just are not there yet. Remove this fragment when
-         * scheduling processors actually works. */
-        int cpuid;
-
-        printk("WARNING!: Taking all secondary CPUs offline\n");
-
-        for_each_online_cpu(cpuid) {
-            if (cpuid == 0)
-                continue;
-            cpu_clear(cpuid, cpu_online_map);
-        }
-    }
-
     initialize_keytable();
     /* Register another key that will allow for the the Harware Probe
      * to be contacted, this works with RiscWatch probes and should
      * work with Chronos and FSPs */
-    register_irq_keyhandler('^', hw_probe_attn,   "Trap to Hardware Probe");
+    register_keyhandler('^', key_hw_probe_attn, "Trap to Hardware Probe");
+
+    /* allow the dumping of the devtree */
+    register_keyhandler('D', key_ofdump , "Dump OF Devtree");
 
     timer_init();
     serial_init_postirq();
     do_initcalls();
-    schedulers_start();
-}
-
-extern void idle_loop(void);
+}
 
 void startup_cpu_idle_loop(void)
 {
@@ -208,6 +207,15 @@ void startup_cpu_idle_loop(void)
     /* Finally get off the boot stack. */
     reset_stack_and_jump(idle_loop);
 }
+
+/* The boot_pa is enough "parea" for the boot CPU to get thru
+ * initialization, it will ultimately get replaced later */
+static __init void init_boot_cpu(void)
+{
+    static struct processor_area boot_pa;
+    boot_pa.whoami = 0;
+    parea = &boot_pa;
+}    
 
 static void init_parea(int cpuid)
 {
@@ -227,6 +235,7 @@ static void init_parea(int cpuid)
     pa->whoami = cpuid;
     pa->hard_id = cpu_hard_id[cpuid];
     pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
+    mb();
 
     /* This store has the effect of invoking secondary_cpu_init.  */
     global_cpu_table[cpuid] = pa;
@@ -248,18 +257,34 @@ static int kick_secondary_cpus(int maxcp
         /* wait for it */
         while (!cpu_online(cpuid))
             cpu_relax();
+
+        numa_set_node(cpuid, 0);
+        numa_add_cpu(cpuid);
     }
 
     return 0;
 }
 
 /* This is the first C code that secondary processors invoke.  */
-int secondary_cpu_init(int cpuid, unsigned long r4)
-{
+void secondary_cpu_init(int cpuid, unsigned long r4)
+{
+    struct vcpu *vcpu;
+
     cpu_initialize(cpuid);
     smp_generic_take_timebase();
+
+    /* If we are online, we must be able to ACK IPIs.  */
+    mpic_setup_this_cpu();
     cpu_set(cpuid, cpu_online_map);
-    while(1);
+
+    vcpu = alloc_vcpu(idle_domain, cpuid, cpuid);
+    BUG_ON(vcpu == NULL);
+
+    set_current(idle_domain->vcpu[cpuid]);
+    idle_vcpu[cpuid] = current;
+    startup_cpu_idle_loop();
+
+    panic("should never get here\n");
 }
 
 static void __init __start_xen(multiboot_info_t *mbi)
@@ -277,6 +302,9 @@ static void __init __start_xen(multiboot
     /* Parse the command-line options. */
     if ((mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0))
         cmdline_parse(__va((ulong)mbi->cmdline));
+
+    /* we need to be able to identify this CPU early on */
+    init_boot_cpu();
 
     /* We initialise the serial devices very early so we can get debugging. */
     ns16550.io_base = 0x3f8;
@@ -286,20 +314,12 @@ static void __init __start_xen(multiboot
     serial_init_preirq();
 
     init_console();
-#ifdef CONSOLE_SYNC
+    /* let synchronize until we really get going */
     console_start_sync();
-#endif
-
-    /* we give the first RMA to the hypervisor */
-    xenheap_phys_end = rma_size(cpu_default_rma_order_pages());
 
     /* Check that we have at least one Multiboot module. */
     if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) {
         panic("FATAL ERROR: Require at least one Multiboot module.\n");
-    }
-
-    if (!(mbi->flags & MBI_MEMMAP)) {
-        panic("FATAL ERROR: Bootloader provided no memory information.\n");
     }
 
     /* OF dev tree is the last module */
@@ -312,14 +332,18 @@ static void __init __start_xen(multiboot
     mod[mbi->mods_count-1].mod_end = 0;
     --mbi->mods_count;
 
+    if (rtas_entry) {
+        rtas_init((void *)oftree);
+        /* remove rtas module from consideration */
+        mod[mbi->mods_count-1].mod_start = 0;
+        mod[mbi->mods_count-1].mod_end = 0;
+        --mbi->mods_count;
+    }
     memory_init(mod, mbi->mods_count);
 
 #ifdef OF_DEBUG
-    printk("ofdump:\n");
-    /* make sure the OF devtree is good */
-    ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
+    key_ofdump(0);
 #endif
-
     percpu_init_areas();
 
     init_parea(0);
@@ -330,6 +354,10 @@ static void __init __start_xen(multiboot
     if (opt_earlygdb)
         debugger_trap_immediate();
 #endif
+
+    start_of_day();
+
+    mpic_setup_this_cpu();
 
     /* Deal with secondary processors.  */
     if (opt_nosmp || ofd_boot_cpu == -1) {
@@ -339,7 +367,11 @@ static void __init __start_xen(multiboot
         kick_secondary_cpus(max_cpus);
     }
 
-    start_of_day();
+    /* Secondary processors must be online before we call this.  */
+    schedulers_start();
+
+    /* This cannot be called before secondary cpus are marked online.  */
+    percpu_free_unused_areas();
 
     /* Create initial domain 0. */
     dom0 = domain_create(0, 0);
@@ -383,10 +415,10 @@ static void __init __start_xen(multiboot
     }
 
     init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE),
-                 ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
+                       ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
     if (initrd_start)
         init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE),
-                     ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
+                           ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
 
     init_trace_bufs();
 
@@ -395,8 +427,12 @@ static void __init __start_xen(multiboot
     /* Hide UART from DOM0 if we're using it */
     serial_endboot();
 
+    console_end_sync();
+
     domain_unpause_by_systemcontroller(dom0);
-
+#ifdef DEBUG_IPI
+    ipi_torture_test();
+#endif
     startup_cpu_idle_loop();
 }
 
@@ -414,7 +450,7 @@ void __init __start_xen_ppc(
 
     } else {
         /* booted by someone else that hopefully has a trap handler */
-        trap();
+        __builtin_trap();
     }
 
     __start_xen(mbi);
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/shadow.c
--- a/xen/arch/powerpc/shadow.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/shadow.c Thu Dec 14 08:57:36 2006 -0700
@@ -101,9 +101,6 @@ unsigned int shadow_set_allocation(struc
 
     addr = htab_alloc(d, order);
 
-    printk("%s: ibm,fpt-size should be: 0x%x\n", __func__,
-           d->arch.htab.log_num_ptes + LOG_PTE_SIZE);
-
     if (addr == 0)
         return -ENOMEM;
 
@@ -115,8 +112,8 @@ unsigned int shadow_set_allocation(struc
 }
 
 int shadow_domctl(struct domain *d, 
-                                 xen_domctl_shadow_op_t *sc,
-                                 XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+                  xen_domctl_shadow_op_t *sc,
+                  XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
 {
     if ( unlikely(d == current->domain) )
     {
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.c    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/smp.c    Thu Dec 14 08:57:36 2006 -0700
@@ -13,15 +13,18 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005,2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ * Authors: Amos Waterland <apw@xxxxxxxxxx>
  */
 
-#include <asm/misc.h>
 #include <xen/cpumask.h>
 #include <xen/smp.h>
 #include <asm/flushtlb.h>
+#include <asm/debugger.h>
+#include <asm/mpic.h>
+#include <asm/mach-default/irq_vectors.h>
 
 int smp_num_siblings = 1;
 int smp_num_cpus = 1;
@@ -29,25 +32,56 @@ int ht_per_core = 1;
 
 void __flush_tlb_mask(cpumask_t mask, unsigned long addr)
 {
-    unimplemented();
-}
-
-void smp_send_event_check_mask(cpumask_t cpu_mask)
-{
-    unimplemented();
-}
-
-int smp_call_function(void (*func) (void *info), void *info, int unused,
-        int wait)
-{
-    unimplemented();
-    return 0;
+    if (cpu_isset(smp_processor_id(), mask)) {
+        cpu_clear(smp_processor_id(), mask);
+        if (cpus_empty(mask)) {
+            /* only local */
+            if (addr == FLUSH_ALL_ADDRS)
+                local_flush_tlb();
+            else
+                local_flush_tlb_one(addr);
+            return;
+        }
+    }
+    /* if we are still here and the mask is non-empty, then we need to
+     * flush other TLBs so we flush em all */
+    if (!cpus_empty(mask))
+        unimplemented();
+}
+
+void smp_send_event_check_mask(cpumask_t mask)
+{
+    cpu_clear(smp_processor_id(), mask);
+    if (!cpus_empty(mask))
+        send_IPI_mask(mask, EVENT_CHECK_VECTOR);
+}
+
+
+int smp_call_function(void (*func) (void *info), void *info, int retry,
+                      int wait)
+{
+    cpumask_t allbutself = cpu_online_map;
+    cpu_clear(smp_processor_id(), allbutself);
+
+    return on_selected_cpus(allbutself, func, info, retry, wait);
 }
 
 void smp_send_stop(void)
 {
-    unimplemented();
-}
+    BUG();
+}
+
+struct call_data_struct {
+    void (*func) (void *info);
+    void *info;
+    int wait;
+    atomic_t started;
+    atomic_t finished;
+    cpumask_t selected;
+};
+
+static DEFINE_SPINLOCK(call_lock);
+static struct call_data_struct call_data;
 
 int on_selected_cpus(
     cpumask_t selected,
@@ -56,5 +90,125 @@ int on_selected_cpus(
     int retry,
     int wait)
 {
-    return 0;
-}
+    int retval = 0, nr_cpus = cpus_weight(selected);
+    unsigned long start, stall = SECONDS(1);
+
+    spin_lock(&call_lock);
+
+    call_data.func = func;
+    call_data.info = info;
+    call_data.wait = wait;
+    atomic_set(&call_data.started, 0);
+    atomic_set(&call_data.finished, 0);
+    mb();
+
+    send_IPI_mask(selected, CALL_FUNCTION_VECTOR);
+
+    /* We always wait for an initiation ACK from remote CPU.  */
+    for (start = NOW(); atomic_read(&call_data.started) != nr_cpus; ) {
+        if (NOW() > start + stall) {
+            printk("IPI start stall: %d ACKS to %d SYNS\n", 
+                   atomic_read(&call_data.started), nr_cpus);
+            start = NOW();
+        }
+    }
+
+    /* If told to, we wait for a completion ACK from remote CPU.  */
+    if (wait) {
+        for (start = NOW(); atomic_read(&call_data.finished) != nr_cpus; ) {
+            if (NOW() > start + stall) {
+                printk("IPI finish stall: %d ACKS to %d SYNS\n", 
+                       atomic_read(&call_data.finished), nr_cpus);
+                start = NOW();
+            }
+        }
+    }
+
+    spin_unlock(&call_lock);
+
+    return retval;
+}
+
+void smp_call_function_interrupt(struct cpu_user_regs *regs)
+{
+
+    void (*func)(void *info) = call_data.func;
+    void *info = call_data.info;
+    int wait = call_data.wait;
+
+    atomic_inc(&call_data.started);
+    mb();
+    (*func)(info);
+    mb();
+
+    if (wait)
+        atomic_inc(&call_data.finished);
+
+    return;
+}
+
+void smp_event_check_interrupt(void)
+{
+    /* We are knocked out of NAP state at least.  */
+    return;
+}
+
+void smp_message_recv(int msg, struct cpu_user_regs *regs)
+{
+    switch(msg) {
+    case CALL_FUNCTION_VECTOR:
+        smp_call_function_interrupt(regs);
+        break;
+    case EVENT_CHECK_VECTOR:
+        smp_event_check_interrupt();
+        break;
+    default:
+        BUG();
+        break;
+    }
+}
+
+#ifdef DEBUG_IPI
+static void debug_ipi_ack(void *info)
+{
+    if (info) {
+        unsigned long start, stall = SECONDS(5);
+        for (start = NOW(); NOW() < start + stall; );
+        printk("IPI recv on cpu #%d: %s\n", smp_processor_id(), (char *)info);
+    }
+    return;
+}
+
+void ipi_torture_test(void)
+{
+    int cpu;
+    unsigned long before, after, delta;
+    unsigned long min = ~0, max = 0, mean = 0, sum = 0, trials = 0;
+    cpumask_t mask;
+
+    cpus_clear(mask);
+
+    while (trials < 1000000) {
+        for_each_online_cpu(cpu) {
+            cpu_set(cpu, mask);
+            before = mftb();
+            on_selected_cpus(mask, debug_ipi_ack, NULL, 1, 1);
+            after = mftb();
+            cpus_clear(mask);
+
+            delta = after - before;
+            if (delta > max) max = delta;
+            if (delta < min) min = delta;
+            sum += delta;
+            trials++;
+        }
+    }
+
+    mean = tb_to_ns(sum / trials);
+
+    printk("IPI latency: min = %ld ticks, max = %ld ticks, mean = %ldns\n",
+           min, max, mean);
+
+    smp_call_function(debug_ipi_ack, "Hi", 0, 1);
+}
+#endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/time.c
--- a/xen/arch/powerpc/time.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/time.c   Thu Dec 14 08:57:36 2006 -0700
@@ -25,7 +25,7 @@
 #include <xen/sched.h>
 #include <asm/processor.h>
 #include <asm/current.h>
-#include <asm/misc.h>
+#include <asm/debugger.h>
 
 #define Dprintk(x...) printk(x)
 
@@ -93,5 +93,4 @@ void do_settime(unsigned long secs, unsi
 
 void update_vcpu_system_time(struct vcpu *v)
 {
-    unimplemented();
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/usercopy.c
--- a/xen/arch/powerpc/usercopy.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/usercopy.c       Thu Dec 14 08:57:36 2006 -0700
@@ -18,267 +18,33 @@
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
  */
 
-#include <xen/config.h>
-#include <xen/mm.h>
 #include <xen/sched.h>
+#include <xen/lib.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
-#include <public/xen.h>
-#include <public/xencomm.h>
-
-#undef DEBUG
-#ifdef DEBUG
-static int xencomm_debug = 1; /* extremely verbose */
-#else
-#define xencomm_debug 0
-#endif
+#include <asm/page.h>
+#include <asm/debugger.h>
 
 /* XXX need to return error, not panic, if domain passed a bad pointer */
-static unsigned long paddr_to_maddr(unsigned long paddr)
+unsigned long paddr_to_maddr(unsigned long paddr)
 {
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
-    int mtype;
-    ulong pfn;
+    ulong gpfn;
     ulong offset;
     ulong pa = paddr;
 
     offset = pa & ~PAGE_MASK;
-    pfn = pa >> PAGE_SHIFT;
+    gpfn = pa >> PAGE_SHIFT;
 
-    pa = pfn2mfn(d, pfn, &mtype);
+    pa = gmfn_to_mfn(d, gpfn);
     if (pa == INVALID_MFN) {
         printk("%s: Dom:%d bad paddr: 0x%lx\n",
                __func__, d->domain_id, paddr);
         return 0;
     }
-    switch (mtype) {
-    case PFN_TYPE_RMA:
-    case PFN_TYPE_LOGICAL:
-        break;
 
-    case PFN_TYPE_FOREIGN:
-        /* I don't think this should ever happen, but I suppose it
-         * could be possible */
-        printk("%s: Dom:%d paddr: 0x%lx type: FOREIGN\n",
-               __func__, d->domain_id, paddr);
-        WARN();
-        break;
-
-    case PFN_TYPE_IO:
-    default:
-        printk("%s: Dom:%d paddr: 0x%lx bad type: 0x%x\n",
-               __func__, d->domain_id, paddr, mtype);
-        WARN();
-        return 0;
-    }
     pa <<= PAGE_SHIFT;
     pa |= offset;
 
     return pa;
 }
-
-/**
- * xencomm_copy_from_guest: Copy a block of data from domain space.
- * @to:   Machine address.
- * @from: Physical address to a xencomm buffer descriptor.
- * @n:    Number of bytes to copy.
- * @skip: Number of bytes from the start to skip.
- *
- * Copy data from domain to hypervisor.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long
-xencomm_copy_from_guest(void *to, const void *from, unsigned int n,
-        unsigned int skip)
-{
-    struct xencomm_desc *desc;
-    unsigned int from_pos = 0;
-    unsigned int to_pos = 0;
-    unsigned int i = 0;
-
-    /* first we need to access the descriptor */
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)from);
-    if (desc == NULL)
-        return n;
-
-    if (desc->magic != XENCOMM_MAGIC) {
-        printk("%s: error: %p magic was 0x%x\n",
-               __func__, desc, desc->magic);
-        return n;
-    }
-
-    /* iterate through the descriptor, copying up to a page at a time */
-    while ((to_pos < n) && (i < desc->nr_addrs)) {
-        unsigned long src_paddr = desc->address[i];
-        unsigned int pgoffset;
-        unsigned int chunksz;
-        unsigned int chunk_skip;
-
-        if (src_paddr == XENCOMM_INVALID) {
-            i++;
-            continue;
-        }
-
-        pgoffset = src_paddr % PAGE_SIZE;
-        chunksz = PAGE_SIZE - pgoffset;
-
-        chunk_skip = min(chunksz, skip);
-        from_pos += chunk_skip;
-        chunksz -= chunk_skip;
-        skip -= chunk_skip;
-
-        if (skip == 0) {
-            unsigned long src_maddr;
-            unsigned long dest = (unsigned long)to + to_pos;
-            unsigned int bytes = min(chunksz, n - to_pos);
-
-            src_maddr = paddr_to_maddr(src_paddr + chunk_skip);
-            if (src_maddr == 0)
-                return n - to_pos;
-
-            if (xencomm_debug)
-                printk("%lx[%d] -> %lx\n", src_maddr, bytes, dest);
-            memcpy((void *)dest, (void *)src_maddr, bytes);
-            from_pos += bytes;
-            to_pos += bytes;
-        }
-
-        i++;
-    }
-
-    return n - to_pos;
-}
-
-/**
- * xencomm_copy_to_guest: Copy a block of data to domain space.
- * @to:     Physical address to xencomm buffer descriptor.
- * @from:   Machine address.
- * @n:      Number of bytes to copy.
- * @skip: Number of bytes from the start to skip.
- *
- * Copy data from hypervisor to domain.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long
-xencomm_copy_to_guest(void *to, const void *from, unsigned int n,
-        unsigned int skip)
-{
-    struct xencomm_desc *desc;
-    unsigned int from_pos = 0;
-    unsigned int to_pos = 0;
-    unsigned int i = 0;
-
-    /* first we need to access the descriptor */
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)to);
-    if (desc == NULL)
-        return n;
-
-    if (desc->magic != XENCOMM_MAGIC) {
-        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
-        return n;
-    }
-
-    /* iterate through the descriptor, copying up to a page at a time */
-    while ((from_pos < n) && (i < desc->nr_addrs)) {
-        unsigned long dest_paddr = desc->address[i];
-        unsigned int pgoffset;
-        unsigned int chunksz;
-        unsigned int chunk_skip;
-
-        if (dest_paddr == XENCOMM_INVALID) {
-            i++;
-            continue;
-        }
-
-        pgoffset = dest_paddr % PAGE_SIZE;
-        chunksz = PAGE_SIZE - pgoffset;
-
-        chunk_skip = min(chunksz, skip);
-        to_pos += chunk_skip;
-        chunksz -= chunk_skip;
-        skip -= chunk_skip;
-
-        if (skip == 0) {
-            unsigned long dest_maddr;
-            unsigned long source = (unsigned long)from + from_pos;
-            unsigned int bytes = min(chunksz, n - from_pos);
-
-            dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip);
-            if (dest_maddr == 0)
-                return -1;
-
-            if (xencomm_debug)
-                printk("%lx[%d] -> %lx\n", source, bytes, dest_maddr);
-            memcpy((void *)dest_maddr, (void *)source, bytes);
-            from_pos += bytes;
-            to_pos += bytes;
-        }
-
-        i++;
-    }
-
-    return n - from_pos;
-}
-
-/* Offset page addresses in 'handle' to skip 'bytes' bytes. Set completely
- * exhausted pages to XENCOMM_INVALID. */
-int xencomm_add_offset(void *handle, unsigned int bytes)
-{
-    struct xencomm_desc *desc;
-    int i = 0;
-
-    /* first we need to access the descriptor */
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)handle);
-    if (desc == NULL)
-        return -1;
-
-    if (desc->magic != XENCOMM_MAGIC) {
-        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
-        return -1;
-    }
-
-    /* iterate through the descriptor incrementing addresses */
-    while ((bytes > 0) && (i < desc->nr_addrs)) {
-        unsigned long dest_paddr = desc->address[i];
-        unsigned int pgoffset;
-        unsigned int chunksz;
-        unsigned int chunk_skip;
-
-        if (dest_paddr == XENCOMM_INVALID) {
-            i++;
-            continue;
-        }
-
-        pgoffset = dest_paddr % PAGE_SIZE;
-        chunksz = PAGE_SIZE - pgoffset;
-
-        chunk_skip = min(chunksz, bytes);
-        if (chunk_skip == chunksz) {
-            /* exhausted this page */
-            desc->address[i] = XENCOMM_INVALID;
-        } else {
-            desc->address[i] += chunk_skip;
-        }
-        bytes -= chunk_skip;
-
-       i++;
-    }
-    return 0;
-}
-
-int xencomm_handle_is_null(void *ptr)
-{
-    struct xencomm_desc *desc;
-
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)ptr);
-    if (desc == NULL)
-        return 1;
-
-    return (desc->nr_addrs == 0);
-}
-
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/xen.lds.S
--- a/xen/arch/powerpc/xen.lds.S        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/xen.lds.S        Thu Dec 14 08:57:36 2006 -0700
@@ -12,12 +12,12 @@ SEARCH_DIR("=/usr/local/lib64"); SEARCH_
    __DYNAMIC = 0;    */
 PHDRS
 {
-  text PT_LOAD FILEHDR PHDRS;
+  text PT_LOAD;
 }   
 SECTIONS
 {
+  . = 0x00400000;
   /* Read-only sections, merged into text segment: */
-  PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS;
   .interp         : { *(.interp) } :text
   .hash           : { *(.hash) }
   .dynsym         : { *(.dynsym) }
@@ -111,8 +111,6 @@ SECTIONS
     SORT(CONSTRUCTORS)
   }
 
-  /* Xen addition */
-
   . = ALIGN(32);
   __setup_start = .;
   .setup.init : { *(.setup.init) }
@@ -130,8 +128,6 @@ SECTIONS
   . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
   . = ALIGN(STACK_SIZE);
   __per_cpu_end = .;
-
-  /* end Xen addition */
 
   .data1          : { *(.data1) }
   .tdata         : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/crash.c
--- a/xen/arch/x86/crash.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/crash.c      Thu Dec 14 08:57:36 2006 -0700
@@ -58,9 +58,9 @@ static void smp_send_nmi_allbutself(void
 static void smp_send_nmi_allbutself(void)
 {
     cpumask_t allbutself = cpu_online_map;
-
     cpu_clear(smp_processor_id(), allbutself);
-    send_IPI_mask(allbutself, APIC_DM_NMI);
+    if ( !cpus_empty(allbutself) )
+        send_IPI_mask(allbutself, APIC_DM_NMI);
 }
 
 static void nmi_shootdown_cpus(void)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/domain_build.c       Thu Dec 14 08:57:36 2006 -0700
@@ -321,8 +321,11 @@ int construct_dom0(struct domain *d,
     if ( (rc = parseelfimage(&dsi)) != 0 )
         return rc;
 
-    dom0_pae = (dsi.pae_kernel != PAEKERN_no);
     xen_pae  = (CONFIG_PAGING_LEVELS == 3);
+    if (dsi.pae_kernel == PAEKERN_bimodal)
+        dom0_pae = xen_pae; 
+    else
+        dom0_pae = (dsi.pae_kernel != PAEKERN_no);
     if ( dom0_pae != xen_pae )
     {
         printk("PAE mode mismatch between Xen and DOM0 (xen=%s, dom0=%s)\n",
@@ -330,7 +333,8 @@ int construct_dom0(struct domain *d,
         return -EINVAL;
     }
 
-    if ( xen_pae && dsi.pae_kernel == PAEKERN_extended_cr3 )
+    if ( xen_pae && (dsi.pae_kernel == PAEKERN_extended_cr3 ||
+            dsi.pae_kernel == PAEKERN_bimodal) )
             set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
 
     if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL )
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/mm.c Thu Dec 14 08:57:36 2006 -0700
@@ -2951,7 +2951,17 @@ long arch_memory_op(int op, XEN_GUEST_HA
         guest_physmap_add_page(d, xatp.gpfn, mfn);
 
         UNLOCK_BIGLOCK(d);
-        
+
+        /* If we're doing FAST_FAULT_PATH, then shadow mode may have
+           cached the fact that this is an mmio region in the shadow
+           page tables.  Blow the tables away to remove the cache.
+           This is pretty heavy handed, but this is a rare operation
+           (it might happen a dozen times during boot and then never
+           again), so it doesn't matter too much. */
+        shadow_lock(d);
+        shadow_blow_tables(d);
+        shadow_unlock(d);
+
         put_domain(d);
 
         break;
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Dec 14 08:57:36 2006 -0700
@@ -791,7 +791,7 @@ void shadow_prealloc(struct domain *d, u
 
 /* Deliberately free all the memory we can: this will tear down all of
  * this domain's shadows */
-static void shadow_blow_tables(struct domain *d) 
+void shadow_blow_tables(struct domain *d) 
 {
     struct list_head *l, *t;
     struct shadow_page_info *sp;
@@ -3123,7 +3123,7 @@ static int shadow_log_dirty_op(
  out:
     shadow_unlock(d);
     domain_unpause(d);
-    return 0;
+    return rv;
 }
 
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Dec 14 08:57:36 2006 -0700
@@ -3488,6 +3488,9 @@ sh_update_cr3(struct vcpu *v)
                                        ? SH_type_l2h_shadow 
                                        : SH_type_l2_shadow);
             }
+            else
+                /* The guest is not present: clear out the shadow. */
+                sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
         }
     }
 #elif GUEST_PAGING_LEVELS == 4
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/numa.c       Thu Dec 14 08:57:36 2006 -0700
@@ -214,7 +214,7 @@ void __init numa_initmem_init(unsigned l
 
 __cpuinit void numa_add_cpu(int cpu)
 {
-       set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+       cpu_set(cpu, node_to_cpumask[cpu_to_node(cpu)]);
 } 
 
 void __cpuinit numa_set_node(int cpu, int node)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/Makefile
--- a/xen/common/Makefile       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/Makefile       Thu Dec 14 08:57:36 2006 -0700
@@ -32,5 +32,7 @@ obj-$(crash_debug) += gdbstub.o
 obj-$(crash_debug) += gdbstub.o
 obj-$(xenoprof)    += xenoprof.o
 
+obj-$(CONFIG_XENCOMM) += xencomm.o
+
 # Object file contains changeset and compiler information.
 version.o: $(BASEDIR)/include/xen/compile.h
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/domain.c
--- a/xen/common/domain.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/domain.c       Thu Dec 14 08:57:36 2006 -0700
@@ -238,7 +238,11 @@ void domain_kill(struct domain *d)
 
 void __domain_crash(struct domain *d)
 {
-    if ( d == current->domain )
+    if ( test_bit(_DOMF_shutdown, &d->domain_flags) )
+    {
+        /* Print nothing: the domain is already shutting down. */
+    }
+    else if ( d == current->domain )
     {
         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
                d->domain_id, current->vcpu_id, smp_processor_id());
@@ -346,16 +350,25 @@ void domain_destroy(struct domain *d)
     send_guest_global_virq(dom0, VIRQ_DOM_EXC);
 }
 
-void vcpu_pause(struct vcpu *v)
-{
-    ASSERT(v != current);
-
+static void vcpu_pause_setup(struct vcpu *v)
+{
     spin_lock(&v->pause_lock);
     if ( v->pause_count++ == 0 )
         set_bit(_VCPUF_paused, &v->vcpu_flags);
     spin_unlock(&v->pause_lock);
-
+}
+
+void vcpu_pause(struct vcpu *v)
+{
+    ASSERT(v != current);
+    vcpu_pause_setup(v);
     vcpu_sleep_sync(v);
+}
+
+void vcpu_pause_nosync(struct vcpu *v)
+{
+    vcpu_pause_setup(v);
+    vcpu_sleep_nosync(v);
 }
 
 void vcpu_unpause(struct vcpu *v)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/elf.c
--- a/xen/common/elf.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/elf.c  Thu Dec 14 08:57:36 2006 -0700
@@ -216,16 +216,6 @@ int parseelfimage(struct domain_setup_in
         return -EINVAL;
     }
 
-    /* Find the section-header strings table. */
-    if ( ehdr->e_shstrndx == SHN_UNDEF )
-    {
-        printk("ELF image has no section-header strings table (shstrtab).\n");
-        return -EINVAL;
-    }
-    shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
-                        (ehdr->e_shstrndx*ehdr->e_shentsize));
-    shstrtab = image + shdr->sh_offset;
-
     dsi->__elfnote_section = NULL;
     dsi->__xen_guest_string = NULL;
 
@@ -244,6 +234,16 @@ int parseelfimage(struct domain_setup_in
     /* Fall back to looking for the special '__xen_guest' section. */
     if ( dsi->__elfnote_section == NULL )
     {
+        /* Find the section-header strings table. */
+        if ( ehdr->e_shstrndx == SHN_UNDEF )
+        {
+            printk("ELF image has no section-header strings table.\n");
+            return -EINVAL;
+        }
+        shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
+                            (ehdr->e_shstrndx*ehdr->e_shentsize));
+        shstrtab = image + shdr->sh_offset;
+
         for ( h = 0; h < ehdr->e_shnum; h++ )
         {
             shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize));
@@ -286,6 +286,8 @@ int parseelfimage(struct domain_setup_in
     }
 
     /*
+     * A "bimodal" ELF note indicates the kernel will adjust to the
+     * current paging mode, including handling extended cr3 syntax.
      * If we have ELF notes then PAE=yes implies that we must support
      * the extended cr3 syntax. Otherwise we need to find the
      * [extended-cr3] syntax in the __xen_guest string.
@@ -294,9 +296,10 @@ int parseelfimage(struct domain_setup_in
     if ( dsi->__elfnote_section )
     {
         p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE);
-        if ( p != NULL && strncmp(p, "yes", 3) == 0 )
+        if ( p != NULL && strncmp(p, "bimodal", 7) == 0 )
+            dsi->pae_kernel = PAEKERN_bimodal;
+        else if ( p != NULL && strncmp(p, "yes", 3) == 0 )
             dsi->pae_kernel = PAEKERN_extended_cr3;
-
     }
     else
     {
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/gdbstub.c
--- a/xen/common/gdbstub.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/gdbstub.c      Thu Dec 14 08:57:36 2006 -0700
@@ -42,6 +42,7 @@
 #include <xen/init.h>
 #include <xen/smp.h>
 #include <xen/console.h>
+#include <xen/errno.h>
 
 /* Printk isn't particularly safe just after we've trapped to the
    debugger. so avoid it. */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/kexec.c
--- a/xen/common/kexec.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/kexec.c        Thu Dec 14 08:57:36 2006 -0700
@@ -140,13 +140,21 @@ void machine_crash_kexec(void)
 
 static void do_crashdump_trigger(unsigned char key)
 {
-       printk("triggering crashdump\n");
-       machine_crash_kexec();
+    int pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0);
+    if ( test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
+    {
+        printk("'%c' pressed -> triggering crashdump\n", key);
+        machine_crash_kexec();
+    }
+    else
+    {
+        printk("'%c' pressed -> no crash kernel loaded -- not triggering 
crashdump\n", key);
+    }
 }
 
 static __init int register_crashdump_trigger(void)
 {
-       register_keyhandler('c', do_crashdump_trigger, "trigger a crashdump");
+       register_keyhandler('C', do_crashdump_trigger, "trigger a crashdump");
        return 0;
 }
 __initcall(register_crashdump_trigger);
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/sched_credit.c Thu Dec 14 08:57:36 2006 -0700
@@ -56,7 +56,12 @@
 #define CSCHED_PRI_TS_UNDER     -1      /* time-share w/ credits */
 #define CSCHED_PRI_TS_OVER      -2      /* time-share w/o credits */
 #define CSCHED_PRI_IDLE         -64     /* idle */
-#define CSCHED_PRI_TS_PARKED    -65     /* time-share w/ capped credits */
+
+
+/*
+ * Flags
+ */
+#define CSCHED_FLAG_VCPU_PARKED 0x0001  /* VCPU over capped credits */
 
 
 /*
@@ -100,26 +105,21 @@
     _MACRO(vcpu_wake_onrunq)                \
     _MACRO(vcpu_wake_runnable)              \
     _MACRO(vcpu_wake_not_runnable)          \
+    _MACRO(vcpu_park)                       \
+    _MACRO(vcpu_unpark)                     \
     _MACRO(tickle_local_idler)              \
     _MACRO(tickle_local_over)               \
     _MACRO(tickle_local_under)              \
     _MACRO(tickle_local_other)              \
     _MACRO(tickle_idlers_none)              \
     _MACRO(tickle_idlers_some)              \
-    _MACRO(vcpu_migrate)                    \
     _MACRO(load_balance_idle)               \
     _MACRO(load_balance_over)               \
     _MACRO(load_balance_other)              \
     _MACRO(steal_trylock_failed)            \
-    _MACRO(steal_peer_down)                 \
     _MACRO(steal_peer_idle)                 \
-    _MACRO(steal_peer_running)              \
-    _MACRO(steal_peer_pinned)               \
-    _MACRO(steal_peer_migrating)            \
-    _MACRO(steal_peer_best_idler)           \
-    _MACRO(steal_loner_candidate)           \
-    _MACRO(steal_loner_signal)              \
-    _MACRO(cpu_pick)                        \
+    _MACRO(migrate_queued)                  \
+    _MACRO(migrate_running)                 \
     _MACRO(dom_init)                        \
     _MACRO(dom_destroy)                     \
     _MACRO(vcpu_init)                       \
@@ -146,7 +146,7 @@
     struct                                      \
     {                                           \
         CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
-    } stats
+    } stats;
 
 #define CSCHED_STATS_PRINTK()                   \
     do                                          \
@@ -155,14 +155,27 @@
         CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
     } while ( 0 )
 
-#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
+#define CSCHED_STAT_CRANK(_X)               (CSCHED_STAT(_X)++)
+
+#define CSCHED_VCPU_STATS_RESET(_V)                     \
+    do                                                  \
+    {                                                   \
+        memset(&(_V)->stats, 0, sizeof((_V)->stats));   \
+    } while ( 0 )
+
+#define CSCHED_VCPU_STAT_CRANK(_V, _X)      (((_V)->stats._X)++)
+
+#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    (((_V)->stats._X) = (_Y))
 
 #else /* CSCHED_STATS */
 
-#define CSCHED_STATS_RESET()    do {} while ( 0 )
-#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
-#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
-#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
+#define CSCHED_STATS_RESET()                do {} while ( 0 )
+#define CSCHED_STATS_DEFINE()
+#define CSCHED_STATS_PRINTK()               do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X)               do {} while ( 0 )
+#define CSCHED_VCPU_STATS_RESET(_V)         do {} while ( 0 )
+#define CSCHED_VCPU_STAT_CRANK(_V, _X)      do {} while ( 0 )
+#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    do {} while ( 0 )
 
 #endif /* CSCHED_STATS */
 
@@ -184,14 +197,18 @@ struct csched_vcpu {
     struct csched_dom *sdom;
     struct vcpu *vcpu;
     atomic_t credit;
+    uint16_t flags;
     int16_t pri;
+#ifdef CSCHED_STATS
     struct {
         int credit_last;
         uint32_t credit_incr;
         uint32_t state_active;
         uint32_t state_idle;
-        uint32_t migrate;
+        uint32_t migrate_q;
+        uint32_t migrate_r;
     } stats;
+#endif
 };
 
 /*
@@ -219,7 +236,7 @@ struct csched_private {
     uint32_t credit;
     int credit_balance;
     uint32_t runq_sort;
-    CSCHED_STATS_DEFINE();
+    CSCHED_STATS_DEFINE()
 };
 
 
@@ -229,6 +246,15 @@ static struct csched_private csched_priv
 static struct csched_private csched_priv;
 
 
+
+static inline int
+__cycle_cpu(int cpu, const cpumask_t *mask)
+{
+    int nxt = next_cpu(cpu, *mask);
+    if (nxt == NR_CPUS)
+        nxt = first_cpu(*mask);
+    return nxt;
+}
 
 static inline int
 __vcpu_on_runq(struct csched_vcpu *svc)
@@ -375,118 +401,138 @@ __csched_vcpu_check(struct vcpu *vc)
 #define CSCHED_VCPU_CHECK(_vc)
 #endif
 
-/*
- * Indicates which of two given idlers is most efficient to run
- * an additional VCPU.
- *
- * Returns:
- *  0:           They are the same.
- *  negative:    One is less efficient than Two.
- *  positive:    One is more efficient than Two.
- */
-static int
-csched_idler_compare(int one, int two)
-{
-    cpumask_t idlers;
-    cpumask_t one_idlers;
-    cpumask_t two_idlers;
-
-    idlers = csched_priv.idlers;
-    cpu_clear(one, idlers);
-    cpu_clear(two, idlers);
-
-    if ( cpu_isset(one, cpu_core_map[two]) )
-    {
-        cpus_and(one_idlers, idlers, cpu_sibling_map[one]);
-        cpus_and(two_idlers, idlers, cpu_sibling_map[two]);
-    }
-    else
-    {
-        cpus_and(one_idlers, idlers, cpu_core_map[one]);
-        cpus_and(two_idlers, idlers, cpu_core_map[two]);
-    }
-
-    return cpus_weight(one_idlers) - cpus_weight(two_idlers);
-}
-
 static inline int
-__csched_queued_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
 {
     /*
      * Don't pick up work that's in the peer's scheduling tail. Also only pick
      * up work that's allowed to run on our CPU.
      */
-    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_running);
-        return 0;
-    }
-
-    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_pinned);
-        return 0;
-    }
-
-    return 1;
-}
-
-static inline int
-__csched_running_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
-{
-    BUG_ON( is_idle_vcpu(vc) );
-
-    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_pinned);
-        return 0;
-    }
-
-    if ( test_bit(_VCPUF_migrating, &vc->vcpu_flags) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_migrating);
-        return 0;
-    }
-
-    if ( csched_idler_compare(local_cpu, vc->processor) <= 0 )
-    {
-        CSCHED_STAT_CRANK(steal_peer_best_idler);
-        return 0;
-    }
-
-    return 1;
-}
-
-static void
-csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+    return !test_bit(_VCPUF_running, &vc->vcpu_flags) &&
+           cpu_isset(dest_cpu, vc->cpu_affinity);
+}
+
+static int
+csched_cpu_pick(struct vcpu *vc)
+{
+    cpumask_t cpus;
+    cpumask_t idlers;
+    int cpu;
+
+    /*
+     * Pick from online CPUs in VCPU's affinity mask, giving a
+     * preference to its current processor if it's in there.
+     */
+    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+    cpu = cpu_isset(vc->processor, cpus)
+            ? vc->processor
+            : __cycle_cpu(vc->processor, &cpus);
+    ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
+
+    /*
+     * Try to find an idle processor within the above constraints.
+     *
+     * In multi-core and multi-threaded CPUs, not all idle execution
+     * vehicles are equal!
+     *
+     * We give preference to the idle execution vehicle with the most
+     * idling neighbours in its grouping. This distributes work across
+     * distinct cores first and guarantees we don't do something stupid
+     * like run two VCPUs on co-hyperthreads while there are idle cores
+     * or sockets.
+     */
+    idlers = csched_priv.idlers;
+    cpu_set(cpu, idlers);
+    cpus_and(cpus, cpus, idlers);
+    cpu_clear(cpu, cpus);
+
+    while ( !cpus_empty(cpus) )
+    {
+        cpumask_t cpu_idlers;
+        cpumask_t nxt_idlers;
+        int nxt;
+
+        nxt = __cycle_cpu(cpu, &cpus);
+
+        if ( cpu_isset(cpu, cpu_core_map[nxt]) )
+        {
+            ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) );
+            cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]);
+            cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]);
+        }
+        else
+        {
+            ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) );
+            cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]);
+            cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
+        }
+
+        if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) )
+        {
+            cpu = nxt;
+            cpu_clear(cpu, cpus);
+        }
+        else
+        {
+            cpus_andnot(cpus, cpus, nxt_idlers);
+        }
+    }
+
+    return cpu;
+}
+
+static inline void
+__csched_vcpu_acct_start(struct csched_vcpu *svc)
 {
     struct csched_dom * const sdom = svc->sdom;
     unsigned long flags;
 
-    /* Update credits */
-    atomic_sub(credit_dec, &svc->credit);
-
-    /* Put this VCPU and domain back on the active list if it was idling */
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
     if ( list_empty(&svc->active_vcpu_elem) )
     {
-        spin_lock_irqsave(&csched_priv.lock, flags);
-
-        if ( list_empty(&svc->active_vcpu_elem) )
-        {
-            CSCHED_STAT_CRANK(acct_vcpu_active);
-            svc->stats.state_active++;
-
-            sdom->active_vcpu_count++;
-            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
-            if ( list_empty(&sdom->active_sdom_elem) )
-            {
-                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
-                csched_priv.weight += sdom->weight;
-            }
-        }
-
-        spin_unlock_irqrestore(&csched_priv.lock, flags);
-    }
+        CSCHED_VCPU_STAT_CRANK(svc, state_active);
+        CSCHED_STAT_CRANK(acct_vcpu_active);
+
+        sdom->active_vcpu_count++;
+        list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+        if ( list_empty(&sdom->active_sdom_elem) )
+        {
+            list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+            csched_priv.weight += sdom->weight;
+        }
+    }
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+static inline void
+__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+    CSCHED_VCPU_STAT_CRANK(svc, state_idle);
+    CSCHED_STAT_CRANK(acct_vcpu_idle);
+
+    sdom->active_vcpu_count--;
+    list_del_init(&svc->active_vcpu_elem);
+    if ( list_empty(&sdom->active_vcpu) )
+    {
+        BUG_ON( csched_priv.weight < sdom->weight );
+        list_del_init(&sdom->active_sdom_elem);
+        csched_priv.weight -= sdom->weight;
+    }
+}
+
+static void
+csched_vcpu_acct(unsigned int cpu)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(current);
+
+    ASSERT( current->processor == cpu );
+    ASSERT( svc->sdom != NULL );
 
     /*
      * If this VCPU's priority was boosted when it last awoke, reset it.
@@ -495,25 +541,30 @@ csched_vcpu_acct(struct csched_vcpu *svc
      */
     if ( svc->pri == CSCHED_PRI_TS_BOOST )
         svc->pri = CSCHED_PRI_TS_UNDER;
-}
-
-static inline void
-__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
-{
-    struct csched_dom * const sdom = svc->sdom;
-
-    BUG_ON( list_empty(&svc->active_vcpu_elem) );
-
-    CSCHED_STAT_CRANK(acct_vcpu_idle);
-    svc->stats.state_idle++;
-
-    sdom->active_vcpu_count--;
-    list_del_init(&svc->active_vcpu_elem);
-    if ( list_empty(&sdom->active_vcpu) )
-    {
-        BUG_ON( csched_priv.weight < sdom->weight );
-        list_del_init(&sdom->active_sdom_elem);
-        csched_priv.weight -= sdom->weight;
+
+    /*
+     * Update credits
+     */
+    atomic_sub(CSCHED_CREDITS_PER_TICK, &svc->credit);
+
+    /*
+     * Put this VCPU and domain back on the active list if it was
+     * idling.
+     *
+     * If it's been active a while, check if we'd be better off
+     * migrating it to run elsewhere (see multi-core and multi-thread
+     * support in csched_cpu_pick()).
+     */
+    if ( list_empty(&svc->active_vcpu_elem) )
+    {
+        __csched_vcpu_acct_start(svc);
+    }
+    else if ( csched_cpu_pick(current) != cpu )
+    {
+        CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
+        CSCHED_STAT_CRANK(migrate_running);
+        set_bit(_VCPUF_migrating, &current->vcpu_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
     }
 }
 
@@ -536,15 +587,10 @@ csched_vcpu_init(struct vcpu *vc)
     svc->sdom = sdom;
     svc->vcpu = vc;
     atomic_set(&svc->credit, 0);
+    svc->flags = 0U;
     svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
-    memset(&svc->stats, 0, sizeof(svc->stats));
+    CSCHED_VCPU_STATS_RESET(svc);
     vc->sched_priv = svc;
-
-    CSCHED_VCPU_CHECK(vc);
-
-    /* Attach fair-share VCPUs to the accounting list */
-    if ( likely(sdom != NULL) )
-        csched_vcpu_acct(svc, 0);
 
     /* Allocate per-PCPU info */
     if ( unlikely(!CSCHED_PCPU(vc->processor)) )
@@ -554,7 +600,6 @@ csched_vcpu_init(struct vcpu *vc)
     }
 
     CSCHED_VCPU_CHECK(vc);
-
     return 0;
 }
 
@@ -573,7 +618,7 @@ csched_vcpu_destroy(struct vcpu *vc)
     spin_lock_irqsave(&csched_priv.lock, flags);
 
     if ( !list_empty(&svc->active_vcpu_elem) )
-        __csched_vcpu_acct_idle_locked(svc);
+        __csched_vcpu_acct_stop_locked(svc);
 
     spin_unlock_irqrestore(&csched_priv.lock, flags);
 
@@ -634,9 +679,16 @@ csched_vcpu_wake(struct vcpu *vc)
      * This allows wake-to-run latency sensitive VCPUs to preempt
      * more CPU resource intensive VCPUs without impacting overall 
      * system fairness.
-     */
-    if ( svc->pri == CSCHED_PRI_TS_UNDER )
+     *
+     * The one exception is for VCPUs of capped domains unpausing
+     * after earning credits they had overspent. We don't boost
+     * those.
+     */
+    if ( svc->pri == CSCHED_PRI_TS_UNDER &&
+         !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
+    {
         svc->pri = CSCHED_PRI_TS_BOOST;
+    }
 
     /* Put the VCPU on the runq and tickle CPUs */
     __runq_insert(cpu, svc);
@@ -710,71 +762,8 @@ static void
 static void
 csched_dom_destroy(struct domain *dom)
 {
-    struct csched_dom * const sdom = CSCHED_DOM(dom);
-
     CSCHED_STAT_CRANK(dom_destroy);
-
-    xfree(sdom);
-}
-
-static int
-csched_cpu_pick(struct vcpu *vc)
-{
-    cpumask_t cpus;
-    int cpu, nxt;
-
-    CSCHED_STAT_CRANK(cpu_pick);
-
-    /*
-     * Pick from online CPUs in VCPU's affinity mask, giving a
-     * preference to its current processor if it's in there.
-     */
-    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
-    ASSERT( !cpus_empty(cpus) );
-    cpu = cpu_isset(vc->processor, cpus) ? vc->processor : first_cpu(cpus);
-
-    /*
-     * Try to find an idle processor within the above constraints.
-     */
-    cpus_and(cpus, cpus, csched_priv.idlers);
-    if ( !cpus_empty(cpus) )
-    {
-        cpu = cpu_isset(cpu, cpus) ? cpu : first_cpu(cpus);
-        cpu_clear(cpu, cpus);
-
-        /*
-         * In multi-core and multi-threaded CPUs, not all idle execution
-         * vehicles are equal!
-         *
-         * We give preference to the idle execution vehicle with the most
-         * idling neighbours in its grouping. This distributes work across
-         * distinct cores first and guarantees we don't do something stupid
-         * like run two VCPUs on co-hyperthreads while there are idle cores
-         * or sockets.
-         */
-        while ( !cpus_empty(cpus) )
-        {
-            nxt = first_cpu(cpus);
-
-            if ( csched_idler_compare(cpu, nxt) < 0 )
-            {
-                cpu = nxt;
-                cpu_clear(nxt, cpus);
-            }
-            else if ( cpu_isset(cpu, cpu_core_map[nxt]) )
-            {
-                cpus_andnot(cpus, cpus, cpu_sibling_map[nxt]);
-            }
-            else
-            {
-                cpus_andnot(cpus, cpus, cpu_core_map[nxt]);
-            }
-
-            ASSERT( !cpu_isset(nxt, cpus) );
-        }
-    }
-
-    return cpu;
+    xfree(CSCHED_DOM(dom));
 }
 
 /*
@@ -963,11 +952,19 @@ csched_acct(void)
              */
             if ( credit < 0 )
             {
-                if ( sdom->cap != 0U && credit < -credit_cap )
-                    svc->pri = CSCHED_PRI_TS_PARKED;
-                else
-                    svc->pri = CSCHED_PRI_TS_OVER;
-
+                svc->pri = CSCHED_PRI_TS_OVER;
+
+                /* Park running VCPUs of capped-out domains */
+                if ( sdom->cap != 0U &&
+                     credit < -credit_cap &&
+                     !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
+                {
+                    CSCHED_STAT_CRANK(vcpu_park);
+                    vcpu_pause_nosync(svc->vcpu);
+                    svc->flags |= CSCHED_FLAG_VCPU_PARKED;
+                }
+
+                /* Lower bound on credits */
                 if ( credit < -CSCHED_CREDITS_PER_TSLICE )
                 {
                     CSCHED_STAT_CRANK(acct_min_credit);
@@ -979,16 +976,30 @@ csched_acct(void)
             {
                 svc->pri = CSCHED_PRI_TS_UNDER;
 
+                /* Unpark any capped domains whose credits go positive */
+                if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
+                {
+                    /*
+                     * It's important to unset the flag AFTER the unpause()
+                     * call to make sure the VCPU's priority is not boosted
+                     * if it is woken up here.
+                     */
+                    CSCHED_STAT_CRANK(vcpu_unpark);
+                    vcpu_unpause(svc->vcpu);
+                    svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
+                }
+
+                /* Upper bound on credits means VCPU stops earning */
                 if ( credit > CSCHED_CREDITS_PER_TSLICE )
                 {
-                    __csched_vcpu_acct_idle_locked(svc);
+                    __csched_vcpu_acct_stop_locked(svc);
                     credit = 0;
                     atomic_set(&svc->credit, credit);
                 }
             }
 
-            svc->stats.credit_last = credit;
-            svc->stats.credit_incr = credit_fair;
+            CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
+            CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
             credit_balance += credit;
         }
     }
@@ -1004,21 +1015,14 @@ static void
 static void
 csched_tick(unsigned int cpu)
 {
-    struct csched_vcpu * const svc = CSCHED_VCPU(current);
-    struct csched_dom * const sdom = svc->sdom;
-
     /*
      * Accounting for running VCPU
-     *
-     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
-     */
-    if ( likely(sdom != NULL) )
-    {
-        csched_vcpu_acct(svc, CSCHED_CREDITS_PER_TICK);
-    }
-
-    /*
-     * Accounting duty
+     */
+    if ( !is_idle_vcpu(current) )
+        csched_vcpu_acct(cpu);
+
+    /*
+     * Host-wide accounting duty
      *
      * Note: Currently, this is always done by the master boot CPU. Eventually,
      * we could distribute or at the very least cycle the duty.
@@ -1040,40 +1044,48 @@ csched_tick(unsigned int cpu)
 }
 
 static struct csched_vcpu *
-csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
-{
+csched_runq_steal(int peer_cpu, int cpu, int pri)
+{
+    const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
+    const struct vcpu * const peer_vcpu = per_cpu(schedule_data, 
peer_cpu).curr;
+    struct csched_vcpu *speer;
     struct list_head *iter;
-    struct csched_vcpu *speer;
     struct vcpu *vc;
 
-    list_for_each( iter, &spc->runq )
-    {
-        speer = __runq_elem(iter);
-
-        /*
-         * If next available VCPU here is not of higher priority than ours,
-         * this PCPU is useless to us.
-         */
-        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
-        {
-            CSCHED_STAT_CRANK(steal_peer_idle);
-            break;
-        }
-
-        /* Is this VCPU is runnable on our PCPU? */
-        vc = speer->vcpu;
-        BUG_ON( is_idle_vcpu(vc) );
-
-        if ( __csched_queued_vcpu_is_stealable(cpu, vc) )
-        {
-            /* We got a candidate. Grab it! */
-            __runq_remove(speer);
-            vc->processor = cpu;
-
-            return speer;
-        }
-    }
-
+    /*
+     * Don't steal from an idle CPU's runq because it's about to
+     * pick up work from it itself.
+     */
+    if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
+    {
+        list_for_each( iter, &peer_pcpu->runq )
+        {
+            speer = __runq_elem(iter);
+
+            /*
+             * If next available VCPU here is not of strictly higher
+             * priority than ours, this PCPU is useless to us.
+             */
+            if ( speer->pri <= pri )
+                break;
+
+            /* Is this VCPU is runnable on our PCPU? */
+            vc = speer->vcpu;
+            BUG_ON( is_idle_vcpu(vc) );
+
+            if (__csched_vcpu_is_migrateable(vc, cpu))
+            {
+                /* We got a candidate. Grab it! */
+                CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
+                CSCHED_STAT_CRANK(migrate_queued);
+                __runq_remove(speer);
+                vc->processor = cpu;
+                return speer;
+            }
+        }
+    }
+
+    CSCHED_STAT_CRANK(steal_peer_idle);
     return NULL;
 }
 
@@ -1081,11 +1093,10 @@ csched_load_balance(int cpu, struct csch
 csched_load_balance(int cpu, struct csched_vcpu *snext)
 {
     struct csched_vcpu *speer;
-    struct csched_pcpu *spc;
-    struct vcpu *peer_vcpu;
     cpumask_t workers;
-    cpumask_t loners;
     int peer_cpu;
+
+    BUG_ON( cpu != snext->vcpu->processor );
 
     if ( snext->pri == CSCHED_PRI_IDLE )
         CSCHED_STAT_CRANK(load_balance_idle);
@@ -1095,22 +1106,16 @@ csched_load_balance(int cpu, struct csch
         CSCHED_STAT_CRANK(load_balance_other);
 
     /*
-     * Peek at non-idling CPUs in the system
-     */
-    cpus_clear(loners);
+     * Peek at non-idling CPUs in the system, starting with our
+     * immediate neighbour.
+     */
     cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
     cpu_clear(cpu, workers);
-
     peer_cpu = cpu;
-    BUG_ON( peer_cpu != snext->vcpu->processor );
 
     while ( !cpus_empty(workers) )
     {
-        /* For each CPU of interest, starting with our neighbour... */
-        peer_cpu = next_cpu(peer_cpu, workers);
-        if ( peer_cpu == NR_CPUS )
-            peer_cpu = first_cpu(workers);
-
+        peer_cpu = __cycle_cpu(peer_cpu, &workers);
         cpu_clear(peer_cpu, workers);
 
         /*
@@ -1126,83 +1131,13 @@ csched_load_balance(int cpu, struct csch
             continue;
         }
 
-        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
-        spc = CSCHED_PCPU(peer_cpu);
-
-        if ( unlikely(spc == NULL) )
-        {
-            CSCHED_STAT_CRANK(steal_peer_down);
-        }
-        else if ( unlikely(is_idle_vcpu(peer_vcpu)) )
-        {
-            /*
-             * Don't steal from an idle CPU's runq because it's about to
-             * pick up work from it itself.
-             */
-            CSCHED_STAT_CRANK(steal_peer_idle);
-        }
-        else if ( is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) )
-        {
-            if ( snext->pri == CSCHED_PRI_IDLE &&
-                 __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
-            {
-                CSCHED_STAT_CRANK(steal_loner_candidate);
-                cpu_set(peer_cpu, loners);
-            }
-        }
-        else
-        {
-            /* Try to steal work from a remote CPU's runq. */
-            speer = csched_runq_steal(spc, cpu, snext->pri);
-            if ( speer != NULL )
-            {
-                spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
-                CSCHED_STAT_CRANK(vcpu_migrate);
-                speer->stats.migrate++;
-                return speer;
-            }
-        }
-
+        /*
+         * Any work over there to steal?
+         */
+        speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
         spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
-    }
-
-    /*
-     * If we failed to find any remotely queued VCPUs to move here,
-     * see if it would be more efficient to move any of the running
-     * remote VCPUs over here.
-     */
-    while ( !cpus_empty(loners) )
-    {
-        /* For each CPU of interest, starting with our neighbour... */
-        peer_cpu = next_cpu(peer_cpu, loners);
-        if ( peer_cpu == NR_CPUS )
-            peer_cpu = first_cpu(loners);
-
-        cpu_clear(peer_cpu, loners);
-
-        if ( !spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) )
-        {
-            CSCHED_STAT_CRANK(steal_trylock_failed);
-            continue;
-        }
-
-        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
-        spc = CSCHED_PCPU(peer_cpu);
-
-        /* Signal the first candidate only. */
-        if ( !is_idle_vcpu(peer_vcpu) &&
-             is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) &&
-             __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
-        {
-            set_bit(_VCPUF_migrating, &peer_vcpu->vcpu_flags);
-            spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
-
-            CSCHED_STAT_CRANK(steal_loner_signal);
-            cpu_raise_softirq(peer_cpu, SCHEDULE_SOFTIRQ);
-            break;
-        }
-
-        spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
+        if ( speer != NULL )
+            return speer;
     }
 
     /* Failed to find more important work elsewhere... */
@@ -1270,7 +1205,6 @@ csched_schedule(s_time_t now)
     ret.task = snext->vcpu;
 
     CSCHED_VCPU_CHECK(ret.task);
-
     return ret;
 }
 
@@ -1279,22 +1213,25 @@ csched_dump_vcpu(struct csched_vcpu *svc
 {
     struct csched_dom * const sdom = svc->sdom;
 
-    printk("[%i.%i] pri=%i cpu=%i",
+    printk("[%i.%i] pri=%i flags=%x cpu=%i",
             svc->vcpu->domain->domain_id,
             svc->vcpu->vcpu_id,
             svc->pri,
+            svc->flags,
             svc->vcpu->processor);
 
     if ( sdom )
     {
-        printk(" credit=%i (%d+%u) {a/i=%u/%u m=%u w=%u}",
-            atomic_read(&svc->credit),
-            svc->stats.credit_last,
-            svc->stats.credit_incr,
-            svc->stats.state_active,
-            svc->stats.state_idle,
-            svc->stats.migrate,
-            sdom->weight);
+        printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
+#ifdef CSCHED_STATS
+        printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
+                svc->stats.credit_last,
+                svc->stats.credit_incr,
+                svc->stats.state_active,
+                svc->stats.state_idle,
+                svc->stats.migrate_q,
+                svc->stats.migrate_r);
+#endif
     }
 
     printk("\n");
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/cache.h
--- a/xen/include/asm-powerpc/cache.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/cache.h   Thu Dec 14 08:57:36 2006 -0700
@@ -70,4 +70,5 @@ struct cpu_caches {
     u32 ilines_per_page;
 };
 extern struct cpu_caches cpu_caches;
+extern void cpu_flush_icache(void);
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/config.h
--- a/xen/include/asm-powerpc/config.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/config.h  Thu Dec 14 08:57:36 2006 -0700
@@ -21,7 +21,7 @@
 #ifndef __PPC_CONFIG_H__
 #define __PPC_CONFIG_H__
 
-#define CONFIG_MAMBO 1
+#define CONFIG_SYSTEMSIM 1
 #define HYPERVISOR_VIRT_START 0x0 /* XXX temp hack for common/kernel.c */
 
 
@@ -50,6 +50,8 @@ extern char __bss_start[];
 #define CONFIG_GDB 1
 #define CONFIG_SMP 1
 #define CONFIG_PCI 1
+#define CONFIG_NUMA 1
+#define CONFIG_CMDLINE_SIZE 512
 #define NR_CPUS 16
 
 #ifndef ELFSIZE
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/debugger.h
--- a/xen/include/asm-powerpc/debugger.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/debugger.h        Thu Dec 14 08:57:36 2006 -0700
@@ -13,13 +13,68 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_DEBUGGER_H_
 #define _ASM_DEBUGGER_H_
+
+#include <public/xen.h>
+
+extern void show_backtrace_regs(struct cpu_user_regs *);
+extern void show_backtrace(ulong sp, ulong lr, ulong pc);
+
+static inline void show_execution_state(struct cpu_user_regs *regs)
+{
+    show_registers(regs);
+}
+
+extern void dump_execution_state(void);
+
+static inline void dump_all_execution_state(void)
+{
+    ulong sp;
+    ulong lr;
+
+    dump_execution_state();
+    sp = (ulong)__builtin_frame_address(0);
+    lr = (ulong)__builtin_return_address(0);
+
+    show_backtrace(sp, lr, lr);
+}
+
+static inline void __force_crash(void)
+{
+    dump_all_execution_state();
+    __builtin_trap();
+}
+
+static inline void debugger_trap_immediate(void)
+{
+    dump_all_execution_state();
+#ifdef CRASH_DEBUG
+    __builtin_trap();
+#endif
+}
+
+static inline void unimplemented(void)
+{
+#ifdef VERBOSE
+    dump_all_execution_state();
+#endif
+}
+
+extern void __warn(char *file, int line);
+#define WARN() __warn(__FILE__, __LINE__)
+#define WARN_ON(_p) do { if (_p) WARN(); } while ( 0 )
+
+extern void __attn(void);
+#define ATTN() __attn();
+
+#define FORCE_CRASH() __force_crash()
 
 #ifdef CRASH_DEBUG
 
@@ -32,8 +87,6 @@ static inline int debugger_trap_fatal(
     return vector;
 }
 
-#define debugger_trap_immediate() __asm__ __volatile__ ("trap");
-
 #else /* CRASH_DEBUG */
 
 static inline int debugger_trap_fatal(
@@ -43,17 +96,6 @@ static inline int debugger_trap_fatal(
     return vector;
 }
 
-static inline void debugger_trap_immediate(void)
-{
-    ulong sp;
-    ulong lr;
-
-    sp = (ulong)__builtin_frame_address(0);
-    lr = (ulong)__builtin_return_address(0);
-
-    show_backtrace(sp, lr, lr);
-}
-
 #endif /* CRASH_DEBUG */
 
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/delay.h
--- a/xen/include/asm-powerpc/delay.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/delay.h   Thu Dec 14 08:57:36 2006 -0700
@@ -13,16 +13,28 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_DELAY_H_
 #define _ASM_DELAY_H_
 
+#include <asm/time.h>
+
 extern unsigned long ticks_per_usec; 
 #define __udelay udelay
-extern void udelay(unsigned long usecs);
+static inline void udelay(unsigned long usecs)
+{
+    unsigned long ticks = usecs * ticks_per_usec;
+    unsigned long s;
+    unsigned long e;
 
+    s = get_timebase();
+    do {
+        e = get_timebase();
+    } while ((e-s) < ticks);
+}
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/domain.h  Thu Dec 14 08:57:36 2006 -0700
@@ -40,6 +40,9 @@ struct arch_domain {
 
     /* list of extents beyond RMA */
     struct list_head extent_list;
+
+    uint foreign_mfn_count;
+    uint *foreign_mfns;
 
     /* I/O-port access bitmap mask. */
     u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
@@ -86,7 +89,7 @@ struct arch_vcpu {
     struct slb_entry slb_entries[NUM_SLB_ENTRIES];
 
     /* I/O-port access bitmap. */
-    u8 *iobmp;        /* Guest kernel virtual address of the bitmap. */
+    XEN_GUEST_HANDLE(uint8_t) iobmp; /* Guest kernel virtual address of the 
bitmap. */
     int iobmp_limit;  /* Number of ports represented in the bitmap.  */
     int iopl;         /* Current IOPL for this VCPU. */
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/flushtlb.h
--- a/xen/include/asm-powerpc/flushtlb.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/flushtlb.h        Thu Dec 14 08:57:36 2006 -0700
@@ -24,7 +24,6 @@
 #include <xen/config.h>
 #include <xen/percpu.h>
 #include <xen/types.h>
-#include <asm/misc.h>
 
 /* The current time as shown by the virtual TLB clock. */
 extern u32 tlbflush_clock;
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/grant_table.h
--- a/xen/include/asm-powerpc/grant_table.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/grant_table.h     Thu Dec 14 08:57:36 2006 -0700
@@ -29,6 +29,10 @@
  * Caller must own caller's BIGLOCK, is responsible for flushing the TLB, and
  * must hold a reference to the page.
  */
+extern long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn);
+extern long pte_remove(ulong flags, ulong ptex, ulong avpn,
+                       ulong *hi, ulong *lo);
+
 int create_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags);
 int destroy_grant_host_mapping(
@@ -41,8 +45,7 @@ int destroy_grant_host_mapping(
             (d), XENSHARE_writable);                                     \
     } while ( 0 )
 
-#define gnttab_shared_mfn(d, t, i)                      \
-    ((virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i))
+#define gnttab_shared_mfn(d, t, i) (((ulong)((t)->shared) >> PAGE_SHIFT) + (i))
 
 #define gnttab_shared_gmfn(d, t, i)                     \
     (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i)))
@@ -61,4 +64,9 @@ static inline void gnttab_clear_flag(uns
     clear_bit(lnr, laddr);
 }
 
+static inline uint cpu_foreign_map_order(void)
+{
+    /* 16 GiB */
+    return 34 - PAGE_SHIFT;
+}
 #endif  /* __ASM_PPC_GRANT_TABLE_H__ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/guest_access.h
--- a/xen/include/asm-powerpc/guest_access.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/guest_access.h    Thu Dec 14 08:57:36 2006 -0700
@@ -21,82 +21,6 @@
 #ifndef __PPC_GUEST_ACCESS_H__
 #define __PPC_GUEST_ACCESS_H__
 
-extern unsigned long xencomm_copy_to_guest(void *to, const void *from,
-        unsigned int len, unsigned int skip); 
-extern unsigned long xencomm_copy_from_guest(void *to, const void *from,
-        unsigned int len, unsigned int skip); 
-extern int xencomm_add_offset(void *handle, unsigned int bytes);
-extern int xencomm_handle_is_null(void *ptr);
-
-
-/* Is the guest handle a NULL reference? */
-#define guest_handle_is_null(hnd) \
-    ((hnd).p == NULL || xencomm_handle_is_null((hnd).p))
-
-/* Offset the given guest handle into the array it refers to. */
-#define guest_handle_add_offset(hnd, nr) ({         \
-    const typeof((hnd).p) _ptr = (hnd).p;           \
-    xencomm_add_offset(_ptr, nr * sizeof(*_ptr));   \
-})
-
-/* Cast a guest handle to the specified type of handle. */
-#define guest_handle_cast(hnd, type) ({         \
-    type *_x = (hnd).p;                         \
-    XEN_GUEST_HANDLE(type) _y; \
-    set_xen_guest_handle(_y, _x); \
-    _y; \
-})
-
-/* Since we run in real mode, we can safely access all addresses. That also
- * means our __routines are identical to our "normal" routines. */
-#define guest_handle_okay(hnd, nr) 1
-
-/*
- * Copy an array of objects to guest context via a guest handle.
- * Optionally specify an offset into the guest array.
- */
-#define copy_to_guest_offset(hnd, idx, ptr, nr) \
-    __copy_to_guest_offset(hnd, idx, ptr, nr)
-
-/* Copy sub-field of a structure to guest context via a guest handle. */
-#define copy_field_to_guest(hnd, ptr, field) \
-    __copy_field_to_guest(hnd, ptr, field)
-
-/*
- * Copy an array of objects from guest context via a guest handle.
- * Optionally specify an offset into the guest array.
- */
-#define copy_from_guest_offset(ptr, hnd, idx, nr) \
-    __copy_from_guest_offset(ptr, hnd, idx, nr)
-
-/* Copy sub-field of a structure from guest context via a guest handle. */
-#define copy_field_from_guest(ptr, hnd, field) \
-    __copy_field_from_guest(ptr, hnd, field)
-
-#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
-})
-
-#define __copy_field_to_guest(hnd, ptr, field) ({                   \
-    const int _off = offsetof(typeof(*ptr), field);                  \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \
-})
-
-#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({              \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx));  \
-})
-
-#define __copy_field_from_guest(ptr, hnd, field) ({                 \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \
-})
+#include <xen/xencomm.h>
 
 #endif /* __PPC_GUEST_ACCESS_H__ */
diff -r ed56ef3e9716 -r 4762d73ced42 
xen/include/asm-powerpc/mach-default/irq_vectors.h
--- a/xen/include/asm-powerpc/mach-default/irq_vectors.h        Thu Dec 14 
08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/mach-default/irq_vectors.h        Thu Dec 14 
08:57:36 2006 -0700
@@ -37,26 +37,10 @@
 #define FAST_TRAP -1 /* 0x80 */
 #define FIRST_SYSTEM_VECTOR    -1
 
+#define CALL_FUNCTION_VECTOR   0x0
+#define EVENT_CHECK_VECTOR     0x1
+
 #if 0
-
-/*
- * Vectors 0-16 in some cases are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
- *
- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR   0xff
-#define ERROR_APIC_VECTOR      0xfe
-#define INVALIDATE_TLB_VECTOR  0xfd
-#define EVENT_CHECK_VECTOR     0xfc
-#define CALL_FUNCTION_VECTOR   0xfb
 
 #define THERMAL_APIC_VECTOR    0xf0
 /*
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/mm.h      Thu Dec 14 08:57:36 2006 -0700
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_MM_H_
@@ -25,10 +26,10 @@
 #include <xen/list.h>
 #include <xen/types.h>
 #include <xen/mm.h>
-#include <asm/misc.h>
 #include <asm/system.h>
 #include <asm/flushtlb.h>
-#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/debugger.h>
 
 #define memguard_guard_range(_p,_l)    ((void)0)
 #define memguard_unguard_range(_p,_l)    ((void)0)
@@ -86,39 +87,38 @@ struct page_extents {
     /* page extent */
     struct page_info *pg;
     uint order;
-    ulong pfn;
 };
 
  /* The following page types are MUTUALLY EXCLUSIVE. */
-#define PGT_none            (0<<29) /* no special uses of this page */
-#define PGT_RMA             (1<<29) /* This page is an RMA page? */
-#define PGT_writable_page   (7<<29) /* has writable mappings of this page? */
-#define PGT_type_mask       (7<<29) /* Bits 29-31. */
+#define PGT_none            (0UL<<29) /* no special uses of this page */
+#define PGT_RMA             (1UL<<29) /* This page is an RMA page? */
+#define PGT_writable_page   (7UL<<29) /* has writable mappings of this page? */
+#define PGT_type_mask       (7UL<<29) /* Bits 29-31. */
 
  /* Owning guest has pinned this page to its current type? */
 #define _PGT_pinned         28
-#define PGT_pinned          (1U<<_PGT_pinned)
+#define PGT_pinned          (1UL<<_PGT_pinned)
  /* Has this page been validated for use as its current type? */
 #define _PGT_validated      27
-#define PGT_validated       (1U<<_PGT_validated)
+#define PGT_validated       (1UL<<_PGT_validated)
 
  /* 16-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<16)-1)
+#define PGT_count_mask      ((1UL<<16)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
-#define PGC_allocated       (1U<<_PGC_allocated)
+#define PGC_allocated       (1UL<<_PGC_allocated)
  /* Set on a *guest* page to mark it out-of-sync with its shadow */
 #define _PGC_out_of_sync     30
-#define PGC_out_of_sync     (1U<<_PGC_out_of_sync)
+#define PGC_out_of_sync     (1UL<<_PGC_out_of_sync)
  /* Set when is using a page as a page table */
 #define _PGC_page_table      29
-#define PGC_page_table      (1U<<_PGC_page_table)
+#define PGC_page_table      (1UL<<_PGC_page_table)
 /* Set when using page for RMA */
 #define _PGC_page_RMA      28
-#define PGC_page_RMA      (1U<<_PGC_page_RMA)
+#define PGC_page_RMA      (1UL<<_PGC_page_RMA)
  /* 29-bit count of references to this frame. */
-#define PGC_count_mask      ((1U<<28)-1)
+#define PGC_count_mask      ((1UL<<28)-1)
 
 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
 
@@ -132,6 +132,13 @@ static inline u32 pickle_domptr(struct d
 
 #define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
+#define XENSHARE_writable 0
+#define XENSHARE_readonly 1
+extern void share_xen_page_with_guest(
+    struct page_info *page, struct domain *d, int readonly);
+extern void share_xen_page_with_privileged_guests(
+    struct page_info *page, int readonly);
 
 extern struct page_info *frame_table;
 extern unsigned long max_page;
@@ -218,16 +225,18 @@ typedef struct {
 } vm_assist_info_t;
 extern vm_assist_info_t vm_assist_info[];
 
-#define share_xen_page_with_guest(p, d, r) do { } while (0)
-#define share_xen_page_with_privileged_guests(p, r) do { } while (0)
 
 /* hope that accesses to this will fail spectacularly */
-#define machine_to_phys_mapping ((u32 *)-1UL)
-
-extern int update_grant_va_mapping(unsigned long va,
-                                   unsigned long val,
-                                   struct domain *,
-                                   struct vcpu *);
+#undef machine_to_phys_mapping
+#define INVALID_M2P_ENTRY        (~0UL)
+
+/* do nothing, its all calculated */
+#define set_gpfn_from_mfn(mfn, pfn) do { } while (0)
+#define get_gpfn_from_mfn(mfn) (mfn)
+
+extern unsigned long mfn_to_gmfn(struct domain *d, unsigned long mfn);
+
+extern unsigned long paddr_to_maddr(unsigned long paddr);
 
 #define INVALID_MFN (~0UL)
 #define PFN_TYPE_NONE 0
@@ -235,29 +244,48 @@ extern int update_grant_va_mapping(unsig
 #define PFN_TYPE_LOGICAL 2
 #define PFN_TYPE_IO 3
 #define PFN_TYPE_FOREIGN 4
+#define PFN_TYPE_GNTTAB 5
 
 extern ulong pfn2mfn(struct domain *d, ulong pfn, int *type);
+static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gmfn)
+{
+    int mtype;
+    ulong mfn;
+    
+    mfn = pfn2mfn(d, gmfn, &mtype);
+    if (mfn != INVALID_MFN) {
+        switch (mtype) {
+        case PFN_TYPE_RMA:
+        case PFN_TYPE_LOGICAL:
+            break;
+        default:
+            WARN();
+            mfn = INVALID_MFN;
+            break;
+        }
+    }
+    return mfn;
+}
+
+extern int update_grant_va_mapping(unsigned long va,
+                                   unsigned long val,
+                                   struct domain *,
+                                   struct vcpu *);
 
 /* Arch-specific portion of memory_op hypercall. */
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
-
-/* XXX implement me? */
-#define set_gpfn_from_mfn(mfn, pfn) do { } while (0)
-/* XXX only used for debug print right now... */
-#define get_gpfn_from_mfn(mfn) (mfn)
-
-static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gmfn)
-{
-       return pfn2mfn(d, gmfn, NULL);
-}
-
-#define mfn_to_gmfn(_d, mfn) (mfn)
 
 extern int allocate_rma(struct domain *d, unsigned int order_pages);
 extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages);
 extern void free_extents(struct domain *d);
 
+extern int arch_domain_add_extent(struct domain *d, struct page_info *page,
+        int order);
+
 extern int steal_page(struct domain *d, struct page_info *page,
                         unsigned int memflags);
 
+/* XXX these just exist until we can stop #including x86 code */
+#define access_ok(addr,size) 1
+#define array_access_ok(addr,count,size) 1
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/msr.h
--- a/xen/include/asm-powerpc/msr.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/msr.h     Thu Dec 14 08:57:36 2006 -0700
@@ -51,9 +51,9 @@
 #define MSR_RI      ULL(0x0000000000000002)
 #define MSR_LE      ULL(0x0000000000000001)
 
-/* MSR bits set on the Mambo simulator */
+/* MSR bits set on the systemsim simulator */
 #define MSR_SIM     ULL(0x0000000020000000)
-#define MSR_MAMBO   ULL(0x0000000010000000)
+#define MSR_SYSTEMSIM ULL(0x0000000010000000)
 
 /* On a trap, srr1's copy of msr defines some bits as follows: */
 #define MSR_TRAP_FE     ULL(0x0000000000100000) /* Floating Point Exception */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/page.h
--- a/xen/include/asm-powerpc/page.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/page.h    Thu Dec 14 08:57:36 2006 -0700
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_PAGE_H
@@ -28,7 +29,6 @@
 #ifndef __ASSEMBLY__
 
 #include <xen/config.h>
-#include <asm/misc.h>
 #include <asm/cache.h>
 
 #define PFN_DOWN(x)   ((x) >> PAGE_SHIFT)
@@ -129,5 +129,6 @@ static inline int get_order_from_pages(u
 #define _PAGE_PAT      0x080UL
 #define _PAGE_PSE      0x080UL
 #define _PAGE_GLOBAL   0x100UL
+
 #endif  /* ! __ASSEMBLY__ */
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/powerpc64/string.h
--- a/xen/include/asm-powerpc/powerpc64/string.h        Thu Dec 14 08:54:54 
2006 -0700
+++ b/xen/include/asm-powerpc/powerpc64/string.h        Thu Dec 14 08:57:36 
2006 -0700
@@ -37,4 +37,7 @@ extern int memcmp(const void *,const voi
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 
+extern void *systemsim_memset(void *, int, ulong);
+extern void *systemsim_memcpy(void *, const void *, ulong);
+
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/processor.h       Thu Dec 14 08:57:36 2006 -0700
@@ -31,6 +31,85 @@
 /* most assembler do not know this instruction */
 #define HRFID .long 0x4c000224
 
+/* Processor Version Register (PVR) field extraction */
+
+#define PVR_VER(pvr)   (((pvr) >>  16) & 0xFFFF)       /* Version field */
+#define PVR_REV(pvr)   (((pvr) >>   0) & 0xFFFF)       /* Revison field */
+
+#define __is_processor(pv)     (PVR_VER(mfspr(SPRN_PVR)) == (pv))
+
+/*
+ * IBM has further subdivided the standard PowerPC 16-bit version and
+ * revision subfields of the PVR for the PowerPC 403s into the following:
+ */
+
+#define PVR_FAM(pvr)   (((pvr) >> 20) & 0xFFF) /* Family field */
+#define PVR_MEM(pvr)   (((pvr) >> 16) & 0xF)   /* Member field */
+#define PVR_CORE(pvr)  (((pvr) >> 12) & 0xF)   /* Core field */
+#define PVR_CFG(pvr)   (((pvr) >>  8) & 0xF)   /* Configuration field */
+#define PVR_MAJ(pvr)   (((pvr) >>  4) & 0xF)   /* Major revision field */
+#define PVR_MIN(pvr)   (((pvr) >>  0) & 0xF)   /* Minor revision field */
+
+/* Processor Version Numbers */
+
+#define PVR_403GA      0x00200000
+#define PVR_403GB      0x00200100
+#define PVR_403GC      0x00200200
+#define PVR_403GCX     0x00201400
+#define PVR_405GP      0x40110000
+#define PVR_STB03XXX   0x40310000
+#define PVR_NP405H     0x41410000
+#define PVR_NP405L     0x41610000
+#define PVR_601                0x00010000
+#define PVR_602                0x00050000
+#define PVR_603                0x00030000
+#define PVR_603e       0x00060000
+#define PVR_603ev      0x00070000
+#define PVR_603r       0x00071000
+#define PVR_604                0x00040000
+#define PVR_604e       0x00090000
+#define PVR_604r       0x000A0000
+#define PVR_620                0x00140000
+#define PVR_740                0x00080000
+#define PVR_750                PVR_740
+#define PVR_740P       0x10080000
+#define PVR_750P       PVR_740P
+#define PVR_7400       0x000C0000
+#define PVR_7410       0x800C0000
+#define PVR_7450       0x80000000
+#define PVR_8540       0x80200000
+#define PVR_8560       0x80200000
+/*
+ * For the 8xx processors, all of them report the same PVR family for
+ * the PowerPC core. The various versions of these processors must be
+ * differentiated by the version number in the Communication Processor
+ * Module (CPM).
+ */
+#define PVR_821                0x00500000
+#define PVR_823                PVR_821
+#define PVR_850                PVR_821
+#define PVR_860                PVR_821
+#define PVR_8240       0x00810100
+#define PVR_8245       0x80811014
+#define PVR_8260       PVR_8240
+
+/* 64-bit processors */
+/* XXX the prefix should be PVR_, we'll do a global sweep to fix it one day */
+#define PV_NORTHSTAR   0x0033
+#define PV_PULSAR      0x0034
+#define PV_POWER4      0x0035
+#define PV_ICESTAR     0x0036
+#define PV_SSTAR       0x0037
+#define PV_POWER4p     0x0038
+#define PV_970         0x0039
+#define PV_POWER5      0x003A
+#define PV_POWER5p     0x003B
+#define PV_970FX       0x003C
+#define PV_630         0x0040
+#define PV_630p        0x0041
+#define PV_970MP       0x0044
+#define PV_BE          0x0070
+
 #ifndef __ASSEMBLY__ 
 #include <xen/types.h>
 
@@ -38,13 +117,10 @@ struct vcpu;
 struct vcpu;
 struct cpu_user_regs;
 extern int cpu_machinecheck(struct cpu_user_regs *);
-extern void cpu_scom_init(void);
 extern void show_registers(struct cpu_user_regs *);
-extern void show_execution_state(struct cpu_user_regs *);
-extern void show_backtrace(ulong sp, ulong lr, ulong pc);
 extern unsigned int cpu_extent_order(void);
 extern unsigned int cpu_default_rma_order_pages(void);
-extern int cpu_rma_valid(unsigned int log);
+extern int cpu_rma_valid(unsigned int order);
 extern uint cpu_large_page_orders(uint *sizes, uint max);
 extern void cpu_initialize(int cpuid);
 extern void cpu_init_vcpu(struct vcpu *);
@@ -54,13 +130,6 @@ extern void flush_segments(void);
 extern void flush_segments(void);
 extern void dump_segments(int valid);
 
-/* XXX this could also land us in GDB */
-#define dump_execution_state() BUG()
-
-extern void __warn(char *file, int line);
-#define WARN() __warn(__FILE__, __LINE__)
-#define WARN_ON(_p) do { if (_p) WARN(); } while ( 0 )
-
 #define ARCH_HAS_PREFETCH
 static inline void prefetch(const void *x) {;}
 
@@ -83,7 +152,8 @@ static inline void nop(void) {
 static inline void nop(void) {
     __asm__ __volatile__ ("nop");
 }
-#define cpu_relax() nop()
+/* will need to address thread priorities when we go SMT */
+#define cpu_relax() barrier()
 
 static inline unsigned int mfpir(void)
 {
@@ -207,13 +277,13 @@ static inline unsigned mfdsisr(void)
     return val;
 }
 
-#ifdef CONFIG_MAMBO
-static inline int on_mambo(void)
-{
-    return !!(mfmsr() & MSR_MAMBO);
-}
-#else /* CONFIG_MAMBO */
-static inline int on_mambo(void) { return 0; }
+#ifdef CONFIG_SYSTEMSIM
+static inline int on_systemsim(void)
+{
+    return !!(mfmsr() & MSR_SYSTEMSIM);
+}
+#else /* CONFIG_SYSTEMSIM */
+static inline int on_systemsim(void) { return 0; }
 #endif
 
 #endif /* __ASSEMBLY__ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/smp.h     Thu Dec 14 08:57:36 2006 -0700
@@ -25,6 +25,12 @@
 #include <xen/cpumask.h>
 #include <xen/init.h>
 #include <asm/current.h>
+
+/* crap to make x86 "common code" happy */
+#define BAD_APICID 0xFFu
+extern u8 x86_cpu_to_apicid[];
+
+
 extern int smp_num_siblings;
 
 /* revisit when we support SMP */
@@ -35,4 +41,20 @@ extern cpumask_t cpu_core_map[];
 extern cpumask_t cpu_core_map[];
 extern void __devinit smp_generic_take_timebase(void);
 extern void __devinit smp_generic_give_timebase(void);
+
+#define SA_INTERRUPT   0x20000000u
+typedef int irqreturn_t;
+extern int request_irq(unsigned int irq,
+    irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+    unsigned long irqflags, const char * devname, void *dev_id);
+void smp_message_recv(int msg, struct cpu_user_regs *regs);
+void smp_call_function_interrupt(struct cpu_user_regs *regs);
+void smp_event_check_interrupt(void);
+void send_IPI_mask(cpumask_t mask, int vector);
+
+#undef DEBUG_IPI
+#ifdef DEBUG_IPI
+void ipi_torture_test(void);
 #endif
+
+#endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/spinlock.h
--- a/xen/include/asm-powerpc/spinlock.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/spinlock.h        Thu Dec 14 08:57:36 2006 -0700
@@ -70,18 +70,15 @@ cas_u32(volatile u32 *ptr, u32 oval, u32
     return tmp;
 }
 
-typedef union {
+typedef struct {
     volatile u32 lock;
-    struct {
-        s8 recurse_cpu;
-        u8 recurse_cnt;
-        s16 lock;
-    } fields;
+    s16 recurse_cpu;
+    u16 recurse_cnt;
 } spinlock_t;
 
 #define __UNLOCKED (0U)
 #define __LOCKED (~__UNLOCKED)
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { __UNLOCKED }
+#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { __UNLOCKED, -1, 0 }
 static inline void spin_lock_init(spinlock_t *lock)
 {
     *lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
@@ -181,17 +178,17 @@ static inline void _raw_spin_unlock_recu
 static inline void _raw_spin_unlock_recursive(spinlock_t *lock)
 {
     int cpu = smp_processor_id();
-    if (likely(lock->fields.recurse_cpu != cpu)) {
+    if (likely(lock->recurse_cpu != cpu)) {
         spin_lock(lock);
-        lock->fields.recurse_cpu = cpu;
-    }
-    lock->fields.recurse_cnt++;
+        lock->recurse_cpu = cpu;
+    }
+    lock->recurse_cnt++;
 }
 
 static inline void _raw_spin_unlock_recursive(spinlock_t *lock)
 {
-    if (likely(--lock->fields.recurse_cnt == 0)) {
-        lock->fields.recurse_cpu = -1;
+    if (likely(--lock->recurse_cnt == 0)) {
+        lock->recurse_cpu = -1;
         spin_unlock(lock);
     }
 }
@@ -200,19 +197,19 @@ static inline void _raw_spin_unlock_recu
 #define _raw_spin_lock_recursive(_lock)            \
     do {                                           \
         int cpu = smp_processor_id();              \
-        if ( likely((_lock)->fields.recurse_cpu != cpu) ) \
+        if ( likely((_lock)->recurse_cpu != cpu) ) \
         {                                          \
             spin_lock(_lock);                      \
-            (_lock)->fields.recurse_cpu = cpu;            \
+            (_lock)->recurse_cpu = cpu;            \
         }                                          \
-        (_lock)->fields.recurse_cnt++;                    \
+        (_lock)->recurse_cnt++;                    \
     } while ( 0 )
 
 #define _raw_spin_unlock_recursive(_lock)          \
     do {                                           \
-        if ( likely(--(_lock)->fields.recurse_cnt == 0) ) \
+        if ( likely(--(_lock)->recurse_cnt == 0) ) \
         {                                          \
-            (_lock)->fields.recurse_cpu = -1;             \
+            (_lock)->recurse_cpu = -1;             \
             spin_unlock(_lock);                    \
         }                                          \
     } while ( 0 )
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/numa.h
--- a/xen/include/asm-x86/numa.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/numa.h        Thu Dec 14 08:57:36 2006 -0700
@@ -37,7 +37,7 @@ extern void __init init_cpu_to_node(void
 
 static inline void clear_node_cpumask(int cpu)
 {
-       clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+       cpu_clear(cpu, node_to_cpumask[cpu_to_node(cpu)]);
 }
 
 /* Simple perfect hash to map physical addresses to node numbers */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/page.h        Thu Dec 14 08:57:36 2006 -0700
@@ -26,25 +26,37 @@
 #endif
 
 /* Read a pte atomically from memory. */
-#define l1e_read_atomic(l1ep) l1e_from_intpte(pte_read_atomic(l1ep))
-#define l2e_read_atomic(l2ep) l2e_from_intpte(pte_read_atomic(l2ep))
-#define l3e_read_atomic(l3ep) l3e_from_intpte(pte_read_atomic(l3ep))
-#define l4e_read_atomic(l4ep) l4e_from_intpte(pte_read_atomic(l4ep))
+#define l1e_read_atomic(l1ep) \
+    l1e_from_intpte(pte_read_atomic(&l1e_get_intpte(*(l1ep))))
+#define l2e_read_atomic(l2ep) \
+    l2e_from_intpte(pte_read_atomic(&l2e_get_intpte(*(l2ep))))
+#define l3e_read_atomic(l3ep) \
+    l3e_from_intpte(pte_read_atomic(&l3e_get_intpte(*(l3ep))))
+#define l4e_read_atomic(l4ep) \
+    l4e_from_intpte(pte_read_atomic(&l4e_get_intpte(*(l4ep))))
 
 /* Write a pte atomically to memory. */
-#define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e))
-#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l2e_get_intpte(l2e))
-#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l3e_get_intpte(l3e))
-#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l4e_get_intpte(l4e))
+#define l1e_write_atomic(l1ep, l1e) \
+    pte_write_atomic(&l1e_get_intpte(*(l1ep)), l1e_get_intpte(l1e))
+#define l2e_write_atomic(l2ep, l2e) \
+    pte_write_atomic(&l2e_get_intpte(*(l2ep)), l2e_get_intpte(l2e))
+#define l3e_write_atomic(l3ep, l3e) \
+    pte_write_atomic(&l3e_get_intpte(*(l3ep)), l3e_get_intpte(l3e))
+#define l4e_write_atomic(l4ep, l4e) \
+    pte_write_atomic(&l4e_get_intpte(*(l4ep)), l4e_get_intpte(l4e))
 
 /*
  * Write a pte safely but non-atomically to memory.
  * The PTE may become temporarily not-present during the update.
  */
-#define l1e_write(l1ep, l1e) pte_write(l1ep, l1e_get_intpte(l1e))
-#define l2e_write(l2ep, l2e) pte_write(l2ep, l2e_get_intpte(l2e))
-#define l3e_write(l3ep, l3e) pte_write(l3ep, l3e_get_intpte(l3e))
-#define l4e_write(l4ep, l4e) pte_write(l4ep, l4e_get_intpte(l4e))
+#define l1e_write(l1ep, l1e) \
+    pte_write(&l1e_get_intpte(*(l1ep)), l1e_get_intpte(l1e))
+#define l2e_write(l2ep, l2e) \
+    pte_write(&l2e_get_intpte(*(l2ep)), l2e_get_intpte(l2e))
+#define l3e_write(l3ep, l3e) \
+    pte_write(&l3e_get_intpte(*(l3ep)), l3e_get_intpte(l3e))
+#define l4e_write(l4ep, l4e) \
+    pte_write(&l4e_get_intpte(*(l4ep)), l4e_get_intpte(l4e))
 
 /* Get direct integer representation of a pte's contents (intpte_t). */
 #define l1e_get_intpte(x)          ((x).l1)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/shadow.h      Thu Dec 14 08:57:36 2006 -0700
@@ -540,6 +540,9 @@ extern int shadow_remove_write_access(st
  * Returns non-zero if we need to flush TLBs. */
 extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
 
+/* Remove all mappings from the shadows. */
+extern void shadow_blow_tables(struct domain *d);
+
 void
 shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
 /* This is a HVM page that we thing is no longer a pagetable.
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_32/page-2level.h
--- a/xen/include/asm-x86/x86_32/page-2level.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/x86_32/page-2level.h  Thu Dec 14 08:57:36 2006 -0700
@@ -28,9 +28,9 @@ typedef l2_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
-#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
-#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte))
-#define pte_write(ptep, pte)        ((*(intpte_t *)(ptep)) = (pte))
+#define pte_read_atomic(ptep)       (*(ptep))
+#define pte_write_atomic(ptep, pte) (*(ptep) = (pte))
+#define pte_write(ptep, pte)        (*(ptep) = (pte))
 
 /* root table */
 #define root_get_pfn              l2e_get_pfn
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/x86_32/page-3level.h  Thu Dec 14 08:57:36 2006 -0700
@@ -38,22 +38,23 @@ typedef l3_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
-#define pte_read_atomic(ptep) ({                                            \
-    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
-    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, __pte)) != __pte ) \
-        __pte = __npte;                                                     \
+#define pte_read_atomic(ptep) ({                              \
+    intpte_t __pte = *(ptep), __npte;                         \
+    while ( (__npte = cmpxchg(ptep, __pte, __pte)) != __pte ) \
+        __pte = __npte;                                       \
     __pte; })
-#define pte_write_atomic(ptep, pte) do {                                    \
-    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
-    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \
-        __pte = __npte;                                                     \
+#define pte_write_atomic(ptep, pte) do {                      \
+    intpte_t __pte = *(ptep), __npte;                         \
+    while ( (__npte = cmpxchg(ptep, __pte, (pte))) != __pte ) \
+        __pte = __npte;                                       \
 } while ( 0 )
-#define pte_write(ptep, pte) do {               \
-    *((u32 *)(ptep)+0) = 0;                     \
-    wmb();                                      \
-    *((u32 *)(ptep)+1) = (pte) >> 32;           \
-    wmb();                                      \
-    *((u32 *)(ptep)+0) = (pte) >>  0;           \
+#define pte_write(ptep, pte) do {                             \
+    u32 *__ptep_words = (u32 *)(ptep);                        \
+    __ptep_words[0] = 0;                                      \
+    wmb();                                                    \
+    __ptep_words[1] = (pte) >> 32;                            \
+    wmb();                                                    \
+    __ptep_words[0] = (pte) >>  0;                            \
 } while ( 0 )
 
 /* root table */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/x86_64/page.h Thu Dec 14 08:57:36 2006 -0700
@@ -43,9 +43,9 @@ typedef l4_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
-#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
-#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte))
-#define pte_write(ptep, pte)        ((*(intpte_t *)(ptep)) = (pte))
+#define pte_read_atomic(ptep)       (*(ptep))
+#define pte_write_atomic(ptep, pte) (*(ptep) = (pte))
+#define pte_write(ptep, pte)        (*(ptep) = (pte))
 
 /* Given a virtual address, get an entry offset into a linear page table. */
 #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/arch-powerpc.h Thu Dec 14 08:57:36 2006 -0700
@@ -73,6 +73,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 
 #ifndef __ASSEMBLY__
 
+#define XENCOMM_INLINE_FLAG (1UL << 63)
+
 typedef uint64_t xen_ulong_t;
 
 /* User-accessible registers: need to be saved/restored for every nested Xen
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/domctl.h       Thu Dec 14 08:57:36 2006 -0700
@@ -385,6 +385,13 @@ typedef struct xen_domctl_settimeoffset 
 typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
 
+#define XEN_DOMCTL_real_mode_area     26
+struct xen_domctl_real_mode_area {
+    uint32_t log; /* log2 of Real Mode Area size */
+};
+typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
 struct xen_domctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
@@ -410,6 +417,7 @@ struct xen_domctl {
         struct xen_domctl_hypercall_init    hypercall_init;
         struct xen_domctl_arch_setup        arch_setup;
         struct xen_domctl_settimeoffset     settimeoffset;
+        struct xen_domctl_real_mode_area    real_mode_area;
         uint8_t                             pad[128];
     } u;
 };
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/fbif.h
--- a/xen/include/public/io/fbif.h      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/fbif.h      Thu Dec 14 08:57:36 2006 -0700
@@ -1,18 +1,30 @@
 /*
  * fbif.h -- Xen virtual frame buffer device
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (C) 2005 Anthony Liguori <aliguori@xxxxxxxxxx>
  * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@xxxxxxxxxx>
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License. See the file COPYING in the main directory of this archive for
- *  more details.
  */
 
 #ifndef __XEN_PUBLIC_IO_FBIF_H__
 #define __XEN_PUBLIC_IO_FBIF_H__
-
-#include <asm/types.h>
 
 /* Out events (frontend -> backend) */
 
@@ -31,20 +43,20 @@
 
 struct xenfb_update
 {
-       __u8 type;              /* XENFB_TYPE_UPDATE */
-       __s32 x;                /* source x */
-       __s32 y;                /* source y */
-       __s32 width;            /* rect width */
-       __s32 height;           /* rect height */
+    uint8_t type;    /* XENFB_TYPE_UPDATE */
+    int32_t x;      /* source x */
+    int32_t y;      /* source y */
+    int32_t width;  /* rect width */
+    int32_t height; /* rect height */
 };
 
 #define XENFB_OUT_EVENT_SIZE 40
 
 union xenfb_out_event
 {
-       __u8 type;
-       struct xenfb_update update;
-       char pad[XENFB_OUT_EVENT_SIZE];
+    uint8_t type;
+    struct xenfb_update update;
+    char pad[XENFB_OUT_EVENT_SIZE];
 };
 
 /* In events (backend -> frontend) */
@@ -58,8 +70,8 @@ union xenfb_out_event
 
 union xenfb_in_event
 {
-       __u8 type;
-       char pad[XENFB_IN_EVENT_SIZE];
+    uint8_t type;
+    char pad[XENFB_IN_EVENT_SIZE];
 };
 
 /* shared page */
@@ -82,25 +94,25 @@ union xenfb_in_event
 
 struct xenfb_page
 {
-       __u32 in_cons, in_prod;
-       __u32 out_cons, out_prod;
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
 
-       __s32 width;         /* the width of the framebuffer (in pixels) */
-       __s32 height;        /* the height of the framebuffer (in pixels) */
-       __u32 line_length;   /* the length of a row of pixels (in bytes) */
-       __u32 mem_length;    /* the length of the framebuffer (in bytes) */
-       __u8 depth;          /* the depth of a pixel (in bits) */
+    int32_t width;          /* the width of the framebuffer (in pixels) */
+    int32_t height;         /* the height of the framebuffer (in pixels) */
+    uint32_t line_length;   /* the length of a row of pixels (in bytes) */
+    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */
+    uint8_t depth;          /* the depth of a pixel (in bits) */
 
-       /*
-        * Framebuffer page directory
-        *
-        * Each directory page holds PAGE_SIZE / sizeof(*pd)
-        * framebuffer pages, and can thus map up to PAGE_SIZE *
-        * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
-        * sizeof(unsigned long) == 4, that's 4 Megs.  Two directory
-        * pages should be enough for a while.
-        */
-       unsigned long pd[2];
+    /*
+     * Framebuffer page directory
+     *
+     * Each directory page holds PAGE_SIZE / sizeof(*pd)
+     * framebuffer pages, and can thus map up to PAGE_SIZE *
+     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
+     * sizeof(unsigned long) == 4, that's 4 Megs.  Two directory
+     * pages should be enough for a while.
+     */
+    unsigned long pd[2];
 };
 
 /*
@@ -114,3 +126,13 @@ struct xenfb_page
 #endif
 
 #endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/kbdif.h
--- a/xen/include/public/io/kbdif.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/kbdif.h     Thu Dec 14 08:57:36 2006 -0700
@@ -1,18 +1,30 @@
 /*
  * kbdif.h -- Xen virtual keyboard/mouse
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (C) 2005 Anthony Liguori <aliguori@xxxxxxxxxx>
  * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@xxxxxxxxxx>
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License. See the file COPYING in the main directory of this archive for
- *  more details.
  */
 
 #ifndef __XEN_PUBLIC_IO_KBDIF_H__
 #define __XEN_PUBLIC_IO_KBDIF_H__
-
-#include <asm/types.h>
 
 /* In events (backend -> frontend) */
 
@@ -35,34 +47,34 @@
 
 struct xenkbd_motion
 {
-       __u8 type;         /* XENKBD_TYPE_MOTION */
-       __s32 rel_x;       /* relative X motion */
-       __s32 rel_y;       /* relative Y motion */
+    uint8_t type;        /* XENKBD_TYPE_MOTION */
+    int32_t rel_x;       /* relative X motion */
+    int32_t rel_y;       /* relative Y motion */
 };
 
 struct xenkbd_key
 {
-       __u8 type;         /* XENKBD_TYPE_KEY */
-       __u8 pressed;      /* 1 if pressed; 0 otherwise */
-       __u32 keycode;     /* KEY_* from linux/input.h */
+    uint8_t type;         /* XENKBD_TYPE_KEY */
+    uint8_t pressed;      /* 1 if pressed; 0 otherwise */
+    uint32_t keycode;     /* KEY_* from linux/input.h */
 };
 
 struct xenkbd_position
 {
-       __u8 type;         /* XENKBD_TYPE_POS */
-       __s32 abs_x;       /* absolute X position (in FB pixels) */
-       __s32 abs_y;       /* absolute Y position (in FB pixels) */
+    uint8_t type;        /* XENKBD_TYPE_POS */
+    int32_t abs_x;       /* absolute X position (in FB pixels) */
+    int32_t abs_y;       /* absolute Y position (in FB pixels) */
 };
 
 #define XENKBD_IN_EVENT_SIZE 40
 
 union xenkbd_in_event
 {
-       __u8 type;
-       struct xenkbd_motion motion;
-       struct xenkbd_key key;
-       struct xenkbd_position pos;
-       char pad[XENKBD_IN_EVENT_SIZE];
+    uint8_t type;
+    struct xenkbd_motion motion;
+    struct xenkbd_key key;
+    struct xenkbd_position pos;
+    char pad[XENKBD_IN_EVENT_SIZE];
 };
 
 /* Out events (frontend -> backend) */
@@ -77,8 +89,8 @@ union xenkbd_in_event
 
 union xenkbd_out_event
 {
-       __u8 type;
-       char pad[XENKBD_OUT_EVENT_SIZE];
+    uint8_t type;
+    char pad[XENKBD_OUT_EVENT_SIZE];
 };
 
 /* shared page */
@@ -101,8 +113,18 @@ union xenkbd_out_event
 
 struct xenkbd_page
 {
-       __u32 in_cons, in_prod;
-       __u32 out_cons, out_prod;
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
 };
 
 #endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/pciif.h
--- a/xen/include/public/io/pciif.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/pciif.h     Thu Dec 14 08:57:36 2006 -0700
@@ -25,7 +25,7 @@
 #define __XEN_PCI_COMMON_H__
 
 /* Be sure to bump this number if you change this file */
-#define XEN_PCI_MAGIC          "7"
+#define XEN_PCI_MAGIC "7"
 
 /* xen_pci_sharedinfo flags */
 #define _XEN_PCIF_active     (0)
@@ -45,29 +45,39 @@
 #define XEN_PCI_ERR_op_failed       (-5)
 
 struct xen_pci_op {
-       /* IN: what action to perform: XEN_PCI_OP_* */
-       uint32_t cmd;
+    /* IN: what action to perform: XEN_PCI_OP_* */
+    uint32_t cmd;
 
-       /* OUT: will contain an error number (if any) from errno.h */
-       int32_t err;
+    /* OUT: will contain an error number (if any) from errno.h */
+    int32_t err;
 
-       /* IN: which device to touch */
-       uint32_t domain; /* PCI Domain/Segment */
-       uint32_t bus;
-       uint32_t devfn;
+    /* IN: which device to touch */
+    uint32_t domain; /* PCI Domain/Segment */
+    uint32_t bus;
+    uint32_t devfn;
 
-       /* IN: which configuration registers to touch */
-       int32_t offset;
-       int32_t size;
+    /* IN: which configuration registers to touch */
+    int32_t offset;
+    int32_t size;
 
-       /* IN/OUT: Contains the result after a READ or the value to WRITE */
-       uint32_t value;
+    /* IN/OUT: Contains the result after a READ or the value to WRITE */
+    uint32_t value;
 };
 
 struct xen_pci_sharedinfo {
-       /* flags - XEN_PCIF_* */
-       uint32_t flags;
-       struct xen_pci_op op;
+    /* flags - XEN_PCIF_* */
+    uint32_t flags;
+    struct xen_pci_op op;
 };
 
 #endif /* __XEN_PCI_COMMON_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/xenbus.h    Thu Dec 14 08:57:36 2006 -0700
@@ -56,8 +56,18 @@ enum xenbus_state {
      */
     XenbusStateClosing       = 5,
 
-    XenbusStateClosed       = 6
+    XenbusStateClosed        = 6
 };
 typedef enum xenbus_state XenbusState;
 
 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/memory.h
--- a/xen/include/public/memory.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/memory.h       Thu Dec 14 08:57:36 2006 -0700
@@ -248,7 +248,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t
  * XENMEM_memory_map.
  * arg == addr of xen_memory_map_t.
  */
-#define XENMEM_machine_memory_map      10
+#define XENMEM_machine_memory_map   10
 
 /*
  * Set the pseudo-physical memory map of a domain, as returned by
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/sysctl.h       Thu Dec 14 08:57:36 2006 -0700
@@ -119,7 +119,7 @@ struct xen_sysctl_perfc_op {
     uint32_t       cmd;                /*  XEN_SYSCTL_PERFCOP_??? */
     /* OUT variables. */
     uint32_t       nr_counters;       /*  number of counters description  */
-    uint32_t       nr_vals;                      /*  number of values  */
+    uint32_t       nr_vals;           /*  number of values  */
     /* counter information (or NULL) */
     XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc;
     /* counter values (or NULL) */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/trace.h        Thu Dec 14 08:57:36 2006 -0700
@@ -32,7 +32,7 @@
 #define TRC_SCHED   0x0002f000    /* Xen Scheduler trace      */
 #define TRC_DOM0OP  0x0004f000    /* Xen DOM0 operation trace */
 #define TRC_VMX     0x0008f000    /* Xen VMX trace            */
-#define TRC_MEM     0x000af000    /* Xen memory trace         */
+#define TRC_MEM     0x0010f000    /* Xen memory trace         */
 #define TRC_ALL     0xfffff000
 
 /* Trace subclasses */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/xenoprof.h
--- a/xen/include/public/xenoprof.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/xenoprof.h     Thu Dec 14 08:57:36 2006 -0700
@@ -52,7 +52,7 @@
 #define XENOPROF_last_op            14
 
 #define MAX_OPROF_EVENTS    32
-#define MAX_OPROF_DOMAINS   25 
+#define MAX_OPROF_DOMAINS   25
 #define XENOPROF_CPU_TYPE_SIZE 64
 
 /* Xenoprof performance events (not Xen events) */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/xen/elfcore.h
--- a/xen/include/xen/elfcore.h Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/xen/elfcore.h Thu Dec 14 08:57:36 2006 -0700
@@ -87,7 +87,7 @@ typedef struct
             desctype desc;                      \
             PAD32(sizeof(desctype));            \
         } desc;                                 \
-    } __attribute__ ((packed)) type
+    } type
 
 #define CORE_STR                "CORE"
 #define CORE_STR_LEN            5 /* including terminating zero */
@@ -119,7 +119,7 @@ typedef struct {
     crash_note_core_t core;
     crash_note_xen_core_t xen_regs;
     crash_note_xen_info_t xen_info;
-} __attribute__ ((packed)) crash_note_t;
+} crash_note_t;
 
 #define setup_crash_note(np, member, str, str_len, id) \
   np->member.note.note.note.namesz = str_len; \
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/xen/sched.h   Thu Dec 14 08:57:36 2006 -0700
@@ -188,6 +188,7 @@ struct domain_setup_info
 #define PAEKERN_no           0
 #define PAEKERN_yes          1
 #define PAEKERN_extended_cr3 2
+#define PAEKERN_bimodal      3
     unsigned int  pae_kernel;
     /* Initialised by loader: Private. */
     unsigned long elf_paddr_offset;
@@ -437,6 +438,7 @@ static inline int vcpu_runnable(struct v
 }
 
 void vcpu_pause(struct vcpu *v);
+void vcpu_pause_nosync(struct vcpu *v);
 void domain_pause(struct domain *d);
 void vcpu_unpause(struct vcpu *v);
 void domain_unpause(struct domain *d);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/utils.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/utils.c     Thu Dec 14 08:57:36 2006 -0700
@@ -0,0 +1,211 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/memory.h>
+#include <xc_private.h>
+#include <xg_private.h>
+#include <xenctrl.h>
+
+#include "flatdevtree_env.h"
+#include "flatdevtree.h"
+#include "utils.h"
+
+unsigned long get_rma_pages(void *devtree)
+{
+    void *rma;
+    uint64_t rma_reg[2];
+    int rc;
+
+    rma = ft_find_node(devtree, "/memory@0");
+    if (rma == NULL) {
+        DPRINTF("couldn't find /memory@0\n");
+        return 0;
+    }
+    rc = ft_get_prop(devtree, rma, "reg", rma_reg, sizeof(rma_reg));
+    if (rc < 0) {
+        DPRINTF("couldn't get /memory@0/reg\n");
+        return 0;
+    }
+    if (rma_reg[0] != 0) {
+        DPRINTF("RMA did not start at 0\n");
+        return 0;
+    }
+    return rma_reg[1] >> PAGE_SHIFT;
+}
+
+int get_rma_page_array(int xc_handle, int domid, xen_pfn_t **page_array,
+                      unsigned long nr_pages)
+{
+    int rc;
+    int i;
+    xen_pfn_t *p;
+
+    *page_array = malloc(nr_pages * sizeof(xen_pfn_t));
+    if (*page_array == NULL) {
+        perror("malloc");
+        return -1;
+    }
+
+    DPRINTF("xc_get_pfn_list\n");
+    /* We know that the RMA is machine contiguous so lets just get the
+     * first MFN and fill the rest in ourselves */
+    rc = xc_get_pfn_list(xc_handle, domid, *page_array, 1);
+    if (rc == -1) {
+        perror("Could not get the page frame list");
+        return -1;
+    }
+    p = *page_array;
+    for (i = 1; i < nr_pages; i++)
+        p[i] = p[i - 1] + 1;
+    return 0;
+}
+
+int install_image(
+        int xc_handle,
+        int domid,
+        xen_pfn_t *page_array,
+        void *image,
+        unsigned long paddr,
+        unsigned long size)
+{
+    uint8_t *img = image;
+    int i;
+    int rc = 0;
+
+    if (paddr & ~PAGE_MASK) {
+        printf("*** unaligned address\n");
+        return -1;
+    }
+
+    for (i = 0; i < size; i += PAGE_SIZE) {
+        void *page = img + i;
+        xen_pfn_t pfn = (paddr + i) >> PAGE_SHIFT;
+        xen_pfn_t mfn = page_array[pfn];
+
+        rc = xc_copy_to_domain_page(xc_handle, domid, mfn, page);
+        if (rc < 0) {
+            perror("xc_copy_to_domain_page");
+            break;
+        }
+    }
+    return rc;
+}
+
+void *load_file(const char *path, unsigned long *filesize)
+{
+    void *img;
+    ssize_t size;
+    int fd;
+
+    DPRINTF("load_file(%s)\n", path);
+
+    fd = open(path, O_RDONLY);
+    if (fd < 0) {
+        perror(path);
+        return NULL;
+    }
+
+    size = lseek(fd, 0, SEEK_END);
+    if (size < 0) {
+        perror(path);
+        close(fd);
+        return NULL;
+    }
+    lseek(fd, 0, SEEK_SET);
+
+    img = malloc(size);
+    if (img == NULL) {
+        perror(path);
+        close(fd);
+        return NULL;
+    }
+
+    size = read(fd, img, size);
+    if (size <= 0) {
+        perror(path);
+        close(fd);
+        free(img);
+        return NULL;
+    }
+
+    if (filesize)
+        *filesize = size;
+    close(fd);
+    return img;
+}
+
+int load_elf_kernel(
+    int xc_handle,
+    int domid,
+    const char *kernel_path,
+    struct domain_setup_info *dsi,
+    xen_pfn_t *page_array)
+{
+    struct load_funcs load_funcs;
+    char *kernel_img;
+    unsigned long kernel_size;
+    int rc;
+
+    /* load the kernel ELF file */
+    kernel_img = load_file(kernel_path, &kernel_size);
+    if (kernel_img == NULL) {
+        rc = -1;
+        goto out;
+    }
+
+    DPRINTF("probe_elf\n");
+    rc = probe_elf(kernel_img, kernel_size, &load_funcs);
+    if (rc < 0) {
+        rc = -1;
+        printf("%s is not an ELF file\n", kernel_path);
+        goto out;
+    }
+
+    DPRINTF("parseimage\n");
+    rc = (load_funcs.parseimage)(kernel_img, kernel_size, dsi);
+    if (rc < 0) {
+        rc = -1;
+        goto out;
+    }
+
+    DPRINTF("loadimage\n");
+    (load_funcs.loadimage)(kernel_img, kernel_size, xc_handle, domid,
+            page_array, dsi);
+

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.