[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Merged.



# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 9d86c1a70f347b49393fa26796df4512bb114ebb
# Parent  b09dbe439169a2348c59b30fbdefe3f19e30c766
# Parent  e5c17d2d85a4dc189b98a0ed5a5921d2cda309c3
Merged.
---
 linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile              |   22 
 linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile          |   20 
 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S                 |    2 
 linux-2.6-xen-sparse/arch/i386/kernel/vm86.c                     |    4 
 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c                     |   14 
 linux-2.6-xen-sparse/arch/ia64/Kconfig                           |   36 +
 linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre               |    6 
 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c              |    7 
 linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c                |    2 
 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c                   |   27 
 linux-2.6-xen-sparse/drivers/xen/Kconfig                         |   26 
 linux-2.6-xen-sparse/drivers/xen/Makefile                        |    9 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c             |   21 
 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h                |    1 
 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c                  |    1 
 linux-2.6-xen-sparse/drivers/xen/core/Makefile                   |   16 
 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c                  |    9 
 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c              |    2 
 linux-2.6-xen-sparse/drivers/xen/netback/netback.c               |  288 
++++++++--
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c                |   26 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c             |  173 ++++--
 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c               |   14 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h      |    6 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h |    7 
 linux-2.6-xen-sparse/include/xen/public/privcmd.h                |   16 
 tools/debugger/libxendebug/xendebug.c                            |    7 
 tools/firmware/hvmloader/Makefile                                |    4 
 tools/firmware/rombios/Makefile                                  |    4 
 tools/firmware/vmxassist/vm86.c                                  |   65 ++
 tools/ioemu/hw/cirrus_vga.c                                      |   17 
 tools/ioemu/hw/pc.c                                              |    5 
 tools/ioemu/hw/vga.c                                             |    2 
 tools/ioemu/vl.c                                                 |   36 -
 tools/ioemu/vl.h                                                 |    2 
 tools/libxc/xc_core.c                                            |    8 
 tools/libxc/xc_domain.c                                          |   10 
 tools/libxc/xc_hvm_build.c                                       |    6 
 tools/libxc/xc_ia64_stubs.c                                      |   12 
 tools/libxc/xc_linux.c                                           |    2 
 tools/libxc/xc_linux_build.c                                     |   76 +-
 tools/libxc/xc_linux_restore.c                                   |  214 ++++++-
 tools/libxc/xc_linux_save.c                                      |   57 +
 tools/libxc/xc_load_aout9.c                                      |    4 
 tools/libxc/xc_load_bin.c                                        |    4 
 tools/libxc/xc_load_elf.c                                        |   19 
 tools/libxc/xc_pagetab.c                                         |    2 
 tools/libxc/xc_private.c                                         |   62 +-
 tools/libxc/xc_ptrace.c                                          |   11 
 tools/libxc/xc_ptrace_core.c                                     |   15 
 tools/libxc/xenctrl.h                                            |   19 
 tools/libxc/xg_private.h                                         |    7 
 tools/libxc/xg_save_restore.h                                    |   12 
 tools/python/xen/util/security.py                                |    9 
 tools/tests/test_x86_emulator.c                                  |  131 ++--
 xen/arch/ia64/linux-xen/smpboot.c                                |    3 
 xen/arch/ia64/xen/domain.c                                       |   15 
 xen/arch/ia64/xen/xensetup.c                                     |    2 
 xen/arch/x86/audit.c                                             |    4 
 xen/arch/x86/cpu/mtrr/main.c                                     |    2 
 xen/arch/x86/dom0_ops.c                                          |    2 
 xen/arch/x86/domain.c                                            |   39 -
 xen/arch/x86/domain_build.c                                      |    9 
 xen/arch/x86/hvm/svm/svm.c                                       |   80 +-
 xen/arch/x86/hvm/vmx/vmx.c                                       |   34 -
 xen/arch/x86/hvm/vmx/x86_32/exits.S                              |   35 -
 xen/arch/x86/hvm/vmx/x86_64/exits.S                              |   71 +-
 xen/arch/x86/i8259.c                                             |    2 
 xen/arch/x86/microcode.c                                         |    2 
 xen/arch/x86/mm.c                                                |   34 -
 xen/arch/x86/setup.c                                             |    2 
 xen/arch/x86/shadow.c                                            |    9 
 xen/arch/x86/shadow32.c                                          |   14 
 xen/arch/x86/shadow_public.c                                     |   14 
 xen/arch/x86/smp.c                                               |    2 
 xen/arch/x86/smpboot.c                                           |   17 
 xen/arch/x86/time.c                                              |    6 
 xen/arch/x86/traps.c                                             |   10 
 xen/arch/x86/x86_32/asm-offsets.c                                |    2 
 xen/arch/x86/x86_32/domain_page.c                                |    2 
 xen/arch/x86/x86_32/entry.S                                      |    5 
 xen/arch/x86/x86_32/mm.c                                         |    3 
 xen/arch/x86/x86_32/traps.c                                      |    6 
 xen/arch/x86/x86_64/asm-offsets.c                                |    3 
 xen/arch/x86/x86_64/entry.S                                      |   10 
 xen/arch/x86/x86_64/mm.c                                         |    3 
 xen/arch/x86/x86_64/traps.c                                      |   14 
 xen/arch/x86/x86_emulate.c                                       |   19 
 xen/common/dom0_ops.c                                            |    2 
 xen/common/domain.c                                              |  134 +++-
 xen/common/kernel.c                                              |    5 
 xen/common/keyhandler.c                                          |    5 
 xen/common/memory.c                                              |   20 
 xen/common/page_alloc.c                                          |    4 
 xen/common/perfc.c                                               |    2 
 xen/common/sched_bvt.c                                           |   36 -
 xen/common/sched_credit.c                                        |   30 -
 xen/common/sched_sedf.c                                          |   39 -
 xen/common/schedule.c                                            |  108 ---
 xen/common/trace.c                                               |   12 
 xen/common/xmalloc.c                                             |    2 
 xen/drivers/char/console.c                                       |    6 
 xen/include/asm-x86/page.h                                       |   11 
 xen/include/public/arch-ia64.h                                   |    3 
 xen/include/public/arch-x86_32.h                                 |   27 
 xen/include/public/arch-x86_64.h                                 |   24 
 xen/include/public/callback.h                                    |   15 
 xen/include/public/dom0_ops.h                                    |   56 -
 xen/include/public/grant_table.h                                 |    2 
 xen/include/public/io/netif.h                                    |    4 
 xen/include/public/io/ring.h                                     |   16 
 xen/include/public/memory.h                                      |   10 
 xen/include/public/xen.h                                         |   22 
 xen/include/xen/console.h                                        |    2 
 xen/include/xen/domain.h                                         |   23 
 xen/include/xen/sched-if.h                                       |   11 
 xen/include/xen/sched.h                                          |   12 
 116 files changed, 1737 insertions(+), 958 deletions(-)

diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S  Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S  Wed Jun 07 11:03:51 
2006 +0100
@@ -173,7 +173,7 @@ ENTRY(cpu_gdt_table)
        .ascii           "|pae_pgdir_above_4gb"
        .ascii           "|supervisor_mode_kernel"
 #ifdef CONFIG_X86_PAE
-       .ascii  ",PAE=yes"
+       .ascii  ",PAE=yes[extended-cr3]"
 #else
        .ascii  ",PAE=no"
 #endif
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c      Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c      Wed Jun 07 11:03:51 
2006 +0100
@@ -132,7 +132,9 @@ struct pt_regs * fastcall save_v86_state
        current->thread.sysenter_cs = __KERNEL_CS;
        load_esp0(tss, &current->thread);
        current->thread.saved_esp0 = 0;
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
        loadsegment(fs, current->thread.saved_fs);
        loadsegment(gs, current->thread.saved_gs);
@@ -310,7 +312,9 @@ static void do_sys_vm86(struct kernel_vm
        if (cpu_has_sep)
                tsk->thread.sysenter_cs = 0;
        load_esp0(tss, &tsk->thread);
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
        tsk->thread.screen_bitmap = info->screen_bitmap;
        if (info->flags & VM86_SCREEN_BITMAP)
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Wed Jun 07 11:03:51 
2006 +0100
@@ -558,15 +558,11 @@ void __init paging_init(void)
 
        kmap_init();
 
-       if (!xen_feature(XENFEAT_auto_translated_physmap) ||
-           xen_start_info->shared_info >= xen_start_info->nr_pages) {
-               /* Switch to the real shared_info page, and clear the
-                * dummy page. */
-               set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
+       /* Switch to the real shared_info page, and clear the
+        * dummy page. */
+       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+       memset(empty_zero_page, 0, sizeof(empty_zero_page));
 
        /* Setup mapping of lower 1st MB */
        for (i = 0; i < NR_FIX_ISAMAPS; i++)
diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Wed Jun 07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Wed Jun 07 11:03:51 2006 +0100
@@ -73,7 +73,7 @@ config XEN_IA64_DOM0_VP
 
 config XEN_IA64_DOM0_NON_VP
        bool
-       depends on !(XEN && XEN_IA64_DOM0_VP)
+       depends on XEN && !XEN_IA64_DOM0_VP
        default y
        help
          dom0 P=M model
@@ -496,15 +496,39 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
+#
 # override default values of drivers/xen/Kconfig
-if !XEN_IA64_DOM0_VP
+#
+if XEN
+config XEN_UTIL
+       default n if XEN_IA64_DOM0_VP
+
 config HAVE_ARCH_ALLOC_SKB
-        bool
-        default n
+       default n if !XEN_IA64_DOM0_VP
 
 config HAVE_ARCH_DEV_ALLOC_SKB
-        bool
-        default n
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_BALLOON
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_SKBUFF
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_NETDEV_BACKEND
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_NETDEV_FRONTEND
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_DEVMEM
+       default n
+
+config XEN_REBOOT
+       default n
+
+config XEN_SMPBOOT
+       default n
 endif
 
 source "drivers/xen/Kconfig"
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre
--- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Wed Jun 07 
11:03:51 2006 +0100
@@ -10,12 +10,6 @@
 #eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h
 ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/
 
-#ia64 drivers/xen isn't fully functional yet, workaround...
-#also ignore core/evtchn.c which uses a different irq mechanism than ia64
-#(warning: there be dragons here if these files diverge)
-ln -sf ../../arch/ia64/xen/drivers/Makefile drivers/xen/Makefile
-ln -sf ../../../arch/ia64/xen/drivers/coreMakefile drivers/xen/core/Makefile
-
 #not sure where these ia64-specific files will end up in the future
 ln -sf ../../../arch/ia64/xen/drivers/xenia64_init.c drivers/xen/core
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Wed Jun 07 
11:03:51 2006 +0100
@@ -665,13 +665,6 @@ void __init setup_arch(char **cmdline_p)
 
        setup_xen_features();
 
-       if (xen_feature(XENFEAT_auto_translated_physmap) &&
-           xen_start_info->shared_info < xen_start_info->nr_pages) {
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)__va(xen_start_info->shared_info);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
-
        HYPERVISOR_vm_assist(VMASST_CMD_enable,
                             VMASST_TYPE_writable_pagetables);
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Jun 07 11:03:51 
2006 +0100
@@ -488,7 +488,7 @@ static void smp_really_stop_cpu(void *du
 {
        smp_stop_cpu(); 
        for (;;) 
-               asm("hlt"); 
+               halt();
 } 
 
 void smp_send_stop(void)
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Wed Jun 07 11:03:51 
2006 +0100
@@ -666,7 +666,18 @@ void __meminit init_memory_mapping(unsig
                        set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
        }
 
-       BUG_ON(!after_bootmem && start_pfn != table_end);
+       if (!after_bootmem) {
+               BUG_ON(start_pfn != table_end);
+               /*
+                * Destroy the temporary mappings created above. Prevents
+                * overlap with modules area (if init mapping is very big).
+                */
+               start = __START_KERNEL_map + (table_start << PAGE_SHIFT);
+               end   = __START_KERNEL_map + (table_end   << PAGE_SHIFT);
+               for (; start < end; start += PAGE_SIZE)
+                       WARN_ON(HYPERVISOR_update_va_mapping(
+                               start, __pte_ma(0), 0));
+       }
 
        __flush_tlb_all();
 }
@@ -752,15 +763,11 @@ void __init paging_init(void)
        free_area_init_node(0, NODE_DATA(0), zones,
                            __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
 
-       if (!xen_feature(XENFEAT_auto_translated_physmap) ||
-           xen_start_info->shared_info >= xen_start_info->nr_pages) {
-               /* Switch to the real shared_info page, and clear the
-                * dummy page. */
-               set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
+       /* Switch to the real shared_info page, and clear the
+        * dummy page. */
+       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+       memset(empty_zero_page, 0, sizeof(empty_zero_page));
 
        init_mm.context.pinned = 1;
 
diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Wed Jun 07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Wed Jun 07 11:03:51 2006 +0100
@@ -228,4 +228,30 @@ config NO_IDLE_HZ
        bool
        default y
 
+config XEN_UTIL
+       bool
+       default y
+
+config XEN_BALLOON
+       bool
+       default y
+
+config XEN_DEVMEM
+       bool
+       default y
+
+config XEN_SKBUFF
+       bool
+       default y
+       depends on NET
+
+config XEN_REBOOT
+       bool
+       default y
+
+config XEN_SMPBOOT
+       bool
+       default y
+       depends on SMP
+
 endif
diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Wed Jun 07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Wed Jun 07 11:03:51 2006 +0100
@@ -1,14 +1,12 @@
-
-obj-y  += util.o
-
 obj-y  += core/
-obj-y  += char/
 obj-y  += console/
 obj-y  += evtchn/
-obj-y  += balloon/
 obj-y  += privcmd/
 obj-y  += xenbus/
 
+obj-$(CONFIG_XEN_UTIL)                 += util.o
+obj-$(CONFIG_XEN_BALLOON)              += balloon/
+obj-$(CONFIG_XEN_DEVMEM)               += char/
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
 obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
@@ -17,4 +15,3 @@ obj-$(CONFIG_XEN_BLKDEV_TAP)          += blkt
 obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += pciback/
 obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront/
-
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Wed Jun 07 
11:03:51 2006 +0100
@@ -452,10 +452,6 @@ int blkif_ioctl(struct inode *inode, str
                      command, (long)argument, inode->i_rdev);
 
        switch (command) {
-       case HDIO_GETGEO:
-               /* return ENOSYS to use defaults */
-               return -ENOSYS;
-
        case CDROMMULTISESSION:
                DPRINTK("FIXME: support multisession CDs later\n");
                for (i = 0; i < sizeof(struct cdrom_multisession); i++)
@@ -469,6 +465,23 @@ int blkif_ioctl(struct inode *inode, str
                return -EINVAL; /* same return as native Linux */
        }
 
+       return 0;
+}
+
+
+int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
+{
+       /* We don't have real geometry info, but let's at least return
+          values consistent with the size of the device */
+       sector_t nsect = get_capacity(bd->bd_disk);
+       sector_t cylinders = nsect;
+
+       hg->heads = 0xff;
+       hg->sectors = 0x3f;
+       sector_div(cylinders, hg->heads * hg->sectors);
+       hg->cylinders = cylinders;
+       if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
+               hg->cylinders = 0xffff;
        return 0;
 }
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Jun 07 11:03:51 
2006 +0100
@@ -140,6 +140,7 @@ extern int blkif_release(struct inode *i
 extern int blkif_release(struct inode *inode, struct file *filep);
 extern int blkif_ioctl(struct inode *inode, struct file *filep,
                        unsigned command, unsigned long argument);
+extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
 extern int blkif_check(dev_t dev);
 extern int blkif_revalidate(dev_t dev);
 extern void do_blkif_request (request_queue_t *rq);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Wed Jun 07 11:03:51 
2006 +0100
@@ -91,6 +91,7 @@ static struct block_device_operations xl
        .open = blkif_open,
        .release = blkif_release,
        .ioctl  = blkif_ioctl,
+       .getgeo = blkif_getgeo
 };
 
 DEFINE_SPINLOCK(blkif_io_lock);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Wed Jun 07 11:03:51 
2006 +0100
@@ -2,11 +2,13 @@
 # Makefile for the linux kernel.
 #
 
-obj-y   := evtchn.o reboot.o gnttab.o features.o
+obj-y := evtchn.o gnttab.o features.o
 
-obj-$(CONFIG_PROC_FS)     += xen_proc.o
-obj-$(CONFIG_NET)         += skbuff.o
-obj-$(CONFIG_SMP)         += smpboot.o
-obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
-obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
+obj-$(CONFIG_PROC_FS)          += xen_proc.o
+obj-$(CONFIG_SYSFS)            += hypervisor_sysfs.o
+obj-$(CONFIG_HOTPLUG_CPU)      += cpu_hotplug.o
+obj-$(CONFIG_XEN_SYSFS)                += xen_sysfs.o
+obj-$(CONFIG_IA64)             += xenia64_init.o
+obj-$(CONFIG_XEN_SKBUFF)       += skbuff.o
+obj-$(CONFIG_XEN_REBOOT)       += reboot.o
+obj-$(CONFIG_XEN_SMPBOOT)      += smpboot.o
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Wed Jun 07 11:03:51 
2006 +0100
@@ -89,9 +89,8 @@ void __init prefill_possible_map(void)
 
        for (i = 0; i < NR_CPUS; i++) {
                rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
-               if (rc == -ENOENT)
-                       break;
-               cpu_set(i, cpu_possible_map);
+               if (rc >= 0)
+                       cpu_set(i, cpu_possible_map);
        }
 }
 
@@ -209,7 +208,7 @@ void cpu_initialize_context(unsigned int
        ctxt.failsafe_callback_cs  = __KERNEL_CS;
        ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
 
-       ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
+       ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
 #else /* __x86_64__ */
        ctxt.user_regs.cs = __KERNEL_CS;
        ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
@@ -221,7 +220,7 @@ void cpu_initialize_context(unsigned int
        ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
        ctxt.syscall_callback_eip  = (unsigned long)system_call;
 
-       ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
+       ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
 
        ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
 #endif
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c       Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c       Wed Jun 07 
11:03:51 2006 +0100
@@ -146,11 +146,13 @@ static void loopback_construct(struct ne
        dev->hard_start_xmit = loopback_start_xmit;
        dev->get_stats       = loopback_get_stats;
        dev->set_multicast_list = loopback_set_multicast_list;
+       dev->change_mtu      = NULL; /* allow arbitrary mtu */
 
        dev->tx_queue_len    = 0;
 
        dev->features        = (NETIF_F_HIGHDMA |
                                NETIF_F_LLTX |
+                               NETIF_F_SG |
                                NETIF_F_IP_CSUM);
 
        SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Wed Jun 07 
11:03:51 2006 +0100
@@ -458,6 +458,9 @@ inline static void net_tx_action_dealloc
        dc = dealloc_cons;
        dp = dealloc_prod;
 
+       /* Ensure we see all indexes enqueued by netif_idx_release(). */
+       smp_rmb();
+
        /*
         * Free up any grants we have finished using
         */
@@ -487,6 +490,177 @@ inline static void net_tx_action_dealloc
        }
 }
 
+static void netbk_tx_err(netif_t *netif, RING_IDX end)
+{
+       RING_IDX cons = netif->tx.req_cons;
+
+       do {
+               netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons);
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+       } while (++cons < end);
+       netif->tx.req_cons = cons;
+       netif_schedule_work(netif);
+       netif_put(netif);
+}
+
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
+                               int work_to_do)
+{
+       netif_tx_request_t *first = txp;
+       RING_IDX cons = netif->tx.req_cons;
+       int frags = 1;
+
+       while (txp->flags & NETTXF_more_data) {
+               if (frags >= work_to_do) {
+                       DPRINTK("Need more frags\n");
+                       return -frags;
+               }
+
+               txp = RING_GET_REQUEST(&netif->tx, cons + frags);
+               if (txp->size > first->size) {
+                       DPRINTK("Frags galore\n");
+                       return -frags;
+               }
+
+               first->size -= txp->size;
+               frags++;
+
+               if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+                       DPRINTK("txp->offset: %x, size: %u\n",
+                               txp->offset, txp->size);
+                       return -frags;
+               }
+       }
+
+       return frags;
+}
+
+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+                                                 struct sk_buff *skb,
+                                                 gnttab_map_grant_ref_t *mop)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       skb_frag_t *frags = shinfo->frags;
+       netif_tx_request_t *txp;
+       unsigned long pending_idx = *((u16 *)skb->data);
+       RING_IDX cons = netif->tx.req_cons + 1;
+       int i, start;
+
+       /* Skip first skb fragment if it is on same page as header fragment. */
+       start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+       for (i = start; i < shinfo->nr_frags; i++) {
+               txp = RING_GET_REQUEST(&netif->tx, cons++);
+               pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+
+               gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+                                 GNTMAP_host_map | GNTMAP_readonly,
+                                 txp->gref, netif->domid);
+
+               memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+               netif_get(netif);
+               pending_tx_info[pending_idx].netif = netif;
+               frags[i].page = (void *)pending_idx;
+       }
+
+       return mop;
+}
+
+static int netbk_tx_check_mop(struct sk_buff *skb,
+                              gnttab_map_grant_ref_t **mopp)
+{
+       gnttab_map_grant_ref_t *mop = *mopp;
+       int pending_idx = *((u16 *)skb->data);
+       netif_t *netif = pending_tx_info[pending_idx].netif;
+       netif_tx_request_t *txp;
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int i, err, start;
+
+       /* Check status of header. */
+       err = mop->status;
+       if (unlikely(err)) {
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+       } else {
+               set_phys_to_machine(
+                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+               grant_tx_handle[pending_idx] = mop->handle;
+       }
+
+       /* Skip first skb fragment if it is on same page as header fragment. */
+       start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+       for (i = start; i < nr_frags; i++) {
+               int j, newerr;
+
+               pending_idx = (unsigned long)shinfo->frags[i].page;
+
+               /* Check error status: if okay then remember grant handle. */
+               newerr = (++mop)->status;
+               if (likely(!newerr)) {
+                       set_phys_to_machine(
+                               __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+                               FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+                       grant_tx_handle[pending_idx] = mop->handle;
+                       /* Had a previous error? Invalidate this fragment. */
+                       if (unlikely(err))
+                               netif_idx_release(pending_idx);
+                       continue;
+               }
+
+               /* Error on this fragment: respond to client with an error. */
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+
+               /* Not the first error? Preceding frags already invalidated. */
+               if (err)
+                       continue;
+
+               /* First error: invalidate header and preceding fragments. */
+               pending_idx = *((u16 *)skb->data);
+               netif_idx_release(pending_idx);
+               for (j = start; j < i; j++) {
+                       pending_idx = (unsigned long)shinfo->frags[i].page;
+                       netif_idx_release(pending_idx);
+               }
+
+               /* Remember the error: invalidate all subsequent fragments. */
+               err = newerr;
+       }
+
+       *mopp = mop + 1;
+       return err;
+}
+
+static void netbk_fill_frags(struct sk_buff *skb)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int i;
+
+       for (i = 0; i < nr_frags; i++) {
+               skb_frag_t *frag = shinfo->frags + i;
+               netif_tx_request_t *txp;
+               unsigned long pending_idx;
+
+               pending_idx = (unsigned long)frag->page;
+               txp = &pending_tx_info[pending_idx].req;
+               frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+               frag->size = txp->size;
+               frag->page_offset = txp->offset;
+
+               skb->len += txp->size;
+               skb->data_len += txp->size;
+               skb->truesize += txp->size;
+       }
+}
+
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
@@ -504,7 +678,7 @@ static void net_tx_action(unsigned long 
                net_tx_action_dealloc();
 
        mop = tx_map_ops;
-       while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+       while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
                !list_empty(&net_schedule_list)) {
                /* Get a netif from the list with work to do. */
                ent = net_schedule_list.next;
@@ -552,38 +726,44 @@ static void net_tx_action(unsigned long 
                }
                netif->remaining_credit -= txreq.size;
 
-               netif->tx.req_cons++;
-
-               netif_schedule_work(netif);
-
-               if (unlikely(txreq.size < ETH_HLEN) || 
-                   unlikely(txreq.size > ETH_FRAME_LEN)) {
+               ret = netbk_count_requests(netif, &txreq, work_to_do);
+               if (unlikely(ret < 0)) {
+                       netbk_tx_err(netif, i - ret);
+                       continue;
+               }
+               i += ret;
+
+               if (unlikely(ret > MAX_SKB_FRAGS + 1)) {
+                       DPRINTK("Too many frags\n");
+                       netbk_tx_err(netif, i);
+                       continue;
+               }
+
+               if (unlikely(txreq.size < ETH_HLEN)) {
                        DPRINTK("Bad packet size: %d\n", txreq.size);
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        continue; 
                }
 
                /* No crossing a page as the payload mustn't fragment. */
-               if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+               if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
                        DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", 
                                txreq.offset, txreq.size, 
                                (txreq.offset &~PAGE_MASK) + txreq.size);
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        continue;
                }
 
                pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
 
-               data_len = (txreq.size > PKT_PROT_LEN) ?
+               data_len = (txreq.size > PKT_PROT_LEN &&
+                           ret < MAX_SKB_FRAGS + 1) ?
                        PKT_PROT_LEN : txreq.size;
 
                skb = alloc_skb(data_len+16, GFP_ATOMIC);
                if (unlikely(skb == NULL)) {
                        DPRINTK("Can't allocate a skb in start_xmit.\n");
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        break;
                }
 
@@ -600,9 +780,23 @@ static void net_tx_action(unsigned long 
                pending_tx_info[pending_idx].netif = netif;
                *((u16 *)skb->data) = pending_idx;
 
+               __skb_put(skb, data_len);
+
+               skb_shinfo(skb)->nr_frags = ret - 1;
+               if (data_len < txreq.size) {
+                       skb_shinfo(skb)->nr_frags++;
+                       skb_shinfo(skb)->frags[0].page =
+                               (void *)(unsigned long)pending_idx;
+               }
+
                __skb_queue_tail(&tx_queue, skb);
 
                pending_cons++;
+
+               mop = netbk_get_requests(netif, skb, mop);
+
+               netif->tx.req_cons = i;
+               netif_schedule_work(netif);
 
                if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
                        break;
@@ -617,75 +811,56 @@ static void net_tx_action(unsigned long 
 
        mop = tx_map_ops;
        while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+               netif_tx_request_t *txp;
+
                pending_idx = *((u16 *)skb->data);
                netif       = pending_tx_info[pending_idx].netif;
-               memcpy(&txreq, &pending_tx_info[pending_idx].req,
-                      sizeof(txreq));
+               txp         = &pending_tx_info[pending_idx].req;
 
                /* Check the remap error code. */
-               if (unlikely(mop->status)) {
+               if (unlikely(netbk_tx_check_mop(skb, &mop))) {
                        printk(KERN_ALERT "#### netback grant fails\n");
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       skb_shinfo(skb)->nr_frags = 0;
                        kfree_skb(skb);
-                       mop++;
-                       pending_ring[MASK_PEND_IDX(pending_prod++)] =
-                               pending_idx;
                        continue;
                }
-               set_phys_to_machine(
-                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
-                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-               grant_tx_handle[pending_idx] = mop->handle;
-
-               data_len = (txreq.size > PKT_PROT_LEN) ?
-                       PKT_PROT_LEN : txreq.size;
-
-               __skb_put(skb, data_len);
+
+               data_len = skb->len;
                memcpy(skb->data, 
-                      (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
+                      (void *)(MMAP_VADDR(pending_idx)|txp->offset),
                       data_len);
-               if (data_len < txreq.size) {
+               if (data_len < txp->size) {
                        /* Append the packet payload as a fragment. */
-                       skb_shinfo(skb)->frags[0].page        = 
-                               virt_to_page(MMAP_VADDR(pending_idx));
-                       skb_shinfo(skb)->frags[0].size        =
-                               txreq.size - data_len;
-                       skb_shinfo(skb)->frags[0].page_offset = 
-                               txreq.offset + data_len;
-                       skb_shinfo(skb)->nr_frags = 1;
+                       txp->offset += data_len;
+                       txp->size -= data_len;
                } else {
                        /* Schedule a response immediately. */
                        netif_idx_release(pending_idx);
                }
-
-               skb->data_len  = txreq.size - data_len;
-               skb->len      += skb->data_len;
-               skb->truesize += skb->data_len;
-
-               skb->dev      = netif->dev;
-               skb->protocol = eth_type_trans(skb, skb->dev);
 
                /*
                 * Old frontends do not assert data_validated but we
                 * can infer it from csum_blank so test both flags.
                 */
-               if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+               if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                        skb->proto_data_valid = 1;
                } else {
                        skb->ip_summed = CHECKSUM_NONE;
                        skb->proto_data_valid = 0;
                }
-               skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank);
-
-               netif->stats.rx_bytes += txreq.size;
+               skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+
+               netbk_fill_frags(skb);
+
+               skb->dev      = netif->dev;
+               skb->protocol = eth_type_trans(skb, skb->dev);
+
+               netif->stats.rx_bytes += skb->len;
                netif->stats.rx_packets++;
 
                netif_rx(skb);
                netif->dev->last_rx = jiffies;
-
-               mop++;
        }
 }
 
@@ -695,7 +870,10 @@ static void netif_idx_release(u16 pendin
        unsigned long flags;
 
        spin_lock_irqsave(&_lock, flags);
-       dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+       dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
+       /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+       smp_wmb();
+       dealloc_prod++;
        spin_unlock_irqrestore(&_lock, flags);
 
        tasklet_schedule(&net_tx_tasklet);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Jun 07 11:03:51 
2006 +0100
@@ -69,6 +69,8 @@ static int netback_probe(struct xenbus_d
 static int netback_probe(struct xenbus_device *dev,
                         const struct xenbus_device_id *id)
 {
+       const char *message;
+       xenbus_transaction_t xbt;
        int err;
        struct backend_info *be = kzalloc(sizeof(struct backend_info),
                                          GFP_KERNEL);
@@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d
        if (err)
                goto fail;
 
+       do {
+               err = xenbus_transaction_start(&xbt);
+               if (err) {
+                       xenbus_dev_fatal(dev, err, "starting transaction");
+                       goto fail;
+               }
+
+               err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+               if (err) {
+                       message = "writing feature-sg";
+                       goto abort_transaction;
+               }
+
+               err = xenbus_transaction_end(xbt, 0);
+       } while (err == -EAGAIN);
+
+       if (err) {
+               xenbus_dev_fatal(dev, err, "completing transaction");
+               goto fail;
+       }
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err) {
                goto fail;
@@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d
 
        return 0;
 
+abort_transaction:
+       xenbus_transaction_end(xbt, 1);
+       xenbus_dev_fatal(dev, err, "%s", message);
 fail:
        DPRINTK("failed");
        netback_remove(dev);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Wed Jun 07 
11:03:51 2006 +0100
@@ -45,6 +45,7 @@
 #include <linux/bitops.h>
 #include <linux/ethtool.h>
 #include <linux/in.h>
+#include <linux/if_ether.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/arp.h>
@@ -173,6 +174,11 @@ static void xennet_sysfs_delif(struct ne
 #define xennet_sysfs_delif(dev) do { } while(0)
 #endif
 
+static inline int xennet_can_sg(struct net_device *dev)
+{
+       return dev->features & NETIF_F_SG;
+}
+
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffers for communication with the backend, and
@@ -307,8 +313,6 @@ again:
                goto destroy_ring;
        }
 
-       xenbus_switch_state(dev, XenbusStateConnected);
-
        return 0;
 
  abort_transaction:
@@ -370,12 +374,9 @@ static int setup_device(struct xenbus_de
                goto fail;
 
        memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
-       network_connect(netdev);
        info->irq = bind_evtchn_to_irqhandler(
                info->evtchn, netif_int, SA_SAMPLE_RANDOM, netdev->name,
                netdev);
-       (void)send_fake_arp(netdev);
-       show_device(info);
 
        return 0;
 
@@ -391,15 +392,24 @@ static void backend_changed(struct xenbu
 static void backend_changed(struct xenbus_device *dev,
                            enum xenbus_state backend_state)
 {
+       struct netfront_info *np = dev->data;
+       struct net_device *netdev = np->netdev;
+
        DPRINTK("\n");
 
        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateConnected:
        case XenbusStateUnknown:
        case XenbusStateClosed:
+               break;
+
+       case XenbusStateInitWait:
+               network_connect(netdev);
+               xenbus_switch_state(dev, XenbusStateConnected);
+               (void)send_fake_arp(netdev);
+               show_device(np);
                break;
 
        case XenbusStateClosing:
@@ -452,13 +462,17 @@ static int network_open(struct net_devic
        return 0;
 }
 
+static inline int netfront_tx_slot_available(struct netfront_info *np)
+{
+       return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1;
+}
+
 static inline void network_maybe_wake_tx(struct net_device *dev)
 {
        struct netfront_info *np = netdev_priv(dev);
 
        if (unlikely(netif_queue_stopped(dev)) &&
-           !RING_FULL(&np->tx) &&
-           !gnttab_empty_grant_references(&np->gref_tx_head) &&
+           netfront_tx_slot_available(np) &&
            likely(netif_running(dev)))
                netif_wake_queue(dev);
 }
@@ -485,7 +499,7 @@ static void network_tx_buf_gc(struct net
                                printk(KERN_ALERT "network_tx_buf_gc: warning "
                                       "-- grant still in use by backend "
                                       "domain.\n");
-                               break; /* bail immediately */
+                               BUG();
                        }
                        gnttab_end_foreign_access_ref(
                                np->grant_tx_ref[id], GNTMAP_readonly);
@@ -638,36 +652,95 @@ static void network_alloc_rx_buffers(str
        RING_PUSH_REQUESTS(&np->rx);
 }
 
+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+                             struct netif_tx_request *tx)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       char *data = skb->data;
+       unsigned long mfn;
+       RING_IDX prod = np->tx.req_prod_pvt;
+       int frags = skb_shinfo(skb)->nr_frags;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+       unsigned int id;
+       grant_ref_t ref;
+       int i;
+
+       while (len > PAGE_SIZE - offset) {
+               tx->size = PAGE_SIZE - offset;
+               tx->flags |= NETTXF_more_data;
+               len -= tx->size;
+               data += tx->size;
+               offset = 0;
+
+               id = get_id_from_freelist(np->tx_skbs);
+               np->tx_skbs[id] = skb_get(skb);
+               tx = RING_GET_REQUEST(&np->tx, prod++);
+               tx->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref < 0);
+
+               mfn = virt_to_mfn(data);
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, GNTMAP_readonly);
+
+               tx->gref = np->grant_tx_ref[id] = ref;
+               tx->offset = offset;
+               tx->size = len;
+               tx->flags = 0;
+       }
+
+       for (i = 0; i < frags; i++) {
+               skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+               tx->flags |= NETTXF_more_data;
+
+               id = get_id_from_freelist(np->tx_skbs);
+               np->tx_skbs[id] = skb_get(skb);
+               tx = RING_GET_REQUEST(&np->tx, prod++);
+               tx->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref < 0);
+
+               mfn = pfn_to_mfn(page_to_pfn(frag->page));
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, GNTMAP_readonly);
+
+               tx->gref = np->grant_tx_ref[id] = ref;
+               tx->offset = frag->page_offset;
+               tx->size = frag->size;
+               tx->flags = 0;
+       }
+
+       np->tx.req_prod_pvt = prod;
+}
 
 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        unsigned short id;
        struct netfront_info *np = netdev_priv(dev);
        struct netif_tx_request *tx;
+       char *data = skb->data;
        RING_IDX i;
        grant_ref_t ref;
        unsigned long mfn;
        int notify;
-
-       if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
-                    PAGE_SIZE)) {
-               struct sk_buff *nskb;
-               nskb = __dev_alloc_skb(skb->len, GFP_ATOMIC|__GFP_NOWARN);
-               if (unlikely(nskb == NULL))
-                       goto drop;
-               skb_put(nskb, skb->len);
-               memcpy(nskb->data, skb->data, skb->len);
-               /* Copy only the header fields we use in this driver. */
-               nskb->dev = skb->dev;
-               nskb->ip_summed = skb->ip_summed;
-               nskb->proto_data_valid = skb->proto_data_valid;
-               dev_kfree_skb(skb);
-               skb = nskb;
+       int frags = skb_shinfo(skb)->nr_frags;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+
+       frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
+       if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
+               printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
+                      frags);
+               dump_stack();
+               goto drop;
        }
 
        spin_lock_irq(&np->tx_lock);
 
-       if (unlikely(!netif_carrier_ok(dev))) {
+       if (unlikely(!netif_carrier_ok(dev) ||
+                    (frags > 1 && !xennet_can_sg(dev)))) {
                spin_unlock_irq(&np->tx_lock);
                goto drop;
        }
@@ -682,12 +755,12 @@ static int network_start_xmit(struct sk_
        tx->id   = id;
        ref = gnttab_claim_grant_reference(&np->gref_tx_head);
        BUG_ON((signed short)ref < 0);
-       mfn = virt_to_mfn(skb->data);
+       mfn = virt_to_mfn(data);
        gnttab_grant_foreign_access_ref(
                ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
        tx->gref = np->grant_tx_ref[id] = ref;
-       tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
-       tx->size = skb->len;
+       tx->offset = offset;
+       tx->size = len;
 
        tx->flags = 0;
        if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
@@ -696,14 +769,17 @@ static int network_start_xmit(struct sk_
                tx->flags |= NETTXF_data_validated;
 
        np->tx.req_prod_pvt = i + 1;
+
+       xennet_make_frags(skb, dev, tx);
+       tx->size = skb->len;
+
        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
        if (notify)
                notify_remote_via_irq(np->irq);
 
        network_tx_buf_gc(dev);
 
-       if (RING_FULL(&np->tx) ||
-           gnttab_empty_grant_references(&np->gref_tx_head))
+       if (!netfront_tx_slot_available(np))
                netif_stop_queue(dev);
 
        spin_unlock_irq(&np->tx_lock);
@@ -963,12 +1039,46 @@ static struct net_device_stats *network_
        return &np->stats;
 }
 
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+       int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+       if (mtu > max)
+               return -EINVAL;
+       dev->mtu = mtu;
+       return 0;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+       if (data) {
+               struct netfront_info *np = netdev_priv(dev);
+               int val;
+
+               if (xenbus_scanf(XBT_NULL, np->xbdev->otherend, "feature-sg",
+                                "%d", &val) < 0)
+                       val = 0;
+               if (!val)
+                       return -ENOSYS;
+       } else if (dev->mtu > ETH_DATA_LEN)
+               dev->mtu = ETH_DATA_LEN;
+
+       return ethtool_op_set_sg(dev, data);
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+       xennet_set_sg(dev, 1);
+}
+
 static void network_connect(struct net_device *dev)
 {
        struct netfront_info *np;
        int i, requeue_idx;
        struct netif_tx_request *tx;
        struct sk_buff *skb;
+
+       xennet_set_features(dev);
 
        np = netdev_priv(dev);
        spin_lock_irq(&np->tx_lock);
@@ -1081,6 +1191,8 @@ static struct ethtool_ops network_ethtoo
 {
        .get_tx_csum = ethtool_op_get_tx_csum,
        .set_tx_csum = ethtool_op_set_tx_csum,
+       .get_sg = ethtool_op_get_sg,
+       .set_sg = xennet_set_sg,
 };
 
 #ifdef CONFIG_SYSFS
@@ -1297,6 +1409,7 @@ static struct net_device * __devinit cre
        netdev->poll            = netif_poll;
        netdev->set_multicast_list = network_set_multicast_list;
        netdev->uninit          = netif_uninit;
+       netdev->change_mtu      = xennet_change_mtu;
        netdev->weight          = 64;
        netdev->features        = NETIF_F_IP_CSUM;
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Jun 07 
11:03:51 2006 +0100
@@ -61,11 +61,11 @@ static int privcmd_ioctl(struct inode *i
                __asm__ __volatile__ (
                        "pushl %%ebx; pushl %%ecx; pushl %%edx; "
                        "pushl %%esi; pushl %%edi; "
-                       "movl  4(%%eax),%%ebx ;"
-                       "movl  8(%%eax),%%ecx ;"
-                       "movl 12(%%eax),%%edx ;"
-                       "movl 16(%%eax),%%esi ;"
-                       "movl 20(%%eax),%%edi ;"
+                       "movl  8(%%eax),%%ebx ;"
+                       "movl 16(%%eax),%%ecx ;"
+                       "movl 24(%%eax),%%edx ;"
+                       "movl 32(%%eax),%%esi ;"
+                       "movl 40(%%eax),%%edi ;"
                        "movl   (%%eax),%%eax ;"
                        "shll $5,%%eax ;"
                        "addl $hypercall_page,%%eax ;"
@@ -161,7 +161,7 @@ static int privcmd_ioctl(struct inode *i
        case IOCTL_PRIVCMD_MMAPBATCH: {
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
-               unsigned long __user *p;
+               xen_pfn_t __user *p;
                unsigned long addr, mfn; 
                int i;
 
@@ -210,7 +210,7 @@ static int privcmd_ioctl(struct inode *i
        batch_err:
                printk("batch_err ret=%d vma=%p addr=%lx "
                       "num=%d arr=%p %lx-%lx\n", 
-                      ret, vma, m.addr, m.num, m.arr,
+                      ret, vma, (unsigned long)m.addr, m.num, m.arr,
                       vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
                break;
        }
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Wed Jun 
07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Wed Jun 
07 11:03:51 2006 +0100
@@ -116,10 +116,12 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
        __asm__ ( \
                "movl %%cr3,%0\n\t" \
                :"=r" (__dummy)); \
-       machine_to_phys(__dummy); \
+       __dummy = xen_cr3_to_pfn(__dummy); \
+       mfn_to_pfn(__dummy) << PAGE_SHIFT; \
 })
 #define write_cr3(x) ({                                                \
-       maddr_t __dummy = phys_to_machine(x);                   \
+       unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT);   \
+       __dummy = xen_pfn_to_cr3(__dummy);                      \
        __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy));  \
 })
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Wed Jun 
07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Wed Jun 
07 11:03:51 2006 +0100
@@ -61,13 +61,6 @@ static void __init machine_specific_arch
                .address = { __KERNEL_CS, (unsigned long)nmi },
        };
 
-       if (xen_feature(XENFEAT_auto_translated_physmap) &&
-           xen_start_info->shared_info < xen_start_info->nr_pages) {
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)__va(xen_start_info->shared_info);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
-
        ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
        if (ret == 0)
                ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/include/xen/public/privcmd.h
--- a/linux-2.6-xen-sparse/include/xen/public/privcmd.h Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/include/xen/public/privcmd.h Wed Jun 07 11:03:51 
2006 +0100
@@ -33,20 +33,22 @@
 #ifndef __LINUX_PUBLIC_PRIVCMD_H__
 #define __LINUX_PUBLIC_PRIVCMD_H__
 
+#include <linux/types.h>
+
 #ifndef __user
 #define __user
 #endif
 
 typedef struct privcmd_hypercall
 {
-       unsigned long op;
-       unsigned long arg[5];
+       __u64 op;
+       __u64 arg[5];
 } privcmd_hypercall_t;
 
 typedef struct privcmd_mmap_entry {
-       unsigned long va;
-       unsigned long mfn;
-       unsigned long npages;
+       __u64 va;
+       __u64 mfn;
+       __u64 npages;
 } privcmd_mmap_entry_t; 
 
 typedef struct privcmd_mmap {
@@ -58,8 +60,8 @@ typedef struct privcmd_mmapbatch {
 typedef struct privcmd_mmapbatch {
        int num;     /* number of pages to populate */
        domid_t dom; /* target domain */
-       unsigned long addr;  /* virtual address */
-       unsigned long __user *arr; /* array of mfns - top nibble set on err */
+       __u64 addr;  /* virtual address */
+       xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
 } privcmd_mmapbatch_t; 
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/debugger/libxendebug/xendebug.c     Wed Jun 07 11:03:51 2006 +0100
@@ -57,7 +57,7 @@ typedef struct domain_context           
     vcpu_guest_context_t context[MAX_VIRT_CPUS];
 
     long            total_pages;
-    unsigned long  *page_array;
+    xen_pfn_t      *page_array;
 
     unsigned long   cr3_phys[MAX_VIRT_CPUS];
     unsigned long  *cr3_virt[MAX_VIRT_CPUS];
@@ -346,8 +346,9 @@ xendebug_memory_page (domain_context_p c
         ctxt->cr3_phys[vcpu] = vcpu_ctxt->ctrlreg[3];
         if ( ctxt->cr3_virt[vcpu] )
             munmap(ctxt->cr3_virt[vcpu], PAGE_SIZE);
-        ctxt->cr3_virt[vcpu] = xc_map_foreign_range(xc_handle, ctxt->domid,
-                    PAGE_SIZE, PROT_READ, ctxt->cr3_phys[vcpu] >> PAGE_SHIFT);
+        ctxt->cr3_virt[vcpu] = xc_map_foreign_range(
+            xc_handle, ctxt->domid, PAGE_SIZE, PROT_READ,
+            xen_cr3_to_pfn(ctxt->cr3_phys[vcpu]));
         if ( ctxt->cr3_virt[vcpu] == NULL )
             return 0;
     } 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/hvmloader/Makefile
--- a/tools/firmware/hvmloader/Makefile Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/firmware/hvmloader/Makefile Wed Jun 07 11:03:51 2006 +0100
@@ -51,8 +51,8 @@ hvmloader: roms.h hvmloader.c acpi_madt.
        $(OBJCOPY) hvmloader.tmp hvmloader
        rm -f hvmloader.tmp
 
-roms.h:        ../rombios/BIOS-bochs-8-processors 
../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin 
../vmxassist/vmxassist.bin
-       sh ./mkhex rombios ../rombios/BIOS-bochs-8-processors > roms.h
+roms.h:        ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin 
../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin
+       sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
        sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
        sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin 
>> roms.h
        sh ./mkhex vmxassist ../vmxassist/vmxassist.bin >> roms.h
diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/rombios/Makefile
--- a/tools/firmware/rombios/Makefile   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/firmware/rombios/Makefile   Wed Jun 07 11:03:51 2006 +0100
@@ -1,7 +1,7 @@
-#BIOS_BUILDS = BIOS-bochs-latest
+BIOS_BUILDS = BIOS-bochs-latest
 #BIOS_BUILDS += BIOS-bochs-2-processors
 #BIOS_BUILDS += BIOS-bochs-4-processors
-BIOS_BUILDS += BIOS-bochs-8-processors
+#BIOS_BUILDS += BIOS-bochs-8-processors
 
 .PHONY: all
 all: bios
diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/firmware/vmxassist/vm86.c   Wed Jun 07 11:03:51 2006 +0100
@@ -36,6 +36,8 @@
 
 static unsigned prev_eip = 0;
 enum vm86_mode mode = 0;
+
+static struct regs saved_rm_regs;
 
 #ifdef DEBUG
 int traceset = 0;
@@ -795,6 +797,8 @@ protected_mode(struct regs *regs)
        oldctx.esp = regs->uesp;
        oldctx.eflags = regs->eflags;
 
+       memset(&saved_rm_regs, 0, sizeof(struct regs));
+
        /* reload all segment registers */
        if (!load_seg(regs->cs, &oldctx.cs_base,
                                &oldctx.cs_limit, &oldctx.cs_arbytes))
@@ -808,6 +812,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.es_base,
                            &oldctx.es_limit, &oldctx.es_arbytes);
                oldctx.es_sel = 0;
+               saved_rm_regs.ves = regs->ves;
        }
 
        if (load_seg(regs->uss, &oldctx.ss_base,
@@ -817,6 +822,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.ss_base,
                            &oldctx.ss_limit, &oldctx.ss_arbytes);
                oldctx.ss_sel = 0;
+               saved_rm_regs.uss = regs->uss;
        }
 
        if (load_seg(regs->vds, &oldctx.ds_base,
@@ -826,6 +832,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.ds_base,
                            &oldctx.ds_limit, &oldctx.ds_arbytes);
                oldctx.ds_sel = 0;
+               saved_rm_regs.vds = regs->vds;
        }
 
        if (load_seg(regs->vfs, &oldctx.fs_base,
@@ -835,6 +842,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.fs_base,
                            &oldctx.fs_limit, &oldctx.fs_arbytes);
                oldctx.fs_sel = 0;
+               saved_rm_regs.vfs = regs->vfs;
        }
 
        if (load_seg(regs->vgs, &oldctx.gs_base,
@@ -844,6 +852,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.gs_base,
                            &oldctx.gs_limit, &oldctx.gs_arbytes);
                oldctx.gs_sel = 0;
+               saved_rm_regs.vgs = regs->vgs;
        }
 
        /* initialize jump environment to warp back to protected mode */
@@ -880,16 +889,22 @@ real_mode(struct regs *regs)
                if (regs->uss >= HIGHMEM)
                        panic("%%ss 0x%lx higher than 1MB", regs->uss);
                regs->uss = address(regs, regs->uss, 0) >> 4;
+       } else {
+         regs->uss = saved_rm_regs.uss;
        }
        if (regs->vds != 0) {
                if (regs->vds >= HIGHMEM)
                        panic("%%ds 0x%lx higher than 1MB", regs->vds);
                regs->vds = address(regs, regs->vds, 0) >> 4;
+       } else {
+         regs->vds = saved_rm_regs.vds;
        }
        if (regs->ves != 0) {
                if (regs->ves >= HIGHMEM)
                        panic("%%es 0x%lx higher than 1MB", regs->ves);
                regs->ves = address(regs, regs->ves, 0) >> 4;
+       } else {
+         regs->ves = saved_rm_regs.ves;
        }
 
        /* this should get us into 16-bit mode */
@@ -971,6 +986,39 @@ jmpl(struct regs *regs, int prefix)
        } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
                eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
                cs = fetch16(regs);
+
+               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+                regs->cs = cs;
+                regs->eip = eip;
+               set_mode(regs, VM86_REAL);
+       } else
+               panic("jmpl");
+}
+
+static void
+jmpl_indirect(struct regs *regs, int prefix, unsigned modrm)
+{
+       unsigned n = regs->eip;
+       unsigned cs, eip;
+       unsigned addr;
+
+       addr  = operand(prefix, regs, modrm);
+
+       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
+               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+               addr += (prefix & DATA32) ? 4 : 2;
+               cs = read16(addr);
+
+               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+                regs->cs = cs;
+                regs->eip = eip;
+               set_mode(regs, VM86_PROTECTED);
+       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
+               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+               addr += (prefix & DATA32) ? 4 : 2;
+               cs = read16(addr);
 
                TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
 
@@ -1306,6 +1354,23 @@ opcode(struct regs *regs)
                        }
                        goto invalid;
 
+               case 0xFF: /* jmpl (indirect) */
+                       if ((mode == VM86_REAL_TO_PROTECTED) ||
+                           (mode == VM86_PROTECTED_TO_REAL)) {
+                               unsigned modrm = fetch8(regs);
+                               
+                               switch((modrm >> 3) & 7) {
+                               case 5:
+                                 jmpl_indirect(regs, prefix, modrm);
+                                 return OPC_INVALID;
+
+                               default:
+                                 break;
+                               }
+
+                       }
+                       goto invalid;
+
                case 0xEB: /* short jump */
                        if ((mode == VM86_REAL_TO_PROTECTED) ||
                            (mode == VM86_PROTECTED_TO_REAL)) {
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/hw/cirrus_vga.c       Wed Jun 07 11:03:51 2006 +0100
@@ -2460,10 +2460,9 @@ static CPUWriteMemoryFunc *cirrus_linear
 };
 
 extern FILE *logfile;
-#if defined(__i386__) || defined (__x86_64__)
 static void * set_vram_mapping(unsigned long begin, unsigned long end)
 {
-    unsigned long * extent_start = NULL;
+    xen_pfn_t *extent_start = NULL;
     unsigned long nr_extents;
     void *vram_pointer = NULL;
     int i;
@@ -2474,14 +2473,14 @@ static void * set_vram_mapping(unsigned 
     end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
     nr_extents = (end - begin) >> TARGET_PAGE_BITS;
 
-    extent_start = malloc(sizeof(unsigned long) * nr_extents );
+    extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
     if (extent_start == NULL)
     {
         fprintf(stderr, "Failed malloc on set_vram_mapping\n");
         return NULL;
     }
 
-    memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
+    memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
 
     for (i = 0; i < nr_extents; i++)
     {
@@ -2509,7 +2508,7 @@ static void * set_vram_mapping(unsigned 
 
 static int unset_vram_mapping(unsigned long begin, unsigned long end)
 {
-    unsigned long * extent_start = NULL;
+    xen_pfn_t *extent_start = NULL;
     unsigned long nr_extents;
     int i;
 
@@ -2520,7 +2519,7 @@ static int unset_vram_mapping(unsigned l
     end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
     nr_extents = (end - begin) >> TARGET_PAGE_BITS;
 
-    extent_start = malloc(sizeof(unsigned long) * nr_extents );
+    extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
 
     if (extent_start == NULL)
     {
@@ -2528,7 +2527,7 @@ static int unset_vram_mapping(unsigned l
         return -1;
     }
 
-    memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
+    memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
 
     for (i = 0; i < nr_extents; i++)
         extent_start[i] = (begin + (i * TARGET_PAGE_SIZE)) >> TARGET_PAGE_BITS;
@@ -2540,10 +2539,6 @@ static int unset_vram_mapping(unsigned l
     return 0;
 }
 
-#elif defined(__ia64__)
-static void * set_vram_mapping(unsigned long addr, unsigned long end) {}
-static int unset_vram_mapping(unsigned long addr, unsigned long end) {}
-#endif
 extern int vga_accelerate;
 
 /* Compute the memory access functions */
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/hw/pc.c       Wed Jun 07 11:03:51 2006 +0100
@@ -537,8 +537,11 @@ void pc_init(uint64_t ram_size, int vga_
     for(i = 0; i < MAX_SERIAL_PORTS; i++) {
         if (serial_hds[i]) {
             sp = serial_init(serial_io[i], serial_irq[i], serial_hds[i]);
-            if (i == SUMMA_PORT)
+            if (i == serial_summa_port) {
                summa_init(sp, serial_hds[i]);
+               fprintf(stderr, "Serial port %d (COM%d) initialized for 
Summagraphics\n",
+                       i, i+1);
+           }
         }
     }
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/hw/vga.c      Wed Jun 07 11:03:51 2006 +0100
@@ -1995,6 +1995,7 @@ void vga_common_init(VGAState *s, Displa
     s->get_resolution = vga_get_resolution;
     /* XXX: currently needed for display */
     vga_state = s;
+    vga_bios_init(s);
 }
 
 
@@ -2082,7 +2083,6 @@ int vga_initialize(PCIBus *bus, DisplayS
 #endif
     }
 
-    vga_bios_init(s);
     return 0;
 }
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/vl.c  Wed Jun 07 11:03:51 2006 +0100
@@ -146,6 +146,7 @@ int repeat_key = 1;
 int repeat_key = 1;
 TextConsole *vga_console;
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+int serial_summa_port = -1;
 int xc_handle;
 time_t timeoffset = 0;
 
@@ -2457,7 +2458,7 @@ int unset_mm_mapping(int xc_handle,
                      uint32_t domid,
                      unsigned long nr_pages,
                      unsigned int address_bits,
-                     unsigned long *extent_start)
+                     xen_pfn_t *extent_start)
 {
     int err = 0;
     xc_dominfo_t info;
@@ -2490,7 +2491,7 @@ int set_mm_mapping(int xc_handle,
                     uint32_t domid,
                     unsigned long nr_pages,
                     unsigned int address_bits,
-                    unsigned long *extent_start)
+                    xen_pfn_t *extent_start)
 {
     xc_dominfo_t info;
     int err = 0;
@@ -2498,7 +2499,7 @@ int set_mm_mapping(int xc_handle,
     xc_domain_getinfo(xc_handle, domid, 1, &info);
 
     if ( xc_domain_setmaxmem(xc_handle, domid,
-                             (info.nr_pages + nr_pages) * PAGE_SIZE/1024) != 0)
+                             info.max_memkb + nr_pages * PAGE_SIZE/1024) !=0)
     {
         fprintf(logfile, "set maxmem returned error %d\n", errno);
         return -1;
@@ -2556,7 +2557,8 @@ int main(int argc, char **argv)
     int serial_device_index;
     char qemu_dm_logfilename[64];
     const char *loadvm = NULL;
-    unsigned long nr_pages, *page_array;
+    unsigned long nr_pages;
+    xen_pfn_t *page_array;
     extern void *shared_page;
 
 #if !defined(CONFIG_SOFTMMU)
@@ -2588,8 +2590,8 @@ int main(int argc, char **argv)
     pstrcpy(monitor_device, sizeof(monitor_device), "vc");
 
     pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "vc");
-    pstrcpy(serial_devices[1], sizeof(serial_devices[1]), "null");
-    for(i = 2; i < MAX_SERIAL_PORTS; i++)
+    serial_summa_port = -1;
+    for(i = 1; i < MAX_SERIAL_PORTS; i++)
         serial_devices[i][0] = '\0';
     serial_device_index = 0;
 
@@ -3022,8 +3024,8 @@ int main(int argc, char **argv)
 
     xc_handle = xc_interface_open();
 
-    if ( (page_array = (unsigned long *)
-                        malloc(nr_pages * sizeof(unsigned long))) == NULL)
+    if ( (page_array = (xen_pfn_t *)
+                        malloc(nr_pages * sizeof(xen_pfn_t))) == NULL)
     {
         fprintf(logfile, "malloc returned error %d\n", errno);
         exit(-1);
@@ -3078,8 +3080,8 @@ int main(int argc, char **argv)
                                        page_array[0]);
 #endif
 
-    fprintf(logfile, "shared page at pfn:%lx, mfn: %lx\n", (nr_pages-1),
-           (page_array[nr_pages - 1]));
+    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", (nr_pages-1),
+           (uint64_t)(page_array[nr_pages - 1]));
 
     /* we always create the cdrom drive, even if no disk is there */
     bdrv_init();
@@ -3173,6 +3175,20 @@ int main(int argc, char **argv)
     }
     monitor_init(monitor_hd, !nographic);
 
+    /* Find which port should be the Summagraphics port */
+    /* It's the first unspecified serial line. Note that COM1 is set */
+    /* by default, so the Summagraphics port would be COM2 or higher */
+
+    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
+      if (serial_devices[i][0] != '\0')
+       continue;
+      serial_summa_port = i;
+      pstrcpy(serial_devices[serial_summa_port], sizeof(serial_devices[0]), 
"null");
+      break;
+    }
+
+    /* Now, open the ports */
+
     for(i = 0; i < MAX_SERIAL_PORTS; i++) {
         if (serial_devices[i][0] != '\0') {
             serial_hds[i] = qemu_chr_open(serial_devices[i]);
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/vl.h  Wed Jun 07 11:03:51 2006 +0100
@@ -238,9 +238,9 @@ void console_select(unsigned int index);
 /* serial ports */
 
 #define MAX_SERIAL_PORTS 4
-#define SUMMA_PORT     1
 
 extern CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+extern int serial_summa_port;
 
 /* network redirectors support */
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_core.c     Wed Jun 07 11:03:51 2006 +0100
@@ -28,7 +28,7 @@ xc_domain_dumpcore_via_callback(int xc_h
                                 dumpcore_rtn_t dump_rtn)
 {
     unsigned long nr_pages;
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     xc_dominfo_t info;
     int i, nr_vcpus = 0;
     char *dump_mem, *dump_mem_start = NULL;
@@ -70,7 +70,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         sizeof(vcpu_guest_context_t)*nr_vcpus;
     dummy_len = (sizeof(struct xc_core_header) +
                  (sizeof(vcpu_guest_context_t) * nr_vcpus) +
-                 (nr_pages * sizeof(unsigned long)));
+                 (nr_pages * sizeof(xen_pfn_t)));
     header.xch_pages_offset = round_pgup(dummy_len);
 
     sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header));
@@ -81,7 +81,7 @@ xc_domain_dumpcore_via_callback(int xc_h
     if ( sts != 0 )
         goto error_out;
 
-    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
     {
         printf("Could not allocate memory\n");
         goto error_out;
@@ -91,7 +91,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         printf("Could not get the page frame list\n");
         goto error_out;
     }
-    sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(unsigned long));
+    sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(xen_pfn_t));
     if ( sts != 0 )
         goto error_out;
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_domain.c   Wed Jun 07 11:03:51 2006 +0100
@@ -291,7 +291,7 @@ int xc_domain_memory_increase_reservatio
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                          unsigned long *extent_start)
+                                          xen_pfn_t *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -324,7 +324,7 @@ int xc_domain_memory_decrease_reservatio
                                           uint32_t domid,
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
-                                          unsigned long *extent_start)
+                                          xen_pfn_t *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -363,7 +363,7 @@ int xc_domain_memory_populate_physmap(in
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                          unsigned long *extent_start)
+                                          xen_pfn_t *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -392,8 +392,8 @@ int xc_domain_translate_gpfn_list(int xc
 int xc_domain_translate_gpfn_list(int xc_handle,
                                   uint32_t domid,
                                   unsigned long nr_gpfns,
-                                  unsigned long *gpfn_list,
-                                  unsigned long *mfn_list)
+                                  xen_pfn_t *gpfn_list,
+                                  xen_pfn_t *mfn_list)
 {
     struct xen_translate_gpfn_list op = {
         .domid        = domid,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_hvm_build.c        Wed Jun 07 11:03:51 2006 +0100
@@ -135,7 +135,7 @@ static void set_hvm_info_checksum(struct
  * hvmloader will use this info to set BIOS accordingly
  */
 static int set_hvm_info(int xc_handle, uint32_t dom,
-                        unsigned long *pfn_list, unsigned int vcpus,
+                        xen_pfn_t *pfn_list, unsigned int vcpus,
                         unsigned int pae, unsigned int acpi, unsigned int apic)
 {
     char *va_map;
@@ -178,7 +178,7 @@ static int setup_guest(int xc_handle,
                        unsigned int store_evtchn,
                        unsigned long *store_mfn)
 {
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     unsigned long count, i;
     unsigned long long ptr;
     xc_mmu_t *mmu = NULL;
@@ -223,7 +223,7 @@ static int setup_guest(int xc_handle,
         goto error_out;
     }
 
-    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
     {
         PERROR("Could not allocate memory.\n");
         goto error_out;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_ia64_stubs.c       Wed Jun 07 11:03:51 2006 +0100
@@ -57,7 +57,7 @@ xc_plan9_build(int xc_handle,
 
 int xc_ia64_get_pfn_list(int xc_handle,
                          uint32_t domid,
-                         unsigned long *pfn_buf,
+                         xen_pfn_t *pfn_buf,
                          unsigned int start_page,
                          unsigned int nr_pages)
 {
@@ -65,7 +65,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
     int num_pfns,ret;
     unsigned int __start_page, __nr_pages;
     unsigned long max_pfns;
-    unsigned long *__pfn_buf;
+    xen_pfn_t *__pfn_buf;
 
     __start_page = start_page;
     __nr_pages = nr_pages;
@@ -80,7 +80,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
         set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf);
 
         if ( (max_pfns != -1UL)
-            && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 )
+            && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0 )
         {
             PERROR("Could not lock pfn list buffer");
             return -1;
@@ -89,7 +89,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
         ret = do_dom0_op(xc_handle, &op);
 
         if (max_pfns != -1UL)
-            (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long));
+            (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
 
         if (max_pfns == -1UL)
             return 0;
@@ -122,10 +122,10 @@ int xc_ia64_copy_to_domain_pages(int xc_
 {
     // N.B. gva should be page aligned
 
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     int i;
 
-    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ){
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ){
         PERROR("Could not allocate memory");
         goto error_out;
     }
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux.c    Wed Jun 07 11:03:51 2006 +0100
@@ -28,7 +28,7 @@ int xc_interface_close(int xc_handle)
 }
 
 void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
-                           unsigned long *arr, int num)
+                           xen_pfn_t *arr, int num)
 {
     privcmd_mmapbatch_t ioctlx;
     void *addr;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux_build.c      Wed Jun 07 11:03:51 2006 +0100
@@ -10,6 +10,7 @@
 #include "xc_aout9.h"
 #include <stdlib.h>
 #include <unistd.h>
+#include <inttypes.h>
 #include <zlib.h>
 
 #if defined(__i386__)
@@ -136,7 +137,7 @@ int load_initrd(int xc_handle, domid_t d
 int load_initrd(int xc_handle, domid_t dom,
                 struct initrd_info *initrd,
                 unsigned long physbase,
-                unsigned long *phys_to_mach)
+                xen_pfn_t *phys_to_mach)
 {
     char page[PAGE_SIZE];
     unsigned long pfn_start, pfn, nr_pages;
@@ -189,7 +190,7 @@ static int setup_pg_tables(int xc_handle
                            vcpu_guest_context_t *ctxt,
                            unsigned long dsi_v_start,
                            unsigned long v_end,
-                           unsigned long *page_array,
+                           xen_pfn_t *page_array,
                            unsigned long vpt_start,
                            unsigned long vpt_end,
                            unsigned shadow_mode_enabled)
@@ -205,9 +206,9 @@ static int setup_pg_tables(int xc_handle
     alloc_pt(l2tab, vl2tab, pl2tab);
     vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
     if (shadow_mode_enabled)
-        ctxt->ctrlreg[3] = pl2tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl2tab >> PAGE_SHIFT);
     else
-        ctxt->ctrlreg[3] = l2tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l2tab >> PAGE_SHIFT);
 
     for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
     {
@@ -251,26 +252,42 @@ static int setup_pg_tables_pae(int xc_ha
                                vcpu_guest_context_t *ctxt,
                                unsigned long dsi_v_start,
                                unsigned long v_end,
-                               unsigned long *page_array,
+                               xen_pfn_t *page_array,
                                unsigned long vpt_start,
                                unsigned long vpt_end,
-                               unsigned shadow_mode_enabled)
+                               unsigned shadow_mode_enabled,
+                               unsigned pae_mode)
 {
     l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
     l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
     l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
     uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
-    unsigned long ppt_alloc, count;
+    unsigned long ppt_alloc, count, nmfn;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
+
+    if ( pae_mode == PAEKERN_extended_cr3 )
+    {
+        ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
+    }
+    else if ( page_array[ppt_alloc] > 0xfffff )
+    {
+        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
+        if ( nmfn == 0 )
+        {
+            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+            goto error_out;
+        }
+        page_array[ppt_alloc] = nmfn;
+    }
 
     alloc_pt(l3tab, vl3tab, pl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
     if (shadow_mode_enabled)
-        ctxt->ctrlreg[3] = pl3tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl3tab >> PAGE_SHIFT);
     else
-        ctxt->ctrlreg[3] = l3tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l3tab >> PAGE_SHIFT);
 
     for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
     {
@@ -340,7 +357,7 @@ static int setup_pg_tables_64(int xc_han
                               vcpu_guest_context_t *ctxt,
                               unsigned long dsi_v_start,
                               unsigned long v_end,
-                              unsigned long *page_array,
+                              xen_pfn_t *page_array,
                               unsigned long vpt_start,
                               unsigned long vpt_end,
                               int shadow_mode_enabled)
@@ -361,9 +378,9 @@ static int setup_pg_tables_64(int xc_han
     alloc_pt(l4tab, vl4tab, pl4tab);
     vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
     if (shadow_mode_enabled)
-        ctxt->ctrlreg[3] = pl4tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl4tab >> PAGE_SHIFT);
     else
-        ctxt->ctrlreg[3] = l4tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l4tab >> PAGE_SHIFT);
 
     for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
     {
@@ -451,7 +468,7 @@ static int setup_guest(int xc_handle,
                        unsigned int console_evtchn, unsigned long *console_mfn,
                        uint32_t required_features[XENFEAT_NR_SUBMAPS])
 {
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     struct load_funcs load_funcs;
     struct domain_setup_info dsi;
     unsigned long vinitrd_start;
@@ -478,7 +495,7 @@ static int setup_guest(int xc_handle,
 
     start_page = dsi.v_start >> PAGE_SHIFT;
     pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT;
-    if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
+    if ( (page_array = malloc(pgnr * sizeof(xen_pfn_t))) == NULL )
     {
         PERROR("Could not allocate memory");
         goto error_out;
@@ -579,11 +596,11 @@ static int compat_check(int xc_handle, s
     }
 
     if (strstr(xen_caps, "xen-3.0-x86_32p")) {
-        if (!dsi->pae_kernel) {
+        if (dsi->pae_kernel == PAEKERN_no) {
             ERROR("Non PAE-kernel on PAE host.");
             return 0;
         }
-    } else if (dsi->pae_kernel) {
+    } else if (dsi->pae_kernel != PAEKERN_no) {
         ERROR("PAE-kernel on non-PAE host.");
         return 0;
     }
@@ -606,7 +623,7 @@ static int setup_guest(int xc_handle,
                        unsigned int console_evtchn, unsigned long *console_mfn,
                        uint32_t required_features[XENFEAT_NR_SUBMAPS])
 {
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     unsigned long count, i, hypercall_pfn;
     start_info_t *start_info;
     shared_info_t *shared_info;
@@ -617,7 +634,7 @@ static int setup_guest(int xc_handle,
 
     unsigned long nr_pt_pages;
     unsigned long physmap_pfn;
-    unsigned long *physmap, *physmap_e;
+    xen_pfn_t *physmap, *physmap_e;
 
     struct load_funcs load_funcs;
     struct domain_setup_info dsi;
@@ -673,7 +690,8 @@ static int setup_guest(int xc_handle,
 
     for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ )
     {
-        if ( (supported_features[i]&required_features[i]) != 
required_features[i] )
+        if ( (supported_features[i] & required_features[i]) !=
+             required_features[i] )
         {
             ERROR("Guest kernel does not support a required feature.");
             goto error_out;
@@ -719,7 +737,7 @@ static int setup_guest(int xc_handle,
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
     ((_l) & ~((1UL<<(_s))-1))) >> (_s))
 #if defined(__i386__)
-        if ( dsi.pae_kernel )
+        if ( dsi.pae_kernel != PAEKERN_no )
         {
             if ( (1 + /* # L3 */
                   NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */
@@ -797,11 +815,11 @@ static int setup_guest(int xc_handle,
 
     /* setup page tables */
 #if defined(__i386__)
-    if (dsi.pae_kernel)
+    if (dsi.pae_kernel != PAEKERN_no)
         rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
                                  dsi.v_start, v_end,
                                  page_array, vpt_start, vpt_end,
-                                 shadow_mode_enabled);
+                                 shadow_mode_enabled, dsi.pae_kernel);
     else
         rc = setup_pg_tables(xc_handle, dom, ctxt,
                              dsi.v_start, v_end,
@@ -824,16 +842,16 @@ static int setup_guest(int xc_handle,
      */
     if ( !shadow_mode_enabled )
     {
-        if ( dsi.pae_kernel )
+        if ( dsi.pae_kernel != PAEKERN_no )
         {
             if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
-                           ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+                           xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
                 goto error_out;
         }
         else
         {
             if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
-                           ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+                           xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
                 goto error_out;
         }
     }
@@ -845,7 +863,7 @@ static int setup_guest(int xc_handle,
      * correct protection for the page
      */
     if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
-                   ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+                   xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
         goto error_out;
 #endif
 
@@ -865,8 +883,8 @@ static int setup_guest(int xc_handle,
             ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
             count) )
         {
-            fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
-                    count, page_array[count]);
+            fprintf(stderr,"m2p update failure p=%lx m=%"PRIx64"\n",
+                    count, (uint64_t)page_array[count]);
             munmap(physmap, PAGE_SIZE);
             goto error_out;
         }
@@ -958,7 +976,7 @@ static int setup_guest(int xc_handle,
     rc = xc_version(xc_handle, XENVER_version, NULL);
     sprintf(start_info->magic, "xen-%i.%i-x86_%d%s",
             rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8,
-            dsi.pae_kernel ? "p" : "");
+            (dsi.pae_kernel != PAEKERN_no) ? "p" : "");
     start_info->nr_pages     = nr_pages;
     start_info->shared_info  = guest_shared_info_mfn << PAGE_SHIFT;
     start_info->flags        = flags;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux_restore.c    Wed Jun 07 11:03:51 2006 +0100
@@ -25,10 +25,10 @@ static unsigned long max_pfn;
 static unsigned long max_pfn;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
-static unsigned long *live_p2m = NULL;
+static xen_pfn_t *live_p2m = NULL;
 
 /* A table mapping each PFN to its new MFN. */
-static unsigned long *p2m = NULL;
+static xen_pfn_t *p2m = NULL;
 
 
 static ssize_t
@@ -108,7 +108,7 @@ int xc_linux_restore(int xc_handle, int 
                      unsigned int console_evtchn, unsigned long *console_mfn)
 {
     DECLARE_DOM0_OP;
-    int rc = 1, i, n;
+    int rc = 1, i, n, pae_extended_cr3 = 0;
     unsigned long mfn, pfn;
     unsigned int prev_pc, this_pc;
     int verify = 0;
@@ -126,7 +126,7 @@ int xc_linux_restore(int xc_handle, int 
     unsigned long *pfn_type = NULL;
 
     /* A table of MFNs to map in the current region */
-    unsigned long *region_mfn = NULL;
+    xen_pfn_t *region_mfn = NULL;
 
     /* Types of the pfns in the current region */
     unsigned long region_pfn_type[MAX_BATCH_SIZE];
@@ -135,7 +135,7 @@ int xc_linux_restore(int xc_handle, int 
     unsigned long *page = NULL;
 
     /* A copy of the pfn-to-mfn table frame list. */
-    unsigned long *p2m_frame_list = NULL;
+    xen_pfn_t *p2m_frame_list = NULL;
 
     /* A temporary mapping of the guest's start_info page. */
     start_info_t *start_info;
@@ -162,30 +162,88 @@ int xc_linux_restore(int xc_handle, int 
         return 1;
     }
 
-
     if (mlock(&ctxt, sizeof(ctxt))) {
         /* needed for build dom0 op, but might as well do early */
         ERR("Unable to mlock ctxt");
         return 1;
     }
 
-
-    /* Read the saved P2M frame list */
-    if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
+    if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
         ERR("Couldn't allocate p2m_frame_list array");
         goto out;
     }
 
-    if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
+    /* Read first entry of P2M list, or extended-info signature (~0UL). */
+    if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+        ERR("read extended-info signature failed");
+        goto out;
+    }
+
+    if (p2m_frame_list[0] == ~0UL) {
+        uint32_t tot_bytes;
+
+        /* Next 4 bytes: total size of following extended info. */
+        if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
+            ERR("read extended-info size failed");
+            goto out;
+        }
+
+        while (tot_bytes) {
+            uint32_t chunk_bytes;
+            char     chunk_sig[4];
+
+            /* 4-character chunk signature + 4-byte remaining chunk size. */
+            if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
+                !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
+                ERR("read extended-info chunk signature failed");
+                goto out;
+            }
+            tot_bytes -= 8;
+
+            /* VCPU context structure? */
+            if (!strncmp(chunk_sig, "vcpu", 4)) {
+                if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+                    ERR("read extended-info vcpu context failed");
+                    goto out;
+                }
+                tot_bytes   -= sizeof(struct vcpu_guest_context);
+                chunk_bytes -= sizeof(struct vcpu_guest_context);
+
+                if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
+                    pae_extended_cr3 = 1;
+            }
+
+            /* Any remaining bytes of this chunk: read and discard. */
+            while (chunk_bytes) {
+                unsigned long sz = chunk_bytes;
+                if ( sz > P2M_FL_SIZE )
+                    sz = P2M_FL_SIZE;
+                if (!read_exact(io_fd, p2m_frame_list, sz)) {
+                    ERR("read-and-discard extended-info chunk bytes failed");
+                    goto out;
+                }
+                chunk_bytes -= sz;
+                tot_bytes   -= sz;
+            }
+        }
+
+        /* Now read the real first entry of P2M list. */
+        if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+            ERR("read first entry of p2m_frame_list failed");
+            goto out;
+        }
+    }
+
+    /* First entry is already read into the p2m array. */
+    if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
         ERR("read p2m_frame_list failed");
         goto out;
     }
 
-
     /* We want zeroed memory so use calloc rather than malloc. */
-    p2m        = calloc(max_pfn, sizeof(unsigned long));
+    p2m        = calloc(max_pfn, sizeof(xen_pfn_t));
     pfn_type   = calloc(max_pfn, sizeof(unsigned long));
-    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(unsigned long));
+    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
 
     if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) {
         ERR("memory alloc failed");
@@ -193,7 +251,7 @@ int xc_linux_restore(int xc_handle, int 
         goto out;
     }
 
-    if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) {
+    if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
         ERR("Could not mlock region_mfn");
         goto out;
     }
@@ -331,17 +389,27 @@ int xc_linux_restore(int xc_handle, int 
                 ** A page table page - need to 'uncanonicalize' it, i.e.
                 ** replace all the references to pfns with the corresponding
                 ** mfns for the new domain.
+                **
+                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
+                ** so we may need to update the p2m after the main loop.
+                ** Hence we defer canonicalization of L1s until then.
                 */
-                if(!uncanonicalize_pagetable(pagetype, page)) {
-                    /*
-                    ** Failing to uncanonicalize a page table can be ok
-                    ** under live migration since the pages type may have
-                    ** changed by now (and we'll get an update later).
-                    */
-                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                            pagetype >> 28, pfn, mfn);
-                    nraces++;
-                    continue;
+                if ((pt_levels != 3) ||
+                    pae_extended_cr3 ||
+                    (pagetype != L1TAB)) {
+
+                    if (!uncanonicalize_pagetable(pagetype, page)) {
+                        /*
+                        ** Failing to uncanonicalize a page table can be ok
+                        ** under live migration since the pages type may have
+                        ** changed by now (and we'll get an update later).
+                        */
+                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+                                pagetype >> 28, pfn, mfn);
+                        nraces++;
+                        continue;
+                    }
+
                 }
 
             } else if(pagetype != NOTAB) {
@@ -389,6 +457,100 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("Received all pages (%d races)\n", nraces);
+
+    if ((pt_levels == 3) && !pae_extended_cr3) {
+
+        /*
+        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
+        ** is a little awkward and involves (a) finding all such PGDs and
+        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
+        ** with the new info; and (c) canonicalizing all the L1s using the
+        ** (potentially updated) p2m[].
+        **
+        ** This is relatively slow (and currently involves two passes through
+        ** the pfn_type[] array), but at least seems to be correct. May wish
+        ** to consider more complex approaches to optimize this later.
+        */
+
+        int j, k;
+
+        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
+        for (i = 0; i < max_pfn; i++) {
+
+            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
+
+                unsigned long new_mfn;
+                uint64_t l3ptes[4];
+                uint64_t *l3tab;
+
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                         PROT_READ, p2m[i]);
+
+                for(j = 0; j < 4; j++)
+                    l3ptes[j] = l3tab[j];
+
+                munmap(l3tab, PAGE_SIZE);
+
+                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
+                    ERR("Couldn't get a page below 4GB :-(");
+                    goto out;
+                }
+
+                p2m[i] = new_mfn;
+                if (xc_add_mmu_update(xc_handle, mmu,
+                                      (((unsigned long long)new_mfn)
+                                       << PAGE_SHIFT) |
+                                      MMU_MACHPHYS_UPDATE, i)) {
+                    ERR("Couldn't m2p on PAE root pgdir");
+                    goto out;
+                }
+
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                         PROT_READ | PROT_WRITE, p2m[i]);
+
+                for(j = 0; j < 4; j++)
+                    l3tab[j] = l3ptes[j];
+
+                munmap(l3tab, PAGE_SIZE);
+
+            }
+        }
+
+        /* Second pass: find all L1TABs and uncanonicalize them */
+        j = 0;
+
+        for(i = 0; i < max_pfn; i++) {
+
+            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
+                region_mfn[j] = p2m[i];
+                j++;
+            }
+
+            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
+
+                if (!(region_base = xc_map_foreign_batch(
+                          xc_handle, dom, PROT_READ | PROT_WRITE,
+                          region_mfn, j))) {
+                    ERR("map batch failed");
+                    goto out;
+                }
+
+                for(k = 0; k < j; k++) {
+                    if(!uncanonicalize_pagetable(L1TAB,
+                                                 region_base + k*PAGE_SIZE)) {
+                        ERR("failed uncanonicalize pt!");
+                        goto out;
+                    }
+                }
+
+                munmap(region_base, j*PAGE_SIZE);
+                j = 0;
+            }
+        }
+
+    }
 
 
     if (xc_finish_mmu_updates(xc_handle, mmu)) {
@@ -536,7 +698,7 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     /* Uncanonicalise the page table base pointer. */
-    pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT;
+    pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
 
     if (pfn >= max_pfn) {
         ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
@@ -552,7 +714,7 @@ int xc_linux_restore(int xc_handle, int 
         goto out;
     }
 
-    ctxt.ctrlreg[3] = p2m[pfn] << PAGE_SHIFT;
+    ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
 
     /* clear any pending events and the selector */
     memset(&(shared_info->evtchn_pending[0]), 0,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux_save.c       Wed Jun 07 11:03:51 2006 +0100
@@ -40,10 +40,10 @@ static unsigned long max_pfn;
 static unsigned long max_pfn;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
-static unsigned long *live_p2m = NULL;
+static xen_pfn_t *live_p2m = NULL;
 
 /* Live mapping of system MFN to PFN table. */
-static unsigned long *live_m2p = NULL;
+static xen_pfn_t *live_m2p = NULL;
 
 /* grep fodder: machine_to_phys */
 
@@ -501,22 +501,22 @@ void canonicalize_pagetable(unsigned lon
 
 
 
-static unsigned long *xc_map_m2p(int xc_handle,
+static xen_pfn_t *xc_map_m2p(int xc_handle,
                                  unsigned long max_mfn,
                                  int prot)
 {
     struct xen_machphys_mfn_list xmml;
     privcmd_mmap_entry_t *entries;
     unsigned long m2p_chunks, m2p_size;
-    unsigned long *m2p;
-    unsigned long *extent_start;
+    xen_pfn_t *m2p;
+    xen_pfn_t *extent_start;
     int i, rc;
 
     m2p_size   = M2P_SIZE(max_mfn);
     m2p_chunks = M2P_CHUNKS(max_mfn);
 
     xmml.max_extents = m2p_chunks;
-    if (!(extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) {
+    if (!(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t)))) {
         ERR("failed to allocate space for m2p mfns");
         return NULL;
     }
@@ -583,11 +583,11 @@ int xc_linux_save(int xc_handle, int io_
     char page[PAGE_SIZE];
 
     /* Double and single indirect references to the live P2M table */
-    unsigned long *live_p2m_frame_list_list = NULL;
-    unsigned long *live_p2m_frame_list = NULL;
+    xen_pfn_t *live_p2m_frame_list_list = NULL;
+    xen_pfn_t *live_p2m_frame_list = NULL;
 
     /* A copy of the pfn-to-mfn table frame list. */
-    unsigned long *p2m_frame_list = NULL;
+    xen_pfn_t *p2m_frame_list = NULL;
 
     /* Live mapping of shared info structure */
     shared_info_t *live_shinfo = NULL;
@@ -712,11 +712,11 @@ int xc_linux_save(int xc_handle, int io_
     memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
 
     /* Canonicalise the pfn-to-mfn table frame-number list. */
-    for (i = 0; i < max_pfn; i += ulpp) {
-        if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) {
+    for (i = 0; i < max_pfn; i += fpp) {
+        if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
             ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
-            ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp,
-                p2m_frame_list[i/ulpp]);
+            ERR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
+                (uint64_t)p2m_frame_list[i/fpp]);
             goto out;
         }
     }
@@ -818,12 +818,33 @@ int xc_linux_save(int xc_handle, int io_
 
     /* Start writing out the saved-domain record. */
 
-    if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
         ERR("write: max_pfn");
         goto out;
     }
 
-    if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
+    /*
+     * Write an extended-info structure to inform the restore code that
+     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
+     * slow paths in the restore code.
+     */
+    if ((pt_levels == 3) &&
+        (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))) {
+        unsigned long signature = ~0UL;
+        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
+        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
+        char chunk_sig[]  = "vcpu";
+        if (!write_exact(io_fd, &signature, sizeof(signature)) ||
+            !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
+            !write_exact(io_fd, &chunk_sig, 4) ||
+            !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
+            !write_exact(io_fd, &ctxt,      sizeof(ctxt))) {
+            ERR("write: extended info");
+            goto out;
+        }
+    }
+
+    if (!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
         ERR("write: p2m_frame_list");
         goto out;
     }
@@ -1129,12 +1150,12 @@ int xc_linux_save(int xc_handle, int io_
     }
 
     /* Canonicalise the page table base pointer. */
-    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) ) {
+    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) {
         ERR("PT base is not in range of pseudophys map");
         goto out;
     }
-    ctxt.ctrlreg[3] = mfn_to_pfn(ctxt.ctrlreg[3] >> PAGE_SHIFT) <<
-        PAGE_SHIFT;
+    ctxt.ctrlreg[3] = 
+        xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
 
     if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) ||
         !write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_load_aout9.c       Wed Jun 07 11:03:51 2006 +0100
@@ -17,7 +17,7 @@
 #define KOFFSET(_p)       ((_p)&~KZERO)
 
 static int parseaout9image(const char *, unsigned long, struct 
domain_setup_info *);
-static int loadaout9image(const char *, unsigned long, int, uint32_t, unsigned 
long *, struct domain_setup_info *);
+static int loadaout9image(const char *, unsigned long, int, uint32_t, 
xen_pfn_t *, struct domain_setup_info *);
 static void copyout(int, uint32_t, unsigned long *, unsigned long, const char 
*, int);
 struct Exec *get_header(const char *, unsigned long, struct Exec *);
 
@@ -79,7 +79,7 @@ loadaout9image(
     const char *image,
     unsigned long image_size,
     int xch, uint32_t dom,
-    unsigned long *parray,
+    xen_pfn_t *parray,
     struct domain_setup_info *dsi)
 {
     struct Exec ehdr;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_load_bin.c Wed Jun 07 11:03:51 2006 +0100
@@ -107,7 +107,7 @@ static int
 static int
 loadbinimage(
     const char *image, unsigned long image_size, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi);
+    xen_pfn_t *parray, struct domain_setup_info *dsi);
 
 int probe_bin(const char *image,
               unsigned long image_size,
@@ -235,7 +235,7 @@ static int
 static int
 loadbinimage(
     const char *image, unsigned long image_size, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi)
+    xen_pfn_t *parray, struct domain_setup_info *dsi)
 {
     unsigned long size;
     char         *va;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_load_elf.c Wed Jun 07 11:03:51 2006 +0100
@@ -16,10 +16,10 @@ static int
 static int
 loadelfimage(
     const char *image, unsigned long image_size, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi);
+    xen_pfn_t *parray, struct domain_setup_info *dsi);
 static int
 loadelfsymtab(
-    const char *image, int xch, uint32_t dom, unsigned long *parray,
+    const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
     struct domain_setup_info *dsi);
 
 int probe_elf(const char *image,
@@ -122,8 +122,15 @@ static int parseelfimage(const char *ima
             ERROR("Actually saw: '%s'", guestinfo);
             return -EINVAL;
         }
-        if ( (strstr(guestinfo, "PAE=yes") != NULL) )
-            dsi->pae_kernel = 1;
+
+        dsi->pae_kernel = PAEKERN_no;
+        p = strstr(guestinfo, "PAE=yes");
+        if ( p != NULL )
+        {
+            dsi->pae_kernel = PAEKERN_yes;
+            if ( !strncmp(p+7, "[extended-cr3]", 14) )
+                dsi->pae_kernel = PAEKERN_extended_cr3;
+        }
 
         break;
     }
@@ -204,7 +211,7 @@ static int
 static int
 loadelfimage(
     const char *image, unsigned long elfsize, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi)
+    xen_pfn_t *parray, struct domain_setup_info *dsi)
 {
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
     Elf_Phdr *phdr;
@@ -258,7 +265,7 @@ loadelfimage(
 
 static int
 loadelfsymtab(
-    const char *image, int xch, uint32_t dom, unsigned long *parray,
+    const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
     struct domain_setup_info *dsi)
 {
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_pagetab.c  Wed Jun 07 11:03:51 2006 +0100
@@ -78,7 +78,7 @@ unsigned long xc_translate_foreign_addre
         fprintf(stderr, "failed to retreive vcpu context\n");
         goto out;
     }
-    cr3 = ctx.ctrlreg[3];
+    cr3 = ((unsigned long long)xen_cr3_to_pfn(ctx.ctrlreg[3])) << PAGE_SHIFT;
 
     /* Page Map Level 4 */
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_private.c  Wed Jun 07 11:03:51 2006 +0100
@@ -4,6 +4,7 @@
  * Helper functions for the rest of the library.
  */
 
+#include <inttypes.h>
 #include "xc_private.h"
 
 /* NB: arr must be mlock'ed */
@@ -134,9 +135,9 @@ int xc_memory_op(int xc_handle,
     struct xen_memory_reservation *reservation = arg;
     struct xen_machphys_mfn_list *xmml = arg;
     struct xen_translate_gpfn_list *trans = arg;
-    unsigned long *extent_start;
-    unsigned long *gpfn_list;
-    unsigned long *mfn_list;
+    xen_pfn_t *extent_start;
+    xen_pfn_t *gpfn_list;
+    xen_pfn_t *mfn_list;
     long ret = -EINVAL;
 
     hypercall.op     = __HYPERVISOR_memory_op;
@@ -156,7 +157,7 @@ int xc_memory_op(int xc_handle,
         get_xen_guest_handle(extent_start, reservation->extent_start);
         if ( (extent_start != NULL) &&
              (mlock(extent_start,
-                    reservation->nr_extents * sizeof(unsigned long)) != 0) )
+                    reservation->nr_extents * sizeof(xen_pfn_t)) != 0) )
         {
             PERROR("Could not mlock");
             safe_munlock(reservation, sizeof(*reservation));
@@ -171,7 +172,7 @@ int xc_memory_op(int xc_handle,
         }
         get_xen_guest_handle(extent_start, xmml->extent_start);
         if ( mlock(extent_start,
-                   xmml->max_extents * sizeof(unsigned long)) != 0 )
+                   xmml->max_extents * sizeof(xen_pfn_t)) != 0 )
         {
             PERROR("Could not mlock");
             safe_munlock(xmml, sizeof(*xmml));
@@ -192,17 +193,17 @@ int xc_memory_op(int xc_handle,
             goto out1;
         }
         get_xen_guest_handle(gpfn_list, trans->gpfn_list);
-        if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
+        if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
         {
             PERROR("Could not mlock");
             safe_munlock(trans, sizeof(*trans));
             goto out1;
         }
         get_xen_guest_handle(mfn_list, trans->mfn_list);
-        if ( mlock(mfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
-        {
-            PERROR("Could not mlock");
-            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
+        if ( mlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
+        {
+            PERROR("Could not mlock");
+            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
             safe_munlock(trans, sizeof(*trans));
             goto out1;
         }
@@ -220,22 +221,22 @@ int xc_memory_op(int xc_handle,
         get_xen_guest_handle(extent_start, reservation->extent_start);
         if ( extent_start != NULL )
             safe_munlock(extent_start,
-                         reservation->nr_extents * sizeof(unsigned long));
+                         reservation->nr_extents * sizeof(xen_pfn_t));
         break;
     case XENMEM_machphys_mfn_list:
         safe_munlock(xmml, sizeof(*xmml));
         get_xen_guest_handle(extent_start, xmml->extent_start);
         safe_munlock(extent_start,
-                     xmml->max_extents * sizeof(unsigned long));
+                     xmml->max_extents * sizeof(xen_pfn_t));
         break;
     case XENMEM_add_to_physmap:
         safe_munlock(arg, sizeof(struct xen_add_to_physmap));
         break;
     case XENMEM_translate_gpfn_list:
             get_xen_guest_handle(mfn_list, trans->mfn_list);
-            safe_munlock(mfn_list, trans->nr_gpfns * sizeof(long));
+            safe_munlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
             get_xen_guest_handle(gpfn_list, trans->gpfn_list);
-            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
+            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
             safe_munlock(trans, sizeof(*trans));
         break;
     }
@@ -263,7 +264,7 @@ long long xc_domain_get_cpu_usage( int x
 
 int xc_get_pfn_list(int xc_handle,
                     uint32_t domid,
-                    unsigned long *pfn_buf,
+                    xen_pfn_t *pfn_buf,
                     unsigned long max_pfns)
 {
     DECLARE_DOM0_OP;
@@ -274,10 +275,10 @@ int xc_get_pfn_list(int xc_handle,
     set_xen_guest_handle(op.u.getmemlist.buffer, pfn_buf);
 
 #ifdef VALGRIND
-    memset(pfn_buf, 0, max_pfns * sizeof(unsigned long));
+    memset(pfn_buf, 0, max_pfns * sizeof(xen_pfn_t));
 #endif
 
-    if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
+    if ( mlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)) != 0 )
     {
         PERROR("xc_get_pfn_list: pfn_buf mlock failed");
         return -1;
@@ -285,7 +286,7 @@ int xc_get_pfn_list(int xc_handle,
 
     ret = do_dom0_op(xc_handle, &op);
 
-    safe_munlock(pfn_buf, max_pfns * sizeof(unsigned long));
+    safe_munlock(pfn_buf, max_pfns * sizeof(xen_pfn_t));
 
 #if 0
 #ifdef DEBUG
@@ -364,7 +365,7 @@ unsigned long xc_get_filesz(int fd)
 }
 
 void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
-                   int xch, uint32_t dom, unsigned long *parray,
+                   int xch, uint32_t dom, xen_pfn_t *parray,
                    unsigned long vstart)
 {
     char *va;
@@ -428,6 +429,29 @@ int xc_version(int xc_handle, int cmd, v
         safe_munlock(arg, argsize);
 
     return rc;
+}
+
+unsigned long xc_make_page_below_4G(
+    int xc_handle, uint32_t domid, unsigned long mfn)
+{
+    xen_pfn_t old_mfn = mfn;
+    xen_pfn_t new_mfn;
+
+    if ( xc_domain_memory_decrease_reservation(
+        xc_handle, domid, 1, 0, &old_mfn) != 0 )
+    {
+        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
+        return 0;
+    }
+
+    if ( xc_domain_memory_increase_reservation(
+        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
+    {
+        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
+        return 0;
+    }
+
+    return new_mfn;
 }
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_ptrace.c   Wed Jun 07 11:03:51 2006 +0100
@@ -190,7 +190,8 @@ map_domain_va_32(
     static void *v[MAX_VIRT_CPUS];
 
     l2 = xc_map_foreign_range(
-         xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
+         xc_handle, current_domid, PAGE_SIZE, PROT_READ,
+         xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
     if ( l2 == NULL )
         return NULL;
 
@@ -230,7 +231,8 @@ map_domain_va_pae(
     static void *v[MAX_VIRT_CPUS];
 
     l3 = xc_map_foreign_range(
-        xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
+        xc_handle, current_domid, PAGE_SIZE, PROT_READ,
+        xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
     if ( l3 == NULL )
         return NULL;
 
@@ -282,8 +284,9 @@ map_domain_va_64(
     if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
         return map_domain_va_32(xc_handle, cpu, guest_va, perm);
 
-    l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE,
-            PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT);
+    l4 = xc_map_foreign_range(
+        xc_handle, current_domid, PAGE_SIZE, PROT_READ,
+        xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
     if ( l4 == NULL )
         return NULL;
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_ptrace_core.c      Wed Jun 07 11:03:51 2006 +0100
@@ -12,8 +12,8 @@ static long   nr_pages = 0;
 static long   nr_pages = 0;
 static unsigned long  *p2m_array = NULL;
 static unsigned long  *m2p_array = NULL;
-static unsigned long            pages_offset;
-static unsigned long            cr3[MAX_VIRT_CPUS];
+static unsigned long   pages_offset;
+static unsigned long   cr3[MAX_VIRT_CPUS];
 
 /* --------------------- */
 
@@ -47,7 +47,7 @@ map_domain_va_core(unsigned long domfd, 
             munmap(cr3_virt[cpu], PAGE_SIZE);
         v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
-            map_mtop_offset(cr3_phys[cpu]));
+            map_mtop_offset(xen_cr3_to_pfn(cr3_phys[cpu])));
         if (v == MAP_FAILED)
         {
             perror("mmap failed");
@@ -127,14 +127,15 @@ xc_waitdomain_core(
             sizeof(vcpu_guest_context_t)*nr_vcpus)
             return -1;
 
-        for (i = 0; i < nr_vcpus; i++) {
+        for (i = 0; i < nr_vcpus; i++)
             cr3[i] = ctxt[i].ctrlreg[3];
-        }
+
         if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL)
         {
             printf("Could not allocate p2m_array\n");
             return -1;
         }
+
         if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) !=
             sizeof(unsigned long)*nr_pages)
             return -1;
@@ -146,10 +147,8 @@ xc_waitdomain_core(
         }
         bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
 
-        for (i = 0; i < nr_pages; i++) {
+        for (i = 0; i < nr_pages; i++)
             m2p_array[p2m_array[i]] = i;
-        }
-
     }
     return 0;
 }
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xenctrl.h     Wed Jun 07 11:03:51 2006 +0100
@@ -415,26 +415,26 @@ int xc_domain_memory_increase_reservatio
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                          unsigned long *extent_start);
+                                          xen_pfn_t *extent_start);
 
 int xc_domain_memory_decrease_reservation(int xc_handle,
                                           uint32_t domid,
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
-                                          unsigned long *extent_start);
+                                          xen_pfn_t *extent_start);
 
 int xc_domain_memory_populate_physmap(int xc_handle,
                                       uint32_t domid,
                                       unsigned long nr_extents,
                                       unsigned int extent_order,
                                       unsigned int address_bits,
-                                      unsigned long *extent_start);
+                                      xen_pfn_t *extent_start);
 
 int xc_domain_translate_gpfn_list(int xc_handle,
                                   uint32_t domid,
                                   unsigned long nr_gpfns,
-                                  unsigned long *gpfn_list,
-                                  unsigned long *mfn_list);
+                                  xen_pfn_t *gpfn_list,
+                                  xen_pfn_t *mfn_list);
 
 int xc_domain_ioport_permission(int xc_handle,
                                 uint32_t domid,
@@ -453,6 +453,9 @@ int xc_domain_iomem_permission(int xc_ha
                                unsigned long nr_mfns,
                                uint8_t allow_access);
 
+unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
+                                    unsigned long mfn);
+
 typedef dom0_perfc_desc_t xc_perfc_desc_t;
 /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
 int xc_perfc_control(int xc_handle,
@@ -484,7 +487,7 @@ void *xc_map_foreign_range(int xc_handle
                             unsigned long mfn );
 
 void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
-                           unsigned long *arr, int num );
+                           xen_pfn_t *arr, int num );
 
 /**
  * Translates a virtual address in the context of a given domain and
@@ -499,11 +502,11 @@ unsigned long xc_translate_foreign_addre
 unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
                                            int vcpu, unsigned long long virt);
 
-int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf,
+int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf,
                     unsigned long max_pfns);
 
 int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid,
-                         unsigned long *pfn_buf,
+                         xen_pfn_t *pfn_buf,
                          unsigned int start_page, unsigned int nr_pages);
 
 int xc_copy_to_domain_page(int xc_handle, uint32_t domid,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xg_private.h  Wed Jun 07 11:03:51 2006 +0100
@@ -156,6 +156,9 @@ struct domain_setup_info
 
     unsigned long elf_paddr_offset;
 
+#define PAEKERN_no           0
+#define PAEKERN_yes          1
+#define PAEKERN_extended_cr3 2
     unsigned int  pae_kernel;
 
     unsigned int  load_symtab;
@@ -170,7 +173,7 @@ typedef int (*parseimagefunc)(const char
                               struct domain_setup_info *dsi);
 typedef int (*loadimagefunc)(const char *image, unsigned long image_size,
                              int xch,
-                             uint32_t dom, unsigned long *parray,
+                             uint32_t dom, xen_pfn_t *parray,
                              struct domain_setup_info *dsi);
 
 struct load_funcs
@@ -198,7 +201,7 @@ unsigned long xc_get_filesz(int fd);
 unsigned long xc_get_filesz(int fd);
 
 void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
-                   int xch, uint32_t dom, unsigned long *parray,
+                   int xch, uint32_t dom, xen_pfn_t *parray,
                    unsigned long vstart);
 
 int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xg_save_restore.h     Wed Jun 07 11:03:51 2006 +0100
@@ -105,23 +105,23 @@ static int get_platform_info(int xc_hand
 */
 #define M2P_SHIFT       L2_PAGETABLE_SHIFT_PAE
 #define M2P_CHUNK_SIZE  (1 << M2P_SHIFT)
-#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT)
+#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT)
 #define M2P_CHUNKS(_m)  (M2P_SIZE((_m)) >> M2P_SHIFT)
 
 /* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
-#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT)
+#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
 
-/* Number of unsigned longs in a page */
-#define ulpp            (PAGE_SIZE/sizeof(unsigned long))
+/* Number of xen_pfn_t in a page */
+#define fpp             (PAGE_SIZE/sizeof(xen_pfn_t))
 
 /* Number of entries in the pfn_to_mfn_frame_list */
-#define P2M_FL_ENTRIES  (((max_pfn)+ulpp-1)/ulpp)
+#define P2M_FL_ENTRIES  (((max_pfn)+fpp-1)/fpp)
 
 /* Size in bytes of the pfn_to_mfn_frame_list     */
 #define P2M_FL_SIZE     ((P2M_FL_ENTRIES)*sizeof(unsigned long))
 
 /* Number of entries in the pfn_to_mfn_frame_list_list */
-#define P2M_FLL_ENTRIES (((max_pfn)+(ulpp*ulpp)-1)/(ulpp*ulpp))
+#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
 
 /* Current guests allow 8MB 'slack' in their P2M */
 #define NR_SLACK_ENTRIES   ((8 * 1024 * 1024) / PAGE_SIZE)
diff -r b09dbe439169 -r 9d86c1a70f34 tools/python/xen/util/security.py
--- a/tools/python/xen/util/security.py Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/python/xen/util/security.py Wed Jun 07 11:03:51 2006 +0100
@@ -426,6 +426,15 @@ def get_decision(arg1, arg2):
             err("Argument type not supported.")
         ssidref = label2ssidref(arg2[2][1], arg2[1][1])
         arg2 = ['ssidref', str(ssidref)]
+
+    # accept only int or string types for domid and ssidref
+    if isinstance(arg1[1], int):
+        arg1[1] = str(arg1[1])
+    if isinstance(arg2[1], int):
+        arg2[1] = str(arg2[1])
+    if not isinstance(arg1[1], str) or not isinstance(arg2[1], str):
+        err("Invalid id or ssidref type, string or int required")
+
     try:
         decision = acm.getdecision(arg1[0], arg1[1], arg2[0], arg2[1])
     except:
diff -r b09dbe439169 -r 9d86c1a70f34 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/tests/test_x86_emulator.c   Wed Jun 07 11:03:51 2006 +0100
@@ -13,6 +13,7 @@ typedef int64_t            s64;
 typedef int64_t            s64;
 #include <public/xen.h>
 #include <asm-x86/x86_emulate.h>
+#include <sys/mman.h>
 
 static int read_any(
     unsigned long addr,
@@ -85,23 +86,30 @@ int main(int argc, char **argv)
     struct x86_emulate_ctxt ctxt;
     struct cpu_user_regs regs;
     char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
-    unsigned int res = 0x7FFFFFFF;
-    u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
+    unsigned int *res;
     int rc;
 
     ctxt.regs = &regs;
     ctxt.mode = X86EMUL_MODE_PROT32;
 
+    res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE,
+               MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+    if ( res == MAP_FAILED )
+    {
+        fprintf(stderr, "mmap to low address failed\n");
+        exit(1);
+    }
+
     printf("%-40s", "Testing addl %%ecx,(%%eax)...");
     instr[0] = 0x01; instr[1] = 0x08;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x92345677) || 
+    ctxt.cr2    = (unsigned long)res;
+    *res        = 0x7FFFFFFF;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
          (regs.eip != (unsigned long)&instr[2]) )
         goto fail;
@@ -116,11 +124,25 @@ int main(int argc, char **argv)
 #else
     regs.ecx    = 0x12345678UL;
 #endif
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x92345677) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
+         (regs.eip != (unsigned long)&instr[2]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movl (%%eax),%%ecx...");
+    instr[0] = 0x8b; instr[1] = 0x08;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = ~0UL;
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x92345677) || 
+         (regs.ecx != 0x92345677UL) ||
          (regs.eip != (unsigned long)&instr[2]) )
         goto fail;
     printf("okay\n");
@@ -131,10 +153,10 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x923456AA) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
          (regs.eax != 0x92345677UL) ||
          (regs.eip != (unsigned long)&instr[4]) )
@@ -147,10 +169,10 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x923456AA) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
          (regs.eax != 0xAABBCCAA) ||
          (regs.ecx != 0xFF) ||
@@ -163,10 +185,10 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x12345678) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x12345678) || 
          (regs.eflags != 0x200) ||
          (regs.ecx != 0x923456AA) ||
          (regs.eip != (unsigned long)&instr[2]) )
@@ -176,14 +198,14 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)...");
     instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08;
     regs.eflags = 0x200;
-    res         = 0x923456AA;
+    *res        = 0x923456AA;
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0xDDEEFF00) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
          (regs.eax != 0x923456AAUL) ||
          (regs.eip != (unsigned long)&instr[4]) )
@@ -192,54 +214,57 @@ int main(int argc, char **argv)
 
     printf("%-40s", "Testing rep movsw...");
     instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
-    res         = 0x22334455;
+    *res        = 0x22334455;
     regs.eflags = 0x200;
     regs.ecx    = 23;
     regs.eip    = (unsigned long)&instr[0];
-    regs.esi    = (unsigned long)&res + 0;
-    regs.edi    = (unsigned long)&res + 2;
+    regs.esi    = (unsigned long)res + 0;
+    regs.edi    = (unsigned long)res + 2;
     regs.error_code = 0; /* read fault */
     ctxt.cr2    = regs.esi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (res != 0x44554455) ||
+         (*res != 0x44554455) ||
          (regs.eflags != 0x200) ||
          (regs.ecx != 22) || 
-         (regs.esi != ((unsigned long)&res + 2)) ||
-         (regs.edi != ((unsigned long)&res + 4)) ||
+         (regs.esi != ((unsigned long)res + 2)) ||
+         (regs.edi != ((unsigned long)res + 4)) ||
          (regs.eip != (unsigned long)&instr[0]) )
         goto fail;
     printf("okay\n");
 
     printf("%-40s", "Testing btrl $0x1,(%edi)...");
     instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
-    res         = 0x2233445F;
-    regs.eflags = 0x200;
-    regs.eip    = (unsigned long)&instr[0];
-    regs.edi    = (unsigned long)&res;
+    *res        = 0x2233445F;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.edi    = (unsigned long)res;
     ctxt.cr2    = regs.edi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (res != 0x2233445D) ||
+         (*res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
          (regs.eip != (unsigned long)&instr[4]) )
         goto fail;
     printf("okay\n");
+
+    res[0] = 0x12345678;
+    res[1] = 0x87654321;
 
     printf("%-40s", "Testing cmpxchg8b (%edi) [succeeding]...");
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eflags = 0x200;
-    regs.eax    = cmpxchg8b_res[0];
-    regs.edx    = cmpxchg8b_res[1];
+    regs.eax    = res[0];
+    regs.edx    = res[1];
     regs.ebx    = 0x9999AAAA;
     regs.ecx    = 0xCCCCFFFF;
     regs.eip    = (unsigned long)&instr[0];
-    regs.edi    = (unsigned long)cmpxchg8b_res;
+    regs.edi    = (unsigned long)res;
     ctxt.cr2    = regs.edi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (cmpxchg8b_res[0] != 0x9999AAAA) ||
-         (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+         (res[0] != 0x9999AAAA) ||
+         (res[1] != 0xCCCCFFFF) ||
          ((regs.eflags&0x240) != 0x240) ||
          (regs.eip != (unsigned long)&instr[3]) )
         goto fail;
@@ -248,12 +273,12 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing cmpxchg8b (%edi) [failing]...");
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eip    = (unsigned long)&instr[0];
-    regs.edi    = (unsigned long)cmpxchg8b_res;
+    regs.edi    = (unsigned long)res;
     ctxt.cr2    = regs.edi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (cmpxchg8b_res[0] != 0x9999AAAA) ||
-         (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+         (res[0] != 0x9999AAAA) ||
+         (res[1] != 0xCCCCFFFF) ||
          (regs.eax != 0x9999AAAA) ||
          (regs.edx != 0xCCCCFFFF) ||
          ((regs.eflags&0x240) != 0x200) ||
@@ -265,11 +290,11 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    res         = 0x82;
+    ctxt.cr2    = (unsigned long)res;
+    *res        = 0x82;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
-         (res != 0x82) ||
+         (*res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
          ((regs.eflags&0x240) != 0x200) ||
          (regs.eip != (unsigned long)&instr[3]) )
@@ -280,11 +305,11 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    res         = 0x1234aa82;
+    ctxt.cr2    = (unsigned long)res;
+    *res        = 0x1234aa82;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
-         (res != 0x1234aa82) ||
+         (*res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
          ((regs.eflags&0x240) != 0x200) ||
          (regs.eip != (unsigned long)&instr[3]) )
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/linux-xen/smpboot.c
--- a/xen/arch/ia64/linux-xen/smpboot.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/ia64/linux-xen/smpboot.c Wed Jun 07 11:03:51 2006 +0100
@@ -62,6 +62,7 @@
 #include <asm/unistd.h>
 
 #ifdef XEN
+#include <xen/domain.h>
 #include <asm/hw_irq.h>
 int ht_per_core = 1;
 #ifndef CONFIG_SMP
@@ -487,7 +488,7 @@ do_rest:
 #else
        struct vcpu *v;
 
-       v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
+       v = alloc_idle_vcpu(cpu);
        BUG_ON(v == NULL);
 
        //printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/ia64/xen/domain.c        Wed Jun 07 11:03:51 2006 +0100
@@ -42,6 +42,7 @@
 
 #include <asm/vcpu.h>   /* for function declarations */
 #include <public/arch-ia64.h>
+#include <xen/domain.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vcpu.h>
 #include <asm/vmx_vpd.h>
@@ -92,26 +93,16 @@ alloc_dom_xen_and_dom_io(void)
      * Any Xen-heap pages that we will allow to be mapped will have
      * their domain field set to dom_xen.
      */
-    dom_xen = alloc_domain();
+    dom_xen = alloc_domain(DOMID_XEN);
     BUG_ON(dom_xen == NULL);
-    spin_lock_init(&dom_xen->page_alloc_lock);
-    INIT_LIST_HEAD(&dom_xen->page_list);
-    INIT_LIST_HEAD(&dom_xen->xenpage_list);
-    atomic_set(&dom_xen->refcnt, 1);
-    dom_xen->domain_id = DOMID_XEN;
 
     /*
      * Initialise our DOMID_IO domain.
      * This domain owns I/O pages that are within the range of the page_info
      * array. Mappings occur at the priv of the caller.
      */
-    dom_io = alloc_domain();
+    dom_io = alloc_domain(DOMID_IO);
     BUG_ON(dom_io == NULL);
-    spin_lock_init(&dom_io->page_alloc_lock);
-    INIT_LIST_HEAD(&dom_io->page_list);
-    INIT_LIST_HEAD(&dom_io->xenpage_list);
-    atomic_set(&dom_io->refcnt, 1);
-    dom_io->domain_id = DOMID_IO;
 }
 #endif
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/ia64/xen/xensetup.c      Wed Jun 07 11:03:51 2006 +0100
@@ -35,8 +35,6 @@ char saved_command_line[COMMAND_LINE_SIZ
 char saved_command_line[COMMAND_LINE_SIZE];
 char dom0_command_line[COMMAND_LINE_SIZE];
 
-struct vcpu *idle_vcpu[NR_CPUS];
-
 cpumask_t cpu_present_map;
 
 extern unsigned long domain0_ready;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/audit.c      Wed Jun 07 11:03:51 2006 +0100
@@ -432,10 +432,10 @@ int audit_adjust_pgtables(struct domain 
 
         for_each_vcpu(d, v)
         {
-            if ( pagetable_get_paddr(v->arch.guest_table) )
+            if ( !pagetable_is_null(v->arch.guest_table) )
                 adjust(mfn_to_page(pagetable_get_pfn(v->arch.guest_table)),
                        !shadow_mode_refcounts(d));
-            if ( pagetable_get_paddr(v->arch.shadow_table) )
+            if ( !pagetable_is_null(v->arch.shadow_table) )
                 adjust(mfn_to_page(pagetable_get_pfn(v->arch.shadow_table)),
                        0);
             if ( v->arch.monitor_shadow_ref )
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/cpu/mtrr/main.c
--- a/xen/arch/x86/cpu/mtrr/main.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/cpu/mtrr/main.c      Wed Jun 07 11:03:51 2006 +0100
@@ -43,7 +43,7 @@
 #include "mtrr.h"
 
 /* No blocking mutexes in Xen. Spin instead. */
-#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
+#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m)
 #define down(_m) spin_lock(_m)
 #define up(_m) spin_unlock(_m)
 #define lock_cpu_hotplug() ((void)0)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/dom0_ops.c   Wed Jun 07 11:03:51 2006 +0100
@@ -467,7 +467,7 @@ void arch_getdomaininfo_ctxt(
     if ( hvm_guest(v) )
         c->flags |= VGCF_HVM_GUEST;
 
-    c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table);
+    c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
 
     c->vm_assist = v->domain->vm_assist;
 }
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/domain.c     Wed Jun 07 11:03:51 2006 +0100
@@ -259,7 +259,7 @@ int arch_set_info_guest(
     struct vcpu *v, struct vcpu_guest_context *c)
 {
     struct domain *d = v->domain;
-    unsigned long phys_basetab = INVALID_MFN;
+    unsigned long cr3_pfn = INVALID_MFN;
     int i, rc;
 
     if ( !(c->flags & VGCF_HVM_GUEST) )
@@ -322,12 +322,8 @@ int arch_set_info_guest(
 
     if ( !(c->flags & VGCF_HVM_GUEST) )
     {
-        phys_basetab = c->ctrlreg[3];
-        phys_basetab =
-            (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) |
-            (phys_basetab & ~PAGE_MASK);
-
-        v->arch.guest_table = mk_pagetable(phys_basetab);
+        cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
+        v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
     }
 
     if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
@@ -335,14 +331,14 @@ int arch_set_info_guest(
 
     if ( c->flags & VGCF_HVM_GUEST )
     {
-        v->arch.guest_table = mk_pagetable(0);
+        v->arch.guest_table = pagetable_null();
 
         if ( !hvm_initialize_guest_resources(v) )
             return -EINVAL;
     }
     else if ( shadow_mode_refcounts(d) )
     {
-        if ( !get_page(mfn_to_page(phys_basetab>>PAGE_SHIFT), d) )
+        if ( !get_page(mfn_to_page(cr3_pfn), d) )
         {
             destroy_gdt(v);
             return -EINVAL;
@@ -350,7 +346,7 @@ int arch_set_info_guest(
     }
     else
     {
-        if ( !get_page_and_type(mfn_to_page(phys_basetab>>PAGE_SHIFT), d,
+        if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
                                 PGT_base_page_table) )
         {
             destroy_gdt(v);
@@ -528,20 +524,29 @@ static void load_segments(struct vcpu *n
     if ( unlikely(!all_segs_okay) )
     {
         struct cpu_user_regs *regs = guest_cpu_user_regs();
-        unsigned long   *rsp =
+        unsigned long *rsp =
             (n->arch.flags & TF_kernel_mode) ?
             (unsigned long *)regs->rsp :
             (unsigned long *)nctxt->kernel_sp;
+        unsigned long cs_and_mask, rflags;
 
         if ( !(n->arch.flags & TF_kernel_mode) )
             toggle_guest_mode(n);
         else
             regs->cs &= ~3;
 
+        /* CS longword also contains full evtchn_upcall_mask. */
+        cs_and_mask = (unsigned long)regs->cs |
+            ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32);
+
+        /* Fold upcall mask into RFLAGS.IF. */
+        rflags  = regs->rflags & ~X86_EFLAGS_IF;
+        rflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
+
         if ( put_user(regs->ss,            rsp- 1) |
              put_user(regs->rsp,           rsp- 2) |
-             put_user(regs->rflags,        rsp- 3) |
-             put_user(regs->cs,            rsp- 4) |
+             put_user(rflags,              rsp- 3) |
+             put_user(cs_and_mask,         rsp- 4) |
              put_user(regs->rip,           rsp- 5) |
              put_user(nctxt->user_regs.gs, rsp- 6) |
              put_user(nctxt->user_regs.fs, rsp- 7) |
@@ -553,6 +558,10 @@ static void load_segments(struct vcpu *n
             DPRINTK("Error while creating failsafe callback frame.\n");
             domain_crash(n->domain);
         }
+
+        if ( test_bit(_VGCF_failsafe_disables_events,
+                      &n->arch.guest_context.flags) )
+            n->vcpu_info->evtchn_upcall_mask = 1;
 
         regs->entry_vector  = TRAP_syscall;
         regs->rflags       &= 0xFFFCBEFFUL;
@@ -935,7 +944,7 @@ void domain_relinquish_resources(struct 
                 put_page_type(mfn_to_page(pfn));
             put_page(mfn_to_page(pfn));
 
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
         }
 
         if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 )
@@ -944,7 +953,7 @@ void domain_relinquish_resources(struct 
                 put_page_type(mfn_to_page(pfn));
             put_page(mfn_to_page(pfn));
 
-            v->arch.guest_table_user = mk_pagetable(0);
+            v->arch.guest_table_user = pagetable_null();
         }
     }
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/domain_build.c       Wed Jun 07 11:03:51 2006 +0100
@@ -301,6 +301,9 @@ int construct_dom0(struct domain *d,
                xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no");
         return -EINVAL;
     }
+
+    if ( xen_pae && !!strstr(dsi.xen_section_string, "PAE=yes[extended-cr3]") )
+        set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
 
     if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL )
     {
@@ -443,13 +446,13 @@ int construct_dom0(struct domain *d,
         l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
             l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
     }
-    v->arch.guest_table = mk_pagetable((unsigned long)l3start);
+    v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
 #else
     l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
     memcpy(l2tab, idle_pg_table, PAGE_SIZE);
     l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
         l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
-    v->arch.guest_table = mk_pagetable((unsigned long)l2start);
+    v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
 #endif
 
     for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
@@ -577,7 +580,7 @@ int construct_dom0(struct domain *d,
         l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
-    v->arch.guest_table = mk_pagetable(__pa(l4start));
+    v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
 
     l4tab += l4_table_offset(dsi.v_start);
     mfn = alloc_spfn;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Jun 07 11:03:51 2006 +0100
@@ -84,28 +84,26 @@ struct svm_percore_globals svm_globals[N
 /*
  * Initializes the POOL of ASID used by the guests per core.
  */
-void asidpool_init( int core )
+void asidpool_init(int core)
 {
     int i;
-    svm_globals[core].ASIDpool.asid_lock = SPIN_LOCK_UNLOCKED;
-    spin_lock(&svm_globals[core].ASIDpool.asid_lock);
+
+    spin_lock_init(&svm_globals[core].ASIDpool.asid_lock);
+
     /* Host ASID is always in use */
     svm_globals[core].ASIDpool.asid[INITIAL_ASID] = ASID_INUSE;
-    for( i=1; i<ASID_MAX; i++ )
-    {
+    for ( i = 1; i < ASID_MAX; i++ )
        svm_globals[core].ASIDpool.asid[i] = ASID_AVAILABLE;
-    }
-    spin_unlock(&svm_globals[core].ASIDpool.asid_lock);
 }
 
 
 /* internal function to get the next available ASID */
-static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core )
+static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
 {
     int i;   
-    for( i = 1; i < ASID_MAX; i++ )
-    {
-        if( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
+    for ( i = 1; i < ASID_MAX; i++ )
+    {
+        if ( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
         {
             vmcb->guest_asid = i;
             svm_globals[core].ASIDpool.asid[i] = ASID_INUSE;
@@ -746,34 +744,34 @@ static void svm_ctxt_switch_to(struct vc
 
 void svm_final_setup_guest(struct vcpu *v)
 {
+    struct domain *d = v->domain;
+    struct vcpu *vc;
+
     v->arch.schedule_tail    = arch_svm_do_launch;
     v->arch.ctxt_switch_from = svm_ctxt_switch_from;
     v->arch.ctxt_switch_to   = svm_ctxt_switch_to;
 
-    if (v == v->domain->vcpu[0]) 
-    {
-       struct domain *d = v->domain;
-       struct vcpu *vc;
-
-       /* Initialize monitor page table */
-       for_each_vcpu(d, vc)
-           vc->arch.monitor_table = mk_pagetable(0);
-
-        /* 
-         * Required to do this once per domain
-         * TODO: add a seperate function to do these.
-         */
-        memset(&d->shared_info->evtchn_mask[0], 0xff, 
-               sizeof(d->shared_info->evtchn_mask));       
-
-        /* 
-         * Put the domain in shadow mode even though we're going to be using
-         * the shared 1:1 page table initially. It shouldn't hurt 
-         */
-        shadow_mode_enable(d, 
-                SHM_enable|SHM_refcounts|
-               SHM_translate|SHM_external|SHM_wr_pt_pte);
-    }
+    if ( v != d->vcpu[0] )
+        return;
+
+    /* Initialize monitor page table */
+    for_each_vcpu( d, vc )
+        vc->arch.monitor_table = pagetable_null();
+
+    /* 
+     * Required to do this once per domain
+     * TODO: add a seperate function to do these.
+     */
+    memset(&d->shared_info->evtchn_mask[0], 0xff, 
+           sizeof(d->shared_info->evtchn_mask));       
+
+    /* 
+     * Put the domain in shadow mode even though we're going to be using
+     * the shared 1:1 page table initially. It shouldn't hurt 
+     */
+    shadow_mode_enable(d,
+                       SHM_enable|SHM_refcounts|
+                       SHM_translate|SHM_external|SHM_wr_pt_pte);
 }
 
 
@@ -870,7 +868,7 @@ static int svm_do_page_fault(unsigned lo
     /* Use 1:1 page table to identify MMIO address space */
     if (mmio_space(gpa))
     {
-       /* No support for APIC */
+        /* No support for APIC */
         if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
         { 
             int inst_len;
@@ -1570,7 +1568,7 @@ static int svm_set_cr0(unsigned long val
         }
 
         /* Now arch.guest_table points to machine physical. */
-        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
         update_pagetables(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
@@ -1590,7 +1588,7 @@ static int svm_set_cr0(unsigned long val
         if ( v->arch.hvm_svm.cpu_cr3 ) {
             put_page(mfn_to_page(get_mfn_from_gpfn(
                       v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
         }
 
     /*
@@ -1599,7 +1597,7 @@ static int svm_set_cr0(unsigned long val
      * created.
      */
     if ((value & X86_CR0_PE) == 0) {
-       if (value & X86_CR0_PG) {
+        if (value & X86_CR0_PG) {
             svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             return 0;
         }
@@ -1740,7 +1738,7 @@ static int mov_to_cr(int gpreg, int cr, 
             }
 
             old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-            v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+            v->arch.guest_table = pagetable_from_pfn(mfn);
 
             if (old_base_mfn)
                 put_page(mfn_to_page(old_base_mfn));
@@ -1797,7 +1795,7 @@ static int mov_to_cr(int gpreg, int cr, 
                  * Now arch.guest_table points to machine physical.
                  */
 
-                v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+                v->arch.guest_table = pagetable_from_pfn(mfn);
                 update_pagetables(v);
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 07 11:03:51 2006 +0100
@@ -66,7 +66,7 @@ void vmx_final_setup_guest(struct vcpu *
 
         /* Initialize monitor page table */
         for_each_vcpu(d, vc)
-            vc->arch.monitor_table = mk_pagetable(0);
+            vc->arch.monitor_table = pagetable_null();
 
         /*
          * Required to do this once per domain
@@ -1223,7 +1223,7 @@ vmx_world_restore(struct vcpu *v, struct
         if(!get_page(mfn_to_page(mfn), v->domain))
                 return 0;
         old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
         if (old_base_mfn)
              put_page(mfn_to_page(old_base_mfn));
         /*
@@ -1459,7 +1459,7 @@ static int vmx_set_cr0(unsigned long val
         /*
          * Now arch.guest_table points to machine physical.
          */
-        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
         update_pagetables(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
@@ -1477,7 +1477,7 @@ static int vmx_set_cr0(unsigned long val
         if ( v->arch.hvm_vmx.cpu_cr3 ) {
             put_page(mfn_to_page(get_mfn_from_gpfn(
                       v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
         }
 
     /*
@@ -1635,7 +1635,7 @@ static int mov_to_cr(int gp, int cr, str
                 domain_crash_synchronous(); /* need to take a clean path */
             }
             old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-            v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+            v->arch.guest_table = pagetable_from_pfn(mfn);
             if (old_base_mfn)
                 put_page(mfn_to_page(old_base_mfn));
             /*
@@ -1690,7 +1690,7 @@ static int mov_to_cr(int gp, int cr, str
                  * Now arch.guest_table points to machine physical.
                  */
 
-                v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+                v->arch.guest_table = pagetable_from_pfn(mfn);
                 update_pagetables(v);
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
@@ -1970,7 +1970,6 @@ static inline void vmx_vmexit_do_extint(
         __hvm_bug(regs);
 
     vector &= INTR_INFO_VECTOR_MASK;
-    local_irq_disable();
     TRACE_VMEXIT(1,vector);
 
     switch(vector) {
@@ -2065,30 +2064,33 @@ asmlinkage void vmx_vmexit_handler(struc
     struct vcpu *v = current;
     int error;
 
-    if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
-        __hvm_bug(&regs);
+    error = __vmread(VM_EXIT_REASON, &exit_reason);
+    BUG_ON(error);
 
     perfc_incra(vmexits, exit_reason);
 
-    /* don't bother H/W interrutps */
-    if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
-        exit_reason != EXIT_REASON_VMCALL &&
-        exit_reason != EXIT_REASON_IO_INSTRUCTION) 
+    if ( (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) &&
+         (exit_reason != EXIT_REASON_VMCALL) &&
+         (exit_reason != EXIT_REASON_IO_INSTRUCTION) )
         HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
 
-    if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+    if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
+        local_irq_enable();
+
+    if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
+    {
         printk("Failed vm entry (reason 0x%x)\n", exit_reason);
         printk("*********** VMCS Area **************\n");
         vmcs_dump_vcpu();
         printk("**************************************\n");
         domain_crash_synchronous();
-        return;
     }
 
     __vmread(GUEST_RIP, &eip);
     TRACE_VMEXIT(0,exit_reason);
 
-    switch (exit_reason) {
+    switch ( exit_reason )
+    {
     case EXIT_REASON_EXCEPTION_NMI:
     {
         /*
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/x86_32/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S       Wed Jun 07 11:03:51 2006 +0100
@@ -55,29 +55,26 @@
  * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
  */
 
-#define HVM_MONITOR_EFLAGS     0x202 /* IF on */
 #define NR_SKIPPED_REGS        6       /* See the above explanation */
-#define HVM_SAVE_ALL_NOSEGREGS \
-        pushl $HVM_MONITOR_EFLAGS; \
-        popf; \
-        subl $(NR_SKIPPED_REGS*4), %esp; \
+#define HVM_SAVE_ALL_NOSEGREGS                                              \
+        subl $(NR_SKIPPED_REGS*4), %esp;                                    \
         movl $0, 0xc(%esp);  /* XXX why do we need to force eflags==0 ?? */ \
-        pushl %eax; \
-        pushl %ebp; \
-        pushl %edi; \
-        pushl %esi; \
-        pushl %edx; \
-        pushl %ecx; \
+        pushl %eax;                                                         \
+        pushl %ebp;                                                         \
+        pushl %edi;                                                         \
+        pushl %esi;                                                         \
+        pushl %edx;                                                         \
+        pushl %ecx;                                                         \
         pushl %ebx;
 
-#define HVM_RESTORE_ALL_NOSEGREGS   \
-        popl %ebx;  \
-        popl %ecx;  \
-        popl %edx;  \
-        popl %esi;  \
-        popl %edi;  \
-        popl %ebp;  \
-        popl %eax;  \
+#define HVM_RESTORE_ALL_NOSEGREGS               \
+        popl %ebx;                              \
+        popl %ecx;                              \
+        popl %edx;                              \
+        popl %esi;                              \
+        popl %edi;                              \
+        popl %ebp;                              \
+        popl %eax;                              \
         addl $(NR_SKIPPED_REGS*4), %esp
 
         ALIGN
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/x86_64/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S       Wed Jun 07 11:03:51 2006 +0100
@@ -51,45 +51,42 @@
  * (2/1)  u32 entry_vector;
  * (1/1)  u32 error_code;
  */
-#define HVM_MONITOR_RFLAGS     0x202 /* IF on */
 #define NR_SKIPPED_REGS        6       /* See the above explanation */
-#define HVM_SAVE_ALL_NOSEGREGS \
-        pushq $HVM_MONITOR_RFLAGS; \
-        popfq; \
-        subq $(NR_SKIPPED_REGS*8), %rsp; \
-        pushq %rdi; \
-        pushq %rsi; \
-        pushq %rdx; \
-        pushq %rcx; \
-        pushq %rax; \
-        pushq %r8;  \
-        pushq %r9;  \
-        pushq %r10; \
-        pushq %r11; \
-        pushq %rbx; \
-        pushq %rbp; \
-        pushq %r12; \
-        pushq %r13; \
-        pushq %r14; \
-        pushq %r15; \
+#define HVM_SAVE_ALL_NOSEGREGS                  \
+        subq $(NR_SKIPPED_REGS*8), %rsp;        \
+        pushq %rdi;                             \
+        pushq %rsi;                             \
+        pushq %rdx;                             \
+        pushq %rcx;                             \
+        pushq %rax;                             \
+        pushq %r8;                              \
+        pushq %r9;                              \
+        pushq %r10;                             \
+        pushq %r11;                             \
+        pushq %rbx;                             \
+        pushq %rbp;                             \
+        pushq %r12;                             \
+        pushq %r13;                             \
+        pushq %r14;                             \
+        pushq %r15;
 
-#define HVM_RESTORE_ALL_NOSEGREGS \
-        popq %r15; \
-        popq %r14; \
-        popq %r13; \
-        popq %r12; \
-        popq %rbp; \
-        popq %rbx; \
-        popq %r11; \
-        popq %r10; \
-        popq %r9;  \
-        popq %r8;  \
-        popq %rax; \
-        popq %rcx; \
-        popq %rdx; \
-        popq %rsi; \
-        popq %rdi; \
-        addq $(NR_SKIPPED_REGS*8), %rsp; \
+#define HVM_RESTORE_ALL_NOSEGREGS               \
+        popq %r15;                              \
+        popq %r14;                              \
+        popq %r13;                              \
+        popq %r12;                              \
+        popq %rbp;                              \
+        popq %rbx;                              \
+        popq %r11;                              \
+        popq %r10;                              \
+        popq %r9;                               \
+        popq %r8;                               \
+        popq %rax;                              \
+        popq %rcx;                              \
+        popq %rdx;                              \
+        popq %rsi;                              \
+        popq %rdi;                              \
+        addq $(NR_SKIPPED_REGS*8), %rsp;
 
 ENTRY(vmx_asm_vmexit_handler)
         /* selectors are restored/saved by VMX */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/i8259.c      Wed Jun 07 11:03:51 2006 +0100
@@ -102,7 +102,7 @@ BUILD_SMP_INTERRUPT(thermal_interrupt,TH
  * moves to arch independent land
  */
 
-spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(i8259A_lock);
 
 static void disable_8259A_vector(unsigned int vector)
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/microcode.c  Wed Jun 07 11:03:51 2006 +0100
@@ -83,7 +83,7 @@
 #include <asm/processor.h>
 
 #define pr_debug(x...) ((void)0)
-#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
+#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m)
 #define down(_m) spin_lock(_m)
 #define up(_m) spin_unlock(_m)
 #define vmalloc(_s) xmalloc_bytes(_s)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/mm.c Wed Jun 07 11:03:51 2006 +0100
@@ -89,6 +89,7 @@
 #include <xen/kernel.h>
 #include <xen/lib.h>
 #include <xen/mm.h>
+#include <xen/domain.h>
 #include <xen/sched.h>
 #include <xen/errno.h>
 #include <xen/perfc.h>
@@ -187,20 +188,16 @@ void arch_init_memory(void)
      * Any Xen-heap pages that we will allow to be mapped will have
      * their domain field set to dom_xen.
      */
-    dom_xen = alloc_domain();
-    spin_lock_init(&dom_xen->page_alloc_lock);
-    atomic_set(&dom_xen->refcnt, 1);
-    dom_xen->domain_id = DOMID_XEN;
+    dom_xen = alloc_domain(DOMID_XEN);
+    BUG_ON(dom_xen == NULL);
 
     /*
      * Initialise our DOMID_IO domain.
      * This domain owns I/O pages that are within the range of the page_info
      * array. Mappings occur at the priv of the caller.
      */
-    dom_io = alloc_domain();
-    spin_lock_init(&dom_io->page_alloc_lock);
-    atomic_set(&dom_io->refcnt, 1);
-    dom_io->domain_id = DOMID_IO;
+    dom_io = alloc_domain(DOMID_IO);
+    BUG_ON(dom_io == NULL);
 
     /* First 1MB of RAM is historically marked as I/O. */
     for ( i = 0; i < 0x100; i++ )
@@ -999,6 +996,21 @@ static int alloc_l3_table(struct page_in
     int            i;
 
     ASSERT(!shadow_mode_refcounts(d));
+
+#ifdef CONFIG_X86_PAE
+    /*
+     * PAE pgdirs above 4GB are unacceptable if the guest does not understand
+     * the weird 'extended cr3' format for dealing with high-order address
+     * bits. We cut some slack for control tools (before vcpu0 is initialised).
+     */
+    if ( (pfn >= 0x100000) &&
+         unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
+         d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
+    {
+        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
+        return 0;
+    }
+#endif
 
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
@@ -1717,7 +1729,7 @@ int new_guest_cr3(unsigned long mfn)
         {
             /* Switch to idle pagetable: this VCPU has no active p.t. now. */
             old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
             update_pagetables(v);
             write_cr3(__pa(idle_pg_table));
             if ( old_base_mfn != 0 )
@@ -1739,7 +1751,7 @@ int new_guest_cr3(unsigned long mfn)
     invalidate_shadow_ldt(v);
 
     old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-    v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+    v->arch.guest_table = pagetable_from_pfn(mfn);
     update_pagetables(v); /* update shadow_table and monitor_table */
 
     write_ptbase(v);
@@ -2006,7 +2018,7 @@ int do_mmuext_op(
             {
                 unsigned long old_mfn =
                     pagetable_get_pfn(v->arch.guest_table_user);
-                v->arch.guest_table_user = mk_pagetable(mfn << PAGE_SHIFT);
+                v->arch.guest_table_user = pagetable_from_pfn(mfn);
                 if ( old_mfn != 0 )
                     put_page_and_type(mfn_to_page(old_mfn));
             }
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/setup.c      Wed Jun 07 11:03:51 2006 +0100
@@ -85,8 +85,6 @@ extern void early_cpu_init(void);
 
 struct tss_struct init_tss[NR_CPUS];
 
-struct vcpu *idle_vcpu[NR_CPUS];
-
 extern unsigned long cpu0_stack[];
 
 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/shadow.c     Wed Jun 07 11:03:51 2006 +0100
@@ -2472,7 +2472,7 @@ static void shadow_update_pagetables(str
     if ( !get_shadow_ref(smfn) )
         BUG();
     old_smfn = pagetable_get_pfn(v->arch.shadow_table);
-    v->arch.shadow_table = mk_pagetable((u64)smfn << PAGE_SHIFT);
+    v->arch.shadow_table = pagetable_from_pfn(smfn);
     if ( old_smfn )
         put_shadow_ref(old_smfn);
 
@@ -3481,15 +3481,16 @@ static void shadow_set_l2e_64(unsigned l
 
     __shadow_get_l3e(v, va, &sl3e);
     if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
-         if (create_l2_shadow) {
+        if (create_l2_shadow) {
             perfc_incrc(shadow_set_l2e_force_map);
             shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
             __shadow_get_l3e(v, va, &sl3e);
         } else {
             printk("For non HVM shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
         }
-         shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
-
+
+        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L4 )
+            shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
     }
 
     if ( put_ref_check ) {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/shadow32.c   Wed Jun 07 11:03:51 2006 +0100
@@ -583,7 +583,7 @@ static void free_shadow_pages(struct dom
         if ( pagetable_get_paddr(v->arch.shadow_table) )
         {
             put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
-            v->arch.shadow_table = mk_pagetable(0);
+            v->arch.shadow_table = pagetable_null();
 
             if ( shadow_mode_external(d) )
             {
@@ -765,7 +765,7 @@ static void alloc_monitor_pagetable(stru
     mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
     mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = l2e_empty();
 
-    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
+    v->arch.monitor_table = pagetable_from_pfn(mmfn);
     v->arch.monitor_vtable = mpl2e;
 
     if ( v->vcpu_id == 0 )
@@ -830,7 +830,7 @@ void free_monitor_pagetable(struct vcpu 
     unmap_domain_page_global(v->arch.monitor_vtable);
     free_domheap_page(mfn_to_page(mfn));
 
-    v->arch.monitor_table = mk_pagetable(0);
+    v->arch.monitor_table = pagetable_null();
     v->arch.monitor_vtable = 0;
 }
 
@@ -992,7 +992,7 @@ alloc_p2m_table(struct domain *d)
 
         l1tab = map_domain_page(page_to_mfn(page));
         memset(l1tab, 0, PAGE_SIZE);
-        d->arch.phys_table = mk_pagetable(page_to_maddr(page));
+        d->arch.phys_table = pagetable_from_page(page);
     }
 
     list_ent = d->page_list.next;
@@ -1126,7 +1126,7 @@ int shadow_direct_map_init(struct domain
     memset(root, 0, PAGE_SIZE);
     unmap_domain_page(root);
 
-    d->arch.phys_table = mk_pagetable(page_to_maddr(page));
+    d->arch.phys_table = pagetable_from_page(page);
 
     return 1;
 }
@@ -1156,7 +1156,7 @@ void shadow_direct_map_clean(struct doma
 
     unmap_domain_page(l2e);
 
-    d->arch.phys_table = mk_pagetable(0);
+    d->arch.phys_table = pagetable_null();
 }
 
 int __shadow_mode_enable(struct domain *d, unsigned int mode)
@@ -3231,7 +3231,7 @@ void __update_pagetables(struct vcpu *v)
     if ( !get_shadow_ref(smfn) )
         BUG();
     old_smfn = pagetable_get_pfn(v->arch.shadow_table);
-    v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
+    v->arch.shadow_table = pagetable_from_pfn(smfn);
     if ( old_smfn )
         put_shadow_ref(old_smfn);
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/shadow_public.c      Wed Jun 07 11:03:51 2006 +0100
@@ -50,7 +50,7 @@ int shadow_direct_map_init(struct domain
     memset(root, 0, PAGE_SIZE);
     root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR);
 
-    d->arch.phys_table = mk_pagetable(page_to_maddr(page));
+    d->arch.phys_table = pagetable_from_page(page);
 
     unmap_domain_page(root);
     return 1;
@@ -92,7 +92,7 @@ void shadow_direct_map_clean(struct doma
 
     unmap_domain_page(l3e);
 
-    d->arch.phys_table = mk_pagetable(0);
+    d->arch.phys_table = pagetable_null();
 }
 
 /****************************************************************************/
@@ -338,7 +338,7 @@ static void alloc_monitor_pagetable(stru
 
     /* map the phys_to_machine map into the per domain Read-Only MPT space */
 
-    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
+    v->arch.monitor_table = pagetable_from_pfn(mmfn);
     v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
     mpl4e[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
 
@@ -380,7 +380,7 @@ void free_monitor_pagetable(struct vcpu 
     unmap_domain_page_global(v->arch.monitor_vtable);
     free_domheap_page(mfn_to_page(mfn));
 
-    v->arch.monitor_table = mk_pagetable(0);
+    v->arch.monitor_table = pagetable_null();
     v->arch.monitor_vtable = 0;
 }
 #elif CONFIG_PAGING_LEVELS == 3
@@ -431,7 +431,7 @@ static void alloc_monitor_pagetable(stru
     for ( i = 0; i < (MACHPHYS_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
         mpl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = l2e_empty();
 
-    v->arch.monitor_table = mk_pagetable(m3mfn << PAGE_SHIFT); /* < 4GB */
+    v->arch.monitor_table = pagetable_from_pfn(m3mfn);
     v->arch.monitor_vtable = (l2_pgentry_t *) mpl3e;
 
     if ( v->vcpu_id == 0 )
@@ -492,7 +492,7 @@ void free_monitor_pagetable(struct vcpu 
     unmap_domain_page_global(v->arch.monitor_vtable);
     free_domheap_page(mfn_to_page(m3mfn));
 
-    v->arch.monitor_table = mk_pagetable(0);
+    v->arch.monitor_table = pagetable_null();
     v->arch.monitor_vtable = 0;
 }
 #endif
@@ -924,7 +924,7 @@ void free_shadow_pages(struct domain *d)
         if ( pagetable_get_paddr(v->arch.shadow_table) )
         {
             put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
-            v->arch.shadow_table = mk_pagetable(0);
+            v->arch.shadow_table = pagetable_null();
 
             if ( shadow_mode_external(d) )
             {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/smp.c        Wed Jun 07 11:03:51 2006 +0100
@@ -161,7 +161,7 @@ void send_IPI_mask_phys(cpumask_t mask, 
     local_irq_restore(flags);
 }
 
-static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(flush_lock);
 static cpumask_t flush_cpumask;
 static unsigned long flush_va;
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/smpboot.c    Wed Jun 07 11:03:51 2006 +0100
@@ -37,6 +37,7 @@
 #include <xen/init.h>
 #include <xen/kernel.h>
 #include <xen/mm.h>
+#include <xen/domain.h>
 #include <xen/sched.h>
 #include <xen/irq.h>
 #include <xen/delay.h>
@@ -886,28 +887,16 @@ static int __devinit do_boot_cpu(int api
        int timeout;
        unsigned long start_eip;
        unsigned short nmi_high = 0, nmi_low = 0;
-       struct domain *d;
        struct vcpu *v;
-       int vcpu_id;
 
        ++cpucount;
 
        booting_cpu = cpu;
 
-       if ((vcpu_id = cpu % MAX_VIRT_CPUS) == 0) {
-               d = domain_create(IDLE_DOMAIN_ID, cpu);
-               BUG_ON(d == NULL);
-               v = d->vcpu[0];
-       } else {
-               d = idle_vcpu[cpu - vcpu_id]->domain;
-               BUG_ON(d == NULL);
-               v = alloc_vcpu(d, vcpu_id, cpu);
-       }
-
-       idle_vcpu[cpu] = v;
+       v = alloc_idle_vcpu(cpu);
        BUG_ON(v == NULL);
 
-       v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+       v->arch.monitor_table = pagetable_from_paddr(__pa(idle_pg_table));
 
        /* start_eip had better be page-aligned! */
        start_eip = setup_trampoline();
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/time.c       Wed Jun 07 11:03:51 2006 +0100
@@ -40,10 +40,10 @@ boolean_param("hpet_force", opt_hpet_for
 
 unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
 unsigned long hpet_address;
-spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(rtc_lock);
 unsigned long volatile jiffies;
 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
-static spinlock_t wc_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(wc_lock);
 
 struct time_scale {
     int shift;
@@ -67,7 +67,7 @@ static s_time_t stime_platform_stamp;
 static s_time_t stime_platform_stamp;
 static u64 platform_timer_stamp;
 static struct time_scale platform_timer_scale;
-static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(platform_timer_lock);
 static u64 (*read_platform_count)(void);
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/traps.c      Wed Jun 07 11:03:51 2006 +0100
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
                     PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
                 break;
             }
-            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
+            regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
             break;
 
         case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
-            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
+            regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
             break;
         }
 
@@ -1034,8 +1034,8 @@ static int emulate_privileged_op(struct 
             break;
             
         case 3: /* Read CR3 */
-            *reg = pfn_to_paddr(mfn_to_gmfn(v->domain,
-                                    pagetable_get_pfn(v->arch.guest_table)));
+            *reg = xen_pfn_to_cr3(mfn_to_gmfn(
+                v->domain, pagetable_get_pfn(v->arch.guest_table)));
             break;
 
         case 4: /* Read CR4 */
@@ -1085,7 +1085,7 @@ static int emulate_privileged_op(struct 
         case 3: /* Write CR3 */
             LOCK_BIGLOCK(v->domain);
             cleanup_writable_pagetable(v->domain);
-            (void)new_guest_cr3(gmfn_to_mfn(v->domain, paddr_to_pfn(*reg)));
+            (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
             UNLOCK_BIGLOCK(v->domain);
             break;
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/asm-offsets.c Wed Jun 07 11:03:51 2006 +0100
@@ -64,11 +64,13 @@ void __dummy__(void)
            arch.guest_context.kernel_ss);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
     OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
+    DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
     BLANK();
 
     OFFSET(TSS_ss0, struct tss_struct, ss0);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/domain_page.c
--- a/xen/arch/x86/x86_32/domain_page.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/domain_page.c Wed Jun 07 11:03:51 2006 +0100
@@ -183,7 +183,7 @@ static unsigned long inuse[BITS_TO_LONGS
 static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
 static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
 static unsigned int inuse_cursor;
-static spinlock_t globalmap_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(globalmap_lock);
 
 void *map_domain_page_global(unsigned long pfn)
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/entry.S       Wed Jun 07 11:03:51 2006 +0100
@@ -130,7 +130,10 @@ failsafe_callback:
         movl  VCPU_failsafe_sel(%ebx),%eax
         movw  %ax,TRAPBOUNCE_cs(%edx)
         movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
-        call  create_bounce_frame
+        bt    $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%ebx)
+        jnc   1f
+        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
+1:      call  create_bounce_frame
         xorl  %eax,%eax
         movl  %eax,UREGS_ds(%esp)
         movl  %eax,UREGS_es(%esp)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/mm.c  Wed Jun 07 11:03:51 2006 +0100
@@ -75,7 +75,8 @@ void __init paging_init(void)
     printk("PAE disabled.\n");
 #endif
 
-    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+    idle_vcpu[0]->arch.monitor_table =
+        pagetable_from_paddr(__pa(idle_pg_table));
 
     if ( cpu_has_pge )
     {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/traps.c       Wed Jun 07 11:03:51 2006 +0100
@@ -346,6 +346,12 @@ static long register_guest_callback(stru
     case CALLBACKTYPE_failsafe:
         v->arch.guest_context.failsafe_callback_cs  = reg->address.cs;
         v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
+        if ( reg->flags & CALLBACKF_mask_events )
+            set_bit(_VGCF_failsafe_disables_events,
+                    &v->arch.guest_context.flags);
+        else
+            clear_bit(_VGCF_failsafe_disables_events,
+                      &v->arch.guest_context.flags);
         break;
 
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/asm-offsets.c Wed Jun 07 11:03:51 2006 +0100
@@ -64,11 +64,14 @@ void __dummy__(void)
            arch.guest_context.syscall_callback_eip);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
     OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
+    DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
+    DEFINE(_VGCF_syscall_disables_events,  _VGCF_syscall_disables_events);
     BLANK();
 
     OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/entry.S       Wed Jun 07 11:03:51 2006 +0100
@@ -30,7 +30,10 @@ switch_to_kernel:
         movq  VCPU_syscall_addr(%rbx),%rax
         movq  %rax,TRAPBOUNCE_eip(%rdx)
         movw  $0,TRAPBOUNCE_flags(%rdx)
-        call  create_bounce_frame
+        bt    $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
+        jnc   1f
+        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
+1:      call  create_bounce_frame
         jmp   test_all_events
 
 /* %rbx: struct vcpu, interrupts disabled */
@@ -77,7 +80,10 @@ failsafe_callback:
         movq  VCPU_failsafe_addr(%rbx),%rax
         movq  %rax,TRAPBOUNCE_eip(%rdx)
         movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
-        call  create_bounce_frame
+        bt    $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx)
+        jnc   1f
+        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
+1:      call  create_bounce_frame
         jmp   test_all_events
 .previous
 .section __pre_ex_table,"a"
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/mm.c  Wed Jun 07 11:03:51 2006 +0100
@@ -81,7 +81,8 @@ void __init paging_init(void)
     l2_pgentry_t *l2_ro_mpt;
     struct page_info *pg;
 
-    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+    idle_vcpu[0]->arch.monitor_table =
+        pagetable_from_paddr(__pa(idle_pg_table));
 
     /* Create user-accessible L2 directory to map the MPT for guests. */
     l3_ro_mpt = alloc_xenheap_page();
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/traps.c       Wed Jun 07 11:03:51 2006 +0100
@@ -195,7 +195,7 @@ unsigned long do_iret(void)
     /* Returning to user mode? */
     if ( (iret_saved.cs & 3) == 3 )
     {
-        if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
+        if ( unlikely(pagetable_is_null(v->arch.guest_table_user)) )
         {
             DPRINTK("Guest switching to user mode with no user page tables\n");
             domain_crash_synchronous();
@@ -334,10 +334,22 @@ static long register_guest_callback(stru
 
     case CALLBACKTYPE_failsafe:
         v->arch.guest_context.failsafe_callback_eip = reg->address;
+        if ( reg->flags & CALLBACKF_mask_events )
+            set_bit(_VGCF_failsafe_disables_events,
+                    &v->arch.guest_context.flags);
+        else
+            clear_bit(_VGCF_failsafe_disables_events,
+                      &v->arch.guest_context.flags);
         break;
 
     case CALLBACKTYPE_syscall:
         v->arch.guest_context.syscall_callback_eip  = reg->address;
+        if ( reg->flags & CALLBACKF_mask_events )
+            set_bit(_VGCF_syscall_disables_events,
+                    &v->arch.guest_context.flags);
+        else
+            clear_bit(_VGCF_syscall_disables_events,
+                      &v->arch.guest_context.flags);
         break;
 
     case CALLBACKTYPE_nmi:
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_emulate.c        Wed Jun 07 11:03:51 2006 +0100
@@ -100,8 +100,8 @@ static uint8_t opcode_table[256] = {
     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
     /* 0x88 - 0x8F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
+    ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     0, 0, 0, DstMem|SrcNone|ModRM|Mov,
     /* 0x90 - 0x9F */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -380,11 +380,12 @@ do{ __asm__ __volatile__ (              
       ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
 #define register_address_increment(reg, inc)                            \
 do {                                                                    \
+    int _inc = (inc); /* signed type ensures sign extension to long */  \
     if ( ad_bytes == sizeof(unsigned long) )                            \
-        (reg) += (inc);                                                 \
+        (reg) += _inc;                                                  \
     else                                                                \
         (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) |             \
-                (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1));     \
+                (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1));      \
 } while (0)
 
 void *
@@ -858,7 +859,7 @@ x86_emulate_memop(
                                          &dst.val, 8, ctxt)) != 0 )
                     goto done;
             }
-            register_address_increment(_regs.esp, -(int)dst.bytes);
+            register_address_increment(_regs.esp, -dst.bytes);
             if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
                                       dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
@@ -942,9 +943,9 @@ x86_emulate_memop(
                 goto done;
         }
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -955,7 +956,7 @@ x86_emulate_memop(
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
@@ -964,7 +965,7 @@ x86_emulate_memop(
         if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/dom0_ops.c     Wed Jun 07 11:03:51 2006 +0100
@@ -95,7 +95,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
     long ret = 0;
     struct dom0_op curop, *op = &curop;
     void *ssid = NULL; /* save security ptr between pre and post/fail hooks */
-    static spinlock_t dom0_lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(dom0_lock);
 
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/domain.c
--- a/xen/common/domain.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/domain.c       Wed Jun 07 11:03:51 2006 +0100
@@ -32,22 +32,111 @@ struct domain *domain_list;
 
 struct domain *dom0;
 
-struct domain *domain_create(domid_t dom_id, unsigned int cpu)
-{
-    struct domain *d, **pd;
-    struct vcpu *v;
-
-    if ( (d = alloc_domain()) == NULL )
+struct vcpu *idle_vcpu[NR_CPUS];
+
+struct domain *alloc_domain(domid_t domid)
+{
+    struct domain *d;
+
+    if ( (d = xmalloc(struct domain)) == NULL )
         return NULL;
 
-    d->domain_id = dom_id;
-
+    memset(d, 0, sizeof(*d));
+    d->domain_id = domid;
     atomic_set(&d->refcnt, 1);
-
     spin_lock_init(&d->big_lock);
     spin_lock_init(&d->page_alloc_lock);
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
+
+    return d;
+}
+
+
+void free_domain(struct domain *d)
+{
+    struct vcpu *v;
+    int i;
+
+    sched_destroy_domain(d);
+
+    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
+        if ( (v = d->vcpu[i]) != NULL )
+            free_vcpu_struct(v);
+
+    xfree(d);
+}
+
+
+struct vcpu *alloc_vcpu(
+    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
+{
+    struct vcpu *v;
+
+    BUG_ON(d->vcpu[vcpu_id] != NULL);
+
+    if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
+        return NULL;
+
+    v->domain = d;
+    v->vcpu_id = vcpu_id;
+    v->processor = cpu_id;
+    atomic_set(&v->pausecnt, 0);
+    v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
+
+    v->cpu_affinity = is_idle_domain(d) ?
+        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
+
+    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
+    v->runstate.state_entry_time = NOW();
+
+    if ( (vcpu_id != 0) && !is_idle_domain(d) )
+        set_bit(_VCPUF_down, &v->vcpu_flags);
+
+    if ( sched_init_vcpu(v) < 0 )
+    {
+        free_vcpu_struct(v);
+        return NULL;
+    }
+
+    d->vcpu[vcpu_id] = v;
+    if ( vcpu_id != 0 )
+        d->vcpu[v->vcpu_id-1]->next_in_list = v;
+
+    return v;
+}
+
+struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
+{
+    struct domain *d;
+    struct vcpu *v;
+    unsigned int vcpu_id;
+
+    if ((vcpu_id = cpu_id % MAX_VIRT_CPUS) == 0)
+    {
+        d = domain_create(IDLE_DOMAIN_ID, cpu_id);
+        BUG_ON(d == NULL);
+        v = d->vcpu[0];
+    }
+    else
+    {
+        d = idle_vcpu[cpu_id - vcpu_id]->domain;
+        BUG_ON(d == NULL);
+        v = alloc_vcpu(d, vcpu_id, cpu_id);
+    }
+
+    idle_vcpu[cpu_id] = v;
+
+    return v;
+}
+
+struct domain *domain_create(domid_t domid, unsigned int cpu)
+{
+    struct domain *d, **pd;
+    struct vcpu *v;
+
+    if ( (d = alloc_domain(domid)) == NULL )
+        return NULL;
 
     rangeset_domain_initialise(d);
 
@@ -74,14 +163,14 @@ struct domain *domain_create(domid_t dom
     if ( !is_idle_domain(d) )
     {
         write_lock(&domlist_lock);
-        pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
+        pd = &domain_list; /* NB. domain_list maintained in order of domid. */
         for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
             if ( (*pd)->domain_id > d->domain_id )
                 break;
         d->next_in_list = *pd;
         *pd = d;
-        d->next_in_hashbucket = domain_hash[DOMAIN_HASH(dom_id)];
-        domain_hash[DOMAIN_HASH(dom_id)] = d;
+        d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
+        domain_hash[DOMAIN_HASH(domid)] = d;
         write_unlock(&domlist_lock);
     }
 
@@ -126,19 +215,16 @@ struct domain *find_domain_by_id(domid_t
 
 void domain_kill(struct domain *d)
 {
-    struct vcpu *v;
-
     domain_pause(d);
-    if ( !test_and_set_bit(_DOMF_dying, &d->domain_flags) )
-    {
-        for_each_vcpu(d, v)
-            sched_rem_domain(v);
-        gnttab_release_mappings(d);
-        domain_relinquish_resources(d);
-        put_domain(d);
-
-        send_guest_global_virq(dom0, VIRQ_DOM_EXC);
-    }
+
+    if ( test_and_set_bit(_DOMF_dying, &d->domain_flags) )
+        return;
+
+    gnttab_release_mappings(d);
+    domain_relinquish_resources(d);
+    put_domain(d);
+
+    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
 }
 
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/kernel.c
--- a/xen/common/kernel.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/kernel.c       Wed Jun 07 11:03:51 2006 +0100
@@ -184,6 +184,7 @@ long do_xen_version(int cmd, XEN_GUEST_H
     case XENVER_get_features:
     {
         xen_feature_info_t fi;
+        struct domain *d = current->domain;
 
         if ( copy_from_guest(&fi, arg, 1) )
             return -EFAULT;
@@ -191,7 +192,9 @@ long do_xen_version(int cmd, XEN_GUEST_H
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
+            fi.submap = 0;
+            if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) )
+                fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( shadow_mode_translate(current->domain) )
                 fi.submap |= 
                     (1U << XENFEAT_writable_page_tables) |
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/keyhandler.c   Wed Jun 07 11:03:51 2006 +0100
@@ -128,11 +128,12 @@ static void dump_domains(unsigned char k
                d->domain_flags, atomic_read(&d->refcnt),
                d->tot_pages, d->xenheap_pages, cpuset);
         printk("    handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
-               "%02x%02x-%02x%02x%02x%02x%02x%02x\n",
+               "%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
                d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
                d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
                d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
-               d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
+               d->handle[12], d->handle[13], d->handle[14], d->handle[15],
+               d->vm_assist);
 
         arch_dump_domain_info(d);
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/memory.c
--- a/xen/common/memory.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/memory.c       Wed Jun 07 11:03:51 2006 +0100
@@ -31,14 +31,15 @@ static long
 static long
 increase_reservation(
     struct domain *d, 
-    XEN_GUEST_HANDLE(ulong) extent_list,
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     unsigned int   nr_extents,
     unsigned int   extent_order,
     unsigned int   flags,
     int           *preempted)
 {
     struct page_info *page;
-    unsigned long     i, mfn;
+    unsigned long i;
+    xen_pfn_t mfn;
 
     if ( !guest_handle_is_null(extent_list) &&
          !guest_handle_okay(extent_list, nr_extents) )
@@ -80,14 +81,16 @@ static long
 static long
 populate_physmap(
     struct domain *d, 
-    XEN_GUEST_HANDLE(ulong) extent_list,
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     unsigned int  nr_extents,
     unsigned int  extent_order,
     unsigned int  flags,
     int          *preempted)
 {
     struct page_info *page;
-    unsigned long    i, j, gpfn, mfn;
+    unsigned long i, j;
+    xen_pfn_t gpfn;
+    xen_pfn_t mfn;
 
     if ( !guest_handle_okay(extent_list, nr_extents) )
         return 0;
@@ -177,13 +180,14 @@ static long
 static long
 decrease_reservation(
     struct domain *d,
-    XEN_GUEST_HANDLE(ulong) extent_list,
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     unsigned int   nr_extents,
     unsigned int   extent_order,
     unsigned int   flags,
     int           *preempted)
 {
-    unsigned long    i, j, gmfn;
+    unsigned long i, j;
+    xen_pfn_t gmfn;
 
     if ( !guest_handle_okay(extent_list, nr_extents) )
         return 0;
@@ -214,7 +218,9 @@ translate_gpfn_list(
     XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
 {
     struct xen_translate_gpfn_list op;
-    unsigned long i, gpfn, mfn;
+    unsigned long i;
+    xen_pfn_t gpfn;
+    xen_pfn_t mfn;
     struct domain *d;
 
     if ( copy_from_guest(&op, uop, 1) )
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/page_alloc.c   Wed Jun 07 11:03:51 2006 +0100
@@ -59,7 +59,7 @@ custom_param("lowmem_emergency_pool", pa
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 
-static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(page_scrub_lock);
 LIST_HEAD(page_scrub_list);
 
 /*********************
@@ -250,7 +250,7 @@ static struct list_head heap[NR_ZONES][M
 
 static unsigned long avail[NR_ZONES];
 
-static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(heap_lock);
 
 void end_boot_allocator(void)
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/perfc.c
--- a/xen/common/perfc.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/perfc.c        Wed Jun 07 11:03:51 2006 +0100
@@ -209,7 +209,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
 /* Dom0 control of perf counters */
 int perfc_control(dom0_perfccontrol_t *pc)
 {
-    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(lock);
     u32 op = pc->op;
     int rc;
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/sched_bvt.c    Wed Jun 07 11:03:51 2006 +0100
@@ -160,15 +160,14 @@ static inline u32 calc_evt(struct vcpu *
 }
 
 /**
- * bvt_alloc_task - allocate BVT private structures for a task
- * @p:              task to allocate private structures for
- *
+ * bvt_init_vcpu - allocate BVT private structures for a VCPU.
  * Returns non-zero on failure.
  */
-static int bvt_alloc_task(struct vcpu *v)
+static int bvt_init_vcpu(struct vcpu *v)
 {
     struct domain *d = v->domain;
     struct bvt_dom_info *inf;
+    struct bvt_vcpu_info *einf;
 
     if ( (d->sched_priv == NULL) )
     {
@@ -199,15 +198,7 @@ static int bvt_alloc_task(struct vcpu *v
         init_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
     }
 
-    return 0;
-}
-
-/*
- * Add and remove a domain
- */
-static void bvt_add_task(struct vcpu *v) 
-{
-    struct bvt_vcpu_info *einf = EBVT_INFO(v);
+    einf = EBVT_INFO(v);
 
     /* Allocate per-CPU context if this is the first domain to be added. */
     if ( CPU_INFO(v->processor) == NULL )
@@ -223,13 +214,15 @@ static void bvt_add_task(struct vcpu *v)
         einf->avt = einf->evt = ~0U;
         BUG_ON(__task_on_runqueue(v));
         __add_to_runqueue_head(v);
-    } 
+    }
     else 
     {
         /* Set avt and evt to system virtual time. */
         einf->avt = CPU_SVT(v->processor);
         einf->evt = CPU_SVT(v->processor);
     }
+
+    return 0;
 }
 
 static void bvt_wake(struct vcpu *v)
@@ -298,10 +291,9 @@ static int bvt_set_affinity(struct vcpu 
 
 
 /**
- * bvt_free_task - free BVT private structures for a task
- * @d:             task
- */
-static void bvt_free_task(struct domain *d)
+ * bvt_destroy_domain - free BVT private structures for a domain.
+ */
+static void bvt_destroy_domain(struct domain *d)
 {
     struct bvt_dom_info *inf = BVT_INFO(d);
 
@@ -568,10 +560,10 @@ struct scheduler sched_bvt_def = {
     .name     = "Borrowed Virtual Time",
     .opt_name = "bvt",
     .sched_id = SCHED_BVT,
-    
-    .alloc_task     = bvt_alloc_task,
-    .add_task       = bvt_add_task,
-    .free_task      = bvt_free_task,
+
+    .init_vcpu      = bvt_init_vcpu,
+    .destroy_domain = bvt_destroy_domain,
+
     .do_schedule    = bvt_do_schedule,
     .control        = bvt_ctl,
     .adjdom         = bvt_adjdom,
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/sched_credit.c Wed Jun 07 11:03:51 2006 +0100
@@ -75,14 +75,13 @@
     } while ( 0 );
 
 #define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
-    _MACRO(vcpu_alloc)                      \
-    _MACRO(vcpu_add)                        \
+    _MACRO(vcpu_init)                       \
     _MACRO(vcpu_sleep)                      \
     _MACRO(vcpu_wake_running)               \
     _MACRO(vcpu_wake_onrunq)                \
     _MACRO(vcpu_wake_runnable)              \
     _MACRO(vcpu_wake_not_runnable)          \
-    _MACRO(dom_free)                        \
+    _MACRO(dom_destroy)                     \
     _MACRO(schedule)                        \
     _MACRO(tickle_local_idler)              \
     _MACRO(tickle_local_over)               \
@@ -429,14 +428,14 @@ __csched_vcpu_acct_idle_locked(struct cs
 }
 
 static int
-csched_vcpu_alloc(struct vcpu *vc)
+csched_vcpu_init(struct vcpu *vc)
 {
     struct domain * const dom = vc->domain;
     struct csched_dom *sdom;
     struct csched_vcpu *svc;
     int16_t pri;
 
-    CSCHED_STAT_CRANK(vcpu_alloc);
+    CSCHED_STAT_CRANK(vcpu_init);
 
     /* Allocate, if appropriate, per-domain info */
     if ( is_idle_vcpu(vc) )
@@ -489,19 +488,13 @@ csched_vcpu_alloc(struct vcpu *vc)
     if ( likely(sdom != NULL) )
         csched_vcpu_acct(svc, 0);
 
-    return 0;
-}
-
-static void
-csched_vcpu_add(struct vcpu *vc) 
-{
-    CSCHED_STAT_CRANK(vcpu_add);
-
     /* Allocate per-PCPU info */
     if ( unlikely(!CSCHED_PCPU(vc->processor)) )
         csched_pcpu_init(vc->processor);
 
     CSCHED_VCPU_CHECK(vc);
+
+    return 0;
 }
 
 static void
@@ -644,12 +637,12 @@ csched_dom_cntl(
 }
 
 static void
-csched_dom_free(struct domain *dom)
+csched_dom_destroy(struct domain *dom)
 {
     struct csched_dom * const sdom = CSCHED_DOM(dom);
     int i;
 
-    CSCHED_STAT_CRANK(dom_free);
+    CSCHED_STAT_CRANK(dom_destroy);
 
     for ( i = 0; i < MAX_VIRT_CPUS; i++ )
     {
@@ -1215,14 +1208,15 @@ struct scheduler sched_credit_def = {
     .opt_name       = "credit",
     .sched_id       = SCHED_CREDIT,
 
-    .alloc_task     = csched_vcpu_alloc,
-    .add_task       = csched_vcpu_add,
+    .init_vcpu      = csched_vcpu_init,
+    .destroy_domain = csched_dom_destroy,
+
     .sleep          = csched_vcpu_sleep,
     .wake           = csched_vcpu_wake,
+
     .set_affinity   = csched_vcpu_set_affinity,
 
     .adjdom         = csched_dom_cntl,
-    .free_task      = csched_dom_free,
 
     .tick           = csched_tick,
     .do_schedule    = csched_schedule,
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/sched_sedf.c   Wed Jun 07 11:03:51 2006 +0100
@@ -328,11 +328,9 @@ static inline void __add_to_runqueue_sor
 }
 
 
-/* Allocates memory for per domain private scheduling data*/
-static int sedf_alloc_task(struct vcpu *v)
-{
-    PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
-          v->domain->domain_id, v->vcpu_id);
+static int sedf_init_vcpu(struct vcpu *v)
+{
+    struct sedf_vcpu_info *inf;
 
     if ( v->domain->sched_priv == NULL )
     {
@@ -344,23 +342,11 @@ static int sedf_alloc_task(struct vcpu *
 
     if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
         return -1;
-
     memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
 
-    return 0;
-}
-
-
-/* Setup the sedf_dom_info */
-static void sedf_add_task(struct vcpu *v)
-{
-    struct sedf_vcpu_info *inf = EDOM_INFO(v);
-
+    inf = EDOM_INFO(v);
     inf->vcpu = v;
  
-    PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",
-          v->domain->domain_id, v->vcpu_id);
-
     /* Allocate per-CPU context if this is the first domain to be added. */
     if ( unlikely(schedule_data[v->processor].sched_priv == NULL) )
     {
@@ -408,14 +394,13 @@ static void sedf_add_task(struct vcpu *v
         EDOM_INFO(v)->deadl_abs = 0;
         EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
     }
-}
-
-/* Frees memory used by domain info */
-static void sedf_free_task(struct domain *d)
+
+    return 0;
+}
+
+static void sedf_destroy_domain(struct domain *d)
 {
     int i;
-
-    PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
 
     xfree(d->sched_priv);
  
@@ -1452,9 +1437,9 @@ struct scheduler sched_sedf_def = {
     .opt_name = "sedf",
     .sched_id = SCHED_SEDF,
     
-    .alloc_task     = sedf_alloc_task,
-    .add_task       = sedf_add_task,
-    .free_task      = sedf_free_task,
+    .init_vcpu      = sedf_init_vcpu,
+    .destroy_domain = sedf_destroy_domain,
+
     .do_schedule    = sedf_do_schedule,
     .dump_cpu_state = sedf_dump_cpu_state,
     .sleep          = sedf_sleep,
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/schedule.c
--- a/xen/common/schedule.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/schedule.c     Wed Jun 07 11:03:51 2006 +0100
@@ -99,74 +99,7 @@ void vcpu_runstate_get(struct vcpu *v, s
     }
 }
 
-struct domain *alloc_domain(void)
-{
-    struct domain *d;
-
-    if ( (d = xmalloc(struct domain)) != NULL )
-        memset(d, 0, sizeof(*d));
-
-    return d;
-}
-
-void free_domain(struct domain *d)
-{
-    struct vcpu *v;
-    int i;
-
-    for_each_vcpu ( d, v )
-        sched_rem_domain(v);
-
-    SCHED_OP(free_task, d);
-
-    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
-        if ( (v = d->vcpu[i]) != NULL )
-            free_vcpu_struct(v);
-
-    xfree(d);
-}
-
-struct vcpu *alloc_vcpu(
-    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
-{
-    struct vcpu *v;
-
-    BUG_ON(d->vcpu[vcpu_id] != NULL);
-
-    if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
-        return NULL;
-
-    v->domain = d;
-    v->vcpu_id = vcpu_id;
-    v->processor = cpu_id;
-    atomic_set(&v->pausecnt, 0);
-    v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
-
-    v->cpu_affinity = is_idle_domain(d) ?
-        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
-
-    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
-    v->runstate.state_entry_time = NOW();
-
-    if ( (vcpu_id != 0) && !is_idle_domain(d) )
-        set_bit(_VCPUF_down, &v->vcpu_flags);
-
-    if ( SCHED_OP(alloc_task, v) < 0 )
-    {
-        free_vcpu_struct(v);
-        return NULL;
-    }
-
-    d->vcpu[vcpu_id] = v;
-    if ( vcpu_id != 0 )
-        d->vcpu[v->vcpu_id-1]->next_in_list = v;
-
-    sched_add_domain(v);
-
-    return v;
-}
-
-void sched_add_domain(struct vcpu *v) 
+int sched_init_vcpu(struct vcpu *v) 
 {
     /* Initialise the per-domain timers. */
     init_timer(&v->timer, vcpu_timer_fn, v, v->processor);
@@ -179,17 +112,23 @@ void sched_add_domain(struct vcpu *v)
         set_bit(_VCPUF_running, &v->vcpu_flags);
     }
 
-    SCHED_OP(add_task, v);
     TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
-}
-
-void sched_rem_domain(struct vcpu *v) 
-{
-    kill_timer(&v->timer);
-    kill_timer(&v->poll_timer);
-
-    SCHED_OP(rem_task, v);
-    TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
+
+    return SCHED_OP(init_vcpu, v);
+}
+
+void sched_destroy_domain(struct domain *d)
+{
+    struct vcpu *v;
+
+    for_each_vcpu ( d, v )
+    {
+        kill_timer(&v->timer);
+        kill_timer(&v->poll_timer);
+        TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
+    }
+
+    SCHED_OP(destroy_domain, d);
 }
 
 void vcpu_sleep_nosync(struct vcpu *v)
@@ -663,7 +602,7 @@ static void poll_timer_fn(void *data)
 /* Initialise the data structures. */
 void __init scheduler_init(void)
 {
-    int i, rc;
+    int i;
 
     open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler);
 
@@ -686,17 +625,6 @@ void __init scheduler_init(void)
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
     SCHED_OP(init);
-
-    if ( idle_vcpu[0] != NULL )
-    {
-        schedule_data[0].curr = idle_vcpu[0];
-        schedule_data[0].idle = idle_vcpu[0];
-
-        rc = SCHED_OP(alloc_task, idle_vcpu[0]);
-        BUG_ON(rc < 0);
-
-        sched_add_domain(idle_vcpu[0]);
-    }
 }
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/trace.c
--- a/xen/common/trace.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/trace.c        Wed Jun 07 11:03:51 2006 +0100
@@ -173,25 +173,17 @@ void init_trace_bufs(void)
  */
 int tb_control(dom0_tbufcontrol_t *tbc)
 {
-    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(lock);
     int rc = 0;
 
     spin_lock(&lock);
-
-    if ( !tb_init_done &&
-         (tbc->op != DOM0_TBUF_SET_SIZE) &&
-         (tbc->op != DOM0_TBUF_ENABLE) )
-    {
-        spin_unlock(&lock);
-        return -EINVAL;
-    }
 
     switch ( tbc->op )
     {
     case DOM0_TBUF_GET_INFO:
         tbc->cpu_mask   = tb_cpu_mask;
         tbc->evt_mask   = tb_event_mask;
-        tbc->buffer_mfn = __pa(t_bufs[0]) >> PAGE_SHIFT;
+        tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(t_bufs[0]) : 0UL;
         tbc->size       = opt_tbuf_size * PAGE_SIZE;
         break;
     case DOM0_TBUF_SET_CPU_MASK:
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/xmalloc.c
--- a/xen/common/xmalloc.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/xmalloc.c      Wed Jun 07 11:03:51 2006 +0100
@@ -35,7 +35,7 @@
 #include <xen/prefetch.h>
 
 static LIST_HEAD(freelist);
-static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(freelist_lock);
 
 struct xmalloc_hdr
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/drivers/char/console.c        Wed Jun 07 11:03:51 2006 +0100
@@ -53,7 +53,7 @@ static int sercon_handle = -1;
 static int sercon_handle = -1;
 static int vgacon_enabled = 0;
 
-spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(console_lock);
 
 /*
  * *******************************************************
@@ -563,7 +563,7 @@ static unsigned int debugtrace_prd; /* P
 static unsigned int debugtrace_prd; /* Producer index     */
 static unsigned int debugtrace_kilobytes = 128, debugtrace_bytes;
 static unsigned int debugtrace_used;
-static spinlock_t   debugtrace_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(debugtrace_lock);
 integer_param("debugtrace", debugtrace_kilobytes);
 
 void debugtrace_dump(void)
@@ -675,7 +675,7 @@ void panic(const char *fmt, ...)
     va_list args;
     char buf[128];
     unsigned long flags;
-    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(lock);
     extern void machine_restart(char *);
     
     debugtrace_dump();
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/asm-x86/page.h        Wed Jun 07 11:03:51 2006 +0100
@@ -172,10 +172,13 @@ typedef struct { u32 pfn; } pagetable_t;
 /* x86_64 */
 typedef struct { u64 pfn; } pagetable_t;
 #endif
-#define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT)
-#define pagetable_get_pfn(x)   ((x).pfn)
-#define mk_pagetable(pa)       \
-    ({ pagetable_t __p; __p.pfn = (pa) >> PAGE_SHIFT; __p; })
+#define pagetable_get_paddr(x)  ((paddr_t)(x).pfn << PAGE_SHIFT)
+#define pagetable_get_pfn(x)    ((x).pfn)
+#define pagetable_is_null(x)    ((x).pfn == 0)
+#define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) })
+#define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
+#define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
+#define pagetable_null()        pagetable_from_pfn(0)
 #endif
 
 #define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/arch-ia64.h    Wed Jun 07 11:03:51 2006 +0100
@@ -26,6 +26,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
 DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 /* Arch specific VIRQs definition */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/arch-x86_32.h  Wed Jun 07 11:03:51 2006 +0100
@@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
 DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 /*
@@ -138,9 +141,17 @@ struct vcpu_guest_context {
 struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST  (1<<1)
-#define VGCF_IN_KERNEL  (1<<2)
+#define VGCF_I387_VALID                (1<<0)
+#define VGCF_HVM_GUEST                 (1<<1)
+#define VGCF_IN_KERNEL                 (1<<2)
+#define _VGCF_i387_valid               0
+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
+#define _VGCF_hvm_guest                1
+#define VGCF_hvm_guest                 (1<<_VGCF_hvm_guest)
+#define _VGCF_in_kernel                2
+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
     unsigned long flags;                    /* VGCF_* flags                 */
     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
@@ -158,10 +169,18 @@ typedef struct vcpu_guest_context vcpu_g
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
 struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
-    unsigned long pfn_to_mfn_frame_list_list;
+    xen_pfn_t     pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
 };
 typedef struct arch_shared_info arch_shared_info_t;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/arch-x86_64.h  Wed Jun 07 11:03:51 2006 +0100
@@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
 DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 /*
@@ -211,9 +214,19 @@ struct vcpu_guest_context {
 struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST  (1<<1)
-#define VGCF_IN_KERNEL  (1<<2)
+#define VGCF_I387_VALID                (1<<0)
+#define VGCF_HVM_GUEST                 (1<<1)
+#define VGCF_IN_KERNEL                 (1<<2)
+#define _VGCF_i387_valid               0
+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
+#define _VGCF_hvm_guest                1
+#define VGCF_hvm_guest                 (1<<_VGCF_hvm_guest)
+#define _VGCF_in_kernel                2
+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events  4
+#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
     unsigned long flags;                    /* VGCF_* flags                 */
     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
@@ -234,10 +247,13 @@ typedef struct vcpu_guest_context vcpu_g
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
 struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
-    unsigned long pfn_to_mfn_frame_list_list;
+    xen_pfn_t     pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
 };
 typedef struct arch_shared_info arch_shared_info_t;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/callback.h
--- a/xen/include/public/callback.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/callback.h     Wed Jun 07 11:03:51 2006 +0100
@@ -29,12 +29,20 @@
 #define CALLBACKTYPE_nmi                   4
 
 /*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events             0
+#define CALLBACKF_mask_events              (1U << _CALLBACKF_mask_events)
+
+/*
  * Register a callback.
  */
 #define CALLBACKOP_register                0
 struct callback_register {
-     int type;
-     xen_callback_t address;
+    uint16_t type;
+    uint16_t flags;
+    xen_callback_t address;
 };
 typedef struct callback_register callback_register_t;
 DEFINE_XEN_GUEST_HANDLE(callback_register_t);
@@ -47,7 +55,8 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe
  */
 #define CALLBACKOP_unregister              1
 struct callback_unregister {
-     int type;
+    uint16_t type;
+    uint16_t _unused;
 };
 typedef struct callback_unregister callback_unregister_t;
 DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/dom0_ops.h     Wed Jun 07 11:03:51 2006 +0100
@@ -19,7 +19,7 @@
  * This makes sure that old versions of dom0 tools will stop working in a
  * well-defined way (rather than crashing the machine, for instance).
  */
-#define DOM0_INTERFACE_VERSION   0x03000000
+#define DOM0_INTERFACE_VERSION   0x03000001
 
 /************************************************************************/
 
@@ -27,10 +27,10 @@ struct dom0_getmemlist {
 struct dom0_getmemlist {
     /* IN variables. */
     domid_t       domain;
-    unsigned long max_pfns;
-    XEN_GUEST_HANDLE(ulong) buffer;
-    /* OUT variables. */
-    unsigned long num_pfns;
+    uint64_t max_pfns;
+    XEN_GUEST_HANDLE(xen_pfn_t) buffer;
+    /* OUT variables. */
+    uint64_t num_pfns;
 };
 typedef struct dom0_getmemlist dom0_getmemlist_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t);
@@ -96,9 +96,9 @@ struct dom0_getdomaininfo {
 #define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code.  */
 #define DOMFLAGS_SHUTDOWNSHIFT 16
     uint32_t flags;
-    unsigned long tot_pages;
-    unsigned long max_pages;
-    unsigned long shared_info_frame;       /* MFN of shared_info struct */
+    uint64_t tot_pages;
+    uint64_t max_pages;
+    xen_pfn_t shared_info_frame;  /* MFN of shared_info struct */
     uint64_t cpu_time;
     uint32_t nr_online_vcpus;     /* Number of VCPUs currently online. */
     uint32_t max_vcpu_id;         /* Maximum VCPUID in use by this domain. */
@@ -162,7 +162,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
 
 struct dom0_getpageframeinfo {
     /* IN variables. */
-    unsigned long mfn;     /* Machine page frame number to query.       */
+    xen_pfn_t mfn;         /* Machine page frame number to query.       */
     domid_t domain;        /* To which domain does the frame belong?    */
     /* OUT variables. */
     /* Is the page PINNED to a type? */
@@ -213,7 +213,7 @@ struct dom0_tbufcontrol {
     cpumap_t      cpu_mask;
     uint32_t      evt_mask;
     /* OUT variables */
-    unsigned long buffer_mfn;
+    xen_pfn_t buffer_mfn;
     uint32_t size;
 };
 typedef struct dom0_tbufcontrol dom0_tbufcontrol_t;
@@ -229,8 +229,8 @@ struct dom0_physinfo {
     uint32_t sockets_per_node;
     uint32_t nr_nodes;
     uint32_t cpu_khz;
-    unsigned long total_pages;
-    unsigned long free_pages;
+    uint64_t total_pages;
+    uint64_t free_pages;
     uint32_t hw_cap[8];
 };
 typedef struct dom0_physinfo dom0_physinfo_t;
@@ -276,7 +276,7 @@ struct dom0_shadow_control {
     uint32_t       op;
     XEN_GUEST_HANDLE(ulong) dirty_bitmap;
     /* IN/OUT variables. */
-    unsigned long  pages;        /* size of buffer, updated with actual size */
+    uint64_t       pages;        /* size of buffer, updated with actual size */
     /* OUT variables. */
     struct dom0_shadow_control_stats stats;
 };
@@ -286,8 +286,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_shadow_cont
 #define DOM0_SETDOMAINMAXMEM   28
 struct dom0_setdomainmaxmem {
     /* IN variables. */
-    domid_t       domain;
-    unsigned long max_memkb;
+    domid_t  domain;
+    uint64_t max_memkb;
 };
 typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t);
@@ -295,8 +295,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_setdomainma
 #define DOM0_GETPAGEFRAMEINFO2 29   /* batched interface */
 struct dom0_getpageframeinfo2 {
     /* IN variables. */
-    domid_t        domain;
-    unsigned long  num;
+    domid_t  domain;
+    uint64_t num;
     /* IN/OUT variables. */
     XEN_GUEST_HANDLE(ulong) array;
 };
@@ -313,12 +313,12 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram
 #define DOM0_ADD_MEMTYPE         31
 struct dom0_add_memtype {
     /* IN variables. */
-    unsigned long mfn;
-    unsigned long nr_mfns;
-    uint32_t      type;
-    /* OUT variables. */
-    uint32_t      handle;
-    uint32_t      reg;
+    xen_pfn_t mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+    /* OUT variables. */
+    uint32_t handle;
+    uint32_t reg;
 };
 typedef struct dom0_add_memtype dom0_add_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t);
@@ -345,8 +345,8 @@ struct dom0_read_memtype {
     /* IN variables. */
     uint32_t reg;
     /* OUT variables. */
-    unsigned long mfn;
-    unsigned long nr_mfns;
+    xen_pfn_t mfn;
+    uint64_t nr_mfns;
     uint32_t type;
 };
 typedef struct dom0_read_memtype dom0_read_memtype_t;
@@ -499,8 +499,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_irq_permiss
 #define DOM0_IOMEM_PERMISSION 47
 struct dom0_iomem_permission {
     domid_t  domain;          /* domain to be affected */
-    unsigned long first_mfn;  /* first page (physical page number) in range */
-    unsigned long nr_mfns;    /* number of pages in range (>0) */
+    xen_pfn_t first_mfn;      /* first page (physical page number) in range */
+    uint64_t nr_mfns;         /* number of pages in range (>0) */
     uint8_t allow_access;     /* allow (!0) or deny (0) access to range? */
 };
 typedef struct dom0_iomem_permission dom0_iomem_permission_t;
@@ -509,7 +509,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permi
 #define DOM0_HYPERCALL_INIT   48
 struct dom0_hypercall_init {
     domid_t  domain;          /* domain to be affected */
-    unsigned long mfn;        /* machine frame to be initialised */
+    xen_pfn_t mfn;            /* machine frame to be initialised */
 };
 typedef struct dom0_hypercall_init dom0_hypercall_init_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/grant_table.h  Wed Jun 07 11:03:51 2006 +0100
@@ -240,7 +240,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl
 #define GNTTABOP_transfer                4
 struct gnttab_transfer {
     /* IN parameters. */
-    unsigned long mfn;
+    xen_pfn_t     mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/io/netif.h     Wed Jun 07 11:03:51 2006 +0100
@@ -26,6 +26,10 @@
 /* Packet data has been validated against protocol checksum. */
 #define _NETTXF_data_validated (1)
 #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the request. */
+#define _NETTXF_more_data      (2)
+#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
 
 struct netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/io/ring.h      Wed Jun 07 11:03:51 2006 +0100
@@ -151,19 +151,27 @@ typedef struct __name##_back_ring __name
 #define RING_SIZE(_r)                                                   \
     ((_r)->nr_ents)
 
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r)                                         \
+    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
 /* Test if there is an empty slot available on the front ring.
  * (This is only meaningful from the front. )
  */
 #define RING_FULL(_r)                                                   \
-    (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r))
+    (RING_FREE_REQUESTS(_r) == 0)
 
 /* Test if there are outstanding messages to be processed on a ring. */
 #define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
-    ((_r)->rsp_cons != (_r)->sring->rsp_prod)
+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
 
 #define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
-    (((_r)->req_cons != (_r)->sring->req_prod) &&                       \
-     (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r)))
+    ({                                                                 \
+       unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;      \
+       unsigned int rsp = RING_SIZE(_r) -                              \
+                          ((_r)->req_cons - (_r)->rsp_prod_pvt);       \
+       req < rsp ? req : rsp;                                          \
+    })
 
 /* Direct access to individual ring elements, by index. */
 #define RING_GET_REQUEST(_r, _idx)                                      \
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/memory.h
--- a/xen/include/public/memory.h       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/memory.h       Wed Jun 07 11:03:51 2006 +0100
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    XEN_GUEST_HANDLE(ulong) extent_start;
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
     unsigned long  nr_extents;
@@ -87,7 +87,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    XEN_GUEST_HANDLE(ulong) extent_start;
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /*
      * Number of extents written to the above array. This will be smaller
@@ -117,7 +117,7 @@ struct xen_add_to_physmap {
     unsigned long idx;
 
     /* GPFN where the source mapping page should appear. */
-    unsigned long gpfn;
+    xen_pfn_t     gpfn;
 };
 typedef struct xen_add_to_physmap xen_add_to_physmap_t;
 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
@@ -135,13 +135,13 @@ struct xen_translate_gpfn_list {
     unsigned long nr_gpfns;
 
     /* List of GPFNs to translate. */
-    XEN_GUEST_HANDLE(ulong) gpfn_list;
+    XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
 
     /*
      * Output list to contain MFN translations. May be the same as the input
      * list (in which case each input GPFN is overwritten with the output MFN).
      */
-    XEN_GUEST_HANDLE(ulong) mfn_list;
+    XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
 };
 typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
 DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/xen.h
--- a/xen/include/public/xen.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/xen.h  Wed Jun 07 11:03:51 2006 +0100
@@ -197,7 +197,7 @@ struct mmuext_op {
     unsigned int cmd;
     union {
         /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
-        unsigned long mfn;
+        xen_pfn_t     mfn;
         /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
         unsigned long linear_addr;
     } arg1;
@@ -234,10 +234,24 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
  */
 #define VMASST_CMD_enable                0
 #define VMASST_CMD_disable               1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
 #define VMASST_TYPE_4gb_segments         0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
 #define VMASST_TYPE_4gb_segments_notify  1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
 #define VMASST_TYPE_writable_pagetables  2
-#define MAX_VMASST_TYPE 2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
+#define VMASST_TYPE_pae_extended_cr3     3
+
+#define MAX_VMASST_TYPE                  3
 
 #ifndef __ASSEMBLY__
 
@@ -443,9 +457,9 @@ struct start_info {
     unsigned long nr_pages;     /* Total pages allocated to this domain.  */
     unsigned long shared_info;  /* MACHINE address of shared info struct. */
     uint32_t flags;             /* SIF_xxx flags.                         */
-    unsigned long store_mfn;    /* MACHINE page number of shared page.    */
+    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
     uint32_t store_evtchn;      /* Event channel for store communication. */
-    unsigned long console_mfn;  /* MACHINE address of console page.       */
+    xen_pfn_t console_mfn;      /* MACHINE page number of console page.   */
     uint32_t console_evtchn;    /* Event channel for console messages.    */
     /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
     unsigned long pt_base;      /* VIRTUAL address of page directory.     */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/console.h
--- a/xen/include/xen/console.h Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/console.h Wed Jun 07 11:03:51 2006 +0100
@@ -9,8 +9,6 @@
 
 #include <xen/spinlock.h>
 #include <xen/guest_access.h>
-
-extern spinlock_t console_lock;
 
 void set_printk_prefix(const char *prefix);
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/domain.h  Wed Jun 07 11:03:51 2006 +0100
@@ -2,8 +2,14 @@
 #ifndef __XEN_DOMAIN_H__
 #define __XEN_DOMAIN_H__
 
-extern int boot_vcpu(
+struct vcpu *alloc_vcpu(
+    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
+int boot_vcpu(
     struct domain *d, int vcpuid, struct vcpu_guest_context *ctxt);
+struct vcpu *alloc_idle_vcpu(unsigned int cpu_id);
+
+struct domain *alloc_domain(domid_t domid);
+void free_domain(struct domain *d);
 
 /*
  * Arch-specifics.
@@ -11,19 +17,18 @@ extern int boot_vcpu(
 
 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id);
 
-extern void free_vcpu_struct(struct vcpu *v);
+void free_vcpu_struct(struct vcpu *v);
 
-extern int arch_domain_create(struct domain *d);
+int arch_domain_create(struct domain *d);
 
-extern void arch_domain_destroy(struct domain *d);
+void arch_domain_destroy(struct domain *d);
 
-extern int arch_set_info_guest(
-    struct vcpu *v, struct vcpu_guest_context *c);
+int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c);
 
-extern void domain_relinquish_resources(struct domain *d);
+void domain_relinquish_resources(struct domain *d);
 
-extern void dump_pageframe_info(struct domain *d);
+void dump_pageframe_info(struct domain *d);
 
-extern void arch_dump_domain_info(struct domain *d);
+void arch_dump_domain_info(struct domain *d);
 
 #endif /* __XEN_DOMAIN_H__ */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/sched-if.h        Wed Jun 07 11:03:51 2006 +0100
@@ -60,14 +60,17 @@ struct scheduler {
 
     void         (*init)           (void);
     void         (*tick)           (unsigned int cpu);
-    int          (*alloc_task)     (struct vcpu *);
-    void         (*add_task)       (struct vcpu *);
-    void         (*free_task)      (struct domain *);
-    void         (*rem_task)       (struct vcpu *);
+
+    int          (*init_vcpu)      (struct vcpu *);
+    void         (*destroy_domain) (struct domain *);
+
     void         (*sleep)          (struct vcpu *);
     void         (*wake)           (struct vcpu *);
+
     int          (*set_affinity)   (struct vcpu *, cpumask_t *);
+
     struct task_slice (*do_schedule) (s_time_t);
+
     int          (*control)        (struct sched_ctl_cmd *);
     int          (*adjdom)         (struct domain *,
                                     struct sched_adjdom_cmd *);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/sched.h   Wed Jun 07 11:03:51 2006 +0100
@@ -186,12 +186,6 @@ extern struct vcpu *idle_vcpu[NR_CPUS];
 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
 #define is_idle_vcpu(v)   (is_idle_domain((v)->domain))
 
-struct vcpu *alloc_vcpu(
-    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
-
-struct domain *alloc_domain(void);
-void free_domain(struct domain *d);
-
 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
 #define put_domain(_d) \
   if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
@@ -226,7 +220,7 @@ static inline void get_knownalive_domain
 }
 
 extern struct domain *domain_create(
-    domid_t dom_id, unsigned int cpu);
+    domid_t domid, unsigned int cpu);
 extern int construct_dom0(
     struct domain *d,
     unsigned long image_start, unsigned long image_len, 
@@ -269,8 +263,8 @@ void new_thread(struct vcpu *d,
 #define set_current_state(_s) do { current->state = (_s); } while (0)
 void scheduler_init(void);
 void schedulers_start(void);
-void sched_add_domain(struct vcpu *);
-void sched_rem_domain(struct vcpu *);
+int  sched_init_vcpu(struct vcpu *);
+void sched_destroy_domain(struct domain *);
 long sched_ctl(struct sched_ctl_cmd *);
 long sched_adjdom(struct sched_adjdom_cmd *);
 int  sched_id(void);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Wed Jun 07 
11:03:15 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-
-ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
-obj-y   += util.o
-endif
-
-obj-y  += core/
-#obj-y += char/
-obj-y  += console/
-obj-y  += evtchn/
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += balloon/
-obj-y  += privcmd/
-obj-y  += xenbus/
-
-obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
-obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
-obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
-obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
-obj-$(CONFIG_XEN_NETDEV_FRONTEND)      += netfront/
-obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
-obj-$(CONFIG_XEN_TPMDEV_FRONTEND)      += tpmfront/
-obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += pciback/
-obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront/
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile   Wed Jun 07 
11:03:15 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-obj-y   := gnttab.o features.o
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-
-ifeq ($(ARCH),ia64)
-obj-y   += evtchn.o
-obj-y   += xenia64_init.o
-ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y)
-obj-$(CONFIG_NET)     += skbuff.o
-endif
-else
-obj-y   += reboot.o evtchn.o fixup.o 
-obj-$(CONFIG_SMP)     += smp.o         # setup_profiling_timer def'd in ia64
-obj-$(CONFIG_NET)     += skbuff.o      # until networking is up on ia64
-endif
-obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.