[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID fdf25330e4a699c4b90aa28cc30843447cf9da61
# Parent  59d4c1863330e2023b05043d450da39cda47bd5a
# Parent  f91cc71173c55f18280b12e6732d9e7509d208be
merge with xen-unstable.hg
---
 tools/blktap/Makefile                                            |   93 
 tools/blktap/README                                              |  137 -
 tools/blktap/README.sept05                                       |   33 
 tools/blktap/blkdump.c                                           |   62 
 tools/blktap/blkif.c                                             |  212 -
 tools/blktap/blktaplib.c                                         |  453 ---
 tools/blktap/blktaplib.h                                         |  171 -
 tools/blktap/list.h                                              |   55 
 tools/blktap/parallax/Makefile                                   |   62 
 tools/blktap/parallax/README                                     |  171 -
 tools/blktap/parallax/block-async.c                              |  393 --
 tools/blktap/parallax/block-async.h                              |   69 
 tools/blktap/parallax/blockstore.c                               | 1348 
----------
 tools/blktap/parallax/blockstore.h                               |  134 
 tools/blktap/parallax/blockstored.c                              |  275 --
 tools/blktap/parallax/bstest.c                                   |  191 -
 tools/blktap/parallax/parallax.c                                 |  608 ----
 tools/blktap/parallax/radix.c                                    |  631 ----
 tools/blktap/parallax/radix.h                                    |   45 
 tools/blktap/parallax/requests-async.c                           |  762 -----
 tools/blktap/parallax/requests-async.h                           |   29 
 tools/blktap/parallax/snaplog.c                                  |  238 -
 tools/blktap/parallax/snaplog.h                                  |   61 
 tools/blktap/parallax/vdi.c                                      |  367 --
 tools/blktap/parallax/vdi.h                                      |   55 
 tools/blktap/parallax/vdi_create.c                               |   52 
 tools/blktap/parallax/vdi_fill.c                                 |   81 
 tools/blktap/parallax/vdi_list.c                                 |   47 
 tools/blktap/parallax/vdi_snap.c                                 |   43 
 tools/blktap/parallax/vdi_snap_delete.c                          |   48 
 tools/blktap/parallax/vdi_snap_list.c                            |   82 
 tools/blktap/parallax/vdi_tree.c                                 |  132 
 tools/blktap/parallax/vdi_unittest.c                             |  184 -
 tools/blktap/parallax/vdi_validate.c                             |   97 
 tools/blktap/ublkback/Makefile                                   |   40 
 tools/blktap/ublkback/ublkback.c                                 |   18 
 tools/blktap/ublkback/ublkbacklib.c                              |  473 ---
 tools/blktap/ublkback/ublkbacklib.h                              |   16 
 tools/blktap/xenbus.c                                            |  568 ----
 docs/src/user.tex                                                |    3 
 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c                    |   51 
 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c                   |  193 -
 linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c             |   19 
 linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c                  |   54 
 linux-2.6-xen-sparse/drivers/xen/Kconfig                         |   10 
 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c               |   10 
 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c              |    3 
 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c                   |    4 
 linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile                |    2 
 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c               |   16 
 linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile                 |    2 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c           |    4 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h        |   23 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h |   17 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h      |   22 
 patches/linux-2.6.16.13/ipv6-no-autoconf.patch                   |   20 
 tools/examples/network-bridge                                    |   36 
 tools/examples/vif-bridge                                        |   12 
 tools/examples/xen-network-common.sh                             |   45 
 tools/examples/xmexample.hvm                                     |   12 
 tools/firmware/acpi/Makefile                                     |    7 
 tools/firmware/acpi/acpi2_0.h                                    |    2 
 tools/firmware/acpi/acpi_dsdt.asl                                |  345 ++
 tools/firmware/acpi/acpi_dsdt.c                                  |  399 +-
 tools/firmware/acpi/acpi_fadt.h                                  |   21 
 tools/ioemu/hw/pc.c                                              |    8 
 tools/ioemu/hw/pci.c                                             |   19 
 tools/ioemu/hw/piix4acpi.c                                       |  481 +++
 tools/ioemu/target-i386-dm/Makefile                              |    2 
 tools/libxc/xc_domain.c                                          |   11 
 tools/libxc/xc_linux_restore.c                                   |   66 
 tools/libxc/xc_linux_save.c                                      |    4 
 tools/libxc/xenctrl.h                                            |    4 
 tools/python/xen/lowlevel/xc/xc.c                                |   31 
 tools/python/xen/util/SSHTransport.py                            |  102 
 tools/python/xen/util/xmlrpclib2.py                              |   55 
 tools/python/xen/xend/XendClient.py                              |   13 
 tools/python/xen/xend/XendDomainInfo.py                          |    5 
 tools/python/xen/xm/create.py                                    |    2 
 tools/python/xen/xm/main.py                                      |   34 
 tools/security/secpol_tool.c                                     |    7 
 tools/xm-test/grouptest/default                                  |    2 
 tools/xm-test/grouptest/medium                                   |    2 
 tools/xm-test/lib/XmTestLib/Console.py                           |   70 
 tools/xm-test/tests/memset/03_memset_random_pos.py               |    6 
 xen/acm/acm_core.c                                               |    5 
 xen/acm/acm_policy.c                                             |   45 
 xen/arch/ia64/linux-xen/smp.c                                    |   36 
 xen/arch/ia64/xen/domain.c                                       |    4 
 xen/arch/ia64/xen/xensetup.c                                     |    3 
 xen/arch/x86/Makefile                                            |    2 
 xen/arch/x86/audit.c                                             |    4 
 xen/arch/x86/hvm/vmx/vmcs.c                                      |   17 
 xen/arch/x86/hvm/vmx/vmx.c                                       |   13 
 xen/arch/x86/mm.c                                                |  205 +
 xen/arch/x86/setup.c                                             |    7 
 xen/arch/x86/shadow.c                                            |  125 
 xen/arch/x86/shadow32.c                                          |   12 
 xen/arch/x86/shadow_guest32pae.c                                 |    2 
 xen/arch/x86/shadow_public.c                                     |   40 
 xen/arch/x86/time.c                                              |    2 
 xen/arch/x86/traps.c                                             |  242 +
 xen/arch/x86/x86_32/seg_fixup.c                                  |    2 
 xen/arch/x86/x86_32/traps.c                                      |   44 
 xen/arch/x86/x86_64/traps.c                                      |   37 
 xen/arch/x86/x86_emulate.c                                       |    4 
 xen/common/acm_ops.c                                             |    2 
 xen/common/dom0_ops.c                                            |   19 
 xen/common/domain.c                                              |    2 
 xen/common/kernel.c                                              |    5 
 xen/common/keyhandler.c                                          |   20 
 xen/common/memory.c                                              |    2 
 xen/common/sched_credit.c                                        |    4 
 xen/common/sched_sedf.c                                          |   26 
 xen/common/schedule.c                                            |    4 
 xen/drivers/char/console.c                                       |   45 
 xen/include/acm/acm_core.h                                       |    9 
 xen/include/acm/acm_hooks.h                                      |   18 
 xen/include/asm-ia64/debugger.h                                  |    8 
 xen/include/asm-ia64/vmx.h                                       |    1 
 xen/include/asm-ia64/xenprocessor.h                              |    2 
 xen/include/asm-x86/hvm/support.h                                |    2 
 xen/include/asm-x86/mm.h                                         |    2 
 xen/include/asm-x86/processor.h                                  |   15 
 xen/include/asm-x86/shadow.h                                     |   44 
 xen/include/asm-x86/shadow_64.h                                  |   36 
 xen/include/asm-x86/shadow_ops.h                                 |    8 
 xen/include/public/arch-x86_32.h                                 |    9 
 xen/include/public/arch-x86_64.h                                 |   14 
 xen/include/public/dom0_ops.h                                    |   23 
 xen/include/public/memory.h                                      |   14 
 xen/include/xen/console.h                                        |    2 
 xen/include/xen/lib.h                                            |    1 
 xen/include/xen/sched.h                                          |    1 
 134 files changed, 2608 insertions(+), 9373 deletions(-)

diff -r 59d4c1863330 -r fdf25330e4a6 docs/src/user.tex
--- a/docs/src/user.tex Fri Jun 23 15:26:01 2006 -0600
+++ b/docs/src/user.tex Fri Jun 23 15:33:25 2006 -0600
@@ -1972,7 +1972,8 @@ editing \path{grub.conf}.
 \item [ console=$<$specifier list$>$ ] Specify the destination for Xen
   console I/O.  This is a comma-separated list of, for example:
   \begin{description}
-  \item[ vga ] Use VGA console and allow keyboard input.
+  \item[ vga ] Use VGA console (only until domain 0 boots, unless {\bf
+  vga[keep] } is specified).
   \item[ com1 ] Use serial port com1.
   \item[ com2H ] Use serial port com2. Transmitted chars will have the
     MSB set. Received chars must have MSB set.
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c     Fri Jun 23 15:26:01 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c     Fri Jun 23 15:33:25 
2006 -0600
@@ -273,6 +273,49 @@ static void dump_fault_path(unsigned lon
 }
 #endif
 
+static int spurious_fault(struct pt_regs *regs,
+                         unsigned long address,
+                         unsigned long error_code)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+#ifdef CONFIG_XEN
+       /* Faults in hypervisor area are never spurious. */
+       if (address >= HYPERVISOR_VIRT_START)
+               return 0;
+#endif
+
+       /* Reserved-bit violation or user access to kernel space? */
+       if (error_code & 0x0c)
+               return 0;
+
+       pgd = init_mm.pgd + pgd_index(address);
+       if (!pgd_present(*pgd))
+               return 0;
+
+       pud = pud_offset(pgd, address);
+       if (!pud_present(*pud))
+               return 0;
+
+       pmd = pmd_offset(pud, address);
+       if (!pmd_present(*pmd))
+               return 0;
+
+       pte = pte_offset_kernel(pmd, address);
+       if (!pte_present(*pte))
+               return 0;
+       if ((error_code & 0x02) && !pte_write(*pte))
+               return 0;
+#ifdef CONFIG_X86_PAE
+       if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX))
+               return 0;
+#endif
+
+       return 1;
+}
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -327,8 +370,16 @@ fastcall void __kprobes do_page_fault(st
         * protection error (error_code & 1) == 0.
         */
        if (unlikely(address >= TASK_SIZE)) { 
+#ifdef CONFIG_XEN
+               /* Faults in hypervisor area can never be patched up. */
+               if (address >= HYPERVISOR_VIRT_START)
+                       goto bad_area_nosemaphore;
+#endif
                if (!(error_code & 5))
                        goto vmalloc_fault;
+               /* Can take a spurious fault if mapping changes R/O -> R/W. */
+               if (spurious_fault(regs, address, error_code))
+                       return;
                /* 
                 * Don't take the mm semaphore here. If we fixup a prefetch
                 * fault we could otherwise deadlock.
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c    Fri Jun 23 15:26:01 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c    Fri Jun 23 15:33:25 
2006 -0600
@@ -263,6 +263,10 @@ static void contiguous_bitmap_clear(
        }
 }
 
+/* Protected by balloon_lock. */
+#define MAX_CONTIG_ORDER 7
+static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+
 /* Ensure multi-page extents are contiguous in machine memory. */
 int xen_create_contiguous_region(
        unsigned long vstart, unsigned int order, unsigned int address_bits)
@@ -271,13 +275,23 @@ int xen_create_contiguous_region(
        pud_t         *pud; 
        pmd_t         *pmd;
        pte_t         *pte;
+       unsigned long *in_frames = discontig_frames, out_frame;
        unsigned long  frame, i, flags;
-       struct xen_memory_reservation reservation = {
-               .nr_extents   = 1,
-               .extent_order = 0,
-               .domid        = DOMID_SELF
+       long           rc;
+       int            success;
+       struct xen_memory_exchange exchange = {
+               .in = {
+                       .nr_extents   = 1UL << order,
+                       .extent_order = 0,
+                       .domid        = DOMID_SELF
+               },
+               .out = {
+                       .nr_extents   = 1,
+                       .extent_order = order,
+                       .address_bits = address_bits,
+                       .domid        = DOMID_SELF
+               }
        };
-       set_xen_guest_handle(reservation.extent_start, &frame);
 
        /*
         * Currently an auto-translated guest will not perform I/O, nor will
@@ -287,68 +301,73 @@ int xen_create_contiguous_region(
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return 0;
 
+       if (order > MAX_CONTIG_ORDER)
+               return -ENOMEM;
+
+       set_xen_guest_handle(exchange.in.extent_start, in_frames);
+       set_xen_guest_handle(exchange.out.extent_start, &out_frame);
+
        scrub_pages(vstart, 1 << order);
 
        balloon_lock(flags);
 
-       /* 1. Zap current PTEs, giving away the underlying pages. */
-       for (i = 0; i < (1<<order); i++) {
+       /* 1. Zap current PTEs, remembering MFNs. */
+       for (i = 0; i < (1UL<<order); i++) {
                pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
                pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
                pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
                pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
-               frame = pte_mfn(*pte);
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+               in_frames[i] = pte_mfn(*pte);
+               if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+                                                __pte_ma(0), 0))
+                       BUG();
                set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
                        INVALID_P2M_ENTRY);
-               BUG_ON(HYPERVISOR_memory_op(
-                       XENMEM_decrease_reservation, &reservation) != 1);
        }
 
        /* 2. Get a new contiguous memory extent. */
-       reservation.extent_order = order;
-       reservation.address_bits = address_bits;
-       frame = __pa(vstart) >> PAGE_SHIFT;
-       if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
-                                &reservation) != 1)
-               goto fail;
+       out_frame = __pa(vstart) >> PAGE_SHIFT;
+       rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
+       success = (exchange.nr_exchanged == (1UL << order));
+       BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
+       BUG_ON(success && (rc != 0));
+       if (unlikely(rc == -ENOSYS)) {
+               /* Compatibility when XENMEM_exchange is unsupported. */
+               if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                        &exchange.in) != (1UL << order))
+                       BUG();
+               success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
+                                               &exchange.out) == 1);
+               if (!success) {
+                       /* Couldn't get special memory: fall back to normal. */
+                       for (i = 0; i < (1UL<<order); i++)
+                               in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
+                       if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
+                                                &exchange.in) != (1UL<<order))
+                               BUG();
+               }
+       }
 
        /* 3. Map the new extent in place of old pages. */
-       for (i = 0; i < (1<<order); i++) {
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       vstart + (i*PAGE_SIZE),
-                       pfn_pte_ma(frame+i, PAGE_KERNEL), 0));
-               set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame+i);
+       for (i = 0; i < (1UL<<order); i++) {
+               frame = success ? (out_frame + i) : in_frames[i];
+               if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+                                                pfn_pte_ma(frame,
+                                                           PAGE_KERNEL),
+                                                0))
+                       BUG();
+               set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
        }
 
        flush_tlb_all();
 
-       contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
+       if (success)
+               contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
+                                     1UL << order);
 
        balloon_unlock(flags);
 
-       return 0;
-
- fail:
-       reservation.extent_order = 0;
-       reservation.address_bits = 0;
-
-       for (i = 0; i < (1<<order); i++) {
-               frame = (__pa(vstart) >> PAGE_SHIFT) + i;
-               BUG_ON(HYPERVISOR_memory_op(
-                       XENMEM_populate_physmap, &reservation) != 1);
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       vstart + (i*PAGE_SIZE),
-                       pfn_pte_ma(frame, PAGE_KERNEL), 0));
-               set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
-       }
-
-       flush_tlb_all();
-
-       balloon_unlock(flags);
-
-       return -ENOMEM;
+       return success ? 0 : -ENOMEM;
 }
 
 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
@@ -357,47 +376,79 @@ void xen_destroy_contiguous_region(unsig
        pud_t         *pud; 
        pmd_t         *pmd;
        pte_t         *pte;
+       unsigned long *out_frames = discontig_frames, in_frame;
        unsigned long  frame, i, flags;
-       struct xen_memory_reservation reservation = {
-               .nr_extents   = 1,
-               .extent_order = 0,
-               .domid        = DOMID_SELF
+       long           rc;
+       int            success;
+       struct xen_memory_exchange exchange = {
+               .in = {
+                       .nr_extents   = 1,
+                       .extent_order = order,
+                       .domid        = DOMID_SELF
+               },
+               .out = {
+                       .nr_extents   = 1UL << order,
+                       .extent_order = 0,
+                       .domid        = DOMID_SELF
+               }
        };
-       set_xen_guest_handle(reservation.extent_start, &frame);
 
        if (xen_feature(XENFEAT_auto_translated_physmap) ||
            !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
                return;
 
+       if (order > MAX_CONTIG_ORDER)
+               return;
+
+       set_xen_guest_handle(exchange.in.extent_start, &in_frame);
+       set_xen_guest_handle(exchange.out.extent_start, out_frames);
+
        scrub_pages(vstart, 1 << order);
 
        balloon_lock(flags);
 
        contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
 
-       /* 1. Zap current PTEs, giving away the underlying pages. */
-       for (i = 0; i < (1<<order); i++) {
-               pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
-               pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
-               pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
-               pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
-               frame = pte_mfn(*pte);
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+       /* 1. Find start MFN of contiguous extent. */
+       pgd = pgd_offset_k(vstart);
+       pud = pud_offset(pgd, vstart);
+       pmd = pmd_offset(pud, vstart);
+       pte = pte_offset_kernel(pmd, vstart);
+       in_frame = pte_mfn(*pte);
+
+       /* 2. Zap current PTEs. */
+       for (i = 0; i < (1UL<<order); i++) {
+               if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+                                                __pte_ma(0), 0));
                set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
                        INVALID_P2M_ENTRY);
-               BUG_ON(HYPERVISOR_memory_op(
-                       XENMEM_decrease_reservation, &reservation) != 1);
-       }
-
-       /* 2. Map new pages in place of old pages. */
-       for (i = 0; i < (1<<order); i++) {
-               frame = (__pa(vstart) >> PAGE_SHIFT) + i;
-               BUG_ON(HYPERVISOR_memory_op(
-                       XENMEM_populate_physmap, &reservation) != 1);
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       vstart + (i*PAGE_SIZE),
-                       pfn_pte_ma(frame, PAGE_KERNEL), 0));
+               out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
+       }
+
+       /* 3. Do the exchange for non-contiguous MFNs. */
+       rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
+       success = (exchange.nr_exchanged == 1);
+       BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
+       BUG_ON(success && (rc != 0));
+       if (rc == -ENOSYS) {
+               /* Compatibility when XENMEM_exchange is unsupported. */
+               if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                        &exchange.in) != 1)
+                       BUG();
+               if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
+                                        &exchange.out) != (1UL << order))
+                       BUG();
+               success = 1;
+       }
+
+       /* 4. Map new pages in place of old pages. */
+       for (i = 0; i < (1UL<<order); i++) {
+               frame = success ? out_frames[i] : (in_frame + i);
+               if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+                                                pfn_pte_ma(frame,
+                                                           PAGE_KERNEL),
+                                                0))
+                       BUG();
                set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
        }
 
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c      Fri Jun 23 
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c      Fri Jun 23 
15:33:25 2006 -0600
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/percpu.h>
+#include <linux/module.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -92,8 +93,16 @@ static void __init setup_boot_cpu_data(v
        boot_cpu_data.x86_mask = eax & 0xf;
 }
 
+#include <xen/interface/memory.h>
+unsigned long *machine_to_phys_mapping;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
 void __init x86_64_start_kernel(char * real_mode_data)
 {
+       struct xen_machphys_mapping mapping;
+       unsigned long machine_to_phys_nr_ents;
        char *s;
        int i;
 
@@ -104,6 +113,16 @@ void __init x86_64_start_kernel(char * r
                start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
                        xen_start_info->nr_pt_frames;
        }
+
+
+       machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
+       machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+       if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+               machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+               machine_to_phys_nr_ents = mapping.max_mfn + 1;
+       }
+       while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
+               machine_to_phys_order++;
 
 #if 0
        for (i = 0; i < 256; i++)
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c   Fri Jun 23 15:26:01 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c   Fri Jun 23 15:33:25 
2006 -0600
@@ -307,6 +307,49 @@ int exception_trace = 1;
 #define MEM_LOG(_f, _a...) ((void)0)
 #endif
 
+static int spurious_fault(struct pt_regs *regs,
+                         unsigned long address,
+                         unsigned long error_code)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+#ifdef CONFIG_XEN
+       /* Faults in hypervisor area are never spurious. */
+       if ((address >= HYPERVISOR_VIRT_START) &&
+           (address < HYPERVISOR_VIRT_END))
+               return 0;
+#endif
+
+       /* Reserved-bit violation or user access to kernel space? */
+       if (error_code & (PF_RSVD|PF_USER))
+               return 0;
+
+       pgd = init_mm.pgd + pgd_index(address);
+       if (!pgd_present(*pgd))
+               return 0;
+
+       pud = pud_offset(pgd, address);
+       if (!pud_present(*pud))
+               return 0;
+
+       pmd = pmd_offset(pud, address);
+       if (!pmd_present(*pmd))
+               return 0;
+
+       pte = pte_offset_kernel(pmd, address);
+       if (!pte_present(*pte))
+               return 0;
+       if ((error_code & PF_WRITE) && !pte_write(*pte))
+               return 0;
+       if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX))
+               return 0;
+
+       return 1;
+}
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -361,16 +404,19 @@ asmlinkage void __kprobes do_page_fault(
         */
        if (unlikely(address >= TASK_SIZE64)) {
                /*
-                * Must check for the entire kernel range here: with writable
-                * page tables the hypervisor may temporarily clear PMD
-                * entries.
+                * Don't check for the module range here: its PML4
+                * is always initialized because it's shared with the main
+                * kernel text. Only vmalloc may need PML4 syncups.
                 */
                if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-                   address >= PAGE_OFFSET) {
+                     ((address >= VMALLOC_START && address < VMALLOC_END))) {
                        if (vmalloc_fault(address) < 0)
                                goto bad_area_nosemaphore;
                        return;
                }
+               /* Can take a spurious fault if mapping changes R/O -> R/W. */
+               if (spurious_fault(regs, address, error_code))
+                       return;
                /*
                 * Don't take the mm semaphore here. If we fixup a prefetch
                 * fault we could otherwise deadlock.
diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Fri Jun 23 15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Fri Jun 23 15:33:25 2006 -0600
@@ -27,6 +27,11 @@ config XEN_UNPRIVILEGED_GUEST
 config XEN_UNPRIVILEGED_GUEST
        bool
        default !XEN_PRIVILEGED_GUEST
+
+config XEN_PRIVCMD
+       bool
+       depends on PROC_FS
+       default y
 
 config XEN_BACKEND
         tristate "Backend driver support"
@@ -84,6 +89,11 @@ config XEN_BLKDEV_BACKEND
          block devices to other guests via a high-performance shared-memory
          interface.
 
+config XEN_XENBUS_DEV
+       bool
+       depends on PROC_FS
+       default y
+
 config XEN_NETDEV_BACKEND
        tristate "Network-device backend driver"
         depends on XEN_BACKEND && NET
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Fri Jun 23 
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Fri Jun 23 
15:33:25 2006 -0600
@@ -58,7 +58,9 @@
 
 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
 
+#ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *balloon_pde;
+#endif
 
 static DECLARE_MUTEX(balloon_mutex);
 
@@ -403,6 +405,7 @@ static int balloon_init_watcher(struct n
        return NOTIFY_DONE;
 }
 
+#ifdef CONFIG_PROC_FS
 static int balloon_write(struct file *file, const char __user *buffer,
                         unsigned long count, void *data)
 {
@@ -456,6 +459,7 @@ static int balloon_read(char *page, char
        *eof = 1;
        return len;
 }
+#endif
 
 static struct notifier_block xenstore_notifier;
 
@@ -464,10 +468,10 @@ static int __init balloon_init(void)
        unsigned long pfn;
        struct page *page;
 
-       IPRINTK("Initialising balloon driver.\n");
-
        if (!is_running_on_xen())
                return -ENODEV;
+
+       IPRINTK("Initialising balloon driver.\n");
 
        current_pages = min(xen_start_info->nr_pages, max_pfn);
        totalram_pages = current_pages;
@@ -481,6 +485,7 @@ static int __init balloon_init(void)
        balloon_timer.data = 0;
        balloon_timer.function = balloon_alarm;
     
+#ifdef CONFIG_PROC_FS
        if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
                WPRINTK("Unable to create /proc/xen/balloon.\n");
                return -1;
@@ -488,6 +493,7 @@ static int __init balloon_init(void)
 
        balloon_pde->read_proc  = balloon_read;
        balloon_pde->write_proc = balloon_write;
+#endif
     
        /* Initialise the balloon with excess memory space. */
        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Fri Jun 23 
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Fri Jun 23 
15:33:25 2006 -0600
@@ -109,6 +109,9 @@ static int __init setup_vcpu_hotplug_eve
        static struct notifier_block xsn_cpu = {
                .notifier_call = setup_cpu_watcher };
 
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        register_cpu_notifier(&hotplug_cpu);
        register_xenstore_notifier(&xsn_cpu);
 
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Fri Jun 23 15:26:01 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Fri Jun 23 15:33:25 
2006 -0600
@@ -666,6 +666,10 @@ int irq_ignore_unhandled(unsigned int ir
 int irq_ignore_unhandled(unsigned int irq)
 {
        struct physdev_irq_status_query irq_status = { .irq = irq };
+
+       if (!is_running_on_xen())
+               return 0;
+
        (void)HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
        return !!(irq_status.flags & XENIRQSTAT_shared);
 }
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Fri Jun 23 15:26:01 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Fri Jun 23 15:33:25 
2006 -0600
@@ -1,2 +1,2 @@
 
-obj-y  := privcmd.o
+obj-$(CONFIG_XEN_PRIVCMD)      := privcmd.o
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Fri Jun 23 
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Fri Jun 23 
15:33:25 2006 -0600
@@ -71,8 +71,6 @@ static int packet_read_shmem(struct pack
                             char *buffer, int isuserbuffer, u32 left);
 static int vtpm_queue_packet(struct packet *pak);
 
-#define MIN(x,y)  (x) < (y) ? (x) : (y)
-
 /***************************************************************
  Buffer copying fo user and kernel space buffes.
 ***************************************************************/
@@ -309,7 +307,7 @@ int _packet_write(struct packet *pak,
                        return 0;
                }
 
-               tocopy = MIN(size - offset, PAGE_SIZE);
+               tocopy = min_t(size_t, size - offset, PAGE_SIZE);
 
                if (copy_from_buffer((void *)(MMAP_VADDR(tpmif, i) |
                                              (tx->addr & ~PAGE_MASK)),
@@ -365,7 +363,7 @@ static int packet_read(struct packet *pa
                u32 instance_no = htonl(pak->tpm_instance);
                u32 last_read = pak->last_read;
 
-               to_copy = MIN(4 - last_read, numbytes);
+               to_copy = min_t(size_t, 4 - last_read, numbytes);
 
                if (copy_to_buffer(&buffer[0],
                                   &(((u8 *) & instance_no)[last_read]),
@@ -384,7 +382,7 @@ static int packet_read(struct packet *pa
 
        if (room_left > 0) {
                if (pak->data_buffer) {
-                       u32 to_copy = MIN(pak->data_len - offset, room_left);
+                       u32 to_copy = min_t(u32, pak->data_len - offset, 
room_left);
                        u32 last_read = pak->last_read - 4;
 
                        if (copy_to_buffer(&buffer[offset],
@@ -424,7 +422,7 @@ static int packet_read_shmem(struct pack
         * and within that page at offset 'offset'.
         * Copy a maximum of 'room_left' bytes.
         */
-       to_copy = MIN(PAGE_SIZE - pg_offset, room_left);
+       to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
        while (to_copy > 0) {
                void *src;
                struct gnttab_map_grant_ref map_op;
@@ -451,7 +449,7 @@ static int packet_read_shmem(struct pack
                        /*
                         * User requests more than what's available
                         */
-                       to_copy = MIN(tx->size, to_copy);
+                       to_copy = min_t(u32, tx->size, to_copy);
                }
 
                DPRINTK("Copying from mapped memory at %08lx\n",
@@ -483,7 +481,7 @@ static int packet_read_shmem(struct pack
                last_read += to_copy;
                room_left -= to_copy;
 
-               to_copy = MIN(PAGE_SIZE, room_left);
+               to_copy = min_t(u32, PAGE_SIZE, room_left);
                i++;
        }                       /* while (to_copy > 0) */
        /*
@@ -545,7 +543,7 @@ static ssize_t vtpm_op_read(struct file 
 
                DPRINTK("size given by app: %d, available: %d\n", size, left);
 
-               ret_size = MIN(size, left);
+               ret_size = min_t(size_t, size, left);
 
                ret_size = packet_read(pak, ret_size, data, size, 1);
 
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Fri Jun 23 15:26:01 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Fri Jun 23 15:33:25 
2006 -0600
@@ -9,4 +9,4 @@ xenbus-objs += xenbus_comms.o
 xenbus-objs += xenbus_comms.o
 xenbus-objs += xenbus_xs.o
 xenbus-objs += xenbus_probe.o
-xenbus-objs += xenbus_dev.o
+obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Jun 23 
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Jun 23 
15:33:25 2006 -0600
@@ -926,6 +926,7 @@ void xenbus_probe(void *unused)
 }
 
 
+#ifdef CONFIG_PROC_FS
 static struct file_operations xsd_kva_fops;
 static struct proc_dir_entry *xsd_kva_intf;
 static struct proc_dir_entry *xsd_port_intf;
@@ -964,6 +965,7 @@ static int xsd_port_read(char *page, cha
        *eof = 1;
        return len;
 }
+#endif
 
 
 static int __init xenbus_probe_init(void)
@@ -1008,6 +1010,7 @@ static int __init xenbus_probe_init(void
                BUG_ON(err);
                xen_start_info->store_evtchn = alloc_unbound.port;
 
+#ifdef CONFIG_PROC_FS
                /* And finally publish the above info in /proc/xen */
                xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
                if (xsd_kva_intf) {
@@ -1020,6 +1023,7 @@ static int __init xenbus_probe_init(void
                xsd_port_intf = create_xen_proc_entry("xsd_port", 0400);
                if (xsd_port_intf)
                        xsd_port_intf->read_proc = xsd_port_read;
+#endif
        } else
                xenstored_ready = 1;
 
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Jun 23 
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Jun 23 
15:33:25 2006 -0600
@@ -67,6 +67,10 @@
 
 extern unsigned long *phys_to_machine_mapping;
 
+#undef machine_to_phys_mapping
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int   machine_to_phys_order;
+
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
        if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -84,24 +88,29 @@ static inline int phys_to_machine_mappin
 
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
+       extern unsigned long max_mapnr;
        unsigned long pfn;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return mfn;
 
-       /*
-        * The array access can fail (e.g., device space beyond end of RAM).
-        * In such cases it doesn't matter what we return (we return garbage),
-        * but we must handle the fault without crashing!
-        */
+       if (unlikely((mfn >> machine_to_phys_order) != 0))
+               return max_mapnr;
+
+       /* The array access can fail (e.g., device space beyond end of RAM). */
        asm (
                "1:     movl %1,%0\n"
                "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     movl %2,%0\n"
+               "       jmp  2b\n"
+               ".previous\n"
                ".section __ex_table,\"a\"\n"
                "       .align 4\n"
-               "       .long 1b,2b\n"
+               "       .long 1b,3b\n"
                ".previous"
-               : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+               : "=r" (pfn)
+               : "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) );
 
        return pfn;
 }
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Fri Jun 
23 15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Fri Jun 
23 15:33:25 2006 -0600
@@ -7,6 +7,7 @@
  **/
 
 #include <xen/interface/callback.h>
+#include <xen/interface/memory.h>
 
 static char * __init machine_specific_memory_setup(void)
 {
@@ -44,9 +45,16 @@ extern void failsafe_callback(void);
 extern void failsafe_callback(void);
 extern void nmi(void);
 
+unsigned long *machine_to_phys_mapping;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
 static void __init machine_specific_arch_setup(void)
 {
        int ret;
+       struct xen_machphys_mapping mapping;
+       unsigned long machine_to_phys_nr_ents;
        struct xen_platform_parameters pp;
        struct callback_register event = {
                .type = CALLBACKTYPE_event,
@@ -81,4 +89,13 @@ static void __init machine_specific_arch
        if (HYPERVISOR_xen_version(XENVER_platform_parameters,
                                   &pp) == 0)
                set_fixaddr_top(pp.virt_start - PAGE_SIZE);
+
+       machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
+       machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+       if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+               machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+               machine_to_phys_nr_ents = mapping.max_mfn + 1;
+       }
+       while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
+               machine_to_phys_order++;
 }
diff -r 59d4c1863330 -r fdf25330e4a6 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h       Fri Jun 
23 15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h       Fri Jun 
23 15:33:25 2006 -0600
@@ -85,6 +85,10 @@ void copy_page(void *, void *);
 
 extern unsigned long *phys_to_machine_mapping;
 
+#undef machine_to_phys_mapping
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int   machine_to_phys_order;
+
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
        if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -107,19 +111,23 @@ static inline unsigned long mfn_to_pfn(u
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return mfn;
 
-       /*
-        * The array access can fail (e.g., device space beyond end of RAM).
-        * In such cases it doesn't matter what we return (we return garbage),
-        * but we must handle the fault without crashing!
-        */
+       if (unlikely((mfn >> machine_to_phys_order) != 0))
+               return end_pfn;
+
+       /* The array access can fail (e.g., device space beyond end of RAM). */
        asm (
                "1:     movq %1,%0\n"
                "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     movq %2,%0\n"
+               "       jmp  2b\n"
+               ".previous\n"
                ".section __ex_table,\"a\"\n"
                "       .align 8\n"
-               "       .quad 1b,2b\n"
+               "       .quad 1b,3b\n"
                ".previous"
-               : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+               : "=r" (pfn)
+               : "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) );
 
        return pfn;
 }
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/network-bridge     Fri Jun 23 15:33:25 2006 -0600
@@ -151,30 +151,6 @@ link_exists()
     fi
 }
 
-
-# Usage: create_bridge bridge
-create_bridge () {
-    local bridge=$1
-
-    # Don't create the bridge if it already exists.
-    if ! brctl show | grep -q ${bridge} ; then
-       brctl addbr ${bridge}
-       brctl stp ${bridge} off
-       brctl setfd ${bridge} 0
-    fi
-    ip link set ${bridge} up
-}
-
-# Usage: add_to_bridge bridge dev
-add_to_bridge () {
-    local bridge=$1
-    local dev=$2
-    # Don't add $dev to $bridge if it's already on a bridge.
-    if ! brctl show | grep -q ${dev} ; then
-       brctl addif ${bridge} ${dev}
-    fi
-}
-
 # Set the default forwarding policy for $dev to drop.
 # Allow forwarding to the bridge.
 antispoofing () {
@@ -238,14 +214,13 @@ using loopback.nloopbacks=<N> on the dom
        fi
        ip link set ${netdev} name ${pdev}
        ip link set ${vdev} name ${netdev}
-       ip link set ${pdev} down arp off
-       ip link set ${pdev} addr fe:ff:ff:ff:ff:ff
-       ip addr flush ${pdev}
+
+       setup_bridge_port ${pdev}
+       setup_bridge_port ${vif0}
        ip link set ${netdev} addr ${mac} arp on
-       add_to_bridge ${bridge} ${vif0}
+
        ip link set ${bridge} up
-       ip link set ${vif0} up
-       ip link set ${pdev} up
+       add_to_bridge  ${bridge} ${vif0}
        add_to_bridge2 ${bridge} ${pdev}
        do_ifup ${netdev}
     else
@@ -301,6 +276,7 @@ add_to_bridge2() {
     local maxtries=10
 
     echo -n "Waiting for ${dev} to negotiate link."
+    ip link set ${dev} up
     for i in `seq ${maxtries}` ; do
        if ifconfig ${dev} | grep -q RUNNING ; then
            break
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/vif-bridge
--- a/tools/examples/vif-bridge Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/vif-bridge Fri Jun 23 15:33:25 2006 -0600
@@ -48,16 +48,8 @@ fi
 
 case "$command" in
     online)
-        if brctl show | grep -q "$vif"
-        then
-          log debug "$vif already attached to a bridge"
-          exit 0
-        fi
-
-        brctl addif "$bridge" "$vif" ||
-          fatal "brctl addif $bridge $vif failed"
-
-        ifconfig "$vif" up || fatal "ifconfig $vif up failed"
+       setup_bridge_port "$vif"
+       add_to_bridge "$bridge" "$vif"
         ;;
 
     offline)
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/xen-network-common.sh
--- a/tools/examples/xen-network-common.sh      Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/xen-network-common.sh      Fri Jun 23 15:33:25 2006 -0600
@@ -104,3 +104,48 @@ find_dhcpd_init_file()
 {
   first_file -x /etc/init.d/{dhcp3-server,dhcp,dhcpd}
 }
+
+# configure interfaces which act as pure bridge ports:
+#  - make quiet: no arp, no multicast (ipv6 autoconf)
+#  - set mac address to fe:ff:ff:ff:ff:ff
+setup_bridge_port() {
+    local dev="$1"
+
+    # take interface down ...
+    ip link set ${dev} down
+
+    # ... and configure it
+    ip link set ${dev} arp off
+    ip link set ${dev} multicast off
+    ip link set ${dev} addr fe:ff:ff:ff:ff:ff
+    ip addr flush ${dev}
+}
+
+# Usage: create_bridge bridge
+create_bridge () {
+    local bridge=$1
+
+    # Don't create the bridge if it already exists.
+    if [ ! -e "/sys/class/net/${bridge}/bridge" ]; then
+       brctl addbr ${bridge}
+       brctl stp ${bridge} off
+       brctl setfd ${bridge} 0
+        ip link set ${bridge} arp off
+        ip link set ${bridge} multicast off
+    fi
+    ip link set ${bridge} up
+}
+
+# Usage: add_to_bridge bridge dev
+add_to_bridge () {
+    local bridge=$1
+    local dev=$2
+
+    # Don't add $dev to $bridge if it's already on a bridge.
+    if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then
+       return
+    fi
+    brctl addif ${bridge} ${dev}
+    ip link set ${dev} up
+}
+
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/xmexample.hvm      Fri Jun 23 15:33:25 2006 -0600
@@ -164,3 +164,15 @@ ne2000=0
 #-----------------------------------------------------------------------------
 #    start in full screen
 #full-screen=1   
+
+
+#-----------------------------------------------------------------------------
+#   Enable USB support (specific devices specified at runtime through the
+#                      monitor window)
+#usb=1
+
+#   Enable USB mouse support (only enable one of the following, `mouse' for
+#                            PS/2 protocol relative mouse, `tablet' for
+#                            absolute mouse)
+#usbdevice='mouse'
+#usbdevice='tablet'
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/Makefile
--- a/tools/firmware/acpi/Makefile      Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/Makefile      Fri Jun 23 15:33:25 2006 -0600
@@ -33,17 +33,16 @@ IASL_URL=http://developer.intel.com/tech
 
IASL_URL=http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz
 
 vpath iasl $(PATH)
-.PHONY: all
 all:$(ACPI_BIN)
 
 acpi_dsdt.c:acpi_dsdt.asl
        $(MAKE) iasl
-       iasl -oa -tc acpi_dsdt.asl
+       iasl  -tc acpi_dsdt.asl
        mv acpi_dsdt.hex acpi_dsdt.c
        echo "int DsdtLen=sizeof(AmlCode);" >> acpi_dsdt.c
        rm *.aml
+#        iasl -oa -tc acpi_dsdt.asl
 
-.PHONY: iasl
 iasl:
        @echo
        @echo "ACPI ASL compiler(iasl) is needed"
@@ -62,10 +61,8 @@ iasl:
 $(ACPI_BIN):$(ACPI_GEN)
        ./$(ACPI_GEN) $(ACPI_BIN)
 
-.PHONY: clean
 clean:
        rm -rf *.o $(ACPI_GEN) $(ACPI_BIN) $(IASL_VER) 
        rm -rf  $(IASL_VER).tar.gz
 
-.PHONY: install
 install: all
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi2_0.h
--- a/tools/firmware/acpi/acpi2_0.h     Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi2_0.h     Fri Jun 23 15:33:25 2006 -0600
@@ -323,7 +323,7 @@ typedef struct {
 // The physical that acpi table reside in the guest BIOS
 //#define ACPI_PHYSICAL_ADDRESS 0xE2000
 #define ACPI_PHYSICAL_ADDRESS 0xEA000
-#define ACPI_TABLE_SIZE (2*1024)  //Currently 2K is enough
+#define ACPI_TABLE_SIZE (4*1024)  //Currently 4K is enough
 
 void
 AcpiBuildTable(uint8_t* buf);
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_dsdt.asl
--- a/tools/firmware/acpi/acpi_dsdt.asl Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi_dsdt.asl Fri Jun 23 15:33:25 2006 -0600
@@ -20,7 +20,7 @@
 //**
 //**
 
-DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL ", "XEN     ", 2)
+DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL","int-xen", 2006)
 {
     Name (\PMBS, 0x0C00)
     Name (\PMLN, 0x08)
@@ -29,24 +29,33 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
     Name (\APCB, 0xFEC00000)
     Name (\APCL, 0x00010000)
     Name (\PUID, 0x00)
+
     Scope (\_PR)
     {
         Processor (CPU0, 0x00, 0x00000000, 0x00) {}
         Processor (CPU1, 0x01, 0x00000000, 0x00) {}
         Processor (CPU2, 0x02, 0x00000000, 0x00) {}
         Processor (CPU3, 0x03, 0x00000000, 0x00) {}
+
     }
 
 /* Poweroff support - ties in with qemu emulation */
 
     Name (\_S5, Package (0x04)
     {
-        0x07, 
-        0x07, 
-        0x00, 
+        0x07,
+        0x07,
+        0x00,
         0x00
     })
 
+
+       Name(PICD, 0)   
+
+       Method(_PIC, 1) { 
+ 
+               Store(Arg0, PICD) 
+       }
     Scope (\_SB)
     {
         Device (PCI0)
@@ -55,9 +64,20 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
             Name (_UID, 0x00)
             Name (_ADR, 0x00)
             Name (_BBN, 0x00)
+            OperationRegion (PIRP, PCI_Config, 0x3c, 0x10)
+           Field(PIRP, ByteAcc, NoLock, Preserve){        
+          IRQ3,3,
+          IRQ5,5,
+          IRQ7,7,
+          IRQ9,9,
+          IRQA,10,
+          IRQB,11
+         }
+ 
             Method (_CRS, 0, NotSerialized)
             {
-                Name (PRT0, ResourceTemplate ()
+          
+               Name (PRT0, ResourceTemplate ()
                 {
                                        /* bus number is from 0 - 255*/
                     WordBusNumber (ResourceConsumer, MinFixed, MaxFixed, 
SubDecode,
@@ -79,75 +99,270 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
                         0x0FFF,
                         0x0000,
                         0x0300)
+
+                 /* reserve what device model consumed for IDE and acpi pci 
device            */
+                     WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, 
EntireRange,
+                        0x0000,
+                        0xc000,
+                        0xc01f,
+                        0x0000,
+                        0x0020)
+                 /* reserve what device model consumed for Ethernet controller 
pci device        */
+                     WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, 
EntireRange,
+                        0x0000,
+                        0xc020,
+                        0xc03f,
+                        0x0000,
+                        0x0010)
+
                     DWordMemory (ResourceProducer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadOnly,
                         0x00000000,
-                        0x000A0000,
+                        0x000c0000,
                         0x000FFFFF,
                         0x00000000,
-                        0x00060000)
+                        0x00030000)
+
+                 /* reserve what device model consumed for PCI VGA device      
  */
+
+                    DWordMemory (ResourceConsumer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadWrite,
+                        0x00000000,
+                        0xF0000000,
+                        0xF1FFFFFF,
+                        0x00000000,
+                        0x02000000)
+                    DWordMemory (ResourceConsumer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadWrite,
+                        0x00000000,
+                        0xF2000000,
+                        0xF2000FFF,
+                        0x00000000,
+                        0x00001000)
+                 /* reserve what device model consumed for Ethernet controller 
pci device        */
+                      DWordMemory (ResourceConsumer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadWrite,
+                        0x00000000,
+                        0xF2001000,
+                        0xF200101F,
+                        0x00000000,
+                        0x00000020) 
                 })
                 Return (PRT0)
             }
-
-            Name (AIR0, Package (0x06)
-            {
-               Package (0x04)
-                {
-                    0x001FFFFF, 
-                    0x02, 
-                    0x00, 
-                    0x17
-                }, 
-
-                Package (0x04)
-                {
-                    0x001FFFFF, 
-                    0x03, 
-                    0x00, 
-                    0x13
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x01, 
-                    0x00, 
-                    0x13
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x00, 
-                    0x00, 
-                    0x10
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x02, 
-                    0x00, 
-                    0x12
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x03, 
-                    0x00, 
-                    0x17
-                }
-            })
-            Method (_PRT, 0, NotSerialized)
-            {
-                Return (AIR0)
-            }
-
+       Name(BUFA, ResourceTemplate() {
+                IRQ(Level, ActiveLow, Shared) {
+                        3,4,5,6,7,10,11,12,14,15}               
+                }) 
+
+                Name(BUFB, Buffer(){
+                0x23, 0x00, 0x00, 0x18,
+                0x79, 0})
+
+                CreateWordField(BUFB, 0x01, IRQV)
+               
+                Name(BUFC, Buffer(){
+                5, 7, 10, 11
+                 })
+                
+                CreateByteField(BUFC, 0x01, PIQA)
+                CreateByteField(BUFC, 0x01, PIQB)
+                CreateByteField(BUFC, 0x01, PIQC)
+                CreateByteField(BUFC, 0x01, PIQD)
+                               
+               Device(LNKA)    {
+                Name(_HID, EISAID("PNP0C0F"))  // PCI interrupt link
+                Name(_UID, 1)
+                Method(_STA, 0) {
+                               And(PIRA, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)   
+                                }
+                        Else {
+                                Return(0x0B)   
+                                }
+                        }
+
+                Method(_PRS) {
+
+                        Return(BUFA)
+                } // Method(_PRS)
+
+                Method(_DIS) {
+                               Or(PIRA, 0x80, PIRA)
+                }
+
+                Method(_CRS) {
+                        And(PIRB, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } 
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRA)                     
+                 } // Method(_SRS)
+        }
+
+        Device(LNKB)   {
+                Name(_HID, EISAID("PNP0C0F"))   
+                Name(_UID, 2)
+                Method(_STA, 0) {
+                               And(PIRB, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)    
+                                }
+                        Else {
+                                Return(0x0B)    
+                                }
+                        }
+
+                Method(_PRS) {
+                                Return(BUFA)                    
+                } // Method(_PRS)
+
+                Method(_DIS) {
+
+                               Or(PIRB, 0x80, PIRB)
+                }
+
+                Method(_CRS) {
+                        And(PIRB, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } // Method(_CRS)
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRB)                     
+                 } // Method(_SRS)
+        }
+
+        Device(LNKC)   {
+                Name(_HID, EISAID("PNP0C0F"))  // PCI interrupt link
+                Name(_UID, 3)
+                Method(_STA, 0) {
+                               And(PIRC, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)    
+                        }
+                        Else {
+                                Return(0x0B)    
+                        }
+                }
+
+                Method(_PRS) {                          
+                        Return(BUFA)                    
+                } // Method(_PRS)                       
+
+                Method(_DIS) {
+
+                               Or(PIRC, 0x80, PIRC)
+                }
+
+                Method(_CRS) {
+                        And(PIRC, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } // Method(_CRS)
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRC)                     
+                 } // Method(_SRS)
+        }
+
+        Device(LNKD)   {
+                Name(_HID, EISAID("PNP0C0F"))   
+                Name(_UID, 4)
+                Method(_STA, 0) {
+                               And(PIRD, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)    
+                        }
+                        Else {
+                                Return(0x0B)    
+                        }
+                }
+
+                Method(_PRS) {                          
+                        Return(BUFA)                    
+                } // Method(_PRS)                       
+
+                Method(_DIS) {
+                               Or(PIRD, 0x80, PIRD)
+                }
+
+                Method(_CRS) {
+                        And(PIRD, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } // Method(_CRS)
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRD)                     
+                 } // Method(_SRS)
+        }
+        Method(_PRT,0) {
+                       If(PICD) {Return(PRTA)}  
+                       Return (PRTP)  
+               } // end _PRT
+               
+               
+        Name(PRTP, Package(){
+                        Package(){0x0000ffff, 0, \_SB.PCI0.LNKA, 0},   // Slot 
1, INTA
+                        Package(){0x0000ffff, 1, \_SB.PCI0.LNKB, 0},   // Slot 
1, INTB
+                        Package(){0x0000ffff, 2, \_SB.PCI0.LNKC, 0},   // Slot 
1, INTC
+                        Package(){0x0000ffff, 3, \_SB.PCI0.LNKD, 0},   // Slot 
1, INTD
+
+                        Package(){0x0001ffff, 0, \_SB.PCI0.LNKB, 0},   // Slot 
2, INTB
+                        Package(){0x0001ffff, 1, \_SB.PCI0.LNKC, 0},   // Slot 
2, INTC
+                        Package(){0x0001ffff, 2, \_SB.PCI0.LNKD, 0},   // Slot 
2, INTD
+                        Package(){0x0001ffff, 3, \_SB.PCI0.LNKA, 0},   // Slot 
2, INTA
+                        
+                        Package(){0x0002ffff, 0, \_SB.PCI0.LNKC, 0},   // Slot 
3, INTC
+                        Package(){0x0002ffff, 1, \_SB.PCI0.LNKD, 0},   // Slot 
3, INTD
+                        Package(){0x0002ffff, 2, \_SB.PCI0.LNKA, 0},   // Slot 
3, INTA
+                        Package(){0x0002ffff, 3, \_SB.PCI0.LNKB, 0},   // Slot 
3, INTB
+                        
+                        Package(){0x0003ffff, 0, \_SB.PCI0.LNKD, 0},   // Slot 
2, INTD
+                        Package(){0x0003ffff, 1, \_SB.PCI0.LNKA, 0},   // Slot 
2, INTA
+                        Package(){0x0003ffff, 2, \_SB.PCI0.LNKB, 0},   // Slot 
2, INTB
+                        Package(){0x0003ffff, 3, \_SB.PCI0.LNKC, 0},   // Slot 
2, INTC
+                        
+                        }
+            )
+       Name(PRTA, Package(){
+                        Package(){0x0001ffff, 0, 0, 5},        // Device 1, 
INTA
+
+                        Package(){0x0002ffff, 0, 0, 7},        // Device 2, 
INTA
+                       
+                        Package(){0x0003ffff, 0, 0, 10},       // Device 3, 
INTA
+
+                        Package(){0x0003ffff, 0, 0, 11},       // Device 4, 
INTA
+                                   
+                        
+                        }
+            )
+            
             Device (ISA)
             {
-                Name (_ADR, 0x00010000) /*TODO, device id, PCI bus num, ...*/
-
+                Name (_ADR, 0x00000000) /* device id, PCI bus num, ... */
+ 
+               OperationRegion(PIRQ, PCI_Config, 0x60, 0x4)
+                        Scope(\) {
+                                Field (\_SB.PCI0.ISA.PIRQ, ByteAcc, NoLock, 
Preserve) {
+                                        PIRA, 8,
+                                        PIRB, 8,
+                                        PIRC, 8,
+                                        PIRD, 8
+                                        }
+                                }
                 Device (SYSR)
                 {
                     Name (_HID, EisaId ("PNP0C02"))
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_dsdt.c
--- a/tools/firmware/acpi/acpi_dsdt.c   Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi_dsdt.c   Fri Jun 23 15:33:25 2006 -0600
@@ -1,22 +1,22 @@
 /*
  * 
  * Intel ACPI Component Architecture
- * ASL Optimizing Compiler / AML Disassembler version 20050624 [Aug 24 2005]
+ * ASL Optimizing Compiler / AML Disassembler version 20050513 [Jun  8 2005]
  * Copyright (C) 2000 - 2005 Intel Corporation
  * Supports ACPI Specification Revision 3.0
  * 
- * Compilation of "acpi_dsdt.asl" - Thu May  4 17:42:00 2006
+ * Compilation of "acpi_dsdt.asl" - Mon Jun 12 22:33:41 2006
  * 
  * C source code output
  *
  */
 unsigned char AmlCode[] = 
 {
-    0x44,0x53,0x44,0x54,0x7C,0x04,0x00,0x00,  /* 00000000    "DSDT|..." */
-    0x01,0x72,0x49,0x4E,0x54,0x45,0x4C,0x20,  /* 00000008    ".rINTEL " */
-    0x58,0x45,0x4E,0x20,0x20,0x20,0x20,0x20,  /* 00000010    "XEN     " */
-    0x02,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C,  /* 00000018    "....INTL" */
-    0x24,0x06,0x05,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    "$.. .PMB" */
+    0x44,0x53,0x44,0x54,0xC3,0x08,0x00,0x00,  /* 00000000    "DSDT...." */
+    0x01,0x0C,0x49,0x4E,0x54,0x45,0x4C,0x00,  /* 00000008    "..INTEL." */
+    0x69,0x6E,0x74,0x2D,0x78,0x65,0x6E,0x00,  /* 00000010    "int-xen." */
+    0xD6,0x07,0x00,0x00,0x49,0x4E,0x54,0x4C,  /* 00000018    "....INTL" */
+    0x13,0x05,0x05,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    "... .PMB" */
     0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C,  /* 00000028    "S....PML" */
     0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31,  /* 00000030    "N...IOB1" */
     0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08,  /* 00000038    "..IOL1.." */
@@ -32,129 +32,266 @@ unsigned char AmlCode[] =
     0x0B,0x43,0x50,0x55,0x33,0x03,0x00,0x00,  /* 00000088    ".CPU3..." */
     0x00,0x00,0x00,0x08,0x5F,0x53,0x35,0x5F,  /* 00000090    "...._S5_" */
     0x12,0x08,0x04,0x0A,0x07,0x0A,0x07,0x00,  /* 00000098    "........" */
-    0x00,0x10,0x4A,0x3D,0x5F,0x53,0x42,0x5F,  /* 000000A0    "..J=_SB_" */
-    0x5B,0x82,0x42,0x3D,0x50,0x43,0x49,0x30,  /* 000000A8    "[.B=PCI0" */
-    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000000B0    "._HID.A." */
-    0x0A,0x03,0x08,0x5F,0x55,0x49,0x44,0x00,  /* 000000B8    "..._UID." */
-    0x08,0x5F,0x41,0x44,0x52,0x00,0x08,0x5F,  /* 000000C0    "._ADR.._" */
-    0x42,0x42,0x4E,0x00,0x14,0x4A,0x06,0x5F,  /* 000000C8    "BBN..J._" */
-    0x43,0x52,0x53,0x00,0x08,0x50,0x52,0x54,  /* 000000D0    "CRS..PRT" */
-    0x30,0x11,0x48,0x05,0x0A,0x54,0x88,0x0D,  /* 000000D8    "0.H..T.." */
-    0x00,0x02,0x0F,0x00,0x00,0x00,0x00,0x00,  /* 000000E0    "........" */
-    0xFF,0x00,0x00,0x00,0x00,0x01,0x47,0x01,  /* 000000E8    "......G." */
-    0xF8,0x0C,0xF8,0x0C,0x01,0x08,0x88,0x0D,  /* 000000F0    "........" */
-    0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x00,  /* 000000F8    "........" */
-    0xF7,0x0C,0x00,0x00,0xF8,0x0C,0x88,0x0D,  /* 00000100    "........" */
-    0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x0D,  /* 00000108    "........" */
-    0xFF,0x0F,0x00,0x00,0x00,0x03,0x87,0x17,  /* 00000110    "........" */
-    0x00,0x00,0x0C,0x02,0x00,0x00,0x00,0x00,  /* 00000118    "........" */
-    0x00,0x00,0x0A,0x00,0xFF,0xFF,0x0F,0x00,  /* 00000120    "........" */
-    0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,  /* 00000128    "........" */
-    0x79,0x00,0xA4,0x50,0x52,0x54,0x30,0x08,  /* 00000130    "y..PRT0." */
-    0x41,0x49,0x52,0x30,0x12,0x4F,0x04,0x06,  /* 00000138    "AIR0.O.." */
-    0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x1F,0x00,  /* 00000140    "........" */
-    0x0A,0x02,0x00,0x0A,0x17,0x12,0x0C,0x04,  /* 00000148    "........" */
-    0x0C,0xFF,0xFF,0x1F,0x00,0x0A,0x03,0x00,  /* 00000150    "........" */
-    0x0A,0x13,0x12,0x0B,0x04,0x0C,0xFF,0xFF,  /* 00000158    "........" */
-    0x1D,0x00,0x01,0x00,0x0A,0x13,0x12,0x0B,  /* 00000160    "........" */
-    0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x00,0x00,  /* 00000168    "........" */
-    0x0A,0x10,0x12,0x0C,0x04,0x0C,0xFF,0xFF,  /* 00000170    "........" */
-    0x1D,0x00,0x0A,0x02,0x00,0x0A,0x12,0x12,  /* 00000178    "........" */
-    0x0C,0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x0A,  /* 00000180    "........" */
-    0x03,0x00,0x0A,0x17,0x14,0x0B,0x5F,0x50,  /* 00000188    "......_P" */
-    0x52,0x54,0x00,0xA4,0x41,0x49,0x52,0x30,  /* 00000190    "RT..AIR0" */
-    0x5B,0x82,0x42,0x2E,0x49,0x53,0x41,0x5F,  /* 00000198    "[.B.ISA_" */
-    0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00,  /* 000001A0    "._ADR..." */
-    0x01,0x00,0x5B,0x82,0x46,0x0B,0x53,0x59,  /* 000001A8    "..[.F.SY" */
-    0x53,0x52,0x08,0x5F,0x48,0x49,0x44,0x0C,  /* 000001B0    "SR._HID." */
-    0x41,0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49,  /* 000001B8    "A...._UI" */
-    0x44,0x01,0x08,0x43,0x52,0x53,0x5F,0x11,  /* 000001C0    "D..CRS_." */
-    0x4E,0x08,0x0A,0x8A,0x47,0x01,0x10,0x00,  /* 000001C8    "N...G..." */
-    0x10,0x00,0x00,0x10,0x47,0x01,0x22,0x00,  /* 000001D0    "....G."." */
-    0x22,0x00,0x00,0x0C,0x47,0x01,0x30,0x00,  /* 000001D8    ""...G.0." */
-    0x30,0x00,0x00,0x10,0x47,0x01,0x44,0x00,  /* 000001E0    "0...G.D." */
-    0x44,0x00,0x00,0x1C,0x47,0x01,0x62,0x00,  /* 000001E8    "D...G.b." */
-    0x62,0x00,0x00,0x02,0x47,0x01,0x65,0x00,  /* 000001F0    "b...G.e." */
-    0x65,0x00,0x00,0x0B,0x47,0x01,0x72,0x00,  /* 000001F8    "e...G.r." */
-    0x72,0x00,0x00,0x0E,0x47,0x01,0x80,0x00,  /* 00000200    "r...G..." */
-    0x80,0x00,0x00,0x01,0x47,0x01,0x84,0x00,  /* 00000208    "....G..." */
-    0x84,0x00,0x00,0x03,0x47,0x01,0x88,0x00,  /* 00000210    "....G..." */
-    0x88,0x00,0x00,0x01,0x47,0x01,0x8C,0x00,  /* 00000218    "....G..." */
-    0x8C,0x00,0x00,0x03,0x47,0x01,0x90,0x00,  /* 00000220    "....G..." */
-    0x90,0x00,0x00,0x10,0x47,0x01,0xA2,0x00,  /* 00000228    "....G..." */
-    0xA2,0x00,0x00,0x1C,0x47,0x01,0xE0,0x00,  /* 00000230    "....G..." */
-    0xE0,0x00,0x00,0x10,0x47,0x01,0xA0,0x08,  /* 00000238    "....G..." */
-    0xA0,0x08,0x00,0x04,0x47,0x01,0xC0,0x0C,  /* 00000240    "....G..." */
-    0xC0,0x0C,0x00,0x10,0x47,0x01,0xD0,0x04,  /* 00000248    "....G..." */
-    0xD0,0x04,0x00,0x02,0x79,0x00,0x14,0x0B,  /* 00000250    "....y..." */
-    0x5F,0x43,0x52,0x53,0x00,0xA4,0x43,0x52,  /* 00000258    "_CRS..CR" */
-    0x53,0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43,  /* 00000260    "S_[.+PIC" */
-    0x5F,0x08,0x5F,0x48,0x49,0x44,0x0B,0x41,  /* 00000268    "_._HID.A" */
-    0xD0,0x08,0x5F,0x43,0x52,0x53,0x11,0x18,  /* 00000270    ".._CRS.." */
-    0x0A,0x15,0x47,0x01,0x20,0x00,0x20,0x00,  /* 00000278    "..G. . ." */
-    0x01,0x02,0x47,0x01,0xA0,0x00,0xA0,0x00,  /* 00000280    "..G....." */
-    0x01,0x02,0x22,0x04,0x00,0x79,0x00,0x5B,  /* 00000288    ".."..y.[" */
-    0x82,0x47,0x05,0x44,0x4D,0x41,0x30,0x08,  /* 00000290    ".G.DMA0." */
-    0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x02,  /* 00000298    "_HID.A.." */
-    0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x41,  /* 000002A0    ".._CRS.A" */
-    0x04,0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01,  /* 000002A8    "..=*..G." */
-    0x00,0x00,0x00,0x00,0x00,0x10,0x47,0x01,  /* 000002B0    "......G." */
-    0x81,0x00,0x81,0x00,0x00,0x03,0x47,0x01,  /* 000002B8    "......G." */
-    0x87,0x00,0x87,0x00,0x00,0x01,0x47,0x01,  /* 000002C0    "......G." */
-    0x89,0x00,0x89,0x00,0x00,0x03,0x47,0x01,  /* 000002C8    "......G." */
-    0x8F,0x00,0x8F,0x00,0x00,0x01,0x47,0x01,  /* 000002D0    "......G." */
-    0xC0,0x00,0xC0,0x00,0x00,0x20,0x47,0x01,  /* 000002D8    "..... G." */
-    0x80,0x04,0x80,0x04,0x00,0x10,0x79,0x00,  /* 000002E0    "......y." */
-    0x5B,0x82,0x25,0x54,0x4D,0x52,0x5F,0x08,  /* 000002E8    "[.%TMR_." */
-    0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x01,  /* 000002F0    "_HID.A.." */
-    0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x10,  /* 000002F8    ".._CRS.." */
-    0x0A,0x0D,0x47,0x01,0x40,0x00,0x40,0x00,  /* 00000300    "..G.@.@." */
-    0x00,0x04,0x22,0x01,0x00,0x79,0x00,0x5B,  /* 00000308    ".."..y.[" */
-    0x82,0x25,0x52,0x54,0x43,0x5F,0x08,0x5F,  /* 00000310    ".%RTC_._" */
-    0x48,0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00,  /* 00000318    "HID.A..." */
-    0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,  /* 00000320    "._CRS..." */
-    0x0D,0x47,0x01,0x70,0x00,0x70,0x00,0x00,  /* 00000328    ".G.p.p.." */
-    0x02,0x22,0x00,0x01,0x79,0x00,0x5B,0x82,  /* 00000330    "."..y.[." */
-    0x22,0x53,0x50,0x4B,0x52,0x08,0x5F,0x48,  /* 00000338    ""SPKR._H" */
-    0x49,0x44,0x0C,0x41,0xD0,0x08,0x00,0x08,  /* 00000340    "ID.A...." */
-    0x5F,0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A,  /* 00000348    "_CRS...." */
-    0x47,0x01,0x61,0x00,0x61,0x00,0x00,0x01,  /* 00000350    "G.a.a..." */
-    0x79,0x00,0x5B,0x82,0x31,0x50,0x53,0x32,  /* 00000358    "y.[.1PS2" */
-    0x4D,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,  /* 00000360    "M._HID.A" */
-    0xD0,0x0F,0x13,0x08,0x5F,0x43,0x49,0x44,  /* 00000368    "...._CID" */
-    0x0C,0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F,  /* 00000370    ".A....._" */
-    0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,  /* 00000378    "STA....." */
-    0x5F,0x43,0x52,0x53,0x11,0x08,0x0A,0x05,  /* 00000380    "_CRS...." */
-    0x22,0x00,0x10,0x79,0x00,0x5B,0x82,0x42,  /* 00000388    ""..y.[.B" */
-    0x04,0x50,0x53,0x32,0x4B,0x08,0x5F,0x48,  /* 00000390    ".PS2K._H" */
-    0x49,0x44,0x0C,0x41,0xD0,0x03,0x03,0x08,  /* 00000398    "ID.A...." */
-    0x5F,0x43,0x49,0x44,0x0C,0x41,0xD0,0x03,  /* 000003A0    "_CID.A.." */
-    0x0B,0x14,0x09,0x5F,0x53,0x54,0x41,0x00,  /* 000003A8    "..._STA." */
-    0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,  /* 000003B0    "...._CRS" */
-    0x11,0x18,0x0A,0x15,0x47,0x01,0x60,0x00,  /* 000003B8    "....G.`." */
-    0x60,0x00,0x00,0x01,0x47,0x01,0x64,0x00,  /* 000003C0    "`...G.d." */
-    0x64,0x00,0x00,0x01,0x22,0x02,0x00,0x79,  /* 000003C8    "d..."..y" */
-    0x00,0x5B,0x82,0x3A,0x46,0x44,0x43,0x30,  /* 000003D0    ".[.:FDC0" */
-    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000003D8    "._HID.A." */
-    0x07,0x00,0x14,0x09,0x5F,0x53,0x54,0x41,  /* 000003E0    "...._STA" */
-    0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,  /* 000003E8    "....._CR" */
-    0x53,0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0,  /* 000003F0    "S....G.." */
-    0x03,0xF0,0x03,0x01,0x06,0x47,0x01,0xF7,  /* 000003F8    ".....G.." */
-    0x03,0xF7,0x03,0x01,0x01,0x22,0x40,0x00,  /* 00000400    "....."@." */
-    0x2A,0x04,0x00,0x79,0x00,0x5B,0x82,0x35,  /* 00000408    "*..y.[.5" */
-    0x55,0x41,0x52,0x31,0x08,0x5F,0x48,0x49,  /* 00000410    "UAR1._HI" */
-    0x44,0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,  /* 00000418    "D.A...._" */
-    0x55,0x49,0x44,0x01,0x14,0x09,0x5F,0x53,  /* 00000420    "UID..._S" */
-    0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,  /* 00000428    "TA....._" */
-    0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,  /* 00000430    "CRS....G" */
-    0x01,0xF8,0x03,0xF8,0x03,0x01,0x08,0x22,  /* 00000438    "......."" */
-    0x10,0x00,0x79,0x00,0x5B,0x82,0x36,0x55,  /* 00000440    "..y.[.6U" */
-    0x41,0x52,0x32,0x08,0x5F,0x48,0x49,0x44,  /* 00000448    "AR2._HID" */
-    0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,  /* 00000450    ".A...._U" */
-    0x49,0x44,0x0A,0x02,0x14,0x09,0x5F,0x53,  /* 00000458    "ID...._S" */
-    0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,  /* 00000460    "TA....._" */
-    0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,  /* 00000468    "CRS....G" */
-    0x01,0xF8,0x02,0xF8,0x02,0x01,0x08,0x22,  /* 00000470    "......."" */
-    0x08,0x00,0x79,0x00,
+    0x00,0x08,0x50,0x49,0x43,0x44,0x00,0x14,  /* 000000A0    "..PICD.." */
+    0x0C,0x5F,0x50,0x49,0x43,0x01,0x70,0x68,  /* 000000A8    "._PIC.ph" */
+    0x50,0x49,0x43,0x44,0x10,0x4E,0x80,0x5F,  /* 000000B0    "PICD.N._" */
+    0x53,0x42,0x5F,0x5B,0x82,0x46,0x80,0x50,  /* 000000B8    "SB_[.F.P" */
+    0x43,0x49,0x30,0x08,0x5F,0x48,0x49,0x44,  /* 000000C0    "CI0._HID" */
+    0x0C,0x41,0xD0,0x0A,0x03,0x08,0x5F,0x55,  /* 000000C8    ".A...._U" */
+    0x49,0x44,0x00,0x08,0x5F,0x41,0x44,0x52,  /* 000000D0    "ID.._ADR" */
+    0x00,0x08,0x5F,0x42,0x42,0x4E,0x00,0x5B,  /* 000000D8    ".._BBN.[" */
+    0x80,0x50,0x49,0x52,0x50,0x02,0x0A,0x3C,  /* 000000E0    ".PIRP..<" */
+    0x0A,0x10,0x5B,0x81,0x24,0x50,0x49,0x52,  /* 000000E8    "..[.$PIR" */
+    0x50,0x01,0x49,0x52,0x51,0x33,0x03,0x49,  /* 000000F0    "P.IRQ3.I" */
+    0x52,0x51,0x35,0x05,0x49,0x52,0x51,0x37,  /* 000000F8    "RQ5.IRQ7" */
+    0x07,0x49,0x52,0x51,0x39,0x09,0x49,0x52,  /* 00000100    ".IRQ9.IR" */
+    0x51,0x41,0x0A,0x49,0x52,0x51,0x42,0x0B,  /* 00000108    "QA.IRQB." */
+    0x14,0x48,0x0D,0x5F,0x43,0x52,0x53,0x00,  /* 00000110    ".H._CRS." */
+    0x08,0x50,0x52,0x54,0x30,0x11,0x46,0x0C,  /* 00000118    ".PRT0.F." */
+    0x0A,0xC2,0x88,0x0D,0x00,0x02,0x0F,0x00,  /* 00000120    "........" */
+    0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00,  /* 00000128    "........" */
+    0x00,0x01,0x47,0x01,0xF8,0x0C,0xF8,0x0C,  /* 00000130    "..G....." */
+    0x01,0x08,0x88,0x0D,0x00,0x01,0x0C,0x03,  /* 00000138    "........" */
+    0x00,0x00,0x00,0x00,0xF7,0x0C,0x00,0x00,  /* 00000140    "........" */
+    0xF8,0x0C,0x88,0x0D,0x00,0x01,0x0C,0x03,  /* 00000148    "........" */
+    0x00,0x00,0x00,0x0D,0xFF,0x0F,0x00,0x00,  /* 00000150    "........" */
+    0x00,0x03,0x88,0x0D,0x00,0x01,0x0D,0x03,  /* 00000158    "........" */
+    0x00,0x00,0x00,0xC0,0x1F,0xC0,0x00,0x00,  /* 00000160    "........" */
+    0x20,0x00,0x88,0x0D,0x00,0x01,0x0D,0x03,  /* 00000168    " ......." */
+    0x00,0x00,0x20,0xC0,0x3F,0xC0,0x00,0x00,  /* 00000170    ".. .?..." */
+    0x10,0x00,0x87,0x17,0x00,0x00,0x0C,0x02,  /* 00000178    "........" */
+    0x00,0x00,0x00,0x00,0x00,0x00,0x0C,0x00,  /* 00000180    "........" */
+    0xFF,0xFF,0x0F,0x00,0x00,0x00,0x00,0x00,  /* 00000188    "........" */
+    0x00,0x00,0x03,0x00,0x87,0x17,0x00,0x00,  /* 00000190    "........" */
+    0x0D,0x03,0x00,0x00,0x00,0x00,0x00,0x00,  /* 00000198    "........" */
+    0x00,0xF0,0xFF,0xFF,0xFF,0xF1,0x00,0x00,  /* 000001A0    "........" */
+    0x00,0x00,0x00,0x00,0x00,0x02,0x87,0x17,  /* 000001A8    "........" */
+    0x00,0x00,0x0D,0x03,0x00,0x00,0x00,0x00,  /* 000001B0    "........" */
+    0x00,0x00,0x00,0xF2,0xFF,0x0F,0x00,0xF2,  /* 000001B8    "........" */
+    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,  /* 000001C0    "........" */
+    0x87,0x17,0x00,0x00,0x0D,0x03,0x00,0x00,  /* 000001C8    "........" */
+    0x00,0x00,0x00,0x10,0x00,0xF2,0x1F,0x10,  /* 000001D0    "........" */
+    0x00,0xF2,0x00,0x00,0x00,0x00,0x20,0x00,  /* 000001D8    "...... ." */
+    0x00,0x00,0x79,0x00,0xA4,0x50,0x52,0x54,  /* 000001E0    "..y..PRT" */
+    0x30,0x08,0x42,0x55,0x46,0x41,0x11,0x09,  /* 000001E8    "0.BUFA.." */
+    0x0A,0x06,0x23,0xF8,0xDC,0x18,0x79,0x00,  /* 000001F0    "..#...y." */
+    0x08,0x42,0x55,0x46,0x42,0x11,0x09,0x0A,  /* 000001F8    ".BUFB..." */
+    0x06,0x23,0x00,0x00,0x18,0x79,0x00,0x8B,  /* 00000200    ".#...y.." */
+    0x42,0x55,0x46,0x42,0x01,0x49,0x52,0x51,  /* 00000208    "BUFB.IRQ" */
+    0x56,0x08,0x42,0x55,0x46,0x43,0x11,0x07,  /* 00000210    "V.BUFC.." */
+    0x0A,0x04,0x05,0x07,0x0A,0x0B,0x8C,0x42,  /* 00000218    ".......B" */
+    0x55,0x46,0x43,0x01,0x50,0x49,0x51,0x41,  /* 00000220    "UFC.PIQA" */
+    0x8C,0x42,0x55,0x46,0x43,0x01,0x50,0x49,  /* 00000228    ".BUFC.PI" */
+    0x51,0x42,0x8C,0x42,0x55,0x46,0x43,0x01,  /* 00000230    "QB.BUFC." */
+    0x50,0x49,0x51,0x43,0x8C,0x42,0x55,0x46,  /* 00000238    "PIQC.BUF" */
+    0x43,0x01,0x50,0x49,0x51,0x44,0x5B,0x82,  /* 00000240    "C.PIQD[." */
+    0x48,0x08,0x4C,0x4E,0x4B,0x41,0x08,0x5F,  /* 00000248    "H.LNKA._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F,  /* 00000250    "HID.A..." */
+    0x08,0x5F,0x55,0x49,0x44,0x01,0x14,0x1C,  /* 00000258    "._UID..." */
+    0x5F,0x53,0x54,0x41,0x00,0x7B,0x50,0x49,  /* 00000260    "_STA.{PI" */
+    0x52,0x41,0x0A,0x80,0x60,0xA0,0x08,0x93,  /* 00000268    "RA..`..." */
+    0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1,0x04,  /* 00000270    "`......." */
+    0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50,0x52,  /* 00000278    "....._PR" */
+    0x53,0x00,0xA4,0x42,0x55,0x46,0x41,0x14,  /* 00000280    "S..BUFA." */
+    0x11,0x5F,0x44,0x49,0x53,0x00,0x7D,0x50,  /* 00000288    "._DIS.}P" */
+    0x49,0x52,0x41,0x0A,0x80,0x50,0x49,0x52,  /* 00000290    "IRA..PIR" */
+    0x41,0x14,0x1A,0x5F,0x43,0x52,0x53,0x00,  /* 00000298    "A.._CRS." */
+    0x7B,0x50,0x49,0x52,0x42,0x0A,0x0F,0x60,  /* 000002A0    "{PIRB..`" */
+    0x79,0x01,0x60,0x49,0x52,0x51,0x56,0xA4,  /* 000002A8    "y.`IRQV." */
+    0x42,0x55,0x46,0x42,0x14,0x1B,0x5F,0x53,  /* 000002B0    "BUFB.._S" */
+    0x52,0x53,0x01,0x8B,0x68,0x01,0x49,0x52,  /* 000002B8    "RS..h.IR" */
+    0x51,0x31,0x82,0x49,0x52,0x51,0x31,0x60,  /* 000002C0    "Q1.IRQ1`" */
+    0x76,0x60,0x70,0x60,0x50,0x49,0x52,0x41,  /* 000002C8    "v`p`PIRA" */
+    0x5B,0x82,0x49,0x08,0x4C,0x4E,0x4B,0x42,  /* 000002D0    "[.I.LNKB" */
+    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000002D8    "._HID.A." */
+    0x0C,0x0F,0x08,0x5F,0x55,0x49,0x44,0x0A,  /* 000002E0    "..._UID." */
+    0x02,0x14,0x1C,0x5F,0x53,0x54,0x41,0x00,  /* 000002E8    "..._STA." */
+    0x7B,0x50,0x49,0x52,0x42,0x0A,0x80,0x60,  /* 000002F0    "{PIRB..`" */
+    0xA0,0x08,0x93,0x60,0x0A,0x80,0xA4,0x0A,  /* 000002F8    "...`...." */
+    0x09,0xA1,0x04,0xA4,0x0A,0x0B,0x14,0x0B,  /* 00000300    "........" */
+    0x5F,0x50,0x52,0x53,0x00,0xA4,0x42,0x55,  /* 00000308    "_PRS..BU" */
+    0x46,0x41,0x14,0x11,0x5F,0x44,0x49,0x53,  /* 00000310    "FA.._DIS" */
+    0x00,0x7D,0x50,0x49,0x52,0x42,0x0A,0x80,  /* 00000318    ".}PIRB.." */
+    0x50,0x49,0x52,0x42,0x14,0x1A,0x5F,0x43,  /* 00000320    "PIRB.._C" */
+    0x52,0x53,0x00,0x7B,0x50,0x49,0x52,0x42,  /* 00000328    "RS.{PIRB" */
+    0x0A,0x0F,0x60,0x79,0x01,0x60,0x49,0x52,  /* 00000330    "..`y.`IR" */
+    0x51,0x56,0xA4,0x42,0x55,0x46,0x42,0x14,  /* 00000338    "QV.BUFB." */
+    0x1B,0x5F,0x53,0x52,0x53,0x01,0x8B,0x68,  /* 00000340    "._SRS..h" */
+    0x01,0x49,0x52,0x51,0x31,0x82,0x49,0x52,  /* 00000348    ".IRQ1.IR" */
+    0x51,0x31,0x60,0x76,0x60,0x70,0x60,0x50,  /* 00000350    "Q1`v`p`P" */
+    0x49,0x52,0x42,0x5B,0x82,0x49,0x08,0x4C,  /* 00000358    "IRB[.I.L" */
+    0x4E,0x4B,0x43,0x08,0x5F,0x48,0x49,0x44,  /* 00000360    "NKC._HID" */
+    0x0C,0x41,0xD0,0x0C,0x0F,0x08,0x5F,0x55,  /* 00000368    ".A...._U" */
+    0x49,0x44,0x0A,0x03,0x14,0x1C,0x5F,0x53,  /* 00000370    "ID...._S" */
+    0x54,0x41,0x00,0x7B,0x50,0x49,0x52,0x43,  /* 00000378    "TA.{PIRC" */
+    0x0A,0x80,0x60,0xA0,0x08,0x93,0x60,0x0A,  /* 00000380    "..`...`." */
+    0x80,0xA4,0x0A,0x09,0xA1,0x04,0xA4,0x0A,  /* 00000388    "........" */
+    0x0B,0x14,0x0B,0x5F,0x50,0x52,0x53,0x00,  /* 00000390    "..._PRS." */
+    0xA4,0x42,0x55,0x46,0x41,0x14,0x11,0x5F,  /* 00000398    ".BUFA.._" */
+    0x44,0x49,0x53,0x00,0x7D,0x50,0x49,0x52,  /* 000003A0    "DIS.}PIR" */
+    0x43,0x0A,0x80,0x50,0x49,0x52,0x43,0x14,  /* 000003A8    "C..PIRC." */
+    0x1A,0x5F,0x43,0x52,0x53,0x00,0x7B,0x50,  /* 000003B0    "._CRS.{P" */
+    0x49,0x52,0x43,0x0A,0x0F,0x60,0x79,0x01,  /* 000003B8    "IRC..`y." */
+    0x60,0x49,0x52,0x51,0x56,0xA4,0x42,0x55,  /* 000003C0    "`IRQV.BU" */
+    0x46,0x42,0x14,0x1B,0x5F,0x53,0x52,0x53,  /* 000003C8    "FB.._SRS" */
+    0x01,0x8B,0x68,0x01,0x49,0x52,0x51,0x31,  /* 000003D0    "..h.IRQ1" */
+    0x82,0x49,0x52,0x51,0x31,0x60,0x76,0x60,  /* 000003D8    ".IRQ1`v`" */
+    0x70,0x60,0x50,0x49,0x52,0x43,0x5B,0x82,  /* 000003E0    "p`PIRC[." */
+    0x49,0x08,0x4C,0x4E,0x4B,0x44,0x08,0x5F,  /* 000003E8    "I.LNKD._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F,  /* 000003F0    "HID.A..." */
+    0x08,0x5F,0x55,0x49,0x44,0x0A,0x04,0x14,  /* 000003F8    "._UID..." */
+    0x1C,0x5F,0x53,0x54,0x41,0x00,0x7B,0x50,  /* 00000400    "._STA.{P" */
+    0x49,0x52,0x44,0x0A,0x80,0x60,0xA0,0x08,  /* 00000408    "IRD..`.." */
+    0x93,0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1,  /* 00000410    ".`......" */
+    0x04,0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50,  /* 00000418    "......_P" */
+    0x52,0x53,0x00,0xA4,0x42,0x55,0x46,0x41,  /* 00000420    "RS..BUFA" */
+    0x14,0x11,0x5F,0x44,0x49,0x53,0x00,0x7D,  /* 00000428    ".._DIS.}" */
+    0x50,0x49,0x52,0x44,0x0A,0x80,0x50,0x49,  /* 00000430    "PIRD..PI" */
+    0x52,0x44,0x14,0x1A,0x5F,0x43,0x52,0x53,  /* 00000438    "RD.._CRS" */
+    0x00,0x7B,0x50,0x49,0x52,0x44,0x0A,0x0F,  /* 00000440    ".{PIRD.." */
+    0x60,0x79,0x01,0x60,0x49,0x52,0x51,0x56,  /* 00000448    "`y.`IRQV" */
+    0xA4,0x42,0x55,0x46,0x42,0x14,0x1B,0x5F,  /* 00000450    ".BUFB.._" */
+    0x53,0x52,0x53,0x01,0x8B,0x68,0x01,0x49,  /* 00000458    "SRS..h.I" */
+    0x52,0x51,0x31,0x82,0x49,0x52,0x51,0x31,  /* 00000460    "RQ1.IRQ1" */
+    0x60,0x76,0x60,0x70,0x60,0x50,0x49,0x52,  /* 00000468    "`v`p`PIR" */
+    0x44,0x14,0x16,0x5F,0x50,0x52,0x54,0x00,  /* 00000470    "D.._PRT." */
+    0xA0,0x0A,0x50,0x49,0x43,0x44,0xA4,0x50,  /* 00000478    "..PICD.P" */
+    0x52,0x54,0x41,0xA4,0x50,0x52,0x54,0x50,  /* 00000480    "RTA.PRTP" */
+    0x08,0x50,0x52,0x54,0x50,0x12,0x43,0x0E,  /* 00000488    ".PRTP.C." */
+    0x10,0x12,0x0B,0x04,0x0B,0xFF,0xFF,0x00,  /* 00000490    "........" */
+    0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0B,0x04,  /* 00000498    "LNKA...." */
+    0x0B,0xFF,0xFF,0x01,0x4C,0x4E,0x4B,0x42,  /* 000004A0    "....LNKB" */
+    0x00,0x12,0x0C,0x04,0x0B,0xFF,0xFF,0x0A,  /* 000004A8    "........" */
+    0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0C,  /* 000004B0    ".LNKC..." */
+    0x04,0x0B,0xFF,0xFF,0x0A,0x03,0x4C,0x4E,  /* 000004B8    "......LN" */
+    0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C,0xFF,  /* 000004C0    "KD......" */
+    0xFF,0x01,0x00,0x00,0x4C,0x4E,0x4B,0x42,  /* 000004C8    "....LNKB" */
+    0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x01,  /* 000004D0    "........" */
+    0x00,0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12,  /* 000004D8    "..LNKC.." */
+    0x0E,0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,  /* 000004E0    "........" */
+    0x02,0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,  /* 000004E8    ".LNKD..." */
+    0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x03,  /* 000004F0    "........" */
+    0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04,  /* 000004F8    "LNKA...." */
+    0x0C,0xFF,0xFF,0x02,0x00,0x00,0x4C,0x4E,  /* 00000500    "......LN" */
+    0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF,  /* 00000508    "KC......" */
+    0xFF,0x02,0x00,0x01,0x4C,0x4E,0x4B,0x44,  /* 00000510    "....LNKD" */
+    0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02,  /* 00000518    "........" */
+    0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00,  /* 00000520    "...LNKA." */
+    0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02,0x00,  /* 00000528    "........" */
+    0x0A,0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12,  /* 00000530    "..LNKB.." */
+    0x0D,0x04,0x0C,0xFF,0xFF,0x03,0x00,0x00,  /* 00000538    "........" */
+    0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04,  /* 00000540    "LNKD...." */
+    0x0C,0xFF,0xFF,0x03,0x00,0x01,0x4C,0x4E,  /* 00000548    "......LN" */
+    0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,  /* 00000550    "KA......" */
+    0xFF,0x03,0x00,0x0A,0x02,0x4C,0x4E,0x4B,  /* 00000558    ".....LNK" */
+    0x42,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,  /* 00000560    "B......." */
+    0x03,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43,  /* 00000568    "....LNKC" */
+    0x00,0x08,0x50,0x52,0x54,0x41,0x12,0x32,  /* 00000570    "..PRTA.2" */
+    0x04,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x01,  /* 00000578    "........" */
+    0x00,0x00,0x00,0x0A,0x05,0x12,0x0B,0x04,  /* 00000580    "........" */
+    0x0C,0xFF,0xFF,0x02,0x00,0x00,0x00,0x0A,  /* 00000588    "........" */
+    0x07,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x03,  /* 00000590    "........" */
+    0x00,0x00,0x00,0x0A,0x0A,0x12,0x0B,0x04,  /* 00000598    "........" */
+    0x0C,0xFF,0xFF,0x03,0x00,0x00,0x00,0x0A,  /* 000005A0    "........" */
+    0x0B,0x5B,0x82,0x48,0x31,0x49,0x53,0x41,  /* 000005A8    ".[.H1ISA" */
+    0x5F,0x08,0x5F,0x41,0x44,0x52,0x00,0x5B,  /* 000005B0    "_._ADR.[" */
+    0x80,0x50,0x49,0x52,0x51,0x02,0x0A,0x60,  /* 000005B8    ".PIRQ..`" */
+    0x0A,0x04,0x10,0x2E,0x5C,0x00,0x5B,0x81,  /* 000005C0    "....\.[." */
+    0x29,0x5C,0x2F,0x04,0x5F,0x53,0x42,0x5F,  /* 000005C8    ")\/._SB_" */
+    0x50,0x43,0x49,0x30,0x49,0x53,0x41,0x5F,  /* 000005D0    "PCI0ISA_" */
+    0x50,0x49,0x52,0x51,0x01,0x50,0x49,0x52,  /* 000005D8    "PIRQ.PIR" */
+    0x41,0x08,0x50,0x49,0x52,0x42,0x08,0x50,  /* 000005E0    "A.PIRB.P" */
+    0x49,0x52,0x43,0x08,0x50,0x49,0x52,0x44,  /* 000005E8    "IRC.PIRD" */
+    0x08,0x5B,0x82,0x46,0x0B,0x53,0x59,0x53,  /* 000005F0    ".[.F.SYS" */
+    0x52,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,  /* 000005F8    "R._HID.A" */
+    0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49,0x44,  /* 00000600    "...._UID" */
+    0x01,0x08,0x43,0x52,0x53,0x5F,0x11,0x4E,  /* 00000608    "..CRS_.N" */
+    0x08,0x0A,0x8A,0x47,0x01,0x10,0x00,0x10,  /* 00000610    "...G...." */
+    0x00,0x00,0x10,0x47,0x01,0x22,0x00,0x22,  /* 00000618    "...G."."" */
+    0x00,0x00,0x0C,0x47,0x01,0x30,0x00,0x30,  /* 00000620    "...G.0.0" */
+    0x00,0x00,0x10,0x47,0x01,0x44,0x00,0x44,  /* 00000628    "...G.D.D" */
+    0x00,0x00,0x1C,0x47,0x01,0x62,0x00,0x62,  /* 00000630    "...G.b.b" */
+    0x00,0x00,0x02,0x47,0x01,0x65,0x00,0x65,  /* 00000638    "...G.e.e" */
+    0x00,0x00,0x0B,0x47,0x01,0x72,0x00,0x72,  /* 00000640    "...G.r.r" */
+    0x00,0x00,0x0E,0x47,0x01,0x80,0x00,0x80,  /* 00000648    "...G...." */
+    0x00,0x00,0x01,0x47,0x01,0x84,0x00,0x84,  /* 00000650    "...G...." */
+    0x00,0x00,0x03,0x47,0x01,0x88,0x00,0x88,  /* 00000658    "...G...." */
+    0x00,0x00,0x01,0x47,0x01,0x8C,0x00,0x8C,  /* 00000660    "...G...." */
+    0x00,0x00,0x03,0x47,0x01,0x90,0x00,0x90,  /* 00000668    "...G...." */
+    0x00,0x00,0x10,0x47,0x01,0xA2,0x00,0xA2,  /* 00000670    "...G...." */
+    0x00,0x00,0x1C,0x47,0x01,0xE0,0x00,0xE0,  /* 00000678    "...G...." */
+    0x00,0x00,0x10,0x47,0x01,0xA0,0x08,0xA0,  /* 00000680    "...G...." */
+    0x08,0x00,0x04,0x47,0x01,0xC0,0x0C,0xC0,  /* 00000688    "...G...." */
+    0x0C,0x00,0x10,0x47,0x01,0xD0,0x04,0xD0,  /* 00000690    "...G...." */
+    0x04,0x00,0x02,0x79,0x00,0x14,0x0B,0x5F,  /* 00000698    "...y..._" */
+    0x43,0x52,0x53,0x00,0xA4,0x43,0x52,0x53,  /* 000006A0    "CRS..CRS" */
+    0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43,0x5F,  /* 000006A8    "_[.+PIC_" */
+    0x08,0x5F,0x48,0x49,0x44,0x0B,0x41,0xD0,  /* 000006B0    "._HID.A." */
+    0x08,0x5F,0x43,0x52,0x53,0x11,0x18,0x0A,  /* 000006B8    "._CRS..." */
+    0x15,0x47,0x01,0x20,0x00,0x20,0x00,0x01,  /* 000006C0    ".G. . .." */
+    0x02,0x47,0x01,0xA0,0x00,0xA0,0x00,0x01,  /* 000006C8    ".G......" */
+    0x02,0x22,0x04,0x00,0x79,0x00,0x5B,0x82,  /* 000006D0    "."..y.[." */
+    0x47,0x05,0x44,0x4D,0x41,0x30,0x08,0x5F,  /* 000006D8    "G.DMA0._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x02,0x00,  /* 000006E0    "HID.A..." */
+    0x08,0x5F,0x43,0x52,0x53,0x11,0x41,0x04,  /* 000006E8    "._CRS.A." */
+    0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01,0x00,  /* 000006F0    ".=*..G.." */
+    0x00,0x00,0x00,0x00,0x10,0x47,0x01,0x81,  /* 000006F8    ".....G.." */
+    0x00,0x81,0x00,0x00,0x03,0x47,0x01,0x87,  /* 00000700    ".....G.." */
+    0x00,0x87,0x00,0x00,0x01,0x47,0x01,0x89,  /* 00000708    ".....G.." */
+    0x00,0x89,0x00,0x00,0x03,0x47,0x01,0x8F,  /* 00000710    ".....G.." */
+    0x00,0x8F,0x00,0x00,0x01,0x47,0x01,0xC0,  /* 00000718    ".....G.." */
+    0x00,0xC0,0x00,0x00,0x20,0x47,0x01,0x80,  /* 00000720    ".... G.." */
+    0x04,0x80,0x04,0x00,0x10,0x79,0x00,0x5B,  /* 00000728    ".....y.[" */
+    0x82,0x25,0x54,0x4D,0x52,0x5F,0x08,0x5F,  /* 00000730    ".%TMR_._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x01,0x00,  /* 00000738    "HID.A..." */
+    0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,  /* 00000740    "._CRS..." */
+    0x0D,0x47,0x01,0x40,0x00,0x40,0x00,0x00,  /* 00000748    ".G.@.@.." */
+    0x04,0x22,0x01,0x00,0x79,0x00,0x5B,0x82,  /* 00000750    "."..y.[." */
+    0x25,0x52,0x54,0x43,0x5F,0x08,0x5F,0x48,  /* 00000758    "%RTC_._H" */
+    0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00,0x08,  /* 00000760    "ID.A...." */
+    0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,  /* 00000768    "_CRS...." */
+    0x47,0x01,0x70,0x00,0x70,0x00,0x00,0x02,  /* 00000770    "G.p.p..." */
+    0x22,0x00,0x01,0x79,0x00,0x5B,0x82,0x22,  /* 00000778    ""..y.[."" */
+    0x53,0x50,0x4B,0x52,0x08,0x5F,0x48,0x49,  /* 00000780    "SPKR._HI" */
+    0x44,0x0C,0x41,0xD0,0x08,0x00,0x08,0x5F,  /* 00000788    "D.A...._" */
+    0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A,0x47,  /* 00000790    "CRS....G" */
+    0x01,0x61,0x00,0x61,0x00,0x00,0x01,0x79,  /* 00000798    ".a.a...y" */
+    0x00,0x5B,0x82,0x31,0x50,0x53,0x32,0x4D,  /* 000007A0    ".[.1PS2M" */
+    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000007A8    "._HID.A." */
+    0x0F,0x13,0x08,0x5F,0x43,0x49,0x44,0x0C,  /* 000007B0    "..._CID." */
+    0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F,0x53,  /* 000007B8    "A....._S" */
+    0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,  /* 000007C0    "TA....._" */
+    0x43,0x52,0x53,0x11,0x08,0x0A,0x05,0x22,  /* 000007C8    "CRS...."" */
+    0x00,0x10,0x79,0x00,0x5B,0x82,0x42,0x04,  /* 000007D0    "..y.[.B." */
+    0x50,0x53,0x32,0x4B,0x08,0x5F,0x48,0x49,  /* 000007D8    "PS2K._HI" */
+    0x44,0x0C,0x41,0xD0,0x03,0x03,0x08,0x5F,  /* 000007E0    "D.A...._" */
+    0x43,0x49,0x44,0x0C,0x41,0xD0,0x03,0x0B,  /* 000007E8    "CID.A..." */
+    0x14,0x09,0x5F,0x53,0x54,0x41,0x00,0xA4,  /* 000007F0    ".._STA.." */
+    0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,0x11,  /* 000007F8    "..._CRS." */
+    0x18,0x0A,0x15,0x47,0x01,0x60,0x00,0x60,  /* 00000800    "...G.`.`" */
+    0x00,0x00,0x01,0x47,0x01,0x64,0x00,0x64,  /* 00000808    "...G.d.d" */
+    0x00,0x00,0x01,0x22,0x02,0x00,0x79,0x00,  /* 00000810    "..."..y." */
+    0x5B,0x82,0x3A,0x46,0x44,0x43,0x30,0x08,  /* 00000818    "[.:FDC0." */
+    0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x07,  /* 00000820    "_HID.A.." */
+    0x00,0x14,0x09,0x5F,0x53,0x54,0x41,0x00,  /* 00000828    "..._STA." */
+    0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,  /* 00000830    "...._CRS" */
+    0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0,0x03,  /* 00000838    "....G..." */
+    0xF0,0x03,0x01,0x06,0x47,0x01,0xF7,0x03,  /* 00000840    "....G..." */
+    0xF7,0x03,0x01,0x01,0x22,0x40,0x00,0x2A,  /* 00000848    "...."@.*" */
+    0x04,0x00,0x79,0x00,0x5B,0x82,0x35,0x55,  /* 00000850    "..y.[.5U" */
+    0x41,0x52,0x31,0x08,0x5F,0x48,0x49,0x44,  /* 00000858    "AR1._HID" */
+    0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,  /* 00000860    ".A...._U" */
+    0x49,0x44,0x01,0x14,0x09,0x5F,0x53,0x54,  /* 00000868    "ID..._ST" */
+    0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,  /* 00000870    "A....._C" */
+    0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01,  /* 00000878    "RS....G." */
+    0xF8,0x03,0xF8,0x03,0x01,0x08,0x22,0x10,  /* 00000880    "......"." */
+    0x00,0x79,0x00,0x5B,0x82,0x36,0x55,0x41,  /* 00000888    ".y.[.6UA" */
+    0x52,0x32,0x08,0x5F,0x48,0x49,0x44,0x0C,  /* 00000890    "R2._HID." */
+    0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,0x49,  /* 00000898    "A...._UI" */
+    0x44,0x0A,0x02,0x14,0x09,0x5F,0x53,0x54,  /* 000008A0    "D...._ST" */
+    0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,  /* 000008A8    "A....._C" */
+    0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01,  /* 000008B0    "RS....G." */
+    0xF8,0x02,0xF8,0x02,0x01,0x08,0x22,0x08,  /* 000008B8    "......"." */
+    0x00,0x79,0x00,
 };
 int DsdtLen=sizeof(AmlCode);
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_fadt.h
--- a/tools/firmware/acpi/acpi_fadt.h   Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi_fadt.h   Fri Jun 23 15:33:25 2006 -0600
@@ -22,19 +22,19 @@
 // FADT Definitions, see ACPI 2.0 specification for details.
 //
 
-#define ACPI_OEM_FADT_REVISION  0x00000000 // TBD
+#define ACPI_OEM_FADT_REVISION  0x00000001 // TBD
 
-#define ACPI_PREFERRED_PM_PROFILE 0x04
+#define ACPI_PREFERRED_PM_PROFILE 0x00
 #define ACPI_SCI_INT              0x0009
-#define ACPI_SMI_CMD              0x000000B2
+#define ACPI_SMI_CMD              0x00000000
 #define ACPI_ACPI_ENABLE    0x00
 #define ACPI_ACPI_DISABLE   0x00
 #define ACPI_S4_BIOS_REQ    0x00
 #define ACPI_PSTATE_CNT     0x00
-#define ACPI_GPE1_BASE      0x20
+#define ACPI_GPE1_BASE      0x00
 #define ACPI_CST_CNT        0x00
-#define ACPI_P_LVL2_LAT     0x0065
-#define ACPI_P_LVL3_LAT     0X03E9
+#define ACPI_P_LVL2_LAT     0x0064
+#define ACPI_P_LVL3_LAT     0X03E8
 #define ACPI_FLUSH_SIZE     0x00
 #define ACPI_FLUSH_STRIDE   0x00
 #define ACPI_DUTY_OFFSET    0x01
@@ -51,15 +51,16 @@
 //
 // Fixed Feature Flags
 // 
-#define ACPI_FIXED_FEATURE_FLAGS (ACPI_SLP_BUTTON| ACPI_WBINVD  )
+#define ACPI_FIXED_FEATURE_FLAGS 
(ACPI_PROC_C1|ACPI_SLP_BUTTON|ACPI_WBINVD|ACPI_PWR_BUTTON|ACPI_FIX_RTC)
 
 //
 // PM1A Event Register Block Generic Address Information
 //
 #define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
-#define ACPI_PM1A_EVT_BLK_BIT_WIDTH         0x00
+#define ACPI_PM1A_EVT_BLK_BIT_WIDTH         0x20
 #define ACPI_PM1A_EVT_BLK_BIT_OFFSET        0x00
-#define ACPI_PM1A_EVT_BLK_ADDRESS           0x0000000000008000
+//#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
+#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c040
 
 //
 // PM1B Event Register Block Generic Address Information
@@ -73,7 +74,7 @@
 // PM1A Control Register Block Generic Address Information
 //
 #define ACPI_PM1A_CNT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
-#define ACPI_PM1A_CNT_BLK_BIT_WIDTH         0x08
+#define ACPI_PM1A_CNT_BLK_BIT_WIDTH         0x10
 #define ACPI_PM1A_CNT_BLK_BIT_OFFSET        0x00
 #define ACPI_PM1A_CNT_BLK_ADDRESS           (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
 
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/ioemu/hw/pc.c       Fri Jun 23 15:33:25 2006 -0600
@@ -375,7 +375,9 @@ static int serial_io[MAX_SERIAL_PORTS] =
 static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
 static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
 
-extern int acpi_init(unsigned int base);
+//extern int acpi_init(unsigned int base);
+/*  PIIX4 acpi pci configuration space, func 3 */
+extern void pci_piix4_acpi_init(PCIBus *bus);
 
 #define NOBIOS 1
 
@@ -583,7 +585,9 @@ void pc_init(uint64_t ram_size, int vga_
     floppy_controller = fdctrl_init(6, 2, 0, 0x3f0, fd_table);
 
     cmos_init(ram_size, boot_device, bs_table, timeoffset);
-    acpi_init(0x8000);
+// using PIIX4 acpi model
+//    acpi_init(0x8000);
+    pci_piix4_acpi_init(pci_bus);
 
     if (pci_enabled && usb_enabled) {
        usb_uhci_init(pci_bus, usb_root_ports);
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/ioemu/hw/pci.c      Fri Jun 23 15:33:25 2006 -0600
@@ -1394,7 +1394,7 @@ static uint32_t pci_bios_io_addr;
 static uint32_t pci_bios_io_addr;
 static uint32_t pci_bios_mem_addr;
 /* host irqs corresponding to PCI irqs A-D */
-static uint8_t pci_irqs[4] = { 11, 9, 11, 9 };
+static uint8_t pci_irqs[4] = { 10, 11, 10, 11 };
 
 static void pci_set_io_region_addr(PCIDevice *d, int region_num, uint32_t addr)
 {
@@ -1447,12 +1447,22 @@ static void pci_bios_init_device(PCIDevi
             pci_set_io_region_addr(d, 3, 0x374);
         }
         break;
+       case 0x0680:
+       if (vendor_id == 0x8086 && device_id == 0x7113) {
+          // PIIX4 ACPI PM 
+        pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4
+        pci_config_writew(d, 0x22, 0x0000); 
+        goto default_map;
+       }
+         break;
+
     case 0x0300:
         if (vendor_id != 0x1234)
             goto default_map;
         /* VGA: map frame buffer to default Bochs VBE address */
         pci_set_io_region_addr(d, 0, 0xE0000000);
         break;
+
     case 0x0800:
         /* PIC */
         vendor_id = pci_config_readw(d, PCI_VENDOR_ID);
@@ -1497,6 +1507,13 @@ static void pci_bios_init_device(PCIDevi
         pic_irq = pci_irqs[pin];
         pci_config_writeb(d, PCI_INTERRUPT_LINE, pic_irq);
     }
+    if (class== 0x0680&& vendor_id == 0x8086 && device_id == 0x7113) {
+         // PIIX4 ACPI PM
+       pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4
+       pci_config_writew(d, 0x22, 0x0000);
+       pci_config_writew(d, 0x3c, 0x0009); // Hardcodeed IRQ9
+       pci_config_writew(d, 0x3d, 0x0001);
+    }
 }
 
 /*
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile       Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/ioemu/target-i386-dm/Makefile       Fri Jun 23 15:33:25 2006 -0600
@@ -281,7 +281,7 @@ VL_OBJS+= usb.o usb-hub.o usb-uhci.o usb
 # Hardware support
 VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o
 VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259_stub.o pc.o port-e9.o
-VL_OBJS+= cirrus_vga.o pcnet.o acpi.o
+VL_OBJS+= cirrus_vga.o pcnet.o piix4acpi.o
 VL_OBJS+= $(SOUND_HW) $(AUDIODRV) mixeng.o
 
 ifeq ($(TARGET_ARCH), ppc)
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xc_domain.c   Fri Jun 23 15:33:25 2006 -0600
@@ -283,6 +283,17 @@ int xc_domain_setmaxmem(int xc_handle,
     op.cmd = DOM0_SETDOMAINMAXMEM;
     op.u.setdomainmaxmem.domain = (domid_t)domid;
     op.u.setdomainmaxmem.max_memkb = max_memkb;
+    return do_dom0_op(xc_handle, &op);
+}
+
+int xc_domain_set_time_offset(int xc_handle,
+                              uint32_t domid,
+                              int32_t time_offset_seconds)
+{
+    DECLARE_DOM0_OP;
+    op.cmd = DOM0_SETTIMEOFFSET;
+    op.u.settimeoffset.domain = (domid_t)domid;
+    op.u.settimeoffset.time_offset_seconds = time_offset_seconds;
     return do_dom0_op(xc_handle, &op);
 }
 
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c    Fri Jun 23 15:33:25 2006 -0600
@@ -572,42 +572,48 @@ int xc_linux_restore(int xc_handle, int 
     nr_pins = 0;
     for (i = 0; i < max_pfn; i++) {
 
-        if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) {
+        if ( (pfn_type[i] & LPINTAB) == 0 )
+            continue;
+
+        switch (pfn_type[i]) {
+
+        case (L1TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+            break;
+
+        case (L2TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+            break;
+
+        case (L3TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+            break;
+
+        case (L4TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+            break;
+
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH) {
             if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
                 ERR("Failed to pin batch of %d page tables", nr_pins);
                 goto out;
             }
             nr_pins = 0;
         }
-
-        if ( (pfn_type[i] & LPINTAB) == 0 )
-            continue;
-
-        switch(pfn_type[i]) {
-
-        case (L1TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
-            break;
-
-        case (L2TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
-            break;
-
-        case (L3TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
-            break;
-
-        case (L4TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
-            break;
-
-        default:
-            continue;
-        }
-
-        pin[nr_pins].arg1.mfn = p2m[i];
-        nr_pins++;
-
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
+        ERR("Failed to pin batch of %d page tables", nr_pins);
+        goto out;
     }
 
     DPRINTF("\b\b\b\b100%%\n");
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xc_linux_save.c       Fri Jun 23 15:33:25 2006 -0600
@@ -91,12 +91,12 @@ static inline int test_bit (int nr, vola
 
 static inline void clear_bit (int nr, volatile void * addr)
 {
-    BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr));
+    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
 }
 
 static inline void set_bit ( int nr, volatile void * addr)
 {
-    BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr));
+    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
 }
 
 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xenctrl.h     Fri Jun 23 15:33:25 2006 -0600
@@ -410,6 +410,10 @@ int xc_domain_setmaxmem(int xc_handle,
                         uint32_t domid,
                         unsigned int max_memkb);
 
+int xc_domain_set_time_offset(int xc_handle,
+                              uint32_t domid,
+                              int32_t time_offset_seconds);
+
 int xc_domain_memory_increase_reservation(int xc_handle,
                                           uint32_t domid,
                                           unsigned long nr_extents,
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jun 23 15:33:25 2006 -0600
@@ -869,6 +869,30 @@ static PyObject *pyxc_domain_iomem_permi
     return zero;
 }
 
+static PyObject *pyxc_domain_set_time_offset(XcObject *self, PyObject *args)
+{
+    uint32_t dom;
+    int32_t time_offset_seconds;
+    time_t calendar_time;
+    struct tm local_time;
+    struct tm utc_time;
+
+    if (!PyArg_ParseTuple(args, "i", &dom))
+        return NULL;
+
+    calendar_time = time(NULL);
+    localtime_r(&calendar_time, &local_time);
+    gmtime_r(&calendar_time, &utc_time);
+    /* set up to get calendar time based on utc_time, with local dst setting */
+    utc_time.tm_isdst = local_time.tm_isdst;
+    time_offset_seconds = (int32_t)difftime(calendar_time, mktime(&utc_time));
+
+    if (xc_domain_set_time_offset(self->xc_handle, dom, time_offset_seconds) 
!= 0)
+        return NULL;
+
+    Py_INCREF(zero);
+    return zero;
+}
 
 static PyObject *dom_op(XcObject *self, PyObject *args,
                         int (*fn)(int, uint32_t))
@@ -1207,6 +1231,13 @@ static PyMethodDef pyxc_methods[] = {
       METH_VARARGS, "\n"
       "Returns: [int]: The size in KiB of memory spanning the given number "
       "of pages.\n" },
+
+    { "domain_set_time_offset",
+      (PyCFunction)pyxc_domain_set_time_offset,
+      METH_VARARGS, "\n"
+      "Set a domain's time offset to Dom0's localtime\n"
+      " dom        [int]: Domain whose time offset is being set.\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
 
     { NULL, NULL, 0, NULL }
 };
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/util/xmlrpclib2.py
--- a/tools/python/xen/util/xmlrpclib2.py       Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/util/xmlrpclib2.py       Fri Jun 23 15:33:25 2006 -0600
@@ -13,7 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx>
-# Copyright (C) 2006 XenSource Ltd.
+# Copyright (C) 2006 XenSource Inc.
 #============================================================================
 
 """
@@ -26,11 +26,18 @@ from httplib import HTTPConnection, HTTP
 from httplib import HTTPConnection, HTTP
 from xmlrpclib import Transport
 from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
+import SocketServer
 import xmlrpclib, socket, os, stat
-import SocketServer
 
-import xen.xend.XendClient
 from xen.xend.XendLogging import log
+
+try:
+    import SSHTransport
+    ssh_enabled = True
+except ImportError:
+    # SSHTransport is disabled on Python <2.4, because it uses the subprocess
+    # package.
+    ssh_enabled = False
 
 
 # A new ServerProxy that also supports httpu urls.  An http URL comes in the
@@ -39,6 +46,31 @@ from xen.xend.XendLogging import log
 # httpu:///absolute/path/to/socket.sock
 #
 # It assumes that the RPC handler is /RPC2.  This probably needs to be improved
+
+# We're forced to subclass the RequestHandler class so that we can work around
+# some bugs in Keep-Alive handling and also enabled it by default
+class XMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+    protocol_version = "HTTP/1.1"
+
+    # this is inspired by SimpleXMLRPCRequestHandler's do_POST but differs
+    # in a few non-trivial ways
+    # 1) we never generate internal server errors.  We let the exception
+    #    propagate so that it shows up in the Xend debug logs
+    # 2) we don't bother checking for a _dispatch function since we don't
+    #    use one
+    def do_POST(self):
+        data = self.rfile.read(int(self.headers["content-length"]))
+        rsp = self.server._marshaled_dispatch(data)
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/xml")
+        self.send_header("Content-Length", str(len(rsp)))
+        self.end_headers()
+
+        self.wfile.write(rsp)
+        self.wfile.flush()
+        if self.close_connection == 1:
+            self.connection.shutdown(1)
 
 class HTTPUnixConnection(HTTPConnection):
     def connect(self):
@@ -75,9 +107,15 @@ class ServerProxy(xmlrpclib.ServerProxy)
             if protocol == 'httpu':
                 uri = 'http:' + rest
                 transport = UnixTransport()
+            elif protocol == 'ssh':
+                global ssh_enabled
+                if ssh_enabled:
+                    (transport, uri) = SSHTransport.getHTTPURI(uri)
+                else:
+                    raise ValueError(
+                        "SSH transport not supported on Python <2.4.")
         xmlrpclib.ServerProxy.__init__(self, uri, transport, encoding,
                                        verbose, allow_none)
-
 
     def __request(self, methodname, params):
         response = xmlrpclib.ServerProxy.__request(self, methodname, params)
@@ -93,6 +131,10 @@ class ServerProxy(xmlrpclib.ServerProxy)
 
 class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer):
     allow_reuse_address = True
+
+    def __init__(self, addr, requestHandler=XMLRPCRequestHandler,
+                 logRequests=1):
+        SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests)
 
     def _marshaled_dispatch(self, data, dispatch_method = None):
         params, method = xmlrpclib.loads(data)
@@ -121,6 +163,7 @@ class TCPXMLRPCServer(SocketServer.Threa
         except xmlrpclib.Fault, fault:
             response = xmlrpclib.dumps(fault)
         except Exception, exn:
+            import xen.xend.XendClient
             log.exception(exn)
             response = xmlrpclib.dumps(
                 xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn)))
@@ -131,10 +174,10 @@ class TCPXMLRPCServer(SocketServer.Threa
 # It implements proper support for allow_reuse_address by
 # unlink()'ing an existing socket.
 
-class UnixXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+class UnixXMLRPCRequestHandler(XMLRPCRequestHandler):
     def address_string(self):
         try:
-            return SimpleXMLRPCRequestHandler.address_string(self)
+            return XMLRPCRequestHandler.address_string(self)
         except ValueError, e:
             return self.client_address[:2]
 
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xend/XendClient.py
--- a/tools/python/xen/xend/XendClient.py       Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xend/XendClient.py       Fri Jun 23 15:33:25 2006 -0600
@@ -18,6 +18,8 @@
 #============================================================================
 
 from xen.util.xmlrpclib2 import ServerProxy
+import os
+import sys
 
 XML_RPC_SOCKET = "/var/run/xend/xmlrpc.sock"
 
@@ -25,4 +27,13 @@ ERROR_GENERIC = 2
 ERROR_GENERIC = 2
 ERROR_INVALID_DOMAIN = 3
 
-server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock')
+uri = 'httpu:///var/run/xend/xmlrpc.sock'
+if os.environ.has_key('XM_SERVER'):
+    uri = os.environ['XM_SERVER']
+
+try:
+    server = ServerProxy(uri)
+except ValueError, exn:
+    print >>sys.stderr, exn
+    sys.exit(1)
+
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Jun 23 15:33:25 2006 -0600
@@ -135,6 +135,7 @@ ROUNDTRIPPING_CONFIG_ENTRIES = [
     ('bootloader', str),
     ('bootloader_args', str),
     ('features', str),
+    ('localtime', int),
     ]
 
 ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS
@@ -1259,6 +1260,10 @@ class XendDomainInfo:
             self.image = image.create(self,
                                       self.info['image'],
                                       self.info['device'])
+
+            localtime = self.info['localtime']
+            if localtime is not None and localtime == 1:
+                xc.domain_set_time_offset(self.domid)
 
             xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
 
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xm/create.py     Fri Jun 23 15:33:25 2006 -0600
@@ -672,6 +672,8 @@ def make_config(vals):
         config.append(['backend', ['netif']])
     if vals.tpmif:
         config.append(['backend', ['tpmif']])
+    if vals.localtime:
+        config.append(['localtime', vals.localtime])
 
     config_image = configure_image(vals)
     if vals.bootloader:
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xm/main.py       Fri Jun 23 15:33:25 2006 -0600
@@ -41,6 +41,7 @@ import xen.xend.XendClient
 import xen.xend.XendClient
 from xen.xend.XendClient import server
 from xen.util import security
+from select import select
 
 # getopt.gnu_getopt is better, but only exists in Python 2.3+.  Use
 # getopt.getopt if gnu_getopt is not available.  This will mean that options
@@ -124,6 +125,7 @@ loadpolicy_help = "loadpolicy <policy>  
 loadpolicy_help = "loadpolicy <policy>              Load binary policy into 
hypervisor"
 makepolicy_help = "makepolicy <policy>              Build policy and create 
.bin/.map files"
 labels_help     = "labels [policy] [type=DOM|..]    List <type> labels for 
(active) policy."
+serve_help      = "serve                            Proxy Xend XML-RPC over 
stdio"
 
 short_command_list = [
     "console",
@@ -171,7 +173,8 @@ host_commands = [
 host_commands = [
     "dmesg",
     "info",
-    "log"
+    "log",
+    "serve",
     ]
 
 scheduler_commands = [
@@ -273,7 +276,7 @@ for command in all_commands:
 ####################################################################
 
 def arg_check(args, name, lo, hi = -1):
-    n = len(args)
+    n = len([i for i in args if i != '--'])
     
     if hi == -1:
         if n != lo:
@@ -833,6 +836,32 @@ def xm_log(args):
     arg_check(args, "log", 0)
     
     print server.xend.node.log()
+
+def xm_serve(args):
+    arg_check(args, "serve", 0)
+
+    from fcntl import fcntl, F_SETFL
+    
+    s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+    s.connect(xen.xend.XendClient.XML_RPC_SOCKET)
+    fcntl(sys.stdin, F_SETFL, os.O_NONBLOCK)
+
+    while True:
+        iwtd, owtd, ewtd = select([sys.stdin, s], [], [])
+        if s in iwtd:
+            data = s.recv(4096)
+            if len(data) > 0:
+                sys.stdout.write(data)
+                sys.stdout.flush()
+            else:
+                break
+        if sys.stdin in iwtd:
+            data = sys.stdin.read(4096)
+            if len(data) > 0:
+                s.sendall(data)
+            else:
+                break
+    s.close()
 
 def parse_dev_info(info):
     def get_info(n, t, d):
@@ -1072,6 +1101,7 @@ commands = {
     "dmesg": xm_dmesg,
     "info": xm_info,
     "log": xm_log,
+    "serve": xm_serve,
     # scheduler
     "sched-bvt": xm_sched_bvt,
     "sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
diff -r 59d4c1863330 -r fdf25330e4a6 tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/security/secpol_tool.c      Fri Jun 23 15:33:25 2006 -0600
@@ -229,6 +229,7 @@ void acm_dump_policy_buffer(void *buf, i
 
 #define PULL_CACHE_SIZE                8192
 uint8_t pull_buffer[PULL_CACHE_SIZE];
+
 int acm_domain_getpolicy(int xc_handle)
 {
     struct acm_getpolicy getpolicy;
@@ -236,7 +237,7 @@ int acm_domain_getpolicy(int xc_handle)
 
     memset(pull_buffer, 0x00, sizeof(pull_buffer));
     getpolicy.interface_version = ACM_INTERFACE_VERSION;
-    getpolicy.pullcache = (void *) pull_buffer;
+    set_xen_guest_handle(getpolicy.pullcache, pull_buffer);
     getpolicy.pullcache_size = sizeof(pull_buffer);
     ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy));
 
@@ -281,7 +282,7 @@ int acm_domain_loadpolicy(int xc_handle,
         /* dump it and then push it down into xen/acm */
         acm_dump_policy_buffer(buffer, len);
         setpolicy.interface_version = ACM_INTERFACE_VERSION;
-        setpolicy.pushcache = (void *) buffer;
+        set_xen_guest_handle(setpolicy.pushcache, buffer);
         setpolicy.pushcache_size = len;
         ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy, 
sizeof(setpolicy));
 
@@ -330,7 +331,7 @@ int acm_domain_dumpstats(int xc_handle)
 
     memset(stats_buffer, 0x00, sizeof(stats_buffer));
     dumpstats.interface_version = ACM_INTERFACE_VERSION;
-    dumpstats.pullcache = (void *) stats_buffer;
+    set_xen_guest_handle(dumpstats.pullcache, stats_buffer);
     dumpstats.pullcache_size = sizeof(stats_buffer);
     ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats));
 
diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/grouptest/default
--- a/tools/xm-test/grouptest/default   Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/xm-test/grouptest/default   Fri Jun 23 15:33:25 2006 -0600
@@ -21,7 +21,7 @@ reboot
 reboot
 restore
 save
-sedf
+sched-credit
 shutdown
 sysrq
 unpause
diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/grouptest/medium
--- a/tools/xm-test/grouptest/medium    Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/xm-test/grouptest/medium    Fri Jun 23 15:33:25 2006 -0600
@@ -16,7 +16,7 @@ reboot
 reboot
 restore 02_restore_badparm_neg.test 03_restore_badfilename_neg.test 
04_restore_withdevices_pos.test
 save
-sedf
+sched-credit
 shutdown
 sysrq 01_sysrq_basic_neg.test 02_sysrq_sync_pos.test
 unpause
diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/lib/XmTestLib/Console.py
--- a/tools/xm-test/lib/XmTestLib/Console.py    Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/xm-test/lib/XmTestLib/Console.py    Fri Jun 23 15:33:25 2006 -0600
@@ -82,9 +82,6 @@ class XmConsole:
 
         tty.setraw(self.consoleFd, termios.TCSANOW)
 
-        self.__chewall(self.consoleFd)
-
-
     def __addToHistory(self, line):
         self.historyBuffer.append(line)
         self.historyLines += 1
@@ -120,34 +117,47 @@ class XmConsole:
         output"""
         self.PROMPT = prompt
 
-
-    def __chewall(self, fd):
+    def __getprompt(self, fd):
         timeout = 0
-        bytes   = 0
-        
-        while timeout < 3:
-            i, o, e = select.select([fd], [], [], 1)
-            if fd in i:
-                try:
-                    foo = os.read(fd, 1)
-                    if self.debugMe:
-                        sys.stdout.write(foo)
-                    bytes += 1
-                except Exception, exn:
-                    raise ConsoleError(str(exn))
-
-            else:
-                timeout += 1
-
-            if self.limit and bytes >= self.limit:
+        bytes = 0
+        while timeout < 180:
+            # eat anything while total bytes less than limit else raise RUNAWAY
+            while (not self.limit) or (bytes < self.limit):
+                i, o, e = select.select([fd], [], [], 1)
+                if fd in i:
+                    try:
+                        foo = os.read(fd, 1)
+                        if self.debugMe:
+                            sys.stdout.write(foo)
+                        bytes += 1
+                    except Exception, exn:
+                        raise ConsoleError(str(exn))
+                else:
+                    break
+            else:
                 raise ConsoleError("Console run-away (exceeded %i bytes)"
                                    % self.limit, RUNAWAY)
-
-        if self.debugMe:
-            print "Ignored %i bytes of miscellaneous console output" % bytes
-        
-        return bytes
-
+            # press enter
+            os.write(self.consoleFd, "\n")
+            # look for prompt
+            for prompt_char in "\r\n" + self.PROMPT:
+                i, o, e = select.select([fd], [], [], 1)
+                if fd in i:
+                    try:
+                        foo = os.read(fd, 1)
+                        if self.debugMe:
+                            sys.stdout.write(foo)
+                        if foo != prompt_char:
+                            break
+                    except Exception, exn:
+                        raise ConsoleError(str(exn))
+                else:
+                    timeout += 1
+                    break
+            else:
+                break
+        else:
+            raise ConsoleError("Timed out waiting for console prompt")
 
     def __runCmd(self, command, saveHistory=True):
         output = ""
@@ -155,7 +165,7 @@ class XmConsole:
         lines  = 0
         bytes  = 0
 
-        self.__chewall(self.consoleFd)
+        self.__getprompt(self.consoleFd)
 
         if verbose:
             print "[%s] Sending `%s'" % (self.domain, command)
@@ -176,7 +186,7 @@ class XmConsole:
                         "Failed to read from console (fd=%i): %s" %
                         (self.consoleFd, exn))
             else:
-                raise ConsoleError("Timed out waiting for console")
+                raise ConsoleError("Timed out waiting for console command")
 
             if self.limit and bytes >= self.limit:
                 raise ConsoleError("Console run-away (exceeded %i bytes)"
diff -r 59d4c1863330 -r fdf25330e4a6 
tools/xm-test/tests/memset/03_memset_random_pos.py
--- a/tools/xm-test/tests/memset/03_memset_random_pos.py        Fri Jun 23 
15:26:01 2006 -0600
+++ b/tools/xm-test/tests/memset/03_memset_random_pos.py        Fri Jun 23 
15:33:25 2006 -0600
@@ -22,12 +22,6 @@ except DomainError, e:
     FAIL(str(e))
 
 times = random.randint(10,50)
-
-try:
-    console = XmConsole(domain.getName())
-    console.sendInput("input")
-except ConsoleError, e:
-    FAIL(str(e))
 
 try:
     run = console.runCmd("cat /proc/xen/balloon | grep Current");
diff -r 59d4c1863330 -r fdf25330e4a6 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c        Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/acm/acm_core.c        Fri Jun 23 15:33:25 2006 -0600
@@ -222,9 +222,8 @@ acm_setup(unsigned int *initrdidx,
         pol = (struct acm_policy_buffer *)_policy_start;
         if (ntohl(pol->magic) == ACM_MAGIC)
         {
-            rc = acm_set_policy((void *)_policy_start,
-                                (u32)_policy_len,
-                                0);
+            rc = do_acm_set_policy((void *)_policy_start,
+                                   (u32)_policy_len);
             if (rc == ACM_OK)
             {
                 printkd("Policy len  0x%lx, start at 
%p.\n",_policy_len,_policy_start);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/acm/acm_policy.c
--- a/xen/acm/acm_policy.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/acm/acm_policy.c      Fri Jun 23 15:33:25 2006 -0600
@@ -26,36 +26,43 @@
 #include <xen/lib.h>
 #include <xen/delay.h>
 #include <xen/sched.h>
+#include <xen/guest_access.h>
 #include <acm/acm_core.h>
 #include <public/acm_ops.h>
 #include <acm/acm_hooks.h>
 #include <acm/acm_endian.h>
 
 int
-acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size, int isuserbuffer)
+acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size)
 {
     u8 *policy_buffer = NULL;
-    struct acm_policy_buffer *pol;
+    int ret = -EFAULT;
  
     if (buf_size < sizeof(struct acm_policy_buffer))
         return -EFAULT;
 
-    /* 1. copy buffer from domain */
+    /* copy buffer from guest domain */
     if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL)
         return -ENOMEM;
 
-    if (isuserbuffer) {
-        if (copy_from_guest(policy_buffer, buf, buf_size))
-        {
-            printk("%s: Error copying!\n",__func__);
-            goto error_free;
-        }
-    } else
-        memcpy(policy_buffer, buf, buf_size);
-
-    /* 2. some sanity checking */
-    pol = (struct acm_policy_buffer *)policy_buffer;
-
+    if (copy_from_guest(policy_buffer, buf, buf_size))
+    {
+        printk("%s: Error copying!\n",__func__);
+        goto error_free;
+    }
+    ret = do_acm_set_policy(policy_buffer, buf_size);
+
+ error_free:
+    xfree(policy_buffer);
+    return ret;
+}
+
+
+int
+do_acm_set_policy(void *buf, u32 buf_size)
+{
+    struct acm_policy_buffer *pol = (struct acm_policy_buffer *)buf;
+    /* some sanity checking */
     if ((ntohl(pol->magic) != ACM_MAGIC) ||
         (buf_size != ntohl(pol->len)) ||
         (ntohl(pol->policy_version) != ACM_POLICY_VERSION))
@@ -85,33 +92,31 @@ acm_set_policy(XEN_GUEST_HANDLE(void) bu
     /* get bin_policy lock and rewrite policy (release old one) */
     write_lock(&acm_bin_pol_rwlock);
 
-    /* 3. set label reference name */
+    /* set label reference name */
     if (acm_set_policy_reference(buf + ntohl(pol->policy_reference_offset),
                                  ntohl(pol->primary_buffer_offset) -
                                  ntohl(pol->policy_reference_offset)))
         goto error_lock_free;
 
-    /* 4. set primary policy data */
+    /* set primary policy data */
     if (acm_primary_ops->set_binary_policy(buf + 
ntohl(pol->primary_buffer_offset),
                                            ntohl(pol->secondary_buffer_offset) 
-
                                            ntohl(pol->primary_buffer_offset)))
         goto error_lock_free;
 
-    /* 5. set secondary policy data */
+    /* set secondary policy data */
     if (acm_secondary_ops->set_binary_policy(buf + 
ntohl(pol->secondary_buffer_offset),
                                              ntohl(pol->len) - 
                                              
ntohl(pol->secondary_buffer_offset)))
         goto error_lock_free;
 
     write_unlock(&acm_bin_pol_rwlock);
-    xfree(policy_buffer);
     return ACM_OK;
 
  error_lock_free:
     write_unlock(&acm_bin_pol_rwlock);
  error_free:
     printk("%s: Error setting policy.\n", __func__);
-    xfree(policy_buffer);
     return -EFAULT;
 }
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/linux-xen/smp.c
--- a/xen/arch/ia64/linux-xen/smp.c     Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/ia64/linux-xen/smp.c     Fri Jun 23 15:33:25 2006 -0600
@@ -421,6 +421,42 @@ smp_call_function (void (*func) (void *i
 }
 EXPORT_SYMBOL(smp_call_function);
 
+#ifdef XEN
+int
+on_selected_cpus(cpumask_t selected, void (*func) (void *info), void *info,
+                 int retry, int wait)
+{
+       struct call_data_struct data;
+       unsigned int cpu, nr_cpus = cpus_weight(selected);
+
+       ASSERT(local_irq_is_enabled());
+
+       if (!nr_cpus)
+               return 0;
+
+       data.func = func;
+       data.info = info;
+       data.wait = wait;
+       atomic_set(&data.started, 0);
+       atomic_set(&data.finished, 0);
+
+       spin_lock(&call_lock);
+
+       call_data = &data;
+       wmb();
+
+       for_each_cpu_mask(cpu, selected)
+               send_IPI_single(cpu, IPI_CALL_FUNC);
+
+       while (atomic_read(wait ? &data.finished : &data.started) != nr_cpus)
+               cpu_relax();
+
+       spin_unlock(&call_lock);
+
+       return 0;
+}
+#endif
+
 /*
  * this function calls the 'stop' function on all other CPUs in the system.
  */
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/ia64/xen/domain.c        Fri Jun 23 15:33:25 2006 -0600
@@ -895,9 +895,7 @@ int construct_dom0(struct domain *d,
        sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
        si->nr_pages     = max_pages;
 
-       /* Give up the VGA console if DOM0 is configured to grab it. */
-       if (cmdline != NULL)
-           console_endboot(strstr(cmdline, "tty0") != NULL);
+       console_endboot();
 
        printk("Dom0: 0x%lx\n", (u64)dom0);
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/ia64/xen/xensetup.c      Fri Jun 23 15:33:25 2006 -0600
@@ -532,9 +532,8 @@ printk("num_online_cpus=%d, max_cpus=%d\
 
     init_trace_bufs();
 
-    /* Give up the VGA console if DOM0 is configured to grab it. */
     if (opt_xencons)
-       console_endboot(cmdline && strstr(cmdline, "tty0"));
+        console_endboot();
 
     domain0_ready = 1;
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/Makefile     Fri Jun 23 15:33:25 2006 -0600
@@ -41,7 +41,7 @@ obj-y += x86_emulate.o
 obj-y += x86_emulate.o
 
 ifneq ($(pae),n)
-obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o
+obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o
 else
 obj-$(x86_32) += shadow32.o
 endif
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/audit.c      Fri Jun 23 15:33:25 2006 -0600
@@ -923,8 +923,8 @@ void _audit_domain(struct domain *d, int
                                d->domain_id, page_to_mfn(page),
                                page->u.inuse.type_info,
                                page->count_info);
-                        printk("a->gpfn_and_flags=%p\n",
-                               (void *)a->gpfn_and_flags);
+                        printk("a->gpfn_and_flags=%"PRIx64"\n",
+                               (u64)a->gpfn_and_flags);
                         errors++;
                     }
                     break;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Fri Jun 23 15:33:25 2006 -0600
@@ -74,12 +74,15 @@ static void __vmx_clear_vmcs(void *info)
 
 static void vmx_clear_vmcs(struct vcpu *v)
 {
-    unsigned int cpu = v->arch.hvm_vmx.active_cpu;
-
-    if ( (cpu == -1) || (cpu == smp_processor_id()) )
-        __vmx_clear_vmcs(v);
-    else
-        on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
+    int cpu = v->arch.hvm_vmx.active_cpu;
+
+    if ( cpu == -1 )
+        return;
+
+    if ( cpu == smp_processor_id() )
+        return __vmx_clear_vmcs(v);
+
+    on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
 }
 
 static void vmx_load_vmcs(struct vcpu *v)
@@ -97,6 +100,8 @@ void vmx_vmcs_enter(struct vcpu *v)
      *     context initialisation.
      *  2. VMPTRLD as soon as we context-switch to a HVM VCPU.
      *  3. VMCS destruction needs to happen later (from domain_destroy()).
+     * We can relax this a bit if a paused VCPU always commits its
+     * architectural state to a software structure.
      */
     if ( v == current )
         return;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri Jun 23 15:33:25 2006 -0600
@@ -1623,7 +1623,7 @@ static int mov_to_cr(int gp, int cr, str
             if ( vmx_pgbit_test(v) )
             {
                 /* The guest is a 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
                 unsigned long mfn, old_base_mfn;
 
                 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
@@ -1667,7 +1667,7 @@ static int mov_to_cr(int gp, int cr, str
             else
             {
                 /*  The guest is a 64 bit or 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
                 if ( (v->domain->arch.ops != NULL) &&
                         v->domain->arch.ops->guest_paging_levels == PAGING_L2)
                 {
@@ -1680,15 +1680,6 @@ static int mov_to_cr(int gp, int cr, str
                     {
                         printk("Unsupported guest paging levels\n");
                         /* need to take a clean path */
-                        domain_crash_synchronous();
-                    }
-                }
-                else
-                {
-                    if ( !shadow_set_guest_paging_levels(v->domain,
-                                                            PAGING_L4) )
-                    {
-                        printk("Unsupported guest paging levels\n");
                         domain_crash_synchronous();
                     }
                 }
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/mm.c Fri Jun 23 15:33:25 2006 -0600
@@ -108,11 +108,20 @@
 #include <public/memory.h>
 
 #ifdef VERBOSE
-#define MEM_LOG(_f, _a...)                           \
-  printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
+#define MEM_LOG(_f, _a...)                                  \
+  printk("DOM%u: (file=mm.c, line=%d) " _f "\n",            \
          current->domain->domain_id , __LINE__ , ## _a )
 #else
 #define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/*
+ * PTE updates can be done with ordinary writes except:
+ *  1. Debug builds get extra checking by using CMPXCHG[8B].
+ *  2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
+ */
+#if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
+#define PTE_UPDATE_WITH_CMPXCHG
 #endif
 
 /*
@@ -261,17 +270,19 @@ void share_xen_page_with_privileged_gues
 
 #ifdef NDEBUG
 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
-#define l3tab_needs_shadow(mfn) (mfn >= 0x100000)
+#define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
 #else
 /*
- * In debug builds we aggressively shadow PDPTs to exercise code paths.
+ * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
  * We cannot safely shadow the idle page table, nor shadow-mode page tables
- * (detected by lack of an owning domain). Always shadow PDPTs above 4GB.
+ * (detected by lack of an owning domain). As required for correctness, we
+ * always shadow PDPTs aboive 4GB.
  */
 #define l3tab_needs_shadow(mfn)                         \
-    ((((mfn << PAGE_SHIFT) != __pa(idle_pg_table)) &&   \
-      (page_get_owner(mfn_to_page(mfn)) != NULL)) ||    \
-     (mfn >= 0x100000))
+    (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
+      (page_get_owner(mfn_to_page(mfn)) != NULL) &&     \
+      ((mfn) & 1)) || /* odd MFNs are shadowed */       \
+     ((mfn) >= 0x100000))
 #endif
 
 static l1_pgentry_t *fix_pae_highmem_pl1e;
@@ -296,6 +307,8 @@ static void __write_ptbase(unsigned long
     if ( !l3tab_needs_shadow(mfn) )
     {
         write_cr3(mfn << PAGE_SHIFT);
+        /* Cache is no longer in use or valid (/after/ write to %cr3). */
+        cache->high_mfn = 0;
         return;
     }
 
@@ -1167,20 +1180,35 @@ static inline int update_l1e(l1_pgentry_
                              l1_pgentry_t  ol1e, 
                              l1_pgentry_t  nl1e)
 {
+#ifndef PTE_UPDATE_WITH_CMPXCHG
+    return !__copy_to_user(pl1e, &nl1e, sizeof(nl1e));
+#else
     intpte_t o = l1e_get_intpte(ol1e);
     intpte_t n = l1e_get_intpte(nl1e);
 
-    if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
-         unlikely(o != l1e_get_intpte(ol1e)) )
-    {
-        MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
-                ": saw %" PRIpte,
-                l1e_get_intpte(ol1e),
-                l1e_get_intpte(nl1e),
-                o);
-        return 0;
-    }
+    for ( ; ; )
+    {
+        if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
+        {
+            MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
+                    ": saw %" PRIpte,
+                    l1e_get_intpte(ol1e),
+                    l1e_get_intpte(nl1e),
+                    o);
+            return 0;
+        }
+
+        if ( o == l1e_get_intpte(ol1e) )
+            break;
+
+        /* Allowed to change in Accessed/Dirty flags only. */
+        BUG_ON((o ^ l1e_get_intpte(ol1e)) &
+               ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
+        ol1e = l1e_from_intpte(o);
+    }
+
     return 1;
+#endif
 }
 
 
@@ -1228,17 +1256,24 @@ static int mod_l1_entry(l1_pgentry_t *pl
     return 1;
 }
 
-#define UPDATE_ENTRY(_t,_p,_o,_n) ({                                    \
-    intpte_t __o = cmpxchg((intpte_t *)(_p),                            \
-                           _t ## e_get_intpte(_o),                      \
-                           _t ## e_get_intpte(_n));                     \
-    if ( __o != _t ## e_get_intpte(_o) )                                \
-        MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte              \
-                ": saw %" PRIpte "",                                    \
-                (_t ## e_get_intpte(_o)),                               \
-                (_t ## e_get_intpte(_n)),                               \
-                (__o));                                                 \
-    (__o == _t ## e_get_intpte(_o)); })
+#ifndef PTE_UPDATE_WITH_CMPXCHG
+#define UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
+#else
+#define UPDATE_ENTRY(_t,_p,_o,_n) ({                            \
+    for ( ; ; )                                                 \
+    {                                                           \
+        intpte_t __o = cmpxchg((intpte_t *)(_p),                \
+                               _t ## e_get_intpte(_o),          \
+                               _t ## e_get_intpte(_n));         \
+        if ( __o == _t ## e_get_intpte(_o) )                    \
+            break;                                              \
+        /* Allowed to change in Accessed/Dirty flags only. */   \
+        BUG_ON((__o ^ _t ## e_get_intpte(_o)) &                 \
+               ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));             \
+        _o = _t ## e_from_intpte(__o);                          \
+    }                                                           \
+    1; })
+#endif
 
 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
 static int mod_l2_entry(l2_pgentry_t *pl2e, 
@@ -2408,8 +2443,8 @@ static int create_grant_pte_mapping(
         goto failed;
     }
 
-    if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ||
-         !update_l1e(va, ol1e, _nl1e) )
+    ol1e = *(l1_pgentry_t *)va;
+    if ( !update_l1e(va, ol1e, _nl1e) )
     {
         put_page_type(page);
         rc = GNTST_general_error;
@@ -2486,7 +2521,7 @@ static int destroy_grant_pte_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(__put_user(0, (intpte_t *)va)))
+    if ( unlikely(!update_l1e((l1_pgentry_t *)va, ol1e, l1e_empty())) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", va);
         put_page_type(page);
@@ -2566,7 +2601,7 @@ static int destroy_grant_va_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(__put_user(0, &pl1e->l1)) )
+    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty())) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         return GNTST_general_error;
@@ -3020,6 +3055,20 @@ long arch_memory_op(int op, XEN_GUEST_HA
         return 0;
     }
 
+    case XENMEM_machphys_mapping:
+    {
+        struct xen_machphys_mapping mapping = {
+            .v_start = MACH2PHYS_VIRT_START,
+            .v_end   = MACH2PHYS_VIRT_END,
+            .max_mfn = MACH2PHYS_NR_ENTRIES - 1
+        };
+
+        if ( copy_to_guest(arg, &mapping, 1) )
+            return -EFAULT;
+
+        return 0;
+    }
+
     default:
         return subarch_memory_op(op, arg);
     }
@@ -3343,7 +3392,7 @@ static int ptwr_emulated_update(
         addr &= ~(sizeof(paddr_t)-1);
         if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
         {
-            propagate_page_fault(addr, 4); /* user mode, read fault */
+            propagate_page_fault(addr, 0); /* read fault */
             return X86EMUL_PROPAGATE_FAULT;
         }
         /* Mask out bits provided by caller. */
@@ -3358,6 +3407,7 @@ static int ptwr_emulated_update(
         old  |= full;
     }
 
+#if 0 /* XXX KAF: I don't think this can happen. */
     /*
      * We must not emulate an update to a PTE that is temporarily marked
      * writable by the batched ptwr logic, else we can corrupt page refcnts! 
@@ -3368,6 +3418,12 @@ static int ptwr_emulated_update(
     if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
          (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
         ptwr_flush(d, PTWR_PT_INACTIVE);
+#else
+    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
+           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
+    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
+           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
+#endif
 
     /* Read the PTE that maps the page being updated. */
     if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
@@ -3409,8 +3465,9 @@ static int ptwr_emulated_update(
     }
     else
     {
-        ol1e  = *pl1e;
-        *pl1e = nl1e;
+        ol1e = *pl1e;
+        if ( !update_l1e(pl1e, ol1e, nl1e) )
+            BUG();
     }
     unmap_domain_page(pl1e);
 
@@ -3475,16 +3532,18 @@ int ptwr_do_page_fault(struct domain *d,
     unsigned long    l2_idx;
     struct x86_emulate_ctxt emul_ctxt;
 
-    if ( unlikely(shadow_mode_enabled(d)) )
-        return 0;
+    ASSERT(!shadow_mode_enabled(d));
 
     /*
      * Attempt to read the PTE that maps the VA being accessed. By checking for
      * PDE validity in the L2 we avoid many expensive fixups in __get_user().
+     * NB. The L2 entry cannot be detached due to existing ptwr work: the
+     * caller already checked that.
      */
-    if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
-           _PAGE_PRESENT) ||
-         __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
+    pl2e = &__linear_l2_table[l2_linear_offset(addr)];
+    if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) ||
+        !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+         __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
                           sizeof(pte)) )
     {
         return 0;
@@ -3557,21 +3616,31 @@ int ptwr_do_page_fault(struct domain *d,
     }
 
     /*
-     * If this is a multi-processor guest then ensure that the page is hooked
-     * into at most one L2 table, which must be the one running on this VCPU.
+     * Multi-processor guest? Then ensure that the page table is hooked into
+     * at most one L2, and also ensure that there is only one mapping of the
+     * page table itself (or there can be conflicting writable mappings from
+     * other VCPUs).
      */
-    if ( (d->vcpu[0]->next_in_list != NULL) &&
-         ((page->u.inuse.type_info & PGT_count_mask) != 
-          (!!(page->u.inuse.type_info & PGT_pinned) +
-           (which == PTWR_PT_ACTIVE))) )
-    {
-        /* Could be conflicting writable mappings from other VCPUs. */
-        cleanup_writable_pagetable(d);
-        goto emulate;
+    if ( d->vcpu[0]->next_in_list != NULL )
+    {
+        if ( /* Hooked into at most one L2 table (which this VCPU maps)? */
+             ((page->u.inuse.type_info & PGT_count_mask) != 
+              (!!(page->u.inuse.type_info & PGT_pinned) +
+               (which == PTWR_PT_ACTIVE))) ||
+             /* PTEs are mapped read-only in only one place? */
+             ((page->count_info & PGC_count_mask) !=
+              (!!(page->count_info & PGC_allocated) +       /* alloc count */
+               (page->u.inuse.type_info & PGT_count_mask) + /* type count  */
+               1)) )                                        /* map count   */
+        {
+            /* Could be conflicting writable mappings from other VCPUs. */
+            cleanup_writable_pagetable(d);
+            goto emulate;
+        }
     }
 
     /*
-     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at 
+     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
      * time. If there is already one, we must flush it out.
      */
     if ( d->arch.ptwr[which].l1va )
@@ -3592,18 +3661,16 @@ int ptwr_do_page_fault(struct domain *d,
                 "pfn %lx\n", PTWR_PRINT_WHICH, addr,
                 l2_idx << L2_PAGETABLE_SHIFT, pfn);
 
-    d->arch.ptwr[which].l1va   = addr | 1;
-    d->arch.ptwr[which].l2_idx = l2_idx;
-    d->arch.ptwr[which].vcpu   = current;
-
-#ifdef PERF_ARRAYS
-    d->arch.ptwr[which].eip    = regs->eip;
-#endif
-
     /* For safety, disconnect the L1 p.t. page from current space. */
     if ( which == PTWR_PT_ACTIVE )
     {
-        l2e_remove_flags(*pl2e, _PAGE_PRESENT);
+        l2e_remove_flags(l2e, _PAGE_PRESENT);
+        if ( unlikely(__copy_to_user(pl2e, &l2e, sizeof(l2e))) )
+        {
+            MEM_LOG("ptwr: Could not unhook l2e at %p", pl2e);
+            domain_crash(d);
+            return 0;
+        }
         flush_tlb_mask(d->domain_dirty_cpumask);
     }
     
@@ -3617,14 +3684,24 @@ int ptwr_do_page_fault(struct domain *d,
     if ( unlikely(__put_user(pte.l1,
                              &linear_pg_table[l1_linear_offset(addr)].l1)) )
     {
-        MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
+        MEM_LOG("ptwr: Could not update pte at %p",
                 &linear_pg_table[l1_linear_offset(addr)]);
-        /* Toss the writable pagetable state and crash. */
-        d->arch.ptwr[which].l1va = 0;
         domain_crash(d);
         return 0;
     }
     
+    /*
+     * Now record the writable pagetable state *after* any accesses that can
+     * cause a recursive page fault (i.e., those via the *_user() accessors).
+     * Otherwise we can enter ptwr_flush() with half-done ptwr state.
+     */
+    d->arch.ptwr[which].l1va   = addr | 1;
+    d->arch.ptwr[which].l2_idx = l2_idx;
+    d->arch.ptwr[which].vcpu   = current;
+#ifdef PERF_ARRAYS
+    d->arch.ptwr[which].eip    = regs->eip;
+#endif
+
     return EXCRET_fault_fixed;
 
  emulate:
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/setup.c      Fri Jun 23 15:33:25 2006 -0600
@@ -396,11 +396,13 @@ void __init __start_xen(multiboot_info_t
     BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
     BUILD_BUG_ON(sizeof(vcpu_info_t) != 64);
 
-    /* __foo are defined in public headers. Check they match internal defs. */
+    /* Check definitions in public headers match internal defs. */
     BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
 #ifdef HYPERVISOR_VIRT_END
     BUILD_BUG_ON(__HYPERVISOR_VIRT_END   != HYPERVISOR_VIRT_END);
 #endif
+    BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
+    BUILD_BUG_ON(MACH2PHYS_VIRT_END   != RO_MPT_VIRT_END);
 
     init_frametable();
 
@@ -596,8 +598,7 @@ void __init __start_xen(multiboot_info_t
 
     init_trace_bufs();
 
-    /* Give up the VGA console if DOM0 is configured to grab it. */
-    console_endboot(cmdline && strstr(cmdline, "tty0"));
+    console_endboot();
 
     /* Hide UART from DOM0 if we're using it */
     serial_endboot();
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow.c     Fri Jun 23 15:33:25 2006 -0600
@@ -222,6 +222,7 @@ alloc_shadow_page(struct domain *d,
     unsigned long smfn, real_gpfn;
     int pin = 0;
     void *l1, *lp;
+    u64 index = 0;
 
     // Currently, we only keep pre-zero'ed pages around for use as L1's...
     // This will change.  Soon.
@@ -354,9 +355,19 @@ alloc_shadow_page(struct domain *d,
         if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
             pin = 1;
 #endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+        /*
+         * We use PGT_l4_shadow for 2-level paging guests on PAE
+         */
+        if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+            pin = 1;
+#endif
+        if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+            index = get_cr3_idxval(current);
         break;
 
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
     case PGT_fl1_shadow:
         perfc_incr(shadow_l1_pages);
         d->arch.shadow_page_count++;
@@ -393,7 +404,7 @@ alloc_shadow_page(struct domain *d,
     //
     ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
 
-    set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+    set_shadow_status(d, gpfn, gmfn, smfn, psh_type, index);
 
     if ( pin )
         shadow_pin(smfn);
@@ -1324,7 +1335,7 @@ increase_writable_pte_prediction(struct 
     prediction = (prediction & PGT_mfn_mask) | score;
 
     //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, 
create);
-    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
 
     if ( create )
         perfc_incr(writable_pte_predictions);
@@ -1345,10 +1356,10 @@ decrease_writable_pte_prediction(struct 
     //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, 
score);
 
     if ( score )
-        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
     else
     {
-        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 
0);
         perfc_decr(writable_pte_predictions);
     }
 }
@@ -1385,7 +1396,7 @@ static u32 remove_all_write_access_in_pt
     int is_l1_shadow =
         ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
          PGT_l1_shadow);
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
     is_l1_shadow |=
       ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
                 PGT_fl1_shadow);
@@ -1494,7 +1505,7 @@ static int remove_all_write_access(
         while ( a && a->gpfn_and_flags )
         {
             if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
               || (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow
 #endif
               )
@@ -1538,8 +1549,8 @@ static void resync_pae_guest_l3(struct d
             continue;
 
         idx = get_cr3_idxval(v);
-        smfn = __shadow_status(
-            d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), 
PGT_l4_shadow);
+
+        smfn = __shadow_status(d, entry->gpfn, PGT_l4_shadow);
 
         if ( !smfn ) 
             continue;
@@ -1706,7 +1717,7 @@ static int resync_all(struct domain *d, 
                 {
                     int error;
 
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
                     unsigned long gpfn;
 
                     gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
@@ -2420,17 +2431,6 @@ static void shadow_update_pagetables(str
         v->arch.guest_vtable = map_domain_page_global(gmfn);
     }
 
-#if CONFIG_PAGING_LEVELS >= 3
-    /*
-     * Handle 32-bit PAE enabled guest
-     */
-    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
-    {
-        u32 index = get_cr3_idxval(v);
-        gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn;
-    }
-#endif
-
     /*
      *  arch.shadow_table
      */
@@ -2443,6 +2443,23 @@ static void shadow_update_pagetables(str
         if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
             smfn = shadow_l3_table(v, gpfn, gmfn);
     } 
+    else
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+    /*
+     * We use PGT_l4_shadow for 2-level paging guests on PAE
+     */
+    if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+    {
+        if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
+            smfn = shadow_l3_table(v, gpfn, gmfn);
+        else
+        {
+            update_top_level_shadow(v, smfn);
+            need_sync = 1;
+        }
+    }
     else
 #endif
     if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) 
@@ -3093,6 +3110,36 @@ static inline unsigned long init_bl2(
 
     return smfn;
 }
+
+static inline unsigned long init_l3(
+    struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
+{
+    unsigned long smfn;
+    l4_pgentry_t *spl4e;
+    unsigned long index;
+
+    if ( unlikely(!(smfn = alloc_shadow_page(v->domain, gpfn, gmfn, 
PGT_l4_shadow))) )
+    {
+        printk("Couldn't alloc an L4 shadow for pfn= %lx mfn= %lx\n", gpfn, 
gmfn);
+        BUG(); /* XXX Deal gracefully wiht failure. */
+    }
+
+    /* Map the self entry, L4&L3 share the same page */
+    spl4e = (l4_pgentry_t *)map_domain_page(smfn);
+
+    /*
+     * Shadow L4's pfn_info->tlbflush_timestamp
+     * should also save it's own index.
+     */
+
+    index = get_cr3_idxval(v);
+    frame_table[smfn].tlbflush_timestamp = index;
+
+    memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
+    spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
+    unmap_domain_page(spl4e);
+    return smfn;
+}
 #endif
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -3111,6 +3158,12 @@ static unsigned long shadow_l3_table(
          d->arch.ops->guest_paging_levels == PAGING_L2 )
     {
         return init_bl2(d, gpfn, gmfn);
+    }
+
+    if ( SH_GUEST_32PAE &&
+         d->arch.ops->guest_paging_levels == PAGING_L3 )
+    {
+        return init_l3(v, gpfn, gmfn);
     }
 
     if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
@@ -3223,6 +3276,11 @@ static unsigned long shadow_l4_table(
         return init_bl2(d, gpfn, gmfn);
     }
 
+    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
+    {
+        return init_l3(v, gpfn, gmfn);
+    }
+
     if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
     {
         printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, 
gmfn);
@@ -3230,24 +3288,6 @@ static unsigned long shadow_l4_table(
     }
 
     spl4e = (l4_pgentry_t *)map_domain_page(smfn);
-
-    /* For 32-bit PAE guest on 64-bit host */
-    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
-    {
-        unsigned long index;
-        /*
-         * Shadow L4's pfn_info->tlbflush_timestamp
-         * should also save it's own index.
-         */
-        index = get_cr3_idxval(v);
-        frame_table[smfn].tlbflush_timestamp = index;
-
-        memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
-        /* Map the self entry */
-        spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
-        unmap_domain_page(spl4e);
-        return smfn;
-    }
 
     /* Install hypervisor and 4x linear p.t. mapings. */
     if ( (PGT_base_page_table == PGT_l4_page_table) &&
@@ -3378,7 +3418,7 @@ validate_bl2e_change(
  * This shadow_mark_va_out_of_sync() is for 2M page shadow
  */
 static void shadow_mark_va_out_of_sync_2mp(
-  struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long 
writable_pl1e)
+  struct vcpu *v, unsigned long gpfn, unsigned long mfn, paddr_t writable_pl1e)
 {
     struct out_of_sync_entry *entry =
       shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
@@ -3647,6 +3687,7 @@ static inline int l2e_rw_fault(
     }
 
     unmap_domain_page(l1_p);
+    *gl2e_p = gl2e;
     return 1;
 
 }
@@ -3720,7 +3761,7 @@ static inline int guest_page_fault(
 
     ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
 
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
     if ( (error_code & (ERROR_I | ERROR_P)) == (ERROR_I | ERROR_P) )
         return 1;
 #endif
@@ -4056,7 +4097,7 @@ struct shadow_ops MODE_32_2_HANDLER = {
 };
 #endif
 
-#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) ||  \
+#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) && !defined 
(GUEST_32PAE) ) ||  \
     ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) ) 
 
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow32.c   Fri Jun 23 15:33:25 2006 -0600
@@ -306,7 +306,7 @@ alloc_shadow_page(struct domain *d,
     //
     ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
 
-    set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+    set_shadow_status(d, gpfn, gmfn, smfn, psh_type, 0);
 
     if ( pin )
         shadow_pin(smfn);
@@ -395,7 +395,7 @@ void free_shadow_page(unsigned long smfn
 
     ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
 
-    delete_shadow_status(d, gpfn, gmfn, type);
+    delete_shadow_status(d, gpfn, gmfn, type, 0);
 
     switch ( type )
     {
@@ -2319,7 +2319,7 @@ increase_writable_pte_prediction(struct 
     prediction = (prediction & PGT_mfn_mask) | score;
 
     //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, 
create);
-    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
 
     if ( create )
         perfc_incr(writable_pte_predictions);
@@ -2340,10 +2340,10 @@ decrease_writable_pte_prediction(struct 
     //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, 
score);
 
     if ( score )
-        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
     else
     {
-        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 
0);
         perfc_decr(writable_pte_predictions);
     }
 }
@@ -2381,7 +2381,7 @@ free_writable_pte_predictions(struct dom
              * keep an accurate count of writable_pte_predictions to keep it
              * happy.
              */
-            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
             perfc_decr(writable_pte_predictions);
         }
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow_guest32pae.c
--- a/xen/arch/x86/shadow_guest32pae.c  Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow_guest32pae.c  Fri Jun 23 15:33:25 2006 -0600
@@ -1,5 +1,4 @@
 #define GUEST_32PAE
-#if defined (__x86_64__)
 
 #include "shadow.c"
 struct shadow_ops MODE_64_PAE_HANDLER = {
@@ -15,4 +14,3 @@ struct shadow_ops MODE_64_PAE_HANDLER = 
     .gva_to_gpa                 = gva_to_gpa_64,
 };
 
-#endif
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow_public.c      Fri Jun 23 15:33:25 2006 -0600
@@ -123,8 +123,19 @@ int shadow_set_guest_paging_levels(struc
 #endif
 #if CONFIG_PAGING_LEVELS == 3
     case 3:
-        if ( d->arch.ops != &MODE_64_3_HANDLER )
-            d->arch.ops = &MODE_64_3_HANDLER;
+        if ( d->arch.ops == NULL ||
+                    shadow_mode_log_dirty(d) )
+        {
+            if ( d->arch.ops != &MODE_64_3_HANDLER )
+                d->arch.ops = &MODE_64_3_HANDLER;
+        }
+        else
+        {
+            if ( d->arch.ops == &MODE_64_2_HANDLER )
+                free_shadow_pages(d);
+            if ( d->arch.ops != &MODE_64_PAE_HANDLER )
+                d->arch.ops = &MODE_64_PAE_HANDLER;
+        }
         shadow_unlock(d);
         return 1;
 #endif
@@ -268,10 +279,8 @@ free_shadow_tables(struct domain *d, uns
                     put_shadow_ref(entry_get_pfn(ple[i]));
                 if (d->arch.ops->guest_paging_levels == PAGING_L3)
                 {
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
                     if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L4 )
-#elif CONFIG_PAGING_LEVELS == 3
-                    if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L3 )
 #endif
                         break;
                 }
@@ -710,6 +719,7 @@ void free_shadow_page(unsigned long smfn
     struct domain *d = page_get_owner(mfn_to_page(gmfn));
     unsigned long gpfn = mfn_to_gmfn(d, gmfn);
     unsigned long type = page->u.inuse.type_info & PGT_type_mask;
+    u64 index = 0;
 
     SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
 
@@ -722,12 +732,16 @@ void free_shadow_page(unsigned long smfn
         if ( !mfn )
             gpfn |= (1UL << 63);
     }
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
     if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
-        if ( type == PGT_l4_shadow ) 
-            gpfn = ((unsigned long)page->tlbflush_timestamp << 
PGT_pae_idx_shift) | gpfn;
-#endif
-
-    delete_shadow_status(d, gpfn, gmfn, type);
+    {
+        if ( type == PGT_l4_shadow )
+            index = page->tlbflush_timestamp;
+    }
+#endif
+
+    delete_shadow_status(d, gpfn, gmfn, type, index);
 
     switch ( type )
     {
@@ -835,7 +849,7 @@ free_writable_pte_predictions(struct dom
         while ( count )
         {
             count--;
-            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
         }
 
         xfree(gpfn_list);
@@ -1050,8 +1064,8 @@ void __shadow_mode_disable(struct domain
     {
         if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
         {
-            printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
-                   __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
+            printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%"PRIx64"\n",
+                   __FILE__, i, (u64)d->arch.shadow_ht[i].gpfn_and_flags);
             BUG();
         }
     }
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/time.c       Fri Jun 23 15:33:25 2006 -0600
@@ -699,7 +699,7 @@ void update_domain_wallclock_time(struct
 {
     spin_lock(&wc_lock);
     version_update_begin(&d->shared_info->wc_version);
-    d->shared_info->wc_sec  = wc_sec;
+    d->shared_info->wc_sec  = wc_sec + d->time_offset_seconds;
     d->shared_info->wc_nsec = wc_nsec;
     version_update_end(&d->shared_info->wc_version);
     spin_unlock(&wc_lock);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/traps.c      Fri Jun 23 15:33:25 2006 -0600
@@ -276,6 +276,42 @@ void show_stack(struct cpu_user_regs *re
     show_trace(regs);
 }
 
+void show_stack_overflow(unsigned long esp)
+{
+#ifdef MEMORY_GUARD
+    unsigned long esp_top = get_stack_bottom() & PAGE_MASK;
+    unsigned long *stack, addr;
+
+    /* Trigger overflow trace if %esp is within 100 bytes of the guard page. */
+    if ( ((esp - esp_top) > 100) && ((esp_top - esp) > 100) )
+        return;
+
+    if ( esp < esp_top )
+        esp = esp_top;
+
+    printk("Xen stack overflow:\n   ");
+
+    stack = (unsigned long *)esp;
+    while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
+    {
+        addr = *stack++;
+        if ( is_kernel_text(addr) )
+        {
+            printk("%p: [<%p>]", stack, _p(addr));
+            print_symbol(" %s\n   ", addr);
+        }
+    }
+
+    printk("\n");
+#endif
+}
+
+void show_execution_state(struct cpu_user_regs *regs)
+{
+    show_registers(regs);
+    show_stack(regs);
+}
+
 /*
  * This is called for faults at very unexpected times (e.g., when interrupts
  * are disabled). In such situations we can't do much that is safe. We try to
@@ -297,7 +333,7 @@ asmlinkage void fatal_trap(int trapnr, s
     watchdog_disable();
     console_start_sync();
 
-    show_registers(regs);
+    show_execution_state(regs);
 
     if ( trapnr == TRAP_page_fault )
     {
@@ -360,7 +396,7 @@ static inline int do_trap(int trapnr, ch
 
     DEBUGGER_trap_fatal(trapnr, regs);
 
-    show_registers(regs);
+    show_execution_state(regs);
     panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
           "[error_code=%04x]\n",
           smp_processor_id(), trapnr, str, regs->error_code);
@@ -451,8 +487,23 @@ asmlinkage int do_invalid_op(struct cpu_
 
     if ( unlikely(!guest_mode(regs)) )
     {
+        char sig[5];
+        /* Signature (ud2; .ascii "dbg") indicates dump state and continue. */
+        if ( (__copy_from_user(sig, (char *)regs->eip, sizeof(sig)) == 0) &&
+             (memcmp(sig, "\xf\xb""dbg", sizeof(sig)) == 0) )
+        {
+            show_execution_state(regs);
+            regs->eip += sizeof(sig);
+            return EXCRET_fault_fixed;
+        }
+        printk("%02x %02x %02x %02x %02x\n",
+               (unsigned char)sig[0],
+               (unsigned char)sig[1],
+               (unsigned char)sig[2],
+               (unsigned char)sig[3],
+               (unsigned char)sig[4]);
         DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
-        show_registers(regs);
+        show_execution_state(regs);
         panic("CPU%d FATAL TRAP: vector = %d (invalid opcode)\n",
               smp_processor_id(), TRAP_invalid_op);
     }
@@ -481,7 +532,7 @@ asmlinkage int do_int3(struct cpu_user_r
     if ( !guest_mode(regs) )
     {
         DEBUGGER_trap_fatal(TRAP_int3, regs);
-        show_registers(regs);
+        show_execution_state(regs);
         panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
     } 
 
@@ -511,9 +562,9 @@ void propagate_page_fault(unsigned long 
     v->vcpu_info->arch.cr2           = addr;
 
     /* Re-set error_code.user flag appropriately for the guest. */
-    error_code &= ~4;
+    error_code &= ~PGERR_user_mode;
     if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
-        error_code |= 4;
+        error_code |= PGERR_user_mode;
 
     ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
     tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
@@ -547,6 +598,7 @@ static int handle_gdt_ldt_mapping_fault(
     {
         /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
         LOCK_BIGLOCK(d);
+        cleanup_writable_pagetable(d);
         ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
         UNLOCK_BIGLOCK(d);
 
@@ -578,6 +630,98 @@ static int handle_gdt_ldt_mapping_fault(
     (((va) >= HYPERVISOR_VIRT_START))
 #endif
 
+static int __spurious_page_fault(
+    unsigned long addr, struct cpu_user_regs *regs)
+{
+    unsigned long mfn, cr3 = read_cr3();
+#if CONFIG_PAGING_LEVELS >= 4
+    l4_pgentry_t l4e, *l4t;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    l3_pgentry_t l3e, *l3t;
+#endif
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+    unsigned int required_flags, disallowed_flags;
+
+    /* Reserved bit violations are never spurious faults. */
+    if ( regs->error_code & PGERR_reserved_bit )
+        return 0;
+
+    required_flags  = _PAGE_PRESENT;
+    if ( regs->error_code & PGERR_write_access )
+        required_flags |= _PAGE_RW;
+    if ( regs->error_code & PGERR_user_mode )
+        required_flags |= _PAGE_USER;
+
+    disallowed_flags = 0;
+    if ( regs->error_code & PGERR_instr_fetch )
+        disallowed_flags |= _PAGE_NX;
+
+    mfn = cr3 >> PAGE_SHIFT;
+
+#if CONFIG_PAGING_LEVELS >= 4
+    l4t = map_domain_page(mfn);
+    l4e = l4t[l4_table_offset(addr)];
+    mfn = l4e_get_pfn(l4e);
+    unmap_domain_page(l4t);
+    if ( !(l4e_get_flags(l4e) & required_flags) ||
+         (l4e_get_flags(l4e) & disallowed_flags) )
+        return 0;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+    l3t  = map_domain_page(mfn); 
+#ifdef CONFIG_X86_PAE
+    l3t += (cr3 & 0xFE0UL) >> 3;
+#endif
+    l3e = l3t[l3_table_offset(addr)];
+    mfn = l3e_get_pfn(l3e);
+    unmap_domain_page(l3t);
+#ifdef CONFIG_X86_PAE
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        return 0;
+#else
+    if ( !(l3e_get_flags(l3e) & required_flags) ||
+         (l3e_get_flags(l3e) & disallowed_flags) )
+        return 0;
+#endif
+#endif
+
+    l2t = map_domain_page(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    mfn = l2e_get_pfn(l2e);
+    unmap_domain_page(l2t);
+    if ( !(l2e_get_flags(l2e) & required_flags) ||
+         (l2e_get_flags(l2e) & disallowed_flags) )
+        return 0;
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+        return 1;
+
+    l1t = map_domain_page(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    mfn = l1e_get_pfn(l1e);
+    unmap_domain_page(l1t);
+    if ( !(l1e_get_flags(l1e) & required_flags) ||
+         (l1e_get_flags(l1e) & disallowed_flags) )
+        return 0;
+    return 1;
+}
+
+static int spurious_page_fault(
+    unsigned long addr, struct cpu_user_regs *regs)
+{
+    struct domain *d = current->domain;
+    int            is_spurious;
+
+    LOCK_BIGLOCK(d);
+    cleanup_writable_pagetable(d);
+    is_spurious = __spurious_page_fault(addr, regs);
+    UNLOCK_BIGLOCK(d);
+
+    return is_spurious;
+}
+
 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
 {
     struct vcpu   *v = current;
@@ -590,12 +734,17 @@ static int fixup_page_fault(unsigned lon
         if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
             return handle_gdt_ldt_mapping_fault(
                 addr - GDT_LDT_VIRT_START, regs);
-    }
-    else if ( unlikely(shadow_mode_enabled(d)) )
-    {
+        /*
+         * Do not propagate spurious faults in the hypervisor area to the
+         * guest. It cannot fix them up.
+         */
+        return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
+    }
+
+    if ( unlikely(shadow_mode_enabled(d)) )
         return shadow_fault(addr, regs);
-    }
-    else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+
+    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
     {
         LOCK_BIGLOCK(d);
         if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
@@ -607,9 +756,14 @@ static int fixup_page_fault(unsigned lon
             return EXCRET_fault_fixed;
         }
 
+        /*
+         * Note it is *not* safe to check PGERR_page_present here. It can be
+         * clear, due to unhooked page table, when we would otherwise expect
+         * it to be set. We have an aversion to trusting that flag in Xen, and
+         * guests ought to be leery too.
+         */
         if ( guest_kernel_mode(v, regs) &&
-             /* Protection violation on write? No reserved-bit violation? */
-             ((regs->error_code & 0xb) == 0x3) &&
+             (regs->error_code & PGERR_write_access) &&
              ptwr_do_page_fault(d, addr, regs) )
         {
             UNLOCK_BIGLOCK(d);
@@ -619,46 +773,6 @@ static int fixup_page_fault(unsigned lon
     }
 
     return 0;
-}
-
-static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
-{
-    struct vcpu   *v = current;
-    struct domain *d = v->domain;
-    int            rc;
-
-    /*
-     * The only possible reason for a spurious page fault not to be picked
-     * up already is that a page directory was unhooked by writable page table
-     * logic and then reattached before the faulting VCPU could detect it.
-     */
-    if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
-         IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
-         shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
-         ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault?    */
-        return 0;
-
-    LOCK_BIGLOCK(d);
-
-    /*
-     * The page directory could have been detached again while we weren't
-     * holding the per-domain lock. Detect that and fix up if it's the case.
-     */
-    if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
-         unlikely(l2_linear_offset(addr) ==
-                  d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
-    {
-        ptwr_flush(d, PTWR_PT_ACTIVE);
-        rc = 1;
-    }
-    else
-    {
-        /* Okay, walk the page tables. Only check for not-present faults.*/
-        rc = __spurious_page_fault(addr);
-    }
-
-    UNLOCK_BIGLOCK(d);
-    return rc;
 }
 
 /*
@@ -703,7 +817,7 @@ asmlinkage int do_page_fault(struct cpu_
 
         DEBUGGER_trap_fatal(TRAP_page_fault, regs);
 
-        show_registers(regs);
+        show_execution_state(regs);
         show_page_walk(addr);
         panic("CPU%d FATAL PAGE FAULT\n"
               "[error_code=%04x]\n"
@@ -784,8 +898,6 @@ static inline int admin_io_okay(
     (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
 
 /* Propagate a fault back to the guest kernel. */
-#define USER_READ_FAULT  4 /* user mode, read fault */
-#define USER_WRITE_FAULT 6 /* user mode, write fault */
 #define PAGE_FAULT(_faultaddr, _errcode)        \
 ({  propagate_page_fault(_faultaddr, _errcode); \
     return EXCRET_fault_fixed;                  \
@@ -795,7 +907,7 @@ static inline int admin_io_okay(
 #define insn_fetch(_type, _size, _ptr)          \
 ({  unsigned long _x;                           \
     if ( get_user(_x, (_type *)eip) )           \
-        PAGE_FAULT(eip, USER_READ_FAULT);       \
+        PAGE_FAULT(eip, 0); /* read fault */    \
     eip += _size; (_type)_x; })
 
 static int emulate_privileged_op(struct cpu_user_regs *regs)
@@ -864,17 +976,17 @@ static int emulate_privileged_op(struct 
             case 1:
                 data = (u8)inb_user((u16)regs->edx, v, regs);
                 if ( put_user((u8)data, (u8 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 2:
                 data = (u16)inw_user((u16)regs->edx, v, regs);
                 if ( put_user((u16)data, (u16 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 4:
                 data = (u32)inl_user((u16)regs->edx, v, regs);
                 if ( put_user((u32)data, (u32 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             }
             regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -889,17 +1001,17 @@ static int emulate_privileged_op(struct 
             {
             case 1:
                 if ( get_user(data, (u8 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outb_user((u8)data, (u16)regs->edx, v, regs);
                 break;
             case 2:
                 if ( get_user(data, (u16 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outw_user((u16)data, (u16)regs->edx, v, regs);
                 break;
             case 4:
                 if ( get_user(data, (u32 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
@@ -1082,7 +1194,7 @@ static int emulate_privileged_op(struct 
             v->arch.guest_context.ctrlreg[2] = *reg;
             v->vcpu_info->arch.cr2           = *reg;
             break;
-            
+
         case 3: /* Write CR3 */
             LOCK_BIGLOCK(v->domain);
             cleanup_writable_pagetable(v->domain);
@@ -1270,7 +1382,7 @@ asmlinkage int do_general_protection(str
     DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
 
  hardware_gp:
-    show_registers(regs);
+    show_execution_state(regs);
     panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
           smp_processor_id(), regs->error_code);
     return 0;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_32/seg_fixup.c
--- a/xen/arch/x86/x86_32/seg_fixup.c   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_32/seg_fixup.c   Fri Jun 23 15:33:25 2006 -0600
@@ -464,7 +464,7 @@ int gpf_emulate_4gb(struct cpu_user_regs
     return 0;
 
  page_fault:
-    propagate_page_fault((unsigned long)pb, 4);
+    propagate_page_fault((unsigned long)pb, 0); /* read fault */
     return EXCRET_fault_fixed;
 }
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Fri Jun 23 15:33:25 2006 -0600
@@ -68,13 +68,11 @@ void show_registers(struct cpu_user_regs
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
            fault_regs.gs, fault_regs.ss, fault_regs.cs);
-
-    show_stack(regs);
 }
 
 void show_page_walk(unsigned long addr)
 {
-    unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+    unsigned long pfn, mfn, cr3 = read_cr3();
 #ifdef CONFIG_X86_PAE
     l3_pgentry_t l3e, *l3t;
 #endif
@@ -83,8 +81,11 @@ void show_page_walk(unsigned long addr)
 
     printk("Pagetable walk from %08lx:\n", addr);
 
+    mfn = cr3 >> PAGE_SHIFT;
+
 #ifdef CONFIG_X86_PAE
-    l3t = map_domain_page(mfn);
+    l3t  = map_domain_page(mfn);
+    l3t += (cr3 & 0xFE0UL) >> 3;
     l3e = l3t[l3_table_offset(addr)];
     mfn = l3e_get_pfn(l3e);
     pfn = get_gpfn_from_mfn(mfn);
@@ -111,40 +112,6 @@ void show_page_walk(unsigned long addr)
     pfn = get_gpfn_from_mfn(mfn);
     printk("   L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
     unmap_domain_page(l1t);
-}
-
-int __spurious_page_fault(unsigned long addr)
-{
-    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-#ifdef CONFIG_X86_PAE
-    l3_pgentry_t l3e, *l3t;
-#endif
-    l2_pgentry_t l2e, *l2t;
-    l1_pgentry_t l1e, *l1t;
-
-#ifdef CONFIG_X86_PAE
-    l3t = map_domain_page(mfn);
-    l3e = l3t[l3_table_offset(addr)];
-    mfn = l3e_get_pfn(l3e);
-    unmap_domain_page(l3t);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
-#endif
-
-    l2t = map_domain_page(mfn);
-    l2e = l2t[l2_table_offset(addr)];
-    mfn = l2e_get_pfn(l2e);
-    unmap_domain_page(l2t);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        return 0;
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
-
-    l1t = map_domain_page(mfn);
-    l1e = l1t[l1_table_offset(addr)];
-    mfn = l1e_get_pfn(l1e);
-    unmap_domain_page(l1t);
-    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
 }
 
 #define DOUBLEFAULT_STACK_SIZE 1024
@@ -173,6 +140,7 @@ asmlinkage void do_double_fault(void)
            tss->esi, tss->edi, tss->ebp, tss->esp);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   ss: %04x\n",
            tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
+    show_stack_overflow(tss->esp);
     printk("************************************\n");
     printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
     printk("System needs manual reset.\n");
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Fri Jun 23 15:33:25 2006 -0600
@@ -68,8 +68,6 @@ void show_registers(struct cpu_user_regs
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
            fault_regs.gs, fault_regs.ss, fault_regs.cs);
-
-    show_stack(regs);
 }
 
 void show_page_walk(unsigned long addr)
@@ -115,40 +113,6 @@ void show_page_walk(unsigned long addr)
     printk("    L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
 }
 
-int __spurious_page_fault(unsigned long addr)
-{
-    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-    l4_pgentry_t l4e, *l4t;
-    l3_pgentry_t l3e, *l3t;
-    l2_pgentry_t l2e, *l2t;
-    l1_pgentry_t l1e, *l1t;
-
-    l4t = mfn_to_virt(mfn);
-    l4e = l4t[l4_table_offset(addr)];
-    mfn = l4e_get_pfn(l4e);
-    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
-        return 0;
-
-    l3t = mfn_to_virt(mfn);
-    l3e = l3t[l3_table_offset(addr)];
-    mfn = l3e_get_pfn(l3e);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
-
-    l2t = mfn_to_virt(mfn);
-    l2e = l2t[l2_table_offset(addr)];
-    mfn = l2e_get_pfn(l2e);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        return 0;
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
-
-    l1t = mfn_to_virt(mfn);
-    l1e = l1t[l1_table_offset(addr)];
-    mfn = l1e_get_pfn(l1e);
-    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
-}
-
 asmlinkage void double_fault(void);
 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
 {
@@ -159,6 +123,7 @@ asmlinkage void do_double_fault(struct c
     /* Find information saved during fault and dump it to the console. */
     printk("************************************\n");
     show_registers(regs);
+    show_stack_overflow(regs->rsp);
     printk("************************************\n");
     printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id());
     printk("System needs manual reset.\n");
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_emulate.c        Fri Jun 23 15:33:25 2006 -0600
@@ -1146,7 +1146,7 @@ x86_emulate_read_std(
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
     {
-        propagate_page_fault(addr, 4); /* user mode, read fault */
+        propagate_page_fault(addr, 0); /* read fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;
@@ -1161,7 +1161,7 @@ x86_emulate_write_std(
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
-        propagate_page_fault(addr, 6); /* user mode, write fault */
+        propagate_page_fault(addr, PGERR_write_access); /* write fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/acm_ops.c
--- a/xen/common/acm_ops.c      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/acm_ops.c      Fri Jun 23 15:33:25 2006 -0600
@@ -69,7 +69,7 @@ long do_acm_op(int cmd, XEN_GUEST_HANDLE
             return -EACCES;
 
         rc = acm_set_policy(setpolicy.pushcache,
-                            setpolicy.pushcache_size, 1);
+                            setpolicy.pushcache_size);
         break;
     }
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/dom0_ops.c     Fri Jun 23 15:33:25 2006 -0600
@@ -693,6 +693,21 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
     break;
 #endif
 
+    case DOM0_SETTIMEOFFSET:
+    {
+        struct domain *d;
+
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.settimeoffset.domain);
+        if ( d != NULL )
+        {
+            d->time_offset_seconds = op->u.settimeoffset.time_offset_seconds;
+            put_domain(d);
+            ret = 0;
+        }
+    }
+    break;
+
     default:
         ret = arch_do_dom0_op(op, u_dom0_op);
         break;
@@ -701,9 +716,9 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
     spin_unlock(&dom0_lock);
 
     if (!ret)
-        acm_post_dom0_op(op, ssid);
+        acm_post_dom0_op(op, &ssid);
     else
-        acm_fail_dom0_op(op, ssid);
+        acm_fail_dom0_op(op, &ssid);
 
     return ret;
 }
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/domain.c
--- a/xen/common/domain.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/domain.c       Fri Jun 23 15:33:25 2006 -0600
@@ -234,7 +234,7 @@ void __domain_crash(struct domain *d)
     {
         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
                d->domain_id, current->vcpu_id, smp_processor_id());
-        show_registers(guest_cpu_user_regs());
+        show_execution_state(guest_cpu_user_regs());
     }
     else
     {
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/kernel.c
--- a/xen/common/kernel.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/kernel.c       Fri Jun 23 15:33:25 2006 -0600
@@ -96,10 +96,11 @@ char *print_tainted(char *str)
 {
     if ( tainted )
     {
-        snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c",
+        snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c%c",
                  tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
                  tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
-                 tainted & TAINT_BAD_PAGE ? 'B' : ' ');
+                 tainted & TAINT_BAD_PAGE ? 'B' : ' ',
+                 tainted & TAINT_SYNC_CONSOLE ? 'C' : ' ');
     }
     else
     {
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/keyhandler.c   Fri Jun 23 15:33:25 2006 -0600
@@ -87,10 +87,28 @@ static void show_handlers(unsigned char 
                    key_table[i].desc);
 }
 
+static void __dump_execstate(void *unused)
+{
+    dump_execution_state();
+}
+
 static void dump_registers(unsigned char key, struct cpu_user_regs *regs)
 {
+    unsigned int cpu;
+
     printk("'%c' pressed -> dumping registers\n", key); 
-    show_registers(regs); 
+
+    /* Get local execution state out immediately, in case we get stuck. */
+    printk("\n*** Dumping CPU%d state: ***\n", smp_processor_id());
+    show_execution_state(regs);
+
+    for_each_online_cpu ( cpu )
+    {
+        if ( cpu == smp_processor_id() )
+            continue;
+        printk("\n*** Dumping CPU%d state: ***\n", cpu);
+        on_selected_cpus(cpumask_of_cpu(cpu), __dump_execstate, NULL, 1, 1);
+    }
 }
 
 static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/memory.c
--- a/xen/common/memory.c       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/memory.c       Fri Jun 23 15:33:25 2006 -0600
@@ -282,7 +282,7 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem
     LIST_HEAD(in_chunk_list);
     LIST_HEAD(out_chunk_list);
     unsigned long in_chunk_order, out_chunk_order;
-    unsigned long gpfn, gmfn, mfn;
+    xen_pfn_t     gpfn, gmfn, mfn;
     unsigned long i, j, k;
     unsigned int  memflags = 0;
     long          rc = 0;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/sched_credit.c Fri Jun 23 15:33:25 2006 -0600
@@ -967,9 +967,6 @@ csched_load_balance(int cpu, struct csch
         if ( peer_cpu == cpu )
             break;
 
-        BUG_ON( peer_cpu >= csched_priv.ncpus );
-        BUG_ON( peer_cpu == cpu );
-
         /*
          * Get ahold of the scheduler lock for this peer CPU.
          *
@@ -1072,7 +1069,6 @@ csched_schedule(s_time_t now)
     ret.task = snext->vcpu;
 
     CSCHED_VCPU_CHECK(ret.task);
-    BUG_ON( !vcpu_runnable(ret.task) );
 
     return ret;
 }
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/sched_sedf.c   Fri Jun 23 15:33:25 2006 -0600
@@ -360,24 +360,23 @@ static int sedf_init_vcpu(struct vcpu *v
         INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
     }
        
+    /* Every VCPU gets an equal share of extratime by default. */
+    inf->deadl_abs   = 0;
+    inf->latency     = 0;
+    inf->status      = EXTRA_AWARE | SEDF_ASLEEP;
+    inf->extraweight = 1;
+
     if ( v->domain->domain_id == 0 )
     {
-        /*set dom0 to something useful to boot the machine*/
+        /* Domain0 gets 75% guaranteed (15ms every 20ms). */
         inf->period    = MILLISECS(20);
         inf->slice     = MILLISECS(15);
-        inf->latency   = 0;
-        inf->deadl_abs = 0;
-        inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
     }
     else
     {
-        /*other domains run in best effort mode*/
+        /* Best-effort extratime only. */
         inf->period    = WEIGHT_PERIOD;
         inf->slice     = 0;
-        inf->deadl_abs = 0;
-        inf->latency   = 0;
-        inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
-        inf->extraweight = 1;
     }
 
     inf->period_orig = inf->period; inf->slice_orig = inf->slice;
@@ -609,7 +608,16 @@ static void desched_extra_dom(s_time_t n
         PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", 
               inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
               inf->short_block_lost_tot);
+#if 0
+        /*
+         * KAF: If we don't exit short-blocking state at this point
+         * domain0 can steal all CPU for up to 10 seconds before
+         * scheduling settles down (when competing against another
+         * CPU-bound domain). Doing this seems to make things behave
+         * nicely. Noone gets starved by default.
+         */
         if ( inf->short_block_lost_tot <= 0 )
+#endif
         {
             PRINT(4,"Domain %i.%i compensated short block loss!\n",
                   inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/schedule.c
--- a/xen/common/schedule.c     Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/schedule.c     Fri Jun 23 15:33:25 2006 -0600
@@ -33,8 +33,8 @@
 
 extern void arch_getdomaininfo_ctxt(struct vcpu *,
                                     struct vcpu_guest_context *);
-/* opt_sched: scheduler - default to SEDF */
-static char opt_sched[10] = "sedf";
+/* opt_sched: scheduler - default to credit */
+static char opt_sched[10] = "credit";
 string_param("sched", opt_sched);
 
 #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
diff -r 59d4c1863330 -r fdf25330e4a6 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/drivers/char/console.c        Fri Jun 23 15:33:25 2006 -0600
@@ -476,7 +476,11 @@ void init_console(void)
         if ( strncmp(p, "com", 3) == 0 )
             sercon_handle = serial_parse_handle(p);
         else if ( strncmp(p, "vga", 3) == 0 )
+        {
             vgacon_enabled = 1;
+            if ( strncmp(p+3, "[keep]", 6) == 0 )
+                vgacon_enabled++;
+        }
     }
 
     init_vga();
@@ -497,14 +501,47 @@ void init_console(void)
     if ( opt_sync_console )
     {
         serial_start_sync(sercon_handle);
+        add_taint(TAINT_SYNC_CONSOLE);
         printk("Console output is synchronous.\n");
     }
 }
 
-void console_endboot(int disable_vga)
-{
-    if ( disable_vga )
-        vgacon_enabled = 0;
+void console_endboot(void)
+{
+    int i, j;
+
+    if ( opt_sync_console )
+    {
+        printk("**********************************************\n");
+        printk("******* WARNING: CONSOLE OUTPUT IS SYCHRONOUS\n");
+        printk("******* This option is intended to aid debugging "
+               "of Xen by ensuring\n");
+        printk("******* that all output is synchronously delivered "
+               "on the serial line.\n");
+        printk("******* However it can introduce SIGNIFICANT latencies "
+               "and affect\n");
+        printk("******* timekeeping. It is NOT recommended for "
+               "production use!\n");
+        printk("**********************************************\n");
+        for ( i = 0; i < 3; i++ )
+        {
+            printk("%d... ", 3-i);
+            for ( j = 0; j < 100; j++ )
+            {
+                if ( softirq_pending(smp_processor_id()) )
+                    do_softirq();
+                mdelay(10);
+            }
+        }
+        printk("\n");
+    }
+
+    if ( vgacon_enabled )
+    {
+        vgacon_enabled--;
+        printk("Xen is %s VGA console.\n",
+               vgacon_enabled ? "keeping" : "relinquishing");
+    }
 
     /*
      * If user specifies so, we fool the switch routine to redirect input
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/acm/acm_core.h
--- a/xen/include/acm/acm_core.h        Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/acm/acm_core.h        Fri Jun 23 15:33:25 2006 -0600
@@ -121,10 +121,11 @@ int acm_init_domain_ssid(domid_t id, ssi
 int acm_init_domain_ssid(domid_t id, ssidref_t ssidref);
 void acm_free_domain_ssid(struct acm_ssid_domain *ssid);
 int acm_init_binary_policy(u32 policy_code);
-int acm_set_policy(void *buf, u32 buf_size, int isuserbuffer);
-int acm_get_policy(void *buf, u32 buf_size);
-int acm_dump_statistics(void *buf, u16 buf_size);
-int acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size);
+int acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size);
+int do_acm_set_policy(void *buf, u32 buf_size);
+int acm_get_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size);
+int acm_dump_statistics(XEN_GUEST_HANDLE(void) buf, u16 buf_size);
+int acm_get_ssid(ssidref_t ssidref, XEN_GUEST_HANDLE(void) buf, u16 buf_size);
 int acm_get_decision(ssidref_t ssidref1, ssidref_t ssidref2, u32 hook);
 int acm_set_policy_reference(u8 * buf, u32 buf_size);
 int acm_dump_policy_reference(u8 *buf, u32 buf_size);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/acm/acm_hooks.h
--- a/xen/include/acm/acm_hooks.h       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/acm/acm_hooks.h       Fri Jun 23 15:33:25 2006 -0600
@@ -273,7 +273,12 @@ static inline void acm_post_dom0_op(stru
             op->u.createdomain.domain, op->u.createdomain.ssidref);
         break;
     case DOM0_DESTROYDOMAIN:
-        acm_post_domain_destroy(ssid, op->u.destroydomain.domain);
+        if (*ssid == NULL) {
+            printkd("%s: ERROR. SSID unset.\n",
+                    __func__);
+            break;
+        }
+        acm_post_domain_destroy(*ssid, op->u.destroydomain.domain);
         /* free security ssid for the destroyed domain (also if null policy */
         acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid));
         *ssid = NULL;
@@ -281,13 +286,22 @@ static inline void acm_post_dom0_op(stru
     }
 }
 
-static inline void acm_fail_dom0_op(struct dom0_op *op, void *ssid) 
+static inline void acm_fail_dom0_op(struct dom0_op *op, void **ssid)
 {
     switch(op->cmd) {
     case DOM0_CREATEDOMAIN:
         acm_fail_domain_create(
             current->domain->ssid, op->u.createdomain.ssidref);
         break;
+    case DOM0_DESTROYDOMAIN:
+        /*  we don't handle domain destroy failure but at least free the ssid 
*/
+        if (*ssid == NULL) {
+            printkd("%s: ERROR. SSID unset.\n",
+                    __func__);
+            break;
+        }
+        acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid));
+        *ssid = NULL;
     }
 }
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/debugger.h
--- a/xen/include/asm-ia64/debugger.h   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-ia64/debugger.h   Fri Jun 23 15:33:25 2006 -0600
@@ -41,6 +41,14 @@
 #include <xen/gdbstub.h>
 
 void show_registers(struct cpu_user_regs *regs);
+void dump_stack(void);
+
+static inline void
+show_execution_state(struct cpu_user_regs *regs)
+{
+    show_registers(regs);
+    dump_stack();
+}
 
 // NOTE: on xen struct pt_regs = struct cpu_user_regs
 //       see include/asm-ia64/linux-xen/asm/ptrace.h
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h        Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-ia64/vmx.h        Fri Jun 23 15:33:25 2006 -0600
@@ -42,6 +42,7 @@ extern void vmx_save_state(struct vcpu *
 extern void vmx_save_state(struct vcpu *v);
 extern void vmx_load_state(struct vcpu *v);
 extern void show_registers(struct pt_regs *regs);
+#define show_execution_state show_registers
 extern int vmx_build_physmap_table(struct domain *d);
 extern unsigned long __gpfn_to_mfn_foreign(struct domain *d, unsigned long 
gpfn);
 extern void sync_split_caches(void);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/xenprocessor.h
--- a/xen/include/asm-ia64/xenprocessor.h       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-ia64/xenprocessor.h       Fri Jun 23 15:33:25 2006 -0600
@@ -237,4 +237,6 @@ typedef union {
     u64 itir;
 } ia64_itir_t;
 
+#define dump_execution_state() printk("FIXME: implement ia64 
dump_execution_state()\n");
+
 #endif // _ASM_IA64_XENPROCESSOR_H
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/hvm/support.h Fri Jun 23 15:33:25 2006 -0600
@@ -132,7 +132,7 @@ extern unsigned int opt_hvm_debug_level;
 #define  __hvm_bug(regs)                                        \
     do {                                                        \
         printk("__hvm_bug at %s:%d\n", __FILE__, __LINE__);     \
-        show_registers(regs);                                   \
+        show_execution_state(regs);                             \
         domain_crash_synchronous();                             \
     } while (0)
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/mm.h  Fri Jun 23 15:33:25 2006 -0600
@@ -103,13 +103,11 @@ struct page_info
 #define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
 #define PGT_mfn_mask        (((1U<<23)-1) | PGT_high_mfn_mask)
 #define PGT_high_mfn_nx     (0x800UL << PGT_high_mfn_shift)
-#define PGT_pae_idx_shift   PGT_high_mfn_shift
 #else
  /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
 #define PGT_mfn_mask        ((1U<<23)-1)
  /* NX for PAE xen is not supported yet */
 #define PGT_high_mfn_nx     (1ULL << 63)
-#define PGT_pae_idx_shift   23
 #endif
 
 #define PGT_score_shift     23
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/processor.h   Fri Jun 23 15:33:25 2006 -0600
@@ -128,6 +128,13 @@
 /* 'arch_vcpu' flags values */
 #define _TF_kernel_mode        0
 #define TF_kernel_mode         (1<<_TF_kernel_mode)
+
+/* #PF error code values. */
+#define PGERR_page_present   (1U<<0)
+#define PGERR_write_access   (1U<<1)
+#define PGERR_user_mode      (1U<<2)
+#define PGERR_reserved_bit   (1U<<3)
+#define PGERR_instr_fetch    (1U<<4)
 
 #ifndef __ASSEMBLY__
 
@@ -522,10 +529,16 @@ extern always_inline void prefetchw(cons
 #endif
 
 void show_stack(struct cpu_user_regs *regs);
+void show_stack_overflow(unsigned long esp);
 void show_registers(struct cpu_user_regs *regs);
+void show_execution_state(struct cpu_user_regs *regs);
 void show_page_walk(unsigned long addr);
-int __spurious_page_fault(unsigned long addr);
 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
+
+/* Dumps current register and stack state. */
+#define dump_execution_state()                                              \
+    /* NB. Needs interrupts enabled else we end up in fatal_trap(). */      \
+    __asm__ __volatile__ ( "pushf ; sti ; ud2 ; .ascii \"dbg\" ; popf" )
 
 extern void mtrr_ap_init(void);
 extern void mtrr_bp_init(void);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/shadow.h      Fri Jun 23 15:33:25 2006 -0600
@@ -112,6 +112,30 @@ do {                                    
 } while (0)
 #endif
 
+#if CONFIG_PAGING_LEVELS >= 3
+static inline u64 get_cr3_idxval(struct vcpu *v)
+{
+    u64 pae_cr3;
+
+    if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 &&
+            !shadow_mode_log_dirty(v->domain) )
+    {
+        pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
+        return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
+    }
+    else
+        return 0;
+}
+
+#define shadow_key_t u64
+#define index_to_key(x) ((x) << 32)
+#else
+#define get_cr3_idxval(v) (0)
+#define shadow_key_t unsigned long
+#define index_to_key(x)  (0)
+#endif
+
+
 #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) 
- (_max)) << 16) | (_min))
 #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
 #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) 
>> 16))
@@ -309,7 +333,7 @@ extern unsigned long get_mfn_from_gpfn_f
 
 struct shadow_status {
     struct shadow_status *next;   /* Pull-to-front list per hash bucket. */
-    unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
+    shadow_key_t  gpfn_and_flags; /* Guest pfn plus flags. */
     unsigned long smfn;           /* Shadow mfn.           */
 };
 
@@ -1180,7 +1204,13 @@ static inline unsigned long __shadow_sta
     struct domain *d, unsigned long gpfn, unsigned long stype)
 {
     struct shadow_status *p, *x, *head;
-    unsigned long key = gpfn | stype;
+    shadow_key_t key;
+#if CONFIG_PAGING_LEVELS >= 3
+    if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype == 
PGT_l4_shadow )
+        key = gpfn | stype | index_to_key(get_cr3_idxval(current));
+    else
+#endif
+        key = gpfn | stype;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(gpfn == (gpfn & PGT_mfn_mask));
@@ -1295,10 +1325,11 @@ shadow_max_pgtable_type(struct domain *d
 }
 
 static inline void delete_shadow_status(
-    struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int 
stype)
+    struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int 
stype, u64 index)
 {
     struct shadow_status *p, *x, *n, *head;
-    unsigned long key = gpfn | stype;
+
+    shadow_key_t key = gpfn | stype | index_to_key(index);
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(!(gpfn & ~PGT_mfn_mask));
@@ -1374,11 +1405,12 @@ static inline void delete_shadow_status(
 
 static inline void set_shadow_status(
     struct domain *d, unsigned long gpfn, unsigned long gmfn,
-    unsigned long smfn, unsigned long stype)
+    unsigned long smfn, unsigned long stype, u64 index)
 {
     struct shadow_status *x, *head, *extra;
     int i;
-    unsigned long key = gpfn | stype;
+
+    shadow_key_t key = gpfn | stype | index_to_key(index);
 
     SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/shadow_64.h   Fri Jun 23 15:33:25 2006 -0600
@@ -36,9 +36,9 @@
  */
 extern struct shadow_ops MODE_64_2_HANDLER;
 extern struct shadow_ops MODE_64_3_HANDLER;
+extern struct shadow_ops MODE_64_PAE_HANDLER;
 #if CONFIG_PAGING_LEVELS == 4
 extern struct shadow_ops MODE_64_4_HANDLER;
-extern struct shadow_ops MODE_64_PAE_HANDLER;
 #endif
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -65,10 +65,6 @@ typedef struct { intpte_t l4; } l4_pgent
 #define ESH_LOG(_f, _a...) ((void)0)
 #endif
 
-#define PAGING_L4      4UL
-#define PAGING_L3      3UL
-#define PAGING_L2      2UL
-#define PAGING_L1      1UL
 #define L_MASK  0xff
 
 #define PAE_PAGING_LEVELS   3
@@ -108,18 +104,14 @@ typedef struct { intpte_t lo; } pgentry_
 #define entry_has_changed(x,y,flags) \
         ( !!(((x).lo ^ (y).lo) & 
((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
 
+/******************************************************************************/
+/*
+ * The macro and inlines are for 32-bit PAE guest 
+ */
+#define PAE_PDPT_RESERVED   0x1e6 /* [8:5], [2,1] */
+
 #define PAE_SHADOW_SELF_ENTRY   259
 #define PAE_L3_PAGETABLE_ENTRIES   4
-
-/******************************************************************************/
-/*
- * The macro and inlines are for 32-bit PAE guest on 64-bit host
- */
-#define PAE_CR3_ALIGN       5
-#define PAE_CR3_IDX_MASK    0x7f
-#define PAE_CR3_IDX_NO      128
-
-#define PAE_PDPT_RESERVED   0x1e6 /* [8:5], [2,1] */
 
 
/******************************************************************************/
 static inline int  table_offset_64(unsigned long va, int level)
@@ -186,19 +178,10 @@ static inline int guest_table_offset_64(
     }
 }
 
-static inline unsigned long get_cr3_idxval(struct vcpu *v)
-{
-    unsigned long pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
-
-    return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
-}
-
-
 #define SH_GUEST_32PAE 1
 #else 
 #define guest_table_offset_64(va, level, index) \
             table_offset_64((va),(level))
-#define get_cr3_idxval(v) 0
 #define SH_GUEST_32PAE 0
 #endif
 
@@ -514,7 +497,10 @@ static inline void entry_general(
 
                 l1_p =(pgentry_64_t *)map_domain_page(smfn);
                 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
-                    entry_remove_flags(l1_p[i], _PAGE_RW);
+                {
+                    if ( mfn_is_page_table(entry_get_pfn(l1_p[i])) )
+                        entry_remove_flags(l1_p[i], _PAGE_RW);
+                }
 
                 unmap_domain_page(l1_p);
             }
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow_ops.h
--- a/xen/include/asm-x86/shadow_ops.h  Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/shadow_ops.h  Fri Jun 23 15:33:25 2006 -0600
@@ -21,6 +21,14 @@
 
 #ifndef _XEN_SHADOW_OPS_H
 #define _XEN_SHADOW_OPS_H
+
+#define PAGING_L4      4UL
+#define PAGING_L3      3UL
+#define PAGING_L2      2UL
+#define PAGING_L1      1UL
+
+#define PAE_CR3_ALIGN       5
+#define PAE_CR3_IDX_MASK    0x7f
 
 #if defined( GUEST_PGENTRY_32 )
 
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/arch-x86_32.h  Fri Jun 23 15:33:25 2006 -0600
@@ -74,16 +74,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
  */
 #ifdef CONFIG_X86_PAE
 #define __HYPERVISOR_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_START  0xF5800000
+#define __MACH2PHYS_VIRT_END    0xF6800000
 #else
 #define __HYPERVISOR_VIRT_START 0xFC000000
+#define __MACH2PHYS_VIRT_START  0xFC000000
+#define __MACH2PHYS_VIRT_END    0xFC400000
 #endif
 
 #ifndef HYPERVISOR_VIRT_START
 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
 #endif
 
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
 #ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
 #endif
 
 /* Maximum number of virtual CPUs in multi-processor guests. */
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/arch-x86_64.h  Fri Jun 23 15:33:25 2006 -0600
@@ -85,21 +85,25 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 
 #define __HYPERVISOR_VIRT_START 0xFFFF800000000000
 #define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000
 
 #ifndef HYPERVISOR_VIRT_START
 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
 #define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
 #endif
 
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
 /* Maximum number of virtual CPUs in multi-processor guests. */
 #define MAX_VIRT_CPUS 32
 
 #ifndef __ASSEMBLY__
-
-/* The machine->physical mapping table starts at this address, read-only. */
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
 
 /*
  * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/dom0_ops.h     Fri Jun 23 15:33:25 2006 -0600
@@ -513,6 +513,27 @@ struct dom0_hypercall_init {
 };
 typedef struct dom0_hypercall_init dom0_hypercall_init_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
+
+#define DOM0_DOMAIN_SETUP     49
+#define _XEN_DOMAINSETUP_hvm_guest 0
+#define XEN_DOMAINSETUP_hvm_guest  (1UL<<_XEN_DOMAINSETUP_hvm_guest)
+typedef struct dom0_domain_setup {
+    domid_t  domain;          /* domain to be affected */
+    unsigned long flags;      /* XEN_DOMAINSETUP_* */
+#ifdef __ia64__
+    unsigned long bp;         /* mpaddr of boot param area */
+    unsigned long maxmem;        /* Highest memory address for MDT.  */
+#endif
+} dom0_domain_setup_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_domain_setup_t);
+
+#define DOM0_SETTIMEOFFSET    50
+struct dom0_settimeoffset {
+    domid_t  domain;
+    int32_t  time_offset_seconds; /* applied to domain wallclock time */
+};
+typedef struct dom0_settimeoffset dom0_settimeoffset_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_settimeoffset_t);
 
 struct dom0_op {
     uint32_t cmd;
@@ -555,6 +576,8 @@ struct dom0_op {
         struct dom0_irq_permission    irq_permission;
         struct dom0_iomem_permission  iomem_permission;
         struct dom0_hypercall_init    hypercall_init;
+        struct dom0_domain_setup      domain_setup;
+        struct dom0_settimeoffset     settimeoffset;
         uint8_t                       pad[128];
     } u;
 };
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/memory.h
--- a/xen/include/public/memory.h       Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/memory.h       Fri Jun 23 15:33:25 2006 -0600
@@ -141,6 +141,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn
 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
 
 /*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+    unsigned long v_start, v_end; /* Start and end virtual addresses.   */
+    unsigned long max_mfn;        /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
+
+/*
  * Sets the GPFN at which a particular page appears in the specified guest's
  * pseudophysical address space.
  * arg == addr of xen_add_to_physmap_t.
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/console.h
--- a/xen/include/xen/console.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/xen/console.h Fri Jun 23 15:33:25 2006 -0600
@@ -15,7 +15,7 @@ long read_console_ring(XEN_GUEST_HANDLE(
 long read_console_ring(XEN_GUEST_HANDLE(char), u32 *, int);
 
 void init_console(void);
-void console_endboot(int disable_vga);
+void console_endboot(void);
 
 void console_force_unlock(void);
 void console_force_lock(void);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/xen/lib.h     Fri Jun 23 15:33:25 2006 -0600
@@ -82,6 +82,7 @@ unsigned long long parse_size_and_unit(c
 #define TAINT_UNSAFE_SMP                (1<<0)
 #define TAINT_MACHINE_CHECK             (1<<1)
 #define TAINT_BAD_PAGE                  (1<<2)
+#define TAINT_SYNC_CONSOLE              (1<<3)
 extern int tainted;
 #define TAINT_STRING_MAX_LEN            20
 extern char *print_tainted(char *str);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/xen/sched.h   Fri Jun 23 15:33:25 2006 -0600
@@ -159,6 +159,7 @@ struct domain
 
     /* OProfile support. */
     struct xenoprof *xenoprof;
+    int32_t time_offset_seconds;
 };
 
 struct domain_setup_info
diff -r 59d4c1863330 -r fdf25330e4a6 
patches/linux-2.6.16.13/ipv6-no-autoconf.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/ipv6-no-autoconf.patch    Fri Jun 23 15:33:25 
2006 -0600
@@ -0,0 +1,23 @@
+ net/ipv6/addrconf.c |    2 ++
+ 1 files changed, 2 insertions(+)
+
+Index: build/net/ipv6/addrconf.c
+===================================================================
+--- build.orig/net/ipv6/addrconf.c
++++ build/net/ipv6/addrconf.c
+@@ -2462,6 +2462,7 @@ static void addrconf_dad_start(struct in
+       spin_lock_bh(&ifp->lock);
+ 
+       if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
++          !(dev->flags&IFF_MULTICAST) ||
+           !(ifp->flags&IFA_F_TENTATIVE)) {
+               ifp->flags &= ~IFA_F_TENTATIVE;
+               spin_unlock_bh(&ifp->lock);
+@@ -2546,6 +2547,7 @@ static void addrconf_dad_completed(struc
+       if (ifp->idev->cnf.forwarding == 0 &&
+           ifp->idev->cnf.rtr_solicits > 0 &&
+           (dev->flags&IFF_LOOPBACK) == 0 &&
++          (dev->flags & IFF_MULTICAST) &&
+           (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+               struct in6_addr all_routers;
+ 
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/piix4acpi.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ioemu/hw/piix4acpi.c        Fri Jun 23 15:33:25 2006 -0600
@@ -0,0 +1,481 @@
+/*
+ * PIIX4 ACPI controller emulation
+ *
+ * Winston liwen Wang, winston.l.wang@xxxxxxxxx
+ * Copyright (c) 2006 , Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "vl.h"
+#define FREQUENCE_PMTIMER  3753425
+/* acpi register bit define here  */
+
+/* PM1_STS                                             */
+#define TMROF_STS        (1 << 0)
+#define BM_STS                   (1 << 4)
+#define GBL_STS          (1 << 5)
+#define PWRBTN_STS       (1 << 8)
+#define RTC_STS          (1 << 10)
+#define PRBTNOR_STS       (1 << 11)
+#define WAK_STS          (1 << 15)
+/* PM1_EN                                              */
+#define TMROF_EN          (1 << 0)
+#define GBL_EN            (1 << 5)
+#define PWRBTN_EN         (1 << 8)
+#define RTC_EN           (1 << 10)
+/* PM1_CNT                                             */
+#define SCI_EN            (1 << 0)
+#define GBL_RLS           (1 << 2)
+#define SLP_EN           (1 << 13)
+
+/* Bits of PM1a register define here  */
+#define SLP_TYP_MASK    0x1C00
+#define SLP_VAL         0x1C00
+
+typedef struct AcpiDeviceState AcpiDeviceState;
+AcpiDeviceState *acpi_device_table;
+
+/* Bits of PM1a register define here  */
+typedef struct PMTState {
+    uint32_t count;
+    int irq;
+    uint64_t next_pm_time;
+    QEMUTimer *pm_timer;
+}PMTState;
+
+typedef struct PM1Event_BLK {
+    uint16_t pm1_status; /* pm1a_EVT_BLK */
+    uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */
+}PM1Event_BLK;
+
+typedef struct PCIAcpiState {
+    PCIDevice dev;
+    uint16_t irq;
+    uint16_t pm1_status; /* pm1a_EVT_BLK */
+    uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */
+    uint16_t pm1_control; /* pm1a_ECNT_BLK */
+    uint32_t pm1_timer; /* pmtmr_BLK */
+} PCIAcpiState;
+
+static PMTState *pmtimer_state;
+static PCIAcpiState *acpi_state;
+
+static void pmtimer_save(QEMUFile *f, void *opaque)
+{
+    PMTState *s = opaque;
+
+    qemu_put_be32s(f, &s->count);
+    qemu_put_be32s(f, &s->irq);
+    qemu_put_be64s(f, &s->next_pm_time);
+    qemu_put_timer(f, s->pm_timer);
+}
+
+static int pmtimer_load(QEMUFile *f, void *opaque, int version_id)
+{
+    PMTState *s = opaque;
+
+    if (version_id != 1)
+        return -EINVAL;
+    qemu_get_be32s(f, &s->count);
+    qemu_get_be32s(f, &s->irq);
+    qemu_get_be64s(f, &s->next_pm_time);
+    qemu_get_timer(f, s->pm_timer);
+    return 0;
+
+}
+
+static inline void acpi_set_irq(PCIAcpiState *s)
+{
+/* no real SCI event need for now, so comment the following line out */
+/*  pic_set_irq(s->irq, 1); */
+    printf("acpi_set_irq: s->irq %x \n",s->irq);
+}
+
+static void pm_timer_update(void *opaque)
+{
+    PMTState *s = opaque;
+    s->next_pm_time += muldiv64(1, ticks_per_sec,FREQUENCE_PMTIMER);
+    qemu_mod_timer(s->pm_timer, s->next_pm_time);
+    acpi_state->pm1_timer ++;
+
+    /* If pm timer is zero then reset it to zero. */
+    if (acpi_state->pm1_timer >= 0x1000000) {
+/*      printf("pm_timerupdate: timer overflow: %x \n", 
acpi_state->pm1_timer); */
+
+        acpi_state->pm1_timer = 0;
+        acpi_state->pm1_status =   acpi_state->pm1_status | TMROF_STS;
+        /* If TMROF_EN is set then send the irq. */
+        if ((acpi_state->pm1_enable & TMROF_EN) == TMROF_EN) {
+            acpi_set_irq(acpi_state);
+            acpi_state->pm1_enable = 0x00; /* only need one time...*/
+        }
+    }
+    s->count = acpi_state->pm1_timer;
+}
+
+static PMTState *pmtimer_init(void)
+{
+    PMTState *s;
+
+    s = qemu_mallocz(sizeof(PMTState));
+    if (!s)
+        return NULL;
+
+    /* s->irq = irq;    */
+
+    s->pm_timer = qemu_new_timer(vm_clock, pm_timer_update, s);
+
+    s->count = 0;
+    s->next_pm_time = qemu_get_clock(vm_clock) + muldiv64(1, 
ticks_per_sec,FREQUENCE_PMTIMER) + 1;
+    qemu_mod_timer(s->pm_timer, s->next_pm_time);
+
+    register_savevm("pm timer", 1, 1, pmtimer_save, pmtimer_load, s);
+    return s;
+}
+
+static void acpi_reset(PCIAcpiState *s)
+{
+    uint8_t *pci_conf;
+    pci_conf = s->dev.config;
+
+    pci_conf[0x42] = 0x00;
+    pci_conf[0x43] = 0x00;
+    s->irq = 9;
+    s->pm1_status = 0;
+    s->pm1_enable = 0x00;    /* TMROF_EN should cleared */
+    s->pm1_control = SCI_EN; /* SCI_EN */
+    s->pm1_timer = 0;
+}
+
+/*byte access  */
+static void acpiPm1Status_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+    
+    if ((val&TMROF_STS)==TMROF_STS)
+        s->pm1_status = s->pm1_status&!TMROF_STS;
+
+    if ((val&GBL_STS)==GBL_STS)
+        s->pm1_status = s->pm1_status&!GBL_STS;
+
+/*     printf("acpiPm1Status_writeb \n addr %x val:%x pm1_status:%x \n", addr, 
val,s->pm1_status); */
+}
+
+static uint32_t acpiPm1Status_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_status;
+/*         printf("acpiPm1Status_readb \n addr %x val:%x\n", addr, val); */
+
+   return val;
+}
+
+static void acpiPm1StatusP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+     s->pm1_status = (val<<8)||(s->pm1_status);
+/*     printf("acpiPm1StatusP1_writeb \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1StatusP1_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_status)>>8;
+    printf("acpiPm1StatusP1_readb \n addr %x val:%x\n", addr, val);
+
+    return val;
+}
+
+static void acpiPm1Enable_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_enable = val;
+/*   printf("acpiPm1Enable_writeb \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1Enable_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_enable)||0x1;
+/*  printf("acpiPm1Enable_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1EnableP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_enable = (val<<8)||(s->pm1_enable);
+/*    printf("acpiPm1EnableP1_writeb \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1EnableP1_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_enable)>>8;
+/*  printf("acpiPm1EnableP1_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_control = val;
+/*  printf("acpiPm1Control_writeb \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_control;
+/*    printf("acpiPm1Control_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_control = (val<<8)||(s->pm1_control);
+/*    printf("acpiPm1ControlP1_writeb \n addr %x val:%x\n", addr, val); */
+
+    // Check for power off request
+
+    if (((val & SLP_EN) != 0) &&
+        ((val & SLP_TYP_MASK) == SLP_VAL)) {
+        s->pm1_timer=0x0; //clear ACPI timer
+        qemu_system_shutdown_request();
+    }
+}
+
+static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_control)>>8;
+/*    printf("acpiPm1ControlP1_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+
+/* word access   */
+
+static void acpiPm1Status_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    if ((val&TMROF_STS)==TMROF_STS)
+        s->pm1_status = s->pm1_status&!TMROF_STS;
+
+    if ((val&GBL_STS)==GBL_STS)
+        s->pm1_status = s->pm1_status&!GBL_STS;
+
+/*    printf("acpiPm1Status_writew \n addr %x val:%x pm1_status:%x \n", addr, 
val,s->pm1_status); */
+}
+
+static uint32_t acpiPm1Status_readw(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_status;
+/*    printf("acpiPm1Status_readw \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1Enable_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_enable = val;
+/*    printf("acpiPm1Enable_writew \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Enable_readw(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_enable;
+/*    printf("acpiPm1Enable_readw \n addr %x val:%x\n", addr, val); */
+
+   return val;
+}
+
+static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_control = val;
+/*    printf("acpiPm1Control_writew \n addr %x val:%x\n", addr, val); */
+
+    // Check for power off request
+
+    if (((val & SLP_EN) != 0) &&
+        ((val & SLP_TYP_MASK) == SLP_VAL)) {
+        qemu_system_shutdown_request();
+    }
+
+}
+
+static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_control;
+/*    printf("acpiPm1Control_readw \n addr %x val:%x\n", addr, val);  */
+
+    return val;
+}
+
+/* dword access */
+
+static void acpiPm1Event_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_status = val;
+    s->pm1_enable = val>>16;
+/*     printf("acpiPm1Event_writel \n addr %x val:%x \n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Event_readl(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_status|(s->pm1_enable<<16);
+/*    printf("acpiPm1Event_readl \n addr %x val:%x\n", addr, val);    */
+
+    return val;
+}
+
+static void acpiPm1Timer_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_timer = val;
+/*    printf("acpiPm1Timer_writel \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1Timer_readl(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_timer;
+/*    printf("acpiPm1Timer_readl \n addr %x val:%x\n", addr, val); */
+    return val;
+}
+
+static void acpi_map(PCIDevice *pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCIAcpiState *d = (PCIAcpiState *)pci_dev;
+
+    printf("register acpi io\n");
+
+    /* Byte access */
+    register_ioport_write(addr, 1, 1, acpiPm1Status_writeb, d);
+    register_ioport_read(addr, 1, 1, acpiPm1Status_readb, d);
+    register_ioport_write(addr+1, 1, 1, acpiPm1StatusP1_writeb, d);
+    register_ioport_read(addr+1, 1, 1, acpiPm1StatusP1_readb, d);
+
+    register_ioport_write(addr + 2, 1, 1, acpiPm1Enable_writeb, d);
+    register_ioport_read(addr + 2, 1, 1, acpiPm1Enable_readb, d);
+    register_ioport_write(addr + 2 +1, 1, 1, acpiPm1EnableP1_writeb, d);
+    register_ioport_read(addr + 2 +1, 1, 1, acpiPm1EnableP1_readb, d);
+
+    register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d);
+    register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d);
+    register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d);
+    register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d);
+
+    /* Word access */
+    register_ioport_write(addr, 2, 2, acpiPm1Status_writew, d);
+    register_ioport_read(addr, 2, 2, acpiPm1Status_readw, d);
+
+    register_ioport_write(addr + 2, 2, 2, acpiPm1Enable_writew, d);
+    register_ioport_read(addr + 2, 2, 2, acpiPm1Enable_readw, d);
+
+    register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d);
+    register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d);
+
+    /* DWord access */
+    register_ioport_write(addr, 4, 4, acpiPm1Event_writel, d);
+    register_ioport_read(addr, 4, 4, acpiPm1Event_readl, d);
+
+    register_ioport_write(addr + 8, 4, 4, acpiPm1Timer_writel, d);
+    register_ioport_read(addr + 8, 4, 4, acpiPm1Timer_readl, d);
+}
+
+/* PIIX4 acpi pci configuration space, func 3 */
+void pci_piix4_acpi_init(PCIBus *bus)
+{
+    PCIAcpiState *d;
+    uint8_t *pci_conf;
+
+    /* register a function 3 of PIIX4 */
+    d = (PCIAcpiState *)pci_register_device(
+        bus, "PIIX4 ACPI", sizeof(PCIAcpiState),
+        ((PCIDevice *)piix3_state)->devfn + 3, NULL, NULL);
+
+    acpi_state = d;
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0x86;  /* Intel */
+    pci_conf[0x01] = 0x80;
+    pci_conf[0x02] = 0x13;
+    pci_conf[0x03] = 0x71;
+    pci_conf[0x08] = 0x01;  /* B0 stepping */
+    pci_conf[0x09] = 0x00;  /* base class */
+    pci_conf[0x0a] = 0x80;  /* Sub class */
+    pci_conf[0x0b] = 0x06;
+    pci_conf[0x0e] = 0x00;
+    pci_conf[0x3d] = 0x01;  /* Hardwired to PIRQA is used */
+
+    pci_register_io_region((PCIDevice *)d, 4, 0x10,
+                           PCI_ADDRESS_SPACE_IO, acpi_map);
+    pmtimer_state = pmtimer_init();
+    acpi_reset (d);
+}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/util/SSHTransport.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/SSHTransport.py     Fri Jun 23 15:33:25 2006 -0600
@@ -0,0 +1,102 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx>
+# Copyright (C) 2006 XenSource Inc.
+#============================================================================
+
+"""
+XML-RPC SSH transport.
+"""
+
+from xmlrpclib import getparser, Fault
+from subprocess import Popen, PIPE
+from getpass import getuser
+from fcntl import ioctl
+import errno
+import os
+import termios
+
+
+def getHTTPURI(uri):
+    (protocol, rest) = uri.split(':', 1)
+    if not rest.startswith('//'):
+        raise ValueError("Invalid ssh URL '%s'" % uri)
+    rest = rest[2:]
+    user = getuser()
+    path = 'RPC2'
+    if rest.find('@') != -1:
+        (user, rest) = rest.split('@', 1)
+    if rest.find('/') != -1:
+        (host, rest) = rest.split('/', 1)
+        if len(rest) > 0:
+            path = rest
+    else:
+        host = rest
+    transport = SSHTransport(host, user)
+    uri = 'http://%s/%s' % (host, path)
+    return transport, uri
+
+
+class SSHTransport(object):
+    def __init__(self, host, user, askpass=None):
+        self.host = host
+        self.user = user
+        self.askpass = askpass
+        self.ssh = None
+
+    def getssh(self):
+        if self.ssh == None:
+            if self.askpass:
+                f = open('/dev/tty', 'w')
+                try:
+                    os.environ['SSH_ASKPASS'] = self.askpass
+                    ioctl(f.fileno(), termios.TIOCNOTTY)
+                finally:
+                    f.close()
+
+            cmd = ['ssh', '%s@%s' % (self.user, self.host), 'xm serve']
+            try:
+                self.ssh = Popen(cmd, bufsize=0, stdin=PIPE, stdout=PIPE)
+            except OSError, (err, msg):
+                if err == errno.ENOENT:
+                    raise Fault(0, "ssh executable not found!")
+                raise
+        return self.ssh
+
+    def request(self, host, handler, request_body, verbose=0):
+        p, u = getparser()
+        ssh = self.getssh()
+        ssh.stdin.write("""POST /%s HTTP/1.1
+User-Agent: Xen
+Host: %s
+Content-Type: text/xml
+Content-Length: %d
+
+%s""" % (handler, host, len(request_body), request_body))
+        ssh.stdin.flush()
+
+        content_length = 0
+        line = ssh.stdout.readline()
+        if line.split()[1] != '200':
+            raise Fault(0, 'Server returned %s' % (' '.join(line[1:])))
+        
+        while line not in ['', '\r\n', '\n']:
+            if line.lower().startswith('content-length:'):
+                content_length = int(line[15:].strip())
+            line = ssh.stdout.readline()
+        content = ssh.stdout.read(content_length)
+        p.feed(content)
+        p.close()
+        return u.close()
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/Makefile
--- a/tools/blktap/Makefile     Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-MAJOR    = 3.0
-MINOR    = 0
-SONAME   = libblktap.so.$(MAJOR)
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-SUBDIRS :=
-SUBDIRS += ublkback
-#SUBDIRS += parallax
-
-BLKTAP_INSTALL_DIR = /usr/sbin
-
-INSTALL            = install
-INSTALL_PROG       = $(INSTALL) -m0755
-INSTALL_DIR        = $(INSTALL) -d -m0755
-
-INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
-
-LIBS     := -lpthread -lz
-
-SRCS     :=
-SRCS     += blktaplib.c xenbus.c blkif.c
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# get asprintf():
-CFLAGS   += -D _GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-CFLAGS   += $(INCLUDES) 
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS   :=
-#IBINS   += blkdump
-
-LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
-
-.PHONY: all
-all: mk-symlinks libblktap.so #blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: install
-install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/include
-       $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
-       #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: clean
-clean:
-       rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: rpm
-rpm: all
-       rm -rf staging
-       mkdir staging
-       mkdir staging/i386
-       rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
-               --define "_rpmdir$$PWD/staging" -bb rpm.spec
-       mv staging/i386/*.rpm .
-       rm -rf staging
-
-libblktap.so: $(OBJS) 
-       $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
-             -L$(XEN_XENSTORE) -l xenstore                       \
-             -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
-       ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
-       ln -sf libblktap.so.$(MAJOR) $@
-
-blkdump: libblktap.so
-       $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
-             -l blktap blkdump.c
-
-.PHONY: TAGS clean install mk-symlinks rpm
-
-.PHONY: TAGS
-TAGS:
-       etags -t $(SRCS) *.h
-
--include $(DEPS)
-
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/README
--- a/tools/blktap/README       Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-Block Tap User-level Interfaces
-Andrew Warfield
-andrew.warfield@xxxxxxxxxxxx
-February 8, 2005
-
-NOTE #1: The blktap is _experimental_ code.  It works for me.  Your
-mileage may vary.  Don't use it for anything important.  Please. ;)
-
-NOTE #2: All of the interfaces here are likely to change.  This is all
-early code, and I am checking it in because others want to play with
-it.  If you use it for anything, please let me know!
-
-Overview:
----------
-
-This directory contains a library and set of example applications for
-the block tap device.  The block tap hooks into the split block device
-interfaces above Xen allowing them to be extended.  This extension can
-be done in userspace with the help of a library.
-
-The tap can be installed either as an interposition domain in between
-a frontend and backend driver pair, or as a terminating backend, in
-which case it is responsible for serving all requests itself.
-
-There are two reasons that you might want to use the tap,
-corresponding to these configurations:
-
- 1. To examine or modify a stream of block requests while they are
-    in-flight (e.g. to encrypt data, or add data-driven watchpoints)
-
- 2. To prototype a new backend driver, serving requests from the tap
-    rather than passing them along to the XenLinux blkback driver.
-    (e.g. to forward block requests to a remote host)
-
-
-Interface:
-----------
-
-At the moment, the tap interface is similar in spirit to that of the
-Linux netfilter.  Requests are messages from a client (frontend)
-domain to a disk (backend) domain.  Responses are messages travelling
-back, acknowledging the completion of a request.  the library allows
-chains of functions to be attached to these events.  In addition,
-hooks may be attached to handle control messages, which signify things
-like connections from new domains.
-
-At present the control messages especially expose a lot of the
-underlying driver interfaces.  This may change in the future in order
-to simplify writing hooks.
-
-Here are the public interfaces:
-
-These allow hook functions to be chained:
-
- void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
- void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
- void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
-
-This allows a response to be injected, in the case where a request has
-been removed using BLKTAP_STOLEN.
-
- void blktap_inject_response(blkif_response_t *);
-
-These let you add file descriptors and handlers to the main poll loop:
-
- int  blktap_attach_poll(int fd, short events, int (*func)(int));
- void blktap_detach_poll(int fd);
-
-This starts the main poll loop:
-
- int  blktap_listen(void);
-
-Example:
---------
-
-blkimage.c uses an image on the local file system to serve requests to
-a domain.  Here's what it looks like:
-
----[blkimg.c]---
-
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
-    image_init();
-    
-    blktap_register_ctrl_hook("image_control", image_control);
-    blktap_register_request_hook("image_request", image_request);
-    blktap_listen();
-    
-    return 0;
-}
-
-----------------
-
-All of the real work is in blkimglib.c, but this illustrates the
-actual tap interface well enough.  image_control() will be called with
-all control messages.  image_request() handles requests.  As it reads
-from an on-disk image file, no requests are ever passed on to a
-backend, and so there will be no responses to process -- so there is
-nothing registered as a response hook.
-
-Other examples:
----------------
-
-Here is a list of other examples in the directory:
-
-Things that terminate a block request stream:
-
-  blkimg    - Use a image file/device to serve requests
-  blkgnbd   - Use a remote gnbd server to serve requests
-  blkaio    - Use libaio... (DOES NOT WORK)
-  
-Things that don't:
-
-  blkdump   - Print in-flight requests.
-  blkcow    - Really inefficient copy-on-write disks using libdb to store
-              writes.
-
-There are examples of plugging these things together, for instance
-blkcowgnbd is a read-only gnbd device with copy-on-write to a local
-file.
-
-TODO:
------
-
-- Make session tracking work.  At the moment these generally just handle a 
-  single front-end client at a time.
-
-- Integrate with Xend.  Need to cleanly pass a image identifier in the connect
-  message.
-
-- Make an asynchronous file-io terminator.  The libaio attempt is
-  tragically stalled because mapped foreign pages make pfn_valid fail
-  (they are VM_IO), and so cannot be passed to aio as targets.  A
-  better solution may be to tear the disk interfaces out of the real
-  backend and expose them somehow.
-
-- Make CoW suck less.
-
-- Do something more along the lines of dynamic linking for the
-  plugins, so thatthey don't all need a new main().
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/README.sept05
--- a/tools/blktap/README.sept05        Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-The blktap has been rewritten substantially based on the current
-blkback driver.  I've removed passthrough support, as this is broken
-by the move to grant tables and the lack of transitive grants.  A
-blktap VM is now only capable of terminating block requests in
-userspace.
-
-ublkback/ contains a _very_ initial cut at a user-level version of the block
-backend driver.  It gives a working example of how the current tap
-interfaces are used, in particular w.r.t. the vbd directories in
-xenstore.
-
-parallax/ contains fairly recent parallax code.  This does not run on
-the changed blktap interface, but should only be a couple of hours
-work to get going again.
-
-All of the tricky bits are done, but there is plenty of cleaning to
-do, and the top-level functionality is not here yet.  At the moment,
-the daemon ignores the pdev requested by the tools and opens the file 
-or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
-
-TODO:
-1. Fix to allow pdev in the store to specify the device to open.
-2. Add support (to tools as well) to mount arbitrary files...
-   just write the filename to mount into the store, instead of pdev.
-3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
-   - creating a blkif should take a reference.
-   - each inflight request should take a reference on dequeue in blktaplib
-   - sending responses should drop refs.
-   - blkif should be implicitly freed when refcounts fall to 0.
-4. Modify the parallax req/rsp code as per ublkback to use the new tap 
-   interfaces. 
-5. Write a front end that allows parallax and normal mounts to coexist
-6. Allow blkback and blktap to run at the same time.
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blkdump.c
--- a/tools/blktap/blkdump.c    Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/* blkdump.c
- *
- * show a running trace of block requests as they fly by.
- * 
- * (c) 2004 Andrew Warfield.
- */
- 
-#include <stdio.h>
-#include "blktaplib.h"
- 
-int request_print(blkif_request_t *req)
-{
-    int i;
-    
-    if ( (req->operation == BLKIF_OP_READ) ||
-         (req->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
-                ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
-                blkif_op_name[req->operation], 
-                req->nr_segments, req->handle, 
-                req->sector_number);
-        
-        
-        for (i=0; i < req->nr_segments; i++) {
-            printf("              (gref: 0x%8x start: %u stop: %u)\n",
-                   req->seg[i].gref,
-                   req->seg[i].first_sect,
-                   req->seg[i].last_sect);
-        }
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    
-    return BLKTAP_PASS;
-}
-
-int response_print(blkif_response_t *rsp)
-{   
-    if ( (rsp->operation == BLKIF_OP_READ) ||
-         (rsp->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u>%5s] (status: %d)\n", 
-                ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
-                blkif_op_name[rsp->operation], 
-                rsp->status);
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    return BLKTAP_PASS;
-}
-
-int main(int argc, char *argv[])
-{
-    blktap_register_request_hook("request_print", request_print);
-    blktap_register_response_hook("response_print", response_print);
-    blktap_listen();
-    
-    return 0;
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blkif.c
--- a/tools/blktap/blkif.c      Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,212 +0,0 @@
-/*
- * blkif.c
- * 
- * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <err.h>
-
-#include "blktaplib.h"
-
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-blkif_t *alloc_blkif(domid_t domid)
-{
-    blkif_t *blkif;
-
-    blkif = (blkif_t *)malloc(sizeof(blkif_t));
-    if (!blkif)
-        return NULL;
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-
-    return blkif;
-}
-
-static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
-{
-    new_blkif_hook = fn;
-}
-
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly)
-{
-    domid_t domid;
-    blkif_t **pblkif;
-    
-    if (blkif == NULL)
-        return -EINVAL;
-
-    domid = blkif->domid;
-    blkif->handle   = handle;
-    blkif->pdev     = pdev;
-    blkif->readonly = readonly;
-
-    /*
-     * Call out to the new_blkif_hook. The tap application should define this,
-     * and it should return having set blkif->ops
-     * 
-     */
-    if (new_blkif_hook == NULL)
-    {
-        warn("Probe detected a new blkif, but no new_blkif_hook!");
-        return -1;
-    }
-    new_blkif_hook(blkif);
-
-    /* Now wire it in. */
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists\n");
-            return -1;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-    blkif->hash_next = NULL;
-    *pblkif = blkif;
-
-    return 0;
-}
-
-void free_blkif(blkif_t *blkif)
-{
-    blkif_t **pblkif, *curs;
-    
-    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
-    while ( (curs = *pblkif) != NULL )
-    {
-        if ( blkif == curs )
-        {
-            *pblkif = curs->hash_next;
-        }
-        pblkif = &curs->hash_next;
-    }
-    free(blkif);
-}
-
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                                 int (*rh)(blkif_t *, blkif_request_t *, int)) 
-{
-    request_hook_t *rh_ent, **c;
-    
-    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
-    if (!rh_ent) 
-    {
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->request_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                                  int (*rh)(blkif_t *, blkif_response_t *, 
int)) 
-{
-    response_hook_t *rh_ent, **c;
-    
-    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
-    if (!rh_ent) 
-    { 
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->response_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_print_hooks(blkif_t *blkif)
-{
-    request_hook_t  *req_hook;
-    response_hook_t *rsp_hook;
-    
-    DPRINTF("Request Hooks:\n");
-    req_hook = blkif->request_hook_chain;
-    while (req_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
-        req_hook = req_hook->next;
-    }
-    
-    DPRINTF("Response Hooks:\n");
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-long int vbd_size(blkif_t *blkif)
-{
-    return 1000000000;
-}
-
-long int vbd_secsize(blkif_t *blkif)
-{
-    return 512;
-}
-
-unsigned vbd_info(blkif_t *blkif)
-{
-    return 0;
-}
-
-
-void __init_blkif(void)
-{    
-    memset(blkif_hash, 0, sizeof(blkif_hash));
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c  Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,453 +0,0 @@
-/*
- * blktaplib.c
- * 
- * userspace interface routines for the blktap driver.
- *
- * (threadsafe(r) version) 
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <err.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <linux/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <xs.h>
-                                                                     
-#define __COMPILING_BLKTAP_LIB
-#include "blktaplib.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-#define DEBUG_RING_IDXS 0
-
-#define POLLRDNORM     0x040 
-
-#define BLKTAP_IOCTL_KICK 1
-
-
-void got_sig_bus();
-void got_sig_int();
-
-/* in kernel these are opposite, but we are a consumer now. */
-blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
-blkif_front_ring_t be_ring; 
-
-unsigned long mmap_vstart = 0;
-char *blktap_mem;
-int fd = 0;
-
-#define BLKTAP_RING_PAGES       1 /* Front */
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
-    
-int bad_count = 0;
-void bad(void)
-{
-    bad_count ++;
-    if (bad_count > 50) exit(0);
-}
-/*-----[ ID Manipulation from tap driver code ]--------------------------*/
-
-#define ACTIVE_RING_IDX unsigned short
-
-inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
-    return ( (fe_dom << 16) | idx );
-}
-
-inline unsigned int ID_TO_IDX(unsigned long id) 
-{ 
-        return ( id & 0x0000ffff );
-}
-
-inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
-
-static int (*request_hook)(blkif_request_t *req) = NULL;
-static int (*response_hook)(blkif_response_t *req) = NULL;
-        
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-/*
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
-    blkif_request_t *req_d;
-    static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&be_prod_mutex);
-    req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
-    memcpy(req_d, req, sizeof(blkif_request_t));
-    wmb();
-    be_ring.req_prod_pvt++;
-    pthread_mutex_unlock(&be_prod_mutex);
-    
-    return 0;
-}
-*/
-
-inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
-{
-    blkif_response_t *rsp_d;
-    static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&fe_prod_mutex);
-    rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
-    memcpy(rsp_d, rsp, sizeof(blkif_response_t));
-    wmb();
-    fe_ring.rsp_prod_pvt++;
-    pthread_mutex_unlock(&fe_prod_mutex);
-
-    return 0;
-}
-
-static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
-{
-    response_hook_t  *rsp_hook;
-    
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        switch(rsp_hook->func(blkif, rsp, 1))
-        {
-        case BLKTAP_PASS:
-            break;
-        default:
-            printf("Only PASS is supported for resp hooks!\n");
-        }
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
-{
-    
-    apply_rsp_hooks(blkif, rsp);
-  
-    write_rsp_to_fe_ring(rsp);
-}
-
-void blktap_kick_responses(void)
-{
-    pthread_mutex_lock(&push_mutex);
-    
-    RING_PUSH_RESPONSES(&fe_ring);
-    ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-    
-    pthread_mutex_unlock(&push_mutex);
-}
-
-/*-----[ Polling fd listeners ]------------------------------------------*/
-
-#define MAX_POLLFDS 64
-
-typedef struct {
-    int (*func)(int fd);
-    struct pollfd *pfd;
-    int fd;
-    short events;
-    int active;
-} pollhook_t;
-
-static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
-static pollhook_t     pollhooks[MAX_POLLFDS];
-static unsigned int   ph_freelist[MAX_POLLFDS];
-static unsigned int   ph_cons, ph_prod;
-#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
-#define PH_IDX(x) (x % MAX_POLLFDS)
-
-int blktap_attach_poll(int fd, short events, int (*func)(int fd))
-{
-    pollhook_t *ph;
-    
-    if (nr_pollhooks() == MAX_POLLFDS) {
-        printf("Too many pollhooks!\n");
-        return -1;
-    }
-    
-    ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
-    
-    ph->func        = func;
-    ph->fd          = fd;
-    ph->events      = events;
-    ph->active      = 1;
-    
-    DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 
-            nr_pollhooks());
-    
-    return 0;
-}
-
-void blktap_detach_poll(int fd)
-{
-    int i;
-    
-    for (i=0; i<MAX_POLLFDS; i++)
-        if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
-            ph_freelist[PH_IDX(ph_prod++)] = i;
-            pollhooks[i].pfd->fd = -1;
-            pollhooks[i].active = 0;
-            break;
-        }
-        
-    DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 
-            nr_pollhooks());
-}
-
-void pollhook_init(void)
-{
-    int i;
-    
-    for (i=0; i < MAX_POLLFDS; i++) {
-        ph_freelist[i] = (i+1) % MAX_POLLFDS;
-        pollhooks[i].active = 0;
-    }
-    
-    ph_cons = 0;
-    ph_prod = MAX_POLLFDS;
-}
-
-void __attribute__ ((constructor)) blktaplib_init(void)
-{
-    pollhook_init();
-}
-
-/*-----[ The main listen loop ]------------------------------------------*/
-
-int blktap_listen(void)
-{
-    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
-    struct xs_handle *h;
-    blkif_t *blkif;
-
-    /* comms rings: */
-    blkif_request_t  *req;
-    blkif_response_t *rsp;
-    blkif_sring_t    *sring;
-    RING_IDX          rp, i, pfd_count; 
-    
-    /* pending rings */
-    blkif_request_t req_pending[BLK_RING_SIZE];
-    /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
-    
-    /* handler hooks: */
-    request_hook_t   *req_hook;
-    response_hook_t  *rsp_hook;
-    
-    signal (SIGBUS, got_sig_bus);
-    signal (SIGINT, got_sig_int);
-    
-    __init_blkif();
-
-    fd = open("/dev/blktap", O_RDWR);
-    if (fd == -1)
-        err(-1, "open failed!");
-
-    blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
-             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
-    if ((int)blktap_mem == -1) 
-        err(-1, "mmap failed!");
-
-    /* assign the rings to the mapped memory */
-/*
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
-    FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
-*/  
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
-    BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
-
-    mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
-
-
-    /* Set up store connection and watch. */
-    h = xs_daemon_open();
-    if (h == NULL) 
-        err(-1, "xs_daemon_open");
-    
-    ret = add_blockdevice_probe_watch(h, "Domain-0");
-    if (ret != 0)
-        err(0, "adding device probewatch");
-    
-    ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
-
-    while(1) {
-        int ret;
-        
-        /* build the poll list */
-        pfd_count = 0;
-        for ( i=0; i < MAX_POLLFDS; i++ ) {
-            pollhook_t *ph = &pollhooks[i];
-            
-            if (ph->active) {
-                pfd[pfd_count].fd     = ph->fd;
-                pfd[pfd_count].events = ph->events;
-                ph->pfd               = &pfd[pfd_count];
-                pfd_count++;
-            }
-        }
-
-        tap_pfd = pfd_count++;
-        pfd[tap_pfd].fd = fd;
-        pfd[tap_pfd].events = POLLIN;
-
-        store_pfd = pfd_count++;
-        pfd[store_pfd].fd = xs_fileno(h);
-        pfd[store_pfd].events = POLLIN;
-        
-        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
-            if (DEBUG_RING_IDXS)
-                ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
-            continue;
-        }
-
-        for (i=0; i < MAX_POLLFDS; i++) {
-            if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
-                pollhooks[i].func(pollhooks[i].pfd->fd);
-        }
-        
-        if (pfd[store_pfd].revents) {
-            ret = xs_fire_next_watch(h);
-        }
-
-        if (pfd[tap_pfd].revents) 
-        {    
-            /* empty the fe_ring */
-            notify_fe = 0;
-            notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
-            rp = fe_ring.sring->req_prod;
-            rmb();
-            for (i = fe_ring.req_cons; i != rp; i++)
-            {
-                int done = 0; 
-
-                req = RING_GET_REQUEST(&fe_ring, i);
-                memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
-                req = &req_pending[ID_TO_IDX(req->id)];
-
-                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
-
-                if (blkif != NULL)
-                {
-                    req_hook = blkif->request_hook_chain;
-                    while (req_hook != NULL)
-                    {
-                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
-                        {
-                        case BLKTAP_RESPOND:
-                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
-                            write_rsp_to_fe_ring((blkif_response_t *)req);
-                            notify_fe = 1;
-                            done = 1;
-                            break;
-                        case BLKTAP_STOLEN:
-                            done = 1;
-                            break;
-                        case BLKTAP_PASS:
-                            break;
-                        default:
-                            printf("Unknown request hook return value!\n");
-                        }
-                        if (done) break;
-                        req_hook = req_hook->next;
-                    }
-                }
-
-                if (done == 0) 
-                {
-                    /* this was:  */
-                    /* write_req_to_be_ring(req); */
-
-                    unsigned long id = req->id;
-                    unsigned short operation = req->operation;
-                    printf("Unterminated request!\n");
-                    rsp = (blkif_response_t *)req;
-                    rsp->id = id;
-                    rsp->operation = operation;
-                    rsp->status = BLKIF_RSP_ERROR;
-                    write_rsp_to_fe_ring(rsp);
-                    notify_fe = 1;
-                    done = 1;
-                }
-
-            }
-            fe_ring.req_cons = i;
-
-            /* empty the be_ring */
-/*
-            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
-            rp = be_ring.sring->rsp_prod;
-            rmb();
-            for (i = be_ring.rsp_cons; i != rp; i++)
-            {
-
-                rsp = RING_GET_RESPONSE(&be_ring, i);
-                memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
-                rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
-
-                DPRINTF("copying a be request\n");
-
-                apply_rsp_hooks(rsp);
-                write_rsp_to_fe_ring(rsp);
-            }
-            be_ring.rsp_cons = i;
-*/
-            /* notify the domains */
-/*
-            if (notify_be) {
-                DPRINTF("notifying be\n");
-pthread_mutex_lock(&push_mutex);
-                RING_PUSH_REQUESTS(&be_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_BE);
-pthread_mutex_unlock(&push_mutex);
-            }
-*/
-            if (notify_fe) {
-                DPRINTF("notifying fe\n");
-                pthread_mutex_lock(&push_mutex);
-                RING_PUSH_RESPONSES(&fe_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-                pthread_mutex_unlock(&push_mutex);
-            }
-        }        
-    }
-
-
-    munmap(blktap_mem, PAGE_SIZE);
-
- mmap_failed:
-    close(fd);
-
- open_failed:
-    return 0;
-}
-
-void got_sig_bus() {
-    printf("Attempted to access a page that isn't.\n");
-    exit(-1);
-}
-
-void got_sig_int() {
-    DPRINTF("quitting -- returning to passthrough mode.\n");
-    if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
-    close(fd);
-    fd = 0;
-    exit(0);
-} 
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h  Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-/* blktaplib.h
- *
- * userland accessors to the block tap.
- *
- * Sept 2/05 -- I'm scaling this back to only support block remappings
- * to user in a backend domain.  Passthrough and interposition can be readded
- * once transitive grants are available.
- */
- 
-#ifndef __BLKTAPLIB_H__
-#define __BLKTAPLIB_H__
-
-#include <xenctrl.h>
-#include <sys/user.h>
-#include <xen/xen.h>
-#include <xen/io/blkif.h>
-#include <xen/io/ring.h>
-#include <xen/io/domain_controller.h>
-#include <xs.h>
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-/* /dev/xen/blktap resides at device number major=10, minor=202        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLK_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-#define BLKTAP_IOCTL_PRINT_IDXS      100   
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) );
-/*
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-*/
-}
-
-/* Return values for handling messages in hooks. */
-#define BLKTAP_PASS     0 /* Keep passing this request as normal. */
-#define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
-#define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
-
-//#define domid_t unsigned short
-
-inline unsigned int ID_TO_IDX(unsigned long id);
-inline domid_t ID_TO_DOM(unsigned long id);
-
-int  blktap_attach_poll(int fd, short events, int (*func)(int));
-void blktap_detach_poll(int fd);
-int  blktap_listen(void);
-
-struct blkif;
-
-typedef struct request_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_request_t *, int);
-    struct request_hook_st *next;
-} request_hook_t;
-
-typedef struct response_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_response_t *, int);
-    struct response_hook_st *next;
-} response_hook_t;
-
-struct blkif_ops {
-    long int (*get_size)(struct blkif *blkif);
-    long int (*get_secsize)(struct blkif *blkif);
-    unsigned (*get_info)(struct blkif *blkif);
-};
-
-typedef struct blkif {
-    domid_t domid;
-    long int handle;
-
-    long int pdev;
-    long int readonly;
-
-    enum { DISCONNECTED, CONNECTED } state;
-
-    struct blkif_ops *ops;
-    request_hook_t *request_hook_chain;
-    response_hook_t *response_hook_chain;
-
-    struct blkif *hash_next;
-
-    void *prv;  /* device-specific data */
-} blkif_t;
-
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-blkif_t *alloc_blkif(domid_t domid);
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly);
-void free_blkif(blkif_t *blkif);
-void __init_blkif(void);
-
-
-/* xenstore/xenbus: */
-extern int add_blockdevice_probe_watch(struct xs_handle *h, 
-                                       const char *domname);
-int xs_fire_next_watch(struct xs_handle *h);
-
-
-void blkif_print_hooks(blkif_t *blkif);
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_request_t *, int));
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_response_t *, int));
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
-void blktap_kick_responses(void);
-
-/* this must match the underlying driver... */
-#define MAX_PENDING_REQS 64
-
-/* Accessing attached data page mappings */
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                                   \
-    (mmap_vstart +                                              \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
-
-extern unsigned long mmap_vstart;
-
-/* Defines that are only used by library clients */
-
-#ifndef __COMPILING_BLKTAP_LIB
-
-static char *blkif_op_name[] = {
-    [BLKIF_OP_READ]       = "READ",
-    [BLKIF_OP_WRITE]      = "WRITE",
-};
-
-#endif /* __COMPILING_BLKTAP_LIB */
-    
-#endif /* __BLKTAPLIB_H__ */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/list.h
--- a/tools/blktap/list.h       Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-/*
- * list.h
- * 
- * This is a subset of linux's list.h intended to be used in user-space.
- * 
- */
-
-#ifndef __LIST_H__
-#define __LIST_H__
-
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
-
-struct list_head {
-        struct list_head *next, *prev;
-};
- 
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
- 
-#define LIST_HEAD(name) \
-        struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void __list_add(struct list_head *new,
-                              struct list_head *prev,
-                              struct list_head *next)
-{
-        next->prev = new;
-        new->next = next;
-        new->prev = prev;
-        prev->next = new;
-}
-
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-        __list_add(new, head, head->next);
-}
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-        next->prev = prev;
-        prev->next = next;
-}
-static inline void list_del(struct list_head *entry)
-{
-        __list_del(entry->prev, entry->next);
-        entry->next = LIST_POISON1;
-        entry->prev = LIST_POISON2;
-}
-#define list_entry(ptr, type, member)                                   \
-        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#define list_for_each_entry(pos, head, member)                          \
-        for (pos = list_entry((head)->next, typeof(*pos), member);      \
-             &pos->member != (head);                                    \
-             pos = list_entry(pos->member.next, typeof(*pos), member))
-
-#endif /* __LIST_H__ */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/Makefile
--- a/tools/blktap/parallax/Makefile    Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-PARALLAX_INSTALL_DIR   = /usr/sbin
-
-INSTALL         = install
-INSTALL_PROG    = $(INSTALL) -m0755
-INSTALL_DIR     = $(INSTALL) -d -m0755
-
-INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
-
-LDFLAGS = -L.. -lpthread -lz -lblktap
-
-#PLX_SRCS := 
-PLX_SRCS := vdi.c 
-PLX_SRCS += radix.c 
-PLX_SRCS += snaplog.c
-PLX_SRCS += blockstore.c 
-PLX_SRCS += block-async.c
-PLX_SRCS += requests-async.c
-VDI_SRCS := $(PLX_SRCS)
-PLX_SRCS += parallax.c
-
-#VDI_TOOLS :=
-VDI_TOOLS := vdi_create
-VDI_TOOLS += vdi_list
-VDI_TOOLS += vdi_snap
-VDI_TOOLS += vdi_snap_list
-VDI_TOOLS += vdi_snap_delete
-VDI_TOOLS += vdi_fill
-VDI_TOOLS += vdi_tree
-VDI_TOOLS += vdi_validate
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += $(INCLUDES)
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS    = parallax $(VDI_TOOLS)
-
-.PHONY: all
-all: $(VDI_TOOLS) parallax blockstored
-
-.PHONY: install
-install: all
-       $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
-
-.PHONY: clean
-clean:
-       rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
-
-parallax: $(PLX_SRCS)
-       $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
-
-${VDI_TOOLS}: %: %.c $(VDI_SRCS)
-       $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
-
--include $(DEPS)
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/README
--- a/tools/blktap/parallax/README      Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-Parallax Quick Overview
-March 3, 2005
-
-This is intended to provide a quick set of instructions to let you
-guys play with the current parallax source.  In it's current form, the
-code will let you run an arbitrary number of VMs off of a single disk
-image, doing copy-on-write as they make updates.  Each domain is
-assigned a virtual disk image (VDI), which may be based on a snapshot
-of an existing image.  All of the VDI and snapshot management should
-currently work.
-
-The current implementation uses a single file as a blockstore for
-_everything_ this will soon be replaced by the fancier backend code
-and the local cache.  As it stands, Parallax will create
-"blockstore.dat" in the directory that you run it from, and use
-largefile support to make this grow to unfathomable girth.  So, you
-probably want to run the daemon off of a local disk, with a lot of
-free space.
-
-Here's how to get going:
-
-0. Setup:
----------
-
-Pick a local directory on a disk with lots of room.  You should be
-running from a privileged domain (e.g. dom0) with the blocktap
-configured in and block backend NOT.
-
-For convenience (for the moment) copy all of the vdi tools (vdi_*) and
-the parallax daemon from tools/blktap into this directory.
-
-1. Populate the blockstore:
----------------------------
-
-First you need to put at least one image into the blockstore.  You
-will need a disk image, either as a file or local partition.  My
-general approach has been to
-
-(a) make a really big sparse file with 
-
-        dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
-
-(b) put a filesystem into it
-
-        mkfs.ext3 ./image
-
-(c) mount it using loopback
-
-        mkdir ./mnt
-        mount -o loop ./image
-
-(d) cd into it and untar one of the image files from srg-roots.
-
-        cd mnt
-        tar ...
-
-NOTE: Beware if your system is FC3.  mkfs is not compatible with old
-versions of fedora, and so you don't have much choice but to install
-further fc3 images if you have used the fc3 version of mkfs.
-
-(e) unmount the image
-
-        cd ..
-        umount mnt
-
-(f) now, create a new VDI to hold the image 
-
-        ./vdi_create "My new FC3 VDI"
-
-(g) get the id of the new VDI.
-
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-
-(0 is the VDI id... create a few more if you want.)
-
-(h) hoover your image into the new VDI.
-
-        ./vdi_fill 0 ./image
-
-This will pull the entire image into the blockstore and set up a
-mapping tree for it for VDI 0.  Passing a device (i.e. /dev/sda3)
-should also work, but vdi_fill has NO notion of sparseness yet, so you
-are going to pump a block into the store for each block you read.
-
-vdi_fill will count up until it is done, and you should be ready to
-go.  If you want to be anal, you can use vdi_validate to test the VDI
-against the original image.
-
-2. Create some extra VDIs
--------------------------
-
-VDIs are actually a list of snapshots, and each snapshot is a full
-image of mappings.  So, to preserve an immutable copy of a current
-VDI, do this:
-
-(a) Snapshot your new VDI.
-
-        ./vdi_snap 0
-
-Snapshotting writes the current radix root to the VDI's snapshot log,
-and assigns it a new writable root.
-
-(b) look at the VDI's snapshot log.
-
-        ./vdi_snap_list 0
-
-        | 16   0      Thu Mar  3 19:27:48 2005 565111           31
-
-The first two columns constitute a snapshot id and represent the
-(block, offset) of the snapshot record.  The Date tells you when the
-snapshot was made, and 31 is the radix root node of the snapshot.
-
-(c) Create a new VDI, based on that snapshot, and look at the list.
-
-        ./vdi_create "FC3 - Copy 1" 16 0
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-        |      1                       FC3 - Copy 1
-
-NOTE: If you have Graphviz installed on your system, you can use
-vdi_tree to generate a postscript of your current set of VDIs and
-snapshots.
-
-
-Create as many VDIs as you need for the VMs that you want to run.
-
-3. Boot some VMs:
------------------
-
-Parallax currently uses a hack in xend to pass the VDI id, you need to
-modify the disk line of the VM config that is going to mount it.
-
-(a) set up your vm config, by using the following disk line:
-
-        disk = ['parallax:1,sda1,w,0' ]
-
-This example uses VDI 1 (from vdi_list above), presents it as sda1
-(writable), and uses dom 0 as the backend.  If you were running the
-daemon (and tap driver) in some domain other than 0, you would change
-this last parameter.
-
-NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so 
that it knows what to do with "parallax:".
-
-(b) Run parallax in the backend domain.
-
-        ./parallax
-
-(c) create your new domain.
-
-        xm create ...
-
----
-
-That's pretty much all there is to it at the moment.  Hope this is
-clear enough to get you going.  Now, a few serious caveats that will
-be sorted out in the almost immediate future:
-
-WARNINGS:
----------
-
-1. There is NO locking in the VDI tools at the moment, so I'd avoid
-running them in parallel, or more importantly, running them while the
-daemon is running.
-
-2. I doubt that xend will be very happy about restarting if you have
-parallax-using domains.  So if it dies while there are active parallax
-doms, you may need to reboot.
-
-3. I've turned off write-in-place.  So at the moment, EVERY block
-write is a log append on the blockstore.  I've been having some probs
-with the radix tree's marking of writable blocks after snapshots and
-will sort this out very soon.
-
-
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/block-async.c
--- a/tools/blktap/parallax/block-async.c       Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/* block-async.c
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "block-async.h"
-#include "blockstore.h"
-#include "vdi.h"
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* We have a queue of outstanding I/O requests implemented as a 
- * circular producer-consumer ring with free-running buffers.
- * to allow reordering, this ring indirects to indexes in an 
- * ring of io_structs.
- * 
- * the block_* calls may either add an entry to this ring and return, 
- * or satisfy the request immediately and call the callback directly.
- * None of the io calls in parallax should be nested enough to worry 
- * about stack problems with this approach.
- */
-
-struct read_args {
-    uint64_t addr;
-};
-
-struct write_args {
-    uint64_t   addr;
-    char *block;
-};
-
-struct alloc_args {
-    char *block;
-};
- 
-struct pending_io_req {
-    enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
-    union {
-        struct read_args  r;
-        struct write_args w;
-        struct alloc_args a;
-    } u;
-    io_cb_t cb;
-    void *param;
-};
-
-void radix_lock_init(struct radix_lock *r)
-{
-    int i;
-    
-    pthread_mutex_init(&r->lock, NULL);
-    for (i=0; i < 1024; i++) {
-        r->lines[i] = 0;
-        r->waiters[i] = NULL;
-        r->state[i] = ANY;
-    }
-}
-
-/* maximum outstanding I/O requests issued asynchronously */
-/* must be a power of 2.*/
-#define MAX_PENDING_IO 1024
-
-/* how many threads to concurrently issue I/O to the disk. */
-#define IO_POOL_SIZE   10
-
-static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
-static int pending_io_list[MAX_PENDING_IO];
-static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
-#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
-#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
-#define PENDING_IO_ENT(_x) \
-       (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
-#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
-#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
-static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;
-
-static void init_pending_io(void)
-{
-    int i;
-       
-    for (i=0; i<MAX_PENDING_IO; i++)
-        pending_io_list[i] = i;
-               
-} 
-
-void block_read(uint64_t addr, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_READ;
-    req->u.r.addr = addr;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_WRITE;
-    req->u.w.addr  = addr;
-    req->u.w.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_alloc(char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-       
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    req->op = IO_ALLOC;
-    req->u.a.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
-        r->lines[row]++;
-        r->state[row] = READ;
-        DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = RLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-}
-
-
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    /* the second check here is redundant -- just here for debugging now. */
-    if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
-        r->state[row] = STOP;
-        r->lines[row] = -1;
-        DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = WLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-       
-}
-
-/* called with radix_lock locked and lock count of zero. */
-static void wake_waiters(struct radix_lock *r, int row)
-{
-    struct pending_io_req *req;
-    struct radix_wait *rw;
-    
-    if (r->lines[row] != 0) return;
-    if (r->waiters[row] == NULL) return; 
-    
-    if (r->waiters[row]->type == WLOCK) {
-
-        rw = r->waiters[row];
-        pthread_mutex_lock(&pending_io_lock);
-        assert(CAN_PRODUCE_PENDING_IO);
-        
-        req = PENDING_IO_ENT(io_prod++);
-        req->op    = IO_WWAKE;
-        req->cb    = rw->cb;
-        req->param = rw->param;
-        r->lines[row] = -1; /* write lock the row. */
-        r->state[row] = STOP;
-        r->waiters[row] = rw->next;
-        free(rw);
-        pthread_mutex_unlock(&pending_io_lock);
-    
-    } else /* RLOCK */ {
-
-        while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
-            rw = r->waiters[row];
-            pthread_mutex_lock(&pending_io_lock);
-            assert(CAN_PRODUCE_PENDING_IO);
-            
-            req = PENDING_IO_ENT(io_prod++);
-            req->op    = IO_RWAKE;
-            req->cb    = rw->cb;
-            req->param = rw->param;
-            r->lines[row]++; /* read lock the row. */
-            r->state[row] = READ; 
-            r->waiters[row] = rw->next;
-            free(rw);
-            pthread_mutex_unlock(&pending_io_lock);
-        }
-
-        if (r->waiters[row] != NULL) /* There is a write queued still */
-            r->state[row] = STOP;
-    }  
-    
-    pthread_mutex_lock(&pending_io_lock);
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-       
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] > 0); /* try to catch misuse. */
-    r->lines[row]--;
-    if (r->lines[row] == 0) {
-        r->state[row] = ANY;
-        wake_waiters(r, row);
-    }
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] == -1); /* try to catch misuse. */
-    r->lines[row] = 0;
-    r->state[row] = ANY;
-    wake_waiters(r, row);
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-/* consumer calls */
-static void do_next_io_req(struct pending_io_req *req)
-{
-    struct io_ret          ret;
-    void  *param;
-    
-    switch (req->op) {
-    case IO_READ:
-        ret.type = IO_BLOCK_T;
-        ret.u.b  = readblock(req->u.r.addr);
-        break;
-    case IO_WRITE:
-        ret.type = IO_INT_T;
-        ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);
-        DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
-        break;
-    case IO_ALLOC:
-        ret.type = IO_ADDR_T;
-        ret.u.a  = allocblock(req->u.a.block);
-        break;
-    case IO_RWAKE:
-        DPRINTF("WAKE DEFERRED RLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    case IO_WWAKE:
-        DPRINTF("WAKE DEFERRED WLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    default:
-        DPRINTF("Unknown IO operation on pending list!\n");
-        return;
-    }
-    
-    param = req->param;
-    pthread_mutex_lock(&pending_io_lock);
-    pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    assert(req->cb != NULL);
-    req->cb(ret, param);
-    
-}
-
-void *io_thread(void *param) 
-{
-    int tid;
-    struct pending_io_req *req;
-    
-    /* Set this thread's tid. */
-    tid = *(int *)param;
-    free(param);
-    
-start:
-    pthread_mutex_lock(&pending_io_lock);
-    while (io_prod == io_cons) {
-        pthread_cond_wait(&pending_io_cond, &pending_io_lock);
-    }
-    
-    if (io_prod == io_cons) {
-        /* unnecessary wakeup. */
-        pthread_mutex_unlock(&pending_io_lock);
-        goto start;
-    }
-    
-    req = PENDING_IO_ENT(io_cons++);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    do_next_io_req(req);
-    
-    goto start;
-       
-}
-
-static pthread_t io_pool[IO_POOL_SIZE];
-void start_io_threads(void)
-
-{      
-    int i, tid=0;
-    
-    for (i=0; i < IO_POOL_SIZE; i++) {
-        int ret, *t;
-        t = (int *)malloc(sizeof(int));
-        *t = tid++;
-        ret = pthread_create(&io_pool[i], NULL, io_thread, t);
-        if (ret != 0) printf("Error starting thread %d\n", i);
-    }
-       
-}
-
-void init_block_async(void)
-{
-    init_pending_io();
-    start_io_threads();
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h       Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-/* block-async.h
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
-#ifndef _BLOCKASYNC_H_
-#define _BLOCKASYNC_H_
-
-#include <assert.h>
-#include <xenctrl.h>
-#include "vdi.h"
-
-struct io_ret
-{
-    enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
-    union {
-        uint64_t   a;
-        char *b;
-        int   i;
-    } u;
-};
-
-typedef void (*io_cb_t)(struct io_ret r, void *param);
-
-/* per-vdi lock structures to make sure requests run in a safe order. */
-struct radix_wait {
-    enum {RLOCK, WLOCK} type;
-    io_cb_t  cb;
-    void    *param;
-    struct radix_wait *next;
-};
-
-struct radix_lock {
-    pthread_mutex_t lock;
-    int                    lines[1024];
-    struct radix_wait     *waiters[1024];
-    enum {ANY, READ, STOP} state[1024];
-};
-void radix_lock_init(struct radix_lock *r);
-
-void block_read(uint64_t addr, io_cb_t cb, void *param);
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
-void block_alloc(char *block, io_cb_t cb, void *param);
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void init_block_async(void);
-
-static inline uint64_t IO_ADDR(struct io_ret r)
-{
-    assert(r.type == IO_ADDR_T);
-    return r.u.a;
-}
-
-static inline char *IO_BLOCK(struct io_ret r)
-{
-    assert(r.type == IO_BLOCK_T);
-    return r.u.b;
-}
-
-static inline int IO_INT(struct io_ret r)
-{
-    assert(r.type == IO_INT_T);
-    return r.u.i;
-}
-
-
-#endif //_BLOCKASYNC_H_
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstore.c
--- a/tools/blktap/parallax/blockstore.c        Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1348 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.c
- *
- * Simple block store interface
- *
- */
- 
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <stdarg.h>
-#include "blockstore.h"
-#include <pthread.h>
-
-//#define BLOCKSTORE_REMOTE
-//#define BSDEBUG
-
-#define RETRY_TIMEOUT 1000000 /* microseconds */
-
-/*****************************************************************************
- * Debugging
- */
-#ifdef BSDEBUG
-void DB(char *format, ...)
-{
-    va_list args;
-    fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
-    va_start(args, format);
-    vfprintf(stderr, format, args);
-    va_end(args);
-}
-#else
-#define DB(format, ...) (void)0
-#endif
-
-#ifdef BLOCKSTORE_REMOTE
-
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-/*****************************************************************************
- * Network state                                                             *
- *****************************************************************************/
-
-/* The individual disk servers we talks to. These will be referenced by
- * an integer index into bsservers[].
- */
-bsserver_t bsservers[MAX_SERVERS];
-
-/* The cluster map. This is indexed by an integer cluster number.
- */
-bscluster_t bsclusters[MAX_CLUSTERS];
-
-/* Local socket.
- */
-struct sockaddr_in sin_local;
-int bssock = 0;
-
-/*****************************************************************************
- * Notification                                                              *
- *****************************************************************************/
-
-typedef struct pool_thread_t_struct {
-    pthread_mutex_t ptmutex;
-    pthread_cond_t ptcv;
-    int newdata;
-} pool_thread_t;
-
-pool_thread_t pool_thread[READ_POOL_SIZE+1];
-
-#define RECV_NOTIFY(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    pool_thread[tid].newdata = 1; \
-    DB("CV Waking %u", tid); \
-    pthread_cond_signal(&(pool_thread[tid].ptcv)); \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-#define RECV_AWAIT(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    if (pool_thread[tid].newdata) { \
-        pool_thread[tid].newdata = 0; \
-        DB("CV Woken %u", tid); \
-    } \
-    else { \
-        DB("CV Waiting %u", tid); \
-        pthread_cond_wait(&(pool_thread[tid].ptcv), \
-                          &(pool_thread[tid].ptmutex)); \
-    } \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-
-/*****************************************************************************
- * Message queue management                                                  *
- *****************************************************************************/
-
-/* Protects the queue manipulation critcal regions.
- */
-pthread_mutex_t ptmutex_queue;
-#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
-#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
-
-pthread_mutex_t ptmutex_recv;
-#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
-#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
-
-/* A message queue entry. We allocate one of these for every request we send.
- * Asynchronous reply reception also used one of these.
- */
-typedef struct bsq_t_struct {
-    struct bsq_t_struct *prev;
-    struct bsq_t_struct *next;
-    int status;
-    int server;
-    int length;
-    struct msghdr msghdr;
-    struct iovec iov[2];
-    int tid;
-    struct timeval tv_sent;
-    bshdr_t message;
-    void *block;
-} bsq_t;
-
-#define BSQ_STATUS_MATCHED 1
-
-pthread_mutex_t ptmutex_luid;
-#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
-#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
-
-static uint64_t luid_cnt = 0x1000ULL;
-uint64_t new_luid(void) {
-    uint64_t luid;
-    ENTER_LUID_CR;
-    luid = luid_cnt++;
-    LEAVE_LUID_CR;
-    return luid;
-}
-
-/* Queue of outstanding requests.
- */
-bsq_t *bs_head = NULL;
-bsq_t *bs_tail = NULL;
-int bs_qlen = 0;
-
-/*
- */
-void queuedebug(char *msg) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
-    for (q = bs_head; q; q = q->next) {
-        fprintf(stderr, "  luid=%016llx server=%u\n",
-                q->message.luid, q->server);
-    }
-    LEAVE_QUEUE_CR;
-}
-
-int enqueue(bsq_t *qe) {
-    ENTER_QUEUE_CR;
-    qe->next = NULL;
-    qe->prev = bs_tail;
-    if (!bs_head)
-        bs_head = qe;
-    else
-        bs_tail->next = qe;
-    bs_tail = qe;
-    bs_qlen++;
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("enqueue");
-#endif
-    return 0;
-}
-
-int dequeue(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if (q == qe) {
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 0;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 1;
-}
-
-bsq_t *queuesearch(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if ((qe->server == q->server) &&
-            (qe->message.operation == q->message.operation) &&
-            (qe->message.luid == q->message.luid)) {
-
-            if ((q->message.operation == BSOP_READBLOCK) &&
-                ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
-                q->block = qe->block;
-                qe->block = NULL;
-            }
-            q->length = qe->length;
-            q->message.flags = qe->message.flags;
-            q->message.id = qe->message.id;
-            q->status |= BSQ_STATUS_MATCHED;
-
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            q->next = NULL;
-            q->prev = NULL;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch not found");
-#endif
-    return NULL;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch found");
-#endif
-    return q;
-}
-
-/*****************************************************************************
- * Network communication                                                     *
- *****************************************************************************/
-
-int send_message(bsq_t *qe) {
-    int rc;
-
-    qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    qe->message.luid = new_luid();
-
-    qe->status = 0;
-    qe->tid = (int)pthread_getspecific(tid_key);
-    if (enqueue(qe) < 0) {
-        fprintf(stderr, "Error enqueuing request.\n");
-        return -1;
-    }
-
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
-    //           (struct sockaddr *)&(bsservers[qe->server].sin),
-    //           sizeof(struct sockaddr_in));
-    if (rc < 0)
-        return rc;
-
-    return rc;
-}
-
-int recv_message(bsq_t *qe) {
-    struct sockaddr_in from;
-    //int flen = sizeof(from);
-    int rc;
-
-    qe->msghdr.msg_name = &from;
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    rc = recvmsg(bssock, &(qe->msghdr), 0);
-
-    //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
-    //               (struct sockaddr *)&from, &flen);
-    return rc;
-}
-
-int get_server_number(struct sockaddr_in *sin) {
-    int i;
-
-#ifdef BSDEBUG2
-    fprintf(stderr,
-            "get_server_number(%u.%u.%u.%u/%u)\n",
-            (unsigned int)sin->sin_addr.s_addr & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
-            (unsigned int)sin->sin_port);
-#endif
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (bsservers[i].hostname) {
-#ifdef BSDEBUG2
-            fprintf(stderr,
-                    "get_server_number check %u.%u.%u.%u/%u\n",
-                    (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
16)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
24)&0xff,
-                    (unsigned int)bsservers[i].sin.sin_port);
-#endif
-            if ((sin->sin_family == bsservers[i].sin.sin_family) &&
-                (sin->sin_port == bsservers[i].sin.sin_port) &&
-                (memcmp((void *)&(sin->sin_addr),
-                        (void *)&(bsservers[i].sin.sin_addr),
-                        sizeof(struct in_addr)) == 0)) {
-                return i;
-            }
-        }        
-    }
-
-    return -1;
-}
-
-void *rx_buffer = NULL;
-bsq_t rx_qe;
-bsq_t *recv_any(void) {
-    struct sockaddr_in from;
-    int rc;
-    
-    DB("ENTER recv_any\n");
-
-    rx_qe.msghdr.msg_name = &from;
-    rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    rx_qe.msghdr.msg_iov = rx_qe.iov;
-    if (!rx_buffer) {
-        rx_buffer = malloc(BLOCK_SIZE);
-        if (!rx_buffer) {
-            perror("recv_any malloc");
-            return NULL;
-        }
-    }
-    rx_qe.block = rx_buffer;
-    rx_buffer = NULL;
-    rx_qe.msghdr.msg_iovlen = 2;
-    rx_qe.msghdr.msg_control = NULL;
-    rx_qe.msghdr.msg_controllen = 0;
-    rx_qe.msghdr.msg_flags = 0;
-    
-    rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
-    rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
-    rx_qe.iov[1].iov_base = rx_qe.block;
-    rx_qe.iov[1].iov_len = BLOCK_SIZE;
-
-    rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
-    if (rc < 0) {
-        perror("recv_any");
-        return NULL;
-    }
-
-    rx_qe.length = rc;    
-    rx_qe.server = get_server_number(&from);
-
-    DB("recv_any from %d luid=%016llx len=%u\n",
-       rx_qe.server, rx_qe.message.luid, rx_qe.length);
-
-    return &rx_qe;
-}
-
-void recv_recycle_buffer(bsq_t *q) {
-    if (q->block) {
-        rx_buffer = q->block;
-        q->block = NULL;
-    }
-}
-
-// cycle through reading any incoming, searching for a match in the
-// queue, until we have all we need.
-int wait_recv(bsq_t **reqs, int numreqs) {
-    bsq_t *q, *m;
-    unsigned int x, i;
-    int tid = (int)pthread_getspecific(tid_key);
-
-    DB("ENTER wait_recv %u\n", numreqs);
-
-    checkmatch:
-    x = 0xffffffff;
-    for (i = 0; i < numreqs; i++) {
-        x &= reqs[i]->status;
-    }
-    if ((x & BSQ_STATUS_MATCHED)) {
-        DB("LEAVE wait_recv\n");
-        return numreqs;
-    }
-
-    RECV_AWAIT(tid);
-
-    /*
-    rxagain:
-    ENTER_RECV_CR;
-    q = recv_any();
-    LEAVE_RECV_CR;
-    if (!q)
-        return -1;
-
-    m = queuesearch(q);
-    recv_recycle_buffer(q);
-    if (!m) {
-        fprintf(stderr, "Unmatched RX\n");
-        goto rxagain;
-    }
-    */
-
-    goto checkmatch;
-
-}
-
-/* retry
- */
-static int retry_count = 0;
-int retry(bsq_t *qe)
-{
-    int rc;
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
-    retry_count++;
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    if (rc < 0)
-        return rc;
-    return 0;
-}
-
-/* queue runner
- */
-void *queue_runner(void *arg)
-{
-    for (;;) {
-        struct timeval now;
-        long long nowus, sus;
-        bsq_t *q;
-        int r;
-
-        sleep(1);
-
-        gettimeofday(&now, NULL);
-        nowus = now.tv_usec + now.tv_sec * 1000000;
-        ENTER_QUEUE_CR;
-        r = retry_count;
-        for (q = bs_head; q; q = q->next) {
-            sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
-            if ((nowus - sus) > RETRY_TIMEOUT) {
-                if (retry(q) < 0) {
-                    fprintf(stderr, "Error on sendmsg retry.\n");
-                }
-            }
-        }
-        if (r != retry_count) {
-            fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
-        }
-        LEAVE_QUEUE_CR;
-    }
-}
-
-/* receive loop
- */
-void *receive_loop(void *arg)
-{
-    bsq_t *q, *m;
-
-    for(;;) {
-        q = recv_any();
-        if (!q) {
-            fprintf(stderr, "recv_any error\n");
-        }
-        else {
-            m = queuesearch(q);
-            recv_recycle_buffer(q);
-            if (!m) {
-                fprintf(stderr, "Unmatched RX\n");
-            }
-            else {
-                DB("RX MATCH");
-                RECV_NOTIFY(m->tid);
-            }
-        }
-    }
-}
-pthread_t pthread_recv;
-
-/*****************************************************************************
- * Reading                                                                   *
- *****************************************************************************/
-
-void *readblock_indiv(int server, uint64_t id) {
-    void *block;
-    bsq_t *qe;
-    int len, rc;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("readblock qe malloc");
-        return NULL;
-    }
-    qe->block = NULL;
-    
-    /*
-    qe->block = malloc(BLOCK_SIZE);
-    if (!qe->block) {
-        perror("readblock qe malloc");
-        free((void *)qe);
-        return NULL;
-    }
-    */
-
-    qe->server = server;
-
-    qe->message.operation = BSOP_READBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    qe->length = MSGBUFSIZE_ID;
-
-    if (send_message(qe) < 0) {
-        perror("readblock sendto");
-        goto err;
-    }
-    
-    /*len = recv_message(qe);
-    if (len < 0) {
-        perror("readblock recv");
-        goto err;
-    }*/
-
-    rc = wait_recv(&qe, 1);
-    if (rc < 0) {
-        perror("readblock recv");
-        goto err;
-    }
-
-    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "readblock server error\n");
-        goto err;
-    }
-    if (qe->length < MSGBUFSIZE_BLOCK) {
-        fprintf(stderr, "readblock recv short (%u)\n", len);
-        goto err;
-    }
-    /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    memcpy(block, qe->message.block, BLOCK_SIZE);
-    */    
-    block = qe->block;
-
-    free((void *)qe);
-    return block;
-
-    err:
-    free(qe->block);
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-void *readblock(uint64_t id) {
-    int map = (int)BSID_MAP(id);
-    uint64_t xid;
-    static int i = CLUSTER_MAX_REPLICAS - 1;
-    void *block = NULL;
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        block = readblock_indiv(bsclusters[map].servers[0], id);
-        goto out;
-    }
-
-    i++;
-    if (i >= CLUSTER_MAX_REPLICAS)
-        i = 0;
-    switch (i) {
-    case 0:
-        xid = BSID_REPLICA0(id);
-        break;
-    case 1:
-        xid = BSID_REPLICA1(id);
-        break;
-    case 2:
-        xid = BSID_REPLICA2(id);
-        break;
-    }
-    
-    block = readblock_indiv(bsclusters[map].servers[i], xid);
-
-    out:
-#ifdef BSDEBUG
-    if (block)
-        fprintf(stderr, "READ:  %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-                id,
-                (unsigned int)((unsigned char *)block)[0],
-                (unsigned int)((unsigned char *)block)[1],
-                (unsigned int)((unsigned char *)block)[2],
-                (unsigned int)((unsigned char *)block)[3],
-                (unsigned int)((unsigned char *)block)[4],
-                (unsigned int)((unsigned char *)block)[5],
-                (unsigned int)((unsigned char *)block)[6],
-                (unsigned int)((unsigned char *)block)[7]);
-    else
-        fprintf(stderr, "READ:  %016llx NULL\n", id);
-#endif
-    return block;
-}
-
-/*****************************************************************************
- * Writing                                                                   *
- *****************************************************************************/
-
-bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
-
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("writeblock qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_WRITEBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("writeblock sendto");
-        goto err;
-    }
-
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-    
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int map = (int)BSID_MAP(id);
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-#ifdef BSDEBUG
-    fprintf(stderr,
-            "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            id,
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        reqs[0] = writeblock_indiv(rep0, id, block);
-        if (!reqs[0])
-            return -1;
-        rc = wait_recv(reqs, 1);
-        return rc;
-    }
-
-    reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("writeblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server2 error\n");
-        goto err;
-    }
-
-
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return 0;
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return -1;
-}
-
-/*****************************************************************************
- * Allocation                                                                *
- *****************************************************************************/
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock(void *block) {
-    return allocblock_hint(block, 0);
-}
-
-bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("allocblock_hint qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_ALLOCBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = hint;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("allocblock_hint sendto");
-        goto err;
-    }
-    
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    int map = (int)hint;
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-    uint64_t id0, id1, id2;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-    DB("ENTER allocblock\n");
-
-    reqs[0] = allocblock_hint_indiv(rep0, block, hint);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = allocblock_hint_indiv(rep1, block, hint);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = allocblock_hint_indiv(rep2, block, hint);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("allocblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server2 error\n");
-        goto err;
-    }
-
-    id0 = reqs[0]->message.id;
-    id1 = reqs[1]->message.id;
-    id2 = reqs[2]->message.id;
-
-#ifdef BSDEBUG
-    fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            BSID(map, id0, id1, id2),
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-    
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return BSID(map, id0, id1, id2);
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return 0;
-}
-
-#else /* /BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Local storage version                                                     *
- *****************************************************************************/
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-
-void *readblock(uint64_t id) {
-    void *block;
-    int block_fp;
-   
-//printf("readblock(%llu)\n", id); 
-    block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return NULL;
-    }
-    
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld ", id);
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        goto err;
-    }
-    if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        free(block);
-        goto err;
-    }
-    close(block_fp);
-    return block;
-    
-err:
-    close(block_fp);
-    return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        goto err;
-    }
-    close(block_fp);
-    return 0;
-
-err:
-    close(block_fp);
-    return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return 0;
-    }
-
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        goto err;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        goto err;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-//printf("alloc(%Ld)\n", lb);
-    close(block_fp);
-    return lb;
-    
-err:
-    close(block_fp);
-    return 0;
-    
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    return allocblock(block);
-}
-
-#endif /* BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Memory management                                                         *
- *****************************************************************************/
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-static freeblock_t *new_freeblock(void)
-{
-    freeblock_t *fb;
-    
-    fb = newblock();
-    
-    if (fb == NULL) return NULL;
-    
-    fb->magic = FREEBLOCK_MAGIC;
-    fb->next  = 0ULL;
-    fb->count = 0ULL;
-    memset(fb->list, 0, sizeof fb->list);
-    
-    return fb;
-}
-
-void releaseblock(uint64_t id)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fl_current;
-    
-    /* get superblock */
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    /* get freeblock_current */
-    if (bs_super->freelist_current == 0ULL) 
-    {
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    } else {
-        fl_current = readblock(bs_super->freelist_current);
-    }
-    
-    /* if full, chain to superblock and allocate new current */
-    
-    if (fl_current->count == FREEBLOCK_SIZE) {
-        fl_current->next = bs_super->freelist_full;
-        writeblock(bs_super->freelist_current, fl_current);
-        bs_super->freelist_full = bs_super->freelist_current;
-        freeblock(fl_current);
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    }
-    
-    /* append id to current */
-    fl_current->list[fl_current->count++] = id;
-    writeblock(bs_super->freelist_current, fl_current);
-    
-    freeblock(fl_current);
-    freeblock(bs_super);
-    
-    
-}
-
-/* freelist debug functions: */
-void freelist_count(int print_each)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fb;
-    uint64_t total = 0, next;
-    
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    if (bs_super->freelist_current == 0ULL) {
-        printf("freelist is empty!\n");
-        return;
-    }
-    
-    fb = readblock(bs_super->freelist_current);
-    printf("%Ld entires on current.\n", fb->count);
-    total += fb->count;
-    if (print_each == 1)
-    {
-        int i;
-        for (i=0; i< fb->count; i++)
-            printf("  %Ld\n", fb->list[i]);
-    }
-    
-    freeblock(fb);
-    
-    if (bs_super->freelist_full == 0ULL) {
-        printf("freelist_full is empty!\n");
-        return;
-    }
-    
-    next = bs_super->freelist_full;
-    for (;;) {
-        fb = readblock(next);
-        total += fb->count;
-        if (print_each == 1)
-        {
-            int i;
-            for (i=0; i< fb->count; i++)
-                printf("  %Ld\n", fb->list[i]);
-        }
-        next = fb->next;
-        freeblock(fb);
-        if (next == 0ULL) break;
-    }
-    printf("Total of %Ld ids on freelist.\n", total);
-}
-
-/*****************************************************************************
- * Initialisation                                                            *
- *****************************************************************************/
-
-int __init_blockstore(void)
-{
-    int i;
-    blockstore_super_t *bs_super;
-    uint64_t ret;
-    int block_fp;
-    
-#ifdef BLOCKSTORE_REMOTE
-    struct hostent *addr;
-
-    pthread_mutex_init(&ptmutex_queue, NULL);
-    pthread_mutex_init(&ptmutex_luid, NULL);
-    pthread_mutex_init(&ptmutex_recv, NULL);
-    /*pthread_mutex_init(&ptmutex_notify, NULL);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pool_thread[i].newdata = 0;
-        pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
-        pthread_cond_init(&(pool_thread[i].ptcv), NULL);
-    }
-
-    bsservers[0].hostname = "firebug.cl.cam.ac.uk";
-    bsservers[1].hostname = "planb.cl.cam.ac.uk";
-    bsservers[2].hostname = "simcity.cl.cam.ac.uk";
-    bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
-    bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
-    bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
-    bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
-    bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
-    bsservers[8].hostname = NULL;
-    bsservers[9].hostname = NULL;
-    bsservers[10].hostname = NULL;
-    bsservers[11].hostname = NULL;
-    bsservers[12].hostname = NULL;
-    bsservers[13].hostname = NULL;
-    bsservers[14].hostname = NULL;
-    bsservers[15].hostname = NULL;
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (!bsservers[i].hostname)
-            continue;
-        addr = gethostbyname(bsservers[i].hostname);
-        if (!addr) {
-            perror("bad hostname");
-            return -1;
-        }
-        bsservers[i].sin.sin_family = addr->h_addrtype;
-        bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
-        bsservers[i].sin.sin_addr.s_addr = 
-            ((struct in_addr *)(addr->h_addr))->s_addr;
-    }
-
-    /* Cluster map
-     */
-    bsclusters[0].servers[0] = 0;
-    bsclusters[0].servers[1] = 1;
-    bsclusters[0].servers[2] = 2;
-    bsclusters[1].servers[0] = 1;
-    bsclusters[1].servers[1] = 2;
-    bsclusters[1].servers[2] = 3;
-    bsclusters[2].servers[0] = 2;
-    bsclusters[2].servers[1] = 3;
-    bsclusters[2].servers[2] = 4;
-    bsclusters[3].servers[0] = 3;
-    bsclusters[3].servers[1] = 4;
-    bsclusters[3].servers[2] = 5;
-    bsclusters[4].servers[0] = 4;
-    bsclusters[4].servers[1] = 5;
-    bsclusters[4].servers[2] = 6;
-    bsclusters[5].servers[0] = 5;
-    bsclusters[5].servers[1] = 6;
-    bsclusters[5].servers[2] = 7;
-    bsclusters[6].servers[0] = 6;
-    bsclusters[6].servers[1] = 7;
-    bsclusters[6].servers[2] = 0;
-    bsclusters[7].servers[0] = 7;
-    bsclusters[7].servers[1] = 0;
-    bsclusters[7].servers[2] = 1;
-
-    /* Local socket set up
-     */
-    bssock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (bssock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sin_local, 0, sizeof(sin_local));
-    sin_local.sin_family = AF_INET;
-    sin_local.sin_port = htons(BLOCKSTORED_PORT);
-    sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
-        perror("bind");
-        close(bssock);
-        return -1;
-    }
-
-    pthread_create(&pthread_recv, NULL, receive_loop, NULL);
-    pthread_create(&pthread_recv, NULL, queue_runner, NULL);
-
-#else /* /BLOCKSTORE_REMOTE */
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-        exit(-1);
-    }
-    
-    if (lseek(block_fp, 0, SEEK_END) == 0) {
-        bs_super = newblock();
-        bs_super->magic            = BLOCKSTORE_MAGIC;
-        bs_super->freelist_full    = 0LL;
-        bs_super->freelist_current = 0LL;
-        
-        ret = allocblock(bs_super);
-        
-        freeblock(bs_super);
-    } else {
-        bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-        if (bs_super->magic != BLOCKSTORE_MAGIC)
-        {
-            printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
-            exit(-1);
-        }
-        freeblock(bs_super);
-    }
-        
-    close(block_fp);
-        
-#endif /*  BLOCKSTORE_REMOTE */   
-    return 0;
-}
-
-void __exit_blockstore(void)
-{
-    int i;
-#ifdef BLOCKSTORE_REMOTE
-    pthread_mutex_destroy(&ptmutex_recv);
-    pthread_mutex_destroy(&ptmutex_luid);
-    pthread_mutex_destroy(&ptmutex_queue);
-    /*pthread_mutex_destroy(&ptmutex_notify);
-      pthread_cond_destroy(&ptcv_notify);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pthread_mutex_destroy(&(pool_thread[i].ptmutex));
-        pthread_cond_destroy(&(pool_thread[i].ptcv));
-    }
-#endif
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h        Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.h
- *
- * Simple block store interface
- *
- */
- 
-#ifndef __BLOCKSTORE_H__
-#define __BLOCKSTORE_H__
-
-#include <netinet/in.h>
-#include <xenctrl.h>
-
-#define BLOCK_SIZE  4096
-#define BLOCK_SHIFT   12
-#define BLOCK_MASK  0xfffffffffffff000LL
-
-/* XXX SMH: where is the below supposed to be defined???? */
-#ifndef SECTOR_SHIFT 
-#define SECTOR_SHIFT   9 
-#endif
-
-#define FREEBLOCK_SIZE  (BLOCK_SIZE / sizeof(uint64_t)) - (3 * 
sizeof(uint64_t))
-#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t next;
-    uint64_t count;
-    uint64_t list[FREEBLOCK_SIZE];
-} freeblock_t; 
-
-#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
-#define BLOCKSTORE_SUPER 1ULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t freelist_full;
-    uint64_t freelist_current;
-} blockstore_super_t;
-
-extern void *newblock();
-extern void *readblock(uint64_t id);
-extern uint64_t allocblock(void *block);
-extern uint64_t allocblock_hint(void *block, uint64_t hint);
-extern int writeblock(uint64_t id, void *block);
-
-/* Add this blockid to a freelist, to be recycled by the allocator. */
-extern void releaseblock(uint64_t id);
-
-/* this is a memory free() operation for block-sized allocations */
-extern void freeblock(void *block);
-extern int __init_blockstore(void);
-
-/* debug for freelist. */
-void freelist_count(int print_each);
-#define ALLOCFAIL (((uint64_t)(-1)))
-
-/* Distribution
- */
-#define BLOCKSTORED_PORT 9346
-
-struct bshdr_t_struct {
-    uint32_t            operation;
-    uint32_t            flags;
-    uint64_t            id;
-    uint64_t            luid;
-} __attribute__ ((packed));
-typedef struct bshdr_t_struct bshdr_t;
-
-struct bsmsg_t_struct {
-    bshdr_t        hdr;
-    unsigned char  block[BLOCK_SIZE];
-} __attribute__ ((packed));
-
-typedef struct bsmsg_t_struct bsmsg_t;
-
-#define MSGBUFSIZE_OP    sizeof(uint32_t)
-#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
-#define MSGBUFSIZE_ID    (sizeof(uint32_t) + sizeof(uint32_t) + 
sizeof(uint64_t) + sizeof(uint64_t))
-#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
-
-#define BSOP_READBLOCK  0x01
-#define BSOP_WRITEBLOCK 0x02
-#define BSOP_ALLOCBLOCK 0x03
-#define BSOP_FREEBLOCK  0x04
-
-#define BSOP_FLAG_ERROR 0x01
-
-#define BS_ALLOC_SKIP 10
-#define BS_ALLOC_HACK
-
-/* Remote hosts and cluster map - XXX need to generalise
- */
-
-/*
-
-  Interim ID format is
-
-  63 60 59                40 39                20 19                 0
-  +----+--------------------+--------------------+--------------------+
-  |map | replica 2          | replica 1          | replica 0          |
-  +----+--------------------+--------------------+--------------------+
-
-  The map is an index into a table detailing which machines form the
-  cluster.
-
- */
-
-#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
-#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
-#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
-#define BSID_MAP(_id)      (((_id)>>60)&0xfULL)
-
-#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
-                                         (((uint64_t)(_rep2))<<40) | \
-                                         (((uint64_t)(_rep1))<<20) | 
((uint64_t)(_rep0)))
-
-typedef struct bsserver_t_struct {
-    char              *hostname;
-    struct sockaddr_in sin;
-} bsserver_t;
-
-#define MAX_SERVERS 16
-
-#define CLUSTER_MAX_REPLICAS 3
-typedef struct bscluster_t_struct {
-    int servers[CLUSTER_MAX_REPLICAS];
-} bscluster_t;
-
-#define MAX_CLUSTERS 16
-
-#endif /* __BLOCKSTORE_H__ */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstored.c
--- a/tools/blktap/parallax/blockstored.c       Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,275 +0,0 @@
-/**************************************************************************
- * 
- * blockstored.c
- *
- * Block store daemon.
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <errno.h>
-#include "blockstore.h"
-
-//#define BSDEBUG
-
-int readblock_into(uint64_t id, void *block);
-
-int open_socket(uint16_t port) {
-    
-    struct sockaddr_in sn;
-    int sock;
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(port);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        return -1;
-    }
-
-    return sock;
-}
-
-static int block_fp = -1;
-static int bssock = -1;
-
-int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
-
-    int rc;
-    
-#ifdef BSDEBUG
-    fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
-            len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t 
*)buffer)->hdr.id);
-#endif
-    rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, 
sizeof(*peer));
-    if (rc < 0) {
-        perror("send_reply");
-        return 1;
-    }
-
-
-    return 0;
-}
-
-static bsmsg_t msgbuf;
-
-void service_loop(void) {
-
-    for (;;) {
-        int rc, len;
-        struct sockaddr_in from;
-        size_t slen = sizeof(from);
-        uint64_t bid;
-
-        len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                       (struct sockaddr *)&from, &slen);
-
-        if (len < 0) {
-            perror("recvfrom");
-            continue;
-        }
-
-        if (len < MSGBUFSIZE_OP) {
-            fprintf(stderr, "Short packet.\n");
-            continue;
-        }
-
-#ifdef BSDEBUG
-        fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
-                len, msgbuf.hdr.operation, msgbuf.hdr.id);
-#endif
-
-        switch (msgbuf.hdr.operation) {
-        case BSOP_READBLOCK:
-            if (len < MSGBUFSIZE_ID) {
-                fprintf(stderr, "Short packet (readblock %u).\n", len);
-                continue;
-            }
-            rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "readblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
-            break;
-        case BSOP_WRITEBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (writeblock %u).\n", len);
-                continue;
-            }
-            rc = writeblock(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "writeblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        case BSOP_ALLOCBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (allocblock %u).\n", len);
-                continue;
-            }
-            bid = allocblock(msgbuf.block);
-            if (bid == ALLOCFAIL) {
-                fprintf(stderr, "allocblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.id = bid;
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        }
-
-    }
-}
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *   @block: pointer to buffer to receive block
- *
- *   @return: 0 if OK, other on error
- */
-
-int readblock_into(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        return -1;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        return -1;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-static uint64_t lastblock = 0;
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-
-    retry:
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        return ALLOCFAIL;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        return ALLOCFAIL;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        return ALLOCFAIL;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-
-#ifdef BS_ALLOC_HACK
-    if (lb < BS_ALLOC_SKIP)
-        goto retry;
-#endif
-    
-    if (lb <= lastblock)
-        printf("[*** %Ld alredy allocated! ***]\n", lb);
-    
-    lastblock = lb;
-    return lb;
-}
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-
-int main(int argc, char **argv)
-{
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    bssock = open_socket(BLOCKSTORED_PORT);
-    if (bssock < 0) {
-        return -1;
-    }
-
-    service_loop();
-    
-    close(bssock);
-
-    return 0;
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/bstest.c
--- a/tools/blktap/parallax/bstest.c    Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-/**************************************************************************
- * 
- * bstest.c
- *
- * Block store daemon test program.
- *
- * usage: bstest <host>|X {r|w|a} ID 
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <errno.h>
-#include "blockstore.h"
-
-int direct(char *host, uint32_t op, uint64_t id, int len) {
-    struct sockaddr_in sn, peer;
-    int sock;
-    bsmsg_t msgbuf;
-    int rc, slen;
-    struct hostent *addr;
-
-    addr = gethostbyname(host);
-    if (!addr) {
-        perror("bad hostname");
-        exit(1);
-    }
-    peer.sin_family = addr->h_addrtype;
-    peer.sin_port = htons(BLOCKSTORED_PORT);
-    peer.sin_addr.s_addr =  ((struct in_addr *)(addr->h_addr))->s_addr;
-    fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
-            (unsigned int)(unsigned char)addr->h_addr[0],
-            (unsigned int)(unsigned char)addr->h_addr[1],
-            (unsigned int)(unsigned char)addr->h_addr[2],
-            (unsigned int)(unsigned char)addr->h_addr[3]);
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        exit(1);
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(BLOCKSTORED_PORT);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        exit(1);
-    }
-
-    memset((void *)&msgbuf, 0, sizeof(msgbuf));
-    msgbuf.operation = op;
-    msgbuf.id = id;
-
-    rc = sendto(sock, (void *)&msgbuf, len, 0,
-                (struct sockaddr *)&peer, sizeof(peer));
-    if (rc < 0) {
-        perror("sendto");
-        exit(1);
-    }
-
-    slen = sizeof(peer);
-    len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                   (struct sockaddr *)&peer, &slen);
-    if (len < 0) {
-        perror("recvfrom");
-        exit(1);
-    }
-
-    printf("Reply %u bytes:\n", len);
-    if (len >= MSGBUFSIZE_OP)
-        printf("  operation: %u\n", msgbuf.operation);
-    if (len >= MSGBUFSIZE_FLAGS)
-        printf("  flags: 0x%x\n", msgbuf.flags);
-    if (len >= MSGBUFSIZE_ID)
-        printf("  id: %llu\n", msgbuf.id);
-    if (len >= (MSGBUFSIZE_ID + 4))
-        printf("  data: %02x %02x %02x %02x...\n",
-               (unsigned int)msgbuf.block[0],
-               (unsigned int)msgbuf.block[1],
-               (unsigned int)msgbuf.block[2],
-               (unsigned int)msgbuf.block[3]);
-    
-    if (sock > 0)
-        close(sock);
-   
-    return 0;
-}
-
-int main (int argc, char **argv) {
-
-    uint32_t op = 0;
-    uint64_t id = 0;
-    int len = 0, rc;
-    void *block;
-
-    if (argc < 3) {
-        fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
-        return 1;
-    }
-
-    switch (argv[2][0]) {
-    case 'r':
-    case 'R':
-        op = BSOP_READBLOCK;
-        len = MSGBUFSIZE_ID;
-        break;
-    case 'w':
-    case 'W':
-        op = BSOP_WRITEBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    case 'a':
-    case 'A':
-        op = BSOP_ALLOCBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    default:
-        fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
-        return 1;
-    }
-
-    if (argc >= 4)
-        id = atoll(argv[3]);
-
-    if (strcmp(argv[1], "X") == 0) {
-        rc = __init_blockstore();
-        if (rc < 0) {
-            fprintf(stderr, "blockstore init failed.\n");
-            return 1;
-        }
-        switch(op) {
-        case BSOP_READBLOCK:
-            block = readblock(id);
-            if (block) {
-                printf("data: %02x %02x %02x %02x...\n",
-                       (unsigned int)((unsigned char*)block)[0],
-                       (unsigned int)((unsigned char*)block)[1],
-                       (unsigned int)((unsigned char*)block)[2],
-                       (unsigned int)((unsigned char*)block)[3]);
-            }
-            break;
-        case BSOP_WRITEBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            rc = writeblock(id, block);
-            if (rc != 0) {
-                printf("error\n");
-            }
-            else {
-                printf("OK\n");
-            }
-            break;
-        case BSOP_ALLOCBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            id = allocblock_hint(block, id);
-            if (id == 0) {
-                printf("error\n");
-            }
-            else {
-                printf("ID: %llu\n", id);
-            }
-            break;
-        }
-    }
-    else {
-        direct(argv[1], op, id, len);
-    }
-
-
-    return 0;
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/parallax.c
--- a/tools/blktap/parallax/parallax.c  Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,608 +0,0 @@
-/**************************************************************************
- * 
- * parallax.c
- *
- * The Parallax Storage Server
- *
- */
- 
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "blktaplib.h"
-#include "blockstore.h"
-#include "vdi.h"
-#include "block-async.h"
-#include "requests-async.h"
-
-#define PARALLAX_DEV     61440
-#define SECTS_PER_NODE   8
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* ------[ session records ]----------------------------------------------- */
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-#define VDI_HASHSZ 16
-#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
-
-typedef struct blkif {
-    domid_t       domid;
-    unsigned int  handle;
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    vdi_t        *vdi_hash[VDI_HASHSZ];
-    struct blkif *hash_next;
-} blkif_t;
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    if ( handle != 0 )
-        printf("blktap/parallax don't currently support non-0 dev handles!\n");
-    
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
-{
-    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
-    
-    while ((vdi != NULL) && (vdi->vdevice != device))
-        vdi = vdi->next;
-    
-    return vdi;
-}
-
-/* ------[ control message handling ]-------------------------------------- */
-
-void blkif_create(blkif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_create): create is %p\n", create); 
-    
-    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
-    {
-        DPRINTF("Could not create blkif: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists (%d,%d)\n",
-                domid, handle);
-            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            free(blkif);
-            return;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
-
-    DPRINTF("Successfully created blkif\n");
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
-    
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            if ( blkif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pblkif = blkif->hash_next;
-    free(blkif);
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_create(blkif_be_vbd_create_t *create)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = create->vdevice;
-
-    DPRINTF("parallax (vbd_create): create=%p\n", create); 
-    
-    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
-                create->domid, create->blkif_handle); 
-        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    /* VDI identifier is in grow->extent.sector_start */
-    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
-            (unsigned long)create->dev_handle);
-
-    vdi = vdi_get(create->dev_handle);
-    if (vdi == NULL)
-    {
-        printf("parallax (vbd_create): VDI %lx not found.\n",
-               (unsigned long)create->dev_handle);
-        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
-        return;
-    }
-    
-    vdi->next = NULL;
-    vdi->vdevice = vdevice;
-    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
-    while (*vdip != NULL)
-        vdip = &(*vdip)->next;
-    *vdip = vdi;
-    
-    DPRINTF("blkif_create succeeded\n"); 
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = destroy->vdevice;
-    
-    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
-                destroy->domid, destroy->blkif_handle); 
-        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.