[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Merged.



# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 8af1199488d3636135f3adf3f7302d4a04e9004e
# Parent  25e3c8668f1f4769db8466b4af965a99503311ae
# Parent  299d6ff8fdb2604dde767af2a2bee985602e9a46
Merged.

diff -r 25e3c8668f1f -r 8af1199488d3 .hgignore
--- a/.hgignore Mon Jan  9 11:19:55 2006
+++ b/.hgignore Mon Jan  9 11:22:17 2006
@@ -181,6 +181,7 @@
 ^xen/TAGS$
 ^xen/arch/x86/asm-offsets\.s$
 ^xen/arch/x86/boot/mkelf32$
+^xen/arch/x86/xen\.lds$
 ^xen/ddb/.*$
 ^xen/include/asm$
 ^xen/include/asm-.*/asm-offsets\.h$
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Mon Jan  9 
11:22:17 2006
@@ -25,8 +25,9 @@
 
        xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
        xen_start_info->flags = s->arch.flags;
-       printk("Running on Xen! start_info_pfn=0x%lx lags=0x%x\n",
-               s->arch.start_info_pfn, xen_start_info->flags);
+       printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%d flags=0x%x\n",
+               s->arch.start_info_pfn, xen_start_info->nr_pages,
+               xen_start_info->flags);
 
        evtchn_init();
        initialized = 1;
diff -r 25e3c8668f1f -r 8af1199488d3 linux-2.6-xen-sparse/arch/xen/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/Makefile    Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/Makefile    Mon Jan  9 11:22:17 2006
@@ -77,8 +77,6 @@
        install -m0664 .config 
$(INSTALL_PATH)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
        install -m0664 System.map 
$(INSTALL_PATH)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
        ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) 
$(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
-       mkdir -p $(INSTALL_PATH)/usr/include/xen/linux
-       install -m0644 $(srctree)/include/asm-xen/linux-public/*.h 
$(INSTALL_PATH)/usr/include/xen/linux
 
 archclean:
        @if [ -e arch/xen/arch ]; then $(MAKE) $(clean)=arch/xen/arch; fi;
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c     Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c     Mon Jan  9 
11:22:17 2006
@@ -39,8 +39,6 @@
 #ifdef CONFIG_XEN
 #include <asm/fixmap.h>
 #endif
-
-void (*pm_power_off)(void) = NULL;
 
 #ifdef CONFIG_X86_64
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Mon Jan  9 
11:22:17 2006
@@ -67,8 +67,11 @@
        op.u.add_memtype.pfn     = base;
        op.u.add_memtype.nr_pfns = size;
        op.u.add_memtype.type    = type;
-       if ((error = HYPERVISOR_dom0_op(&op)))
+       error = HYPERVISOR_dom0_op(&op);
+       if (error) {
+               BUG_ON(error > 0);
                return error;
+       }
 
        if (increment)
                ++usage_table[op.u.add_memtype.reg];
@@ -121,8 +124,12 @@
        if (--usage_table[reg] < 1) {
                op.cmd = DOM0_DEL_MEMTYPE;
                op.u.del_memtype.handle = 0;
-               op.u.add_memtype.reg    = reg;
-               (void)HYPERVISOR_dom0_op(&op);
+               op.u.del_memtype.reg    = reg;
+               error = HYPERVISOR_dom0_op(&op);
+               if (error) {
+                       BUG_ON(error > 0);
+                       goto out;
+               }
        }
        error = reg;
  out:
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Mon Jan  9 
11:22:17 2006
@@ -76,9 +76,7 @@
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
 EXPORT_SYMBOL(pm_power_off);
-#endif
 EXPORT_SYMBOL(get_cmos_time);
 EXPORT_SYMBOL(cpu_khz);
 EXPORT_SYMBOL(apm_info);
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Mon Jan  9 11:22:17 2006
@@ -389,6 +389,30 @@
        return -ENOSYS;
 }
 
+static int __init
+gnttab_proc_init(void)
+{
+       /*
+        *  /proc/xen/grant : used by libxc to access grant tables
+        */
+       if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
+               WPRINTK("Unable to create grant xen proc entry\n");
+               return -1;
+       }
+
+       grant_file_ops.read   = grant_pde->proc_fops->read;
+       grant_file_ops.write  = grant_pde->proc_fops->write;
+
+       grant_pde->proc_fops  = &grant_file_ops;
+
+       grant_pde->read_proc  = &grant_read;
+       grant_pde->write_proc = &grant_write;
+
+       return 0;
+}
+
+device_initcall(gnttab_proc_init);
+
 #endif /* CONFIG_PROC_FS */
 
 int
@@ -446,29 +470,11 @@
        gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
        gnttab_free_head  = NR_RESERVED_ENTRIES;
 
-#ifdef CONFIG_PROC_FS
-       /*
-        *  /proc/xen/grant : used by libxc to access grant tables
-        */
-       if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
-               WPRINTK("Unable to create grant xen proc entry\n");
-               return -1;
-       }
-
-       grant_file_ops.read   = grant_pde->proc_fops->read;
-       grant_file_ops.write  = grant_pde->proc_fops->write;
-
-       grant_pde->proc_fops  = &grant_file_ops;
-
-       grant_pde->read_proc  = &grant_read;
-       grant_pde->write_proc = &grant_write;
-#endif
-
        printk("Grant table initialized\n");
        return 0;
 }
 
-__initcall(gnttab_init);
+core_initcall(gnttab_init);
 
 /*
  * Local variables:
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Jan  9 11:22:17 2006
@@ -16,6 +16,13 @@
 #include <linux/cpu.h>
 #include <linux/kthread.h>
 #include <asm-xen/xencons.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+#endif
 
 #define SHUTDOWN_INVALID  -1
 #define SHUTDOWN_POWEROFF  0
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Mon Jan  9 
11:22:17 2006
@@ -59,9 +59,7 @@
 EXPORT_SYMBOL(probe_irq_mask);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
 EXPORT_SYMBOL(pm_power_off);
-#endif
 EXPORT_SYMBOL(get_cmos_time);
 
 EXPORT_SYMBOL(__down_failed);
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Mon Jan  9 
11:22:17 2006
@@ -540,6 +540,9 @@
        pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
                                        mmap_pages, GFP_KERNEL);
        if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+               kfree(pending_reqs);
+               kfree(pending_grant_handles);
+               kfree(pending_vaddrs);
                printk("%s: out of memory\n", __FUNCTION__);
                return -1;
        }
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Jan  9 
11:22:17 2006
@@ -331,7 +331,12 @@
                return;
        }
 
-        xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+       err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+       if (err) {
+               xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+                                info->xbdev->otherend);
+               return;
+       }
 
        (void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected); 
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Mon Jan  9 11:22:17 2006
@@ -208,7 +208,7 @@
 }
 
 struct vm_operations_struct blktap_vm_ops = {
-       nopage:   blktap_nopage,
+       .nopage = blktap_nopage,
 };
 
 /******************************************************************
@@ -225,7 +225,7 @@
        /* Allocate the fe ring. */
        sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
        if (sring == NULL)
-               goto fail_nomem;
+               return -ENOMEM;
 
        SetPageReserved(virt_to_page(sring));
     
@@ -233,9 +233,6 @@
        FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
 
        return 0;
-
- fail_nomem:
-       return -ENOMEM;
 }
 
 static int blktap_release(struct inode *inode, struct file *filp)
@@ -391,12 +388,12 @@
 }
 
 static struct file_operations blktap_fops = {
-       owner:    THIS_MODULE,
-       poll:     blktap_poll,
-       ioctl:    blktap_ioctl,
-       open:     blktap_open,
-       release:  blktap_release,
-       mmap:     blktap_mmap,
+       .owner   = THIS_MODULE,
+       .poll    = blktap_poll,
+       .ioctl   = blktap_ioctl,
+       .open    = blktap_open,
+       .release = blktap_release,
+       .mmap    = blktap_mmap,
 };
 
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Mon Jan  9 
11:22:17 2006
@@ -314,39 +314,31 @@
 {
        int sent, sz, work_done = 0;
 
-       if (xen_start_info->flags & SIF_INITDOMAIN) {
-               if (x_char) {
+       if (x_char) {
+               if (xen_start_info->flags & SIF_INITDOMAIN)
                        kcons_write_dom0(NULL, &x_char, 1);
-                       x_char = 0;
-                       work_done = 1;
-               }
-
-               while (wc != wp) {
-                       sz = wp - wc;
-                       if (sz > (wbuf_size - WBUF_MASK(wc)))
-                               sz = wbuf_size - WBUF_MASK(wc);
+               else
+                       while (x_char)
+                               if (xencons_ring_send(&x_char, 1) == 1)
+                                       break;
+               x_char = 0;
+               work_done = 1;
+       }
+
+       while (wc != wp) {
+               sz = wp - wc;
+               if (sz > (wbuf_size - WBUF_MASK(wc)))
+                       sz = wbuf_size - WBUF_MASK(wc);
+               if (xen_start_info->flags & SIF_INITDOMAIN) {
                        kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
                        wc += sz;
-                       work_done = 1;
-               }
-       } else {
-               while (x_char) {
-                       if (xencons_ring_send(&x_char, 1) == 1) {
-                               x_char = 0;
-                               work_done = 1;
-                       }
-               }
-
-               while (wc != wp) {
-                       sz = wp - wc;
-                       if (sz > (wbuf_size - WBUF_MASK(wc)))
-                               sz = wbuf_size - WBUF_MASK(wc);
+               } else {
                        sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
                        if (sent == 0)
                                break;
                        wc += sent;
-                       work_done = 1;
                }
+               work_done = 1;
        }
 
        if (work_done && (xencons_tty != NULL)) {
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jan  9 11:22:17 2006
@@ -82,7 +82,7 @@
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
 void netif_creditlimit(netif_t *netif);
-int  netif_disconnect(netif_t *netif);
+void netif_disconnect(netif_t *netif);
 
 netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
 void free_netif(netif_t *netif);
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Mon Jan  9 
11:22:17 2006
@@ -183,7 +183,7 @@
 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
              unsigned long rx_ring_ref, unsigned int evtchn)
 {
-       int err;
+       int err = -ENOMEM;
        netif_tx_sring_t *txs;
        netif_rx_sring_t *rxs;
        evtchn_op_t op = {
@@ -196,24 +196,19 @@
                return 0;
 
        netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+       if (netif->tx_comms_area == NULL)
+               return -ENOMEM;
        netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-       if (netif->tx_comms_area == NULL || netif->rx_comms_area == NULL)
-               return -ENOMEM;
+       if (netif->rx_comms_area == NULL)
+               goto err_rx;
 
        err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
-       if (err) {
-               free_vm_area(netif->tx_comms_area);
-               free_vm_area(netif->rx_comms_area);
-               return err;
-       }
+       if (err)
+               goto err_map;
 
        err = HYPERVISOR_event_channel_op(&op);
-       if (err) {
-               unmap_frontend_pages(netif);
-               free_vm_area(netif->tx_comms_area);
-               free_vm_area(netif->rx_comms_area);
-               return err;
-       }
+       if (err)
+               goto err_hypervisor;
 
        netif->evtchn = op.u.bind_interdomain.local_port;
 
@@ -241,19 +236,22 @@
        rtnl_unlock();
 
        return 0;
+err_hypervisor:
+       unmap_frontend_pages(netif);
+err_map:
+       free_vm_area(netif->rx_comms_area);
+err_rx:
+       free_vm_area(netif->tx_comms_area);
+       return err;
 }
 
 static void free_netif_callback(void *arg)
 {
        netif_t *netif = (netif_t *)arg;
 
-       /* Already disconnected? */
-       if (!netif->irq)
-               return;
-
-       unbind_from_irqhandler(netif->irq, netif);
-       netif->irq = 0;
-
+       if (netif->irq)
+               unbind_from_irqhandler(netif->irq, netif);
+       
        unregister_netdev(netif->dev);
 
        if (netif->tx.sring) {
@@ -290,10 +288,10 @@
 #endif
 }
 
-int netif_disconnect(netif_t *netif)
-{
-
-       if (netif->status == CONNECTED) {
+void netif_disconnect(netif_t *netif)
+{
+       switch (netif->status) {
+       case CONNECTED:
                rtnl_lock();
                netif->status = DISCONNECTING;
                wmb();
@@ -301,10 +299,14 @@
                        __netif_down(netif);
                rtnl_unlock();
                netif_put(netif);
-               return 0; /* Caller should not send response message. */
-       }
-
-       return 1;
+               break;
+       case DISCONNECTED:
+               BUG_ON(atomic_read(&netif->refcnt) != 0);
+               free_netif(netif);
+               break;
+       default:
+               BUG();
+       }
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jan  9 
11:22:17 2006
@@ -14,6 +14,7 @@
 #include <asm-xen/balloon.h>
 #include <asm-xen/xen-public/memory.h>
 
+/*#define NETBE_DEBUG_INTERRUPT*/
 
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
@@ -727,6 +728,7 @@
        return notify;
 }
 
+#ifdef NETBE_DEBUG_INTERRUPT
 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 {
        struct list_head *ent;
@@ -758,6 +760,7 @@
 
        return IRQ_HANDLED;
 }
+#endif
 
 static int __init netback_init(void)
 {
@@ -794,6 +797,7 @@
 
        netif_xenbus_init();
 
+#ifdef NETBE_DEBUG_INTERRUPT
        (void)bind_virq_to_irqhandler(
                VIRQ_DEBUG,
                0,
@@ -801,6 +805,7 @@
                SA_SHIRQ, 
                "net-be-dbg",
                &netif_be_dbg);
+#endif
 
        return 0;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Jan  9 
11:22:17 2006
@@ -116,6 +116,8 @@
 #define RX_MAX_TARGET NET_RX_RING_SIZE
        int rx_min_target, rx_max_target, rx_target;
        struct sk_buff_head rx_batch;
+
+       struct timer_list rx_refill_timer;
 
        /*
         * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
@@ -517,6 +519,13 @@
 }
 
 
+static void rx_refill_timeout(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       netif_rx_schedule(dev);
+}
+
+
 static void network_alloc_rx_buffers(struct net_device *dev)
 {
        unsigned short id;
@@ -534,7 +543,7 @@
         * Allocate skbuffs greedily, even though we batch updates to the
         * receive ring. This creates a less bursty demand on the memory
         * allocator, so should reduce the chance of failed allocation requests
-        *  both for ourself and for other kernel subsystems.
+        * both for ourself and for other kernel subsystems.
         */
        batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
        for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
@@ -545,8 +554,15 @@
                skb = alloc_xen_skb(
                        ((PAGE_SIZE - sizeof(struct skb_shared_info)) &
                         (-SKB_DATA_ALIGN(1))) - 16);
-               if (skb == NULL)
-                       break;
+               if (skb == NULL) {
+                       /* Any skbuffs queued for refill? Force them out. */
+                       if (i != 0)
+                               goto refill;
+                       /* Could not allocate any skbuffs. Try again later. */
+                       mod_timer(&np->rx_refill_timer,
+                                 jiffies + (HZ/10));
+                       return;
+               }
                __skb_queue_tail(&np->rx_batch, skb);
        }
 
@@ -554,6 +570,12 @@
        if (i < (np->rx_target/2))
                return;
 
+       /* Adjust our fill target if we risked running out of buffers. */
+       if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
+           ((np->rx_target *= 2) > np->rx_max_target))
+               np->rx_target = np->rx_max_target;
+
+ refill:
        for (i = 0; ; i++) {
                if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
                        break;
@@ -608,11 +630,6 @@
        /* Above is a suitable barrier to ensure backend will see requests. */
        np->rx.req_prod_pvt = req_prod + i;
        RING_PUSH_REQUESTS(&np->rx);
-
-       /* Adjust our fill target if we risked running out of buffers. */
-       if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
-           ((np->rx_target *= 2) > np->rx_max_target))
-               np->rx_target = np->rx_max_target;
 }
 
 
@@ -1077,6 +1094,10 @@
        np->rx_min_target = RX_MIN_TARGET;
        np->rx_max_target = RX_MAX_TARGET;
 
+       init_timer(&np->rx_refill_timer);
+       np->rx_refill_timer.data = (unsigned long)netdev;
+       np->rx_refill_timer.function = rx_refill_timeout;
+
        /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
        for (i = 0; i <= NET_TX_RING_SIZE; i++) {
                np->tx_skbs[i] = (void *)((unsigned long) i+1);
@@ -1188,29 +1209,26 @@
 
        DPRINTK("%s\n", dev->nodename);
 
-       netif_free(info);
-       kfree(info);
+       netif_disconnect_backend(info);
+       free_netdev(info->netdev);
 
        return 0;
 }
 
 
-static void netif_free(struct netfront_info *info)
-{
-       netif_disconnect_backend(info);
-       close_netdev(info);
-}
-
-
 static void close_netdev(struct netfront_info *info)
 {
-       if (info->netdev) {
+       spin_lock_irq(&info->netdev->xmit_lock);
+       netif_stop_queue(info->netdev);
+       spin_unlock_irq(&info->netdev->xmit_lock);
+
 #ifdef CONFIG_PROC_FS
-               xennet_proc_delif(info->netdev);
+       xennet_proc_delif(info->netdev);
 #endif
-               unregister_netdev(info->netdev);
-               info->netdev = NULL;
-       }
+
+       del_timer_sync(&info->rx_refill_timer);
+
+       unregister_netdev(info->netdev);
 }
 
 
@@ -1219,21 +1237,28 @@
        /* Stop old i/f to prevent errors whilst we rebuild the state. */
        spin_lock_irq(&info->tx_lock);
        spin_lock(&info->rx_lock);
-       netif_stop_queue(info->netdev);
-       /* info->backend_state = BEST_DISCONNECTED; */
+       info->backend_state = BEST_DISCONNECTED;
        spin_unlock(&info->rx_lock);
        spin_unlock_irq(&info->tx_lock);
-    
+
+       if (info->irq)
+               unbind_from_irqhandler(info->irq, info->netdev);
+       info->evtchn = info->irq = 0;
+
        end_access(info->tx_ring_ref, info->tx.sring);
        end_access(info->rx_ring_ref, info->rx.sring);
        info->tx_ring_ref = GRANT_INVALID_REF;
        info->rx_ring_ref = GRANT_INVALID_REF;
        info->tx.sring = NULL;
        info->rx.sring = NULL;
-
-       if (info->irq)
-               unbind_from_irqhandler(info->irq, info->netdev);
-       info->evtchn = info->irq = 0;
+}
+
+
+static void netif_free(struct netfront_info *info)
+{
+       close_netdev(info);
+       netif_disconnect_backend(info);
+       free_netdev(info->netdev);
 }
 
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Jan  9 
11:22:17 2006
@@ -111,7 +111,6 @@
        struct xenbus_dev_data *u = filp->private_data;
        struct xenbus_dev_transaction *trans;
        void *reply;
-       int err = 0;
 
        if ((len + u->len) > sizeof(u->u.buffer))
                return -EINVAL;
@@ -136,41 +135,36 @@
        case XS_RM:
        case XS_SET_PERMS:
                reply = xenbus_dev_request_and_reply(&u->u.msg);
-               if (IS_ERR(reply)) {
-                       err = PTR_ERR(reply);
-               } else {
-                       if (u->u.msg.type == XS_TRANSACTION_START) {
-                               trans = kmalloc(sizeof(*trans), GFP_KERNEL);
-                               trans->handle = (struct xenbus_transaction *)
-                                       simple_strtoul(reply, NULL, 0);
-                               list_add(&trans->list, &u->transactions);
-                       } else if (u->u.msg.type == XS_TRANSACTION_END) {
-                               list_for_each_entry(trans, &u->transactions,
-                                                   list)
-                                       if ((unsigned long)trans->handle ==
-                                           (unsigned long)u->u.msg.tx_id)
-                                               break;
-                               BUG_ON(&trans->list == &u->transactions);
-                               list_del(&trans->list);
-                               kfree(trans);
-                       }
-                       queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
-                       queue_reply(u, (char *)reply, u->u.msg.len);
-                       kfree(reply);
+               if (IS_ERR(reply))
+                       return PTR_ERR(reply);
+
+               if (u->u.msg.type == XS_TRANSACTION_START) {
+                       trans = kmalloc(sizeof(*trans), GFP_KERNEL);
+                       if (!trans)
+                               return -ENOMEM;
+                       trans->handle = (struct xenbus_transaction *)
+                               simple_strtoul(reply, NULL, 0);
+                       list_add(&trans->list, &u->transactions);
+               } else if (u->u.msg.type == XS_TRANSACTION_END) {
+                       list_for_each_entry(trans, &u->transactions, list)
+                               if ((unsigned long)trans->handle ==
+                                   (unsigned long)u->u.msg.tx_id)
+                                       break;
+                       BUG_ON(&trans->list == &u->transactions);
+                       list_del(&trans->list);
+                       kfree(trans);
                }
+               queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
+               queue_reply(u, (char *)reply, u->u.msg.len);
+               kfree(reply);
                break;
 
        default:
-               err = -EINVAL;
-               break;
+               return -EINVAL;
        }
 
-       if (err == 0) {
-               u->len = 0;
-               err = len;
-       }
-
-       return err;
+       u->len = 0;
+       return len;
 }
 
 static int xenbus_dev_open(struct inode *inode, struct file *filp)
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Jan  9 
11:22:17 2006
@@ -542,14 +542,6 @@
                             const char *type,
                             const char *nodename)
 {
-#define CHECK_FAIL                             \
-       do {                                    \
-               if (err)                        \
-                       goto fail;              \
-       }                                       \
-       while (0)                               \
-
-
        int err;
        struct xenbus_device *xendev;
        size_t stringlen;
@@ -584,19 +576,18 @@
        xendev->dev.release = xenbus_dev_release;
 
        err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
-       CHECK_FAIL;
+       if (err)
+               goto fail;
 
        /* Register with generic device framework. */
        err = device_register(&xendev->dev);
-       CHECK_FAIL;
+       if (err)
+               goto fail;
 
        device_create_file(&xendev->dev, &dev_attr_nodename);
        device_create_file(&xendev->dev, &dev_attr_devtype);
 
        return 0;
-
-#undef CHECK_FAIL
-
 fail:
        xenbus_dev_free(xendev);
        return err;
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Mon Jan  9 
11:22:17 2006
@@ -372,7 +372,7 @@
     int ret;
     __asm__ __volatile__ ( ";; mov r14=%2 ; mov r15=%3 ; mov r2=%1 ; break 
0x1000 ;; mov %0=r8 ;;"
         : "=r" (ret)
-        : "i" (__HYPERVISOR_console_io), "r"(cmd), "r"(arg)
+        : "i" (__HYPERVISOR_memory_op), "r"(cmd), "r"(arg)
         : "r14","r15","r2","r8","memory" );
     return ret;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 tools/Makefile
--- a/tools/Makefile    Mon Jan  9 11:19:55 2006
+++ b/tools/Makefile    Mon Jan  9 11:22:17 2006
@@ -12,6 +12,7 @@
 SUBDIRS += security
 SUBDIRS += console
 SUBDIRS += xenmon
+SUBDIRS += guest-headers
 ifeq ($(VTPM_TOOLS),y)
 SUBDIRS += vtpm_manager
 SUBDIRS += vtpm
diff -r 25e3c8668f1f -r 8af1199488d3 tools/Rules.mk
--- a/tools/Rules.mk    Mon Jan  9 11:19:55 2006
+++ b/tools/Rules.mk    Mon Jan  9 11:22:17 2006
@@ -35,6 +35,8 @@
 mk-symlinks:
        mkdir -p xen
        ( cd xen && ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
+       mkdir -p xen/hvm
+       ( cd xen/hvm && ln -sf ../../$(XEN_ROOT)/xen/include/public/hvm/*.h . )
        mkdir -p xen/io
        ( cd xen/io && ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
        mkdir -p xen/linux
diff -r 25e3c8668f1f -r 8af1199488d3 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c     Mon Jan  9 11:19:55 2006
+++ b/tools/debugger/libxendebug/xendebug.c     Mon Jan  9 11:22:17 2006
@@ -119,8 +119,8 @@
 
     if ( !ctxt->valid[vcpu] )
     {
-        if ( (rc = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, 
-                                              &ctxt->context[vcpu])) )
+        if ( (rc = xc_vcpu_getcontext(xc_handle, domid, vcpu, 
+                                      &ctxt->context[vcpu])) )
             return NULL;
 
         ctxt->valid[vcpu] = true;
@@ -139,10 +139,10 @@
         return -EINVAL;
 
     op.interface_version = DOM0_INTERFACE_VERSION;
-    op.cmd = DOM0_SETDOMAININFO;
-    op.u.setdomaininfo.domain = ctxt->domid;
-    op.u.setdomaininfo.vcpu = vcpu;
-    op.u.setdomaininfo.ctxt = &ctxt->context[vcpu];
+    op.cmd = DOM0_SETVCPUCONTEXT;
+    op.u.setvcpucontext.domain = ctxt->domid;
+    op.u.setvcpucontext.vcpu = vcpu;
+    op.u.setvcpucontext.ctxt = &ctxt->context[vcpu];
 
     if ( (rc = mlock(&ctxt->context[vcpu], sizeof(vcpu_guest_context_t))) )
         return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Mon Jan  9 11:19:55 2006
+++ b/tools/examples/xmexample.vmx      Mon Jan  9 11:22:17 2006
@@ -28,7 +28,13 @@
 
 #-----------------------------------------------------------------------------
 # the number of cpus guest platform has, default=1
-vcpus=1
+#vcpus=1
+
+# enable/disalbe vmx guest ACPI, default=0 (disabled)
+#acpi=0
+
+# enable/disalbe vmx guest APIC, default=0 (disabled)
+#apic=0
 
 # List of which CPUS this domain is allowed to use, default Xen picks
 #cpus = ""         # leave to Xen to pick
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/Makefile
--- a/tools/firmware/vmxassist/Makefile Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/Makefile Mon Jan  9 11:22:17 2006
@@ -24,7 +24,7 @@
 # The emulator code lives in ROM space
 TEXTADDR=0x000D0000
 
-DEFINES=-DDEBUG -D_ACPI_ -DTEXTADDR=$(TEXTADDR)
+DEFINES=-DDEBUG -DTEXTADDR=$(TEXTADDR)
 XENINC=-I$(XEN_ROOT)/tools/libxc
 
 LD       = ld
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/acpi_madt.c
--- a/tools/firmware/vmxassist/acpi_madt.c      Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/acpi_madt.c      Mon Jan  9 11:22:17 2006
@@ -17,30 +17,73 @@
  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
+
 #include "../acpi/acpi2_0.h"
 #include "../acpi/acpi_madt.h"
+
+#include <xen/hvm/hvm_info_table.h>
 
 #define NULL ((void*)0)
 
 extern int puts(const char *s);
 
-#define VCPU_NR_PAGE        0x0009F000
-#define VCPU_NR_OFFSET      0x00000800
-#define VCPU_MAGIC          0x76637075  /* "vcpu" */
+static struct hvm_info_table *table = NULL;
 
-/* xc_vmx_builder wrote vcpu block at 0x9F800. Return it. */
-static int
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+       char signature[] = "HVM INFO";
+       uint8_t *ptr = (uint8_t *)t;
+       uint8_t sum = 0;
+       int i;
+
+       /* strncmp(t->signature, "HVM INFO", 8) */
+       for (i = 0; i < 8; i++) {
+               if (signature[i] != t->signature[i]) {
+                       puts("Bad hvm info signature\n");
+                       return 0;
+               }
+       }
+
+       for (i = 0; i < t->length; i++)
+               sum += ptr[i];
+
+       return (sum == 0);
+}
+
+/* xc_vmx_builder wrote hvm info at 0x9F800. Return it. */
+static struct hvm_info_table *
+get_hvm_info_table(void)
+{
+       struct hvm_info_table *t;
+       int i;
+
+       if (table != NULL)
+               return table;
+
+       t = (struct hvm_info_table *)HVM_INFO_PADDR;
+
+       if (!validate_hvm_info(t)) {
+               puts("Bad hvm info table\n");
+               return NULL;
+       }
+
+       table = t;
+
+       return table;
+}
+
+int
 get_vcpu_nr(void)
 {
-       unsigned int *vcpus;
+       struct hvm_info_table *t = get_hvm_info_table();
+       return (t ? t->nr_vcpus : 1); /* default 1 vcpu */
+}
 
-       vcpus = (unsigned int *)(VCPU_NR_PAGE + VCPU_NR_OFFSET);
-       if (vcpus[0] != VCPU_MAGIC) {
-               puts("Bad vcpus magic, set vcpu number to 1 by default.\n");
-               return 1;
-       }
-
-       return vcpus[1];
+int
+get_acpi_enabled(void)
+{
+       struct hvm_info_table *t = get_hvm_info_table();
+       return (t ? t->acpi_enabled : 0); /* default no acpi */
 }
 
 static void *
@@ -74,10 +117,10 @@
        return madt;
 }
 
-static void 
+static void
 set_checksum(void *start, int checksum_offset, int len)
 {
-       unsigned char sum = 0;  
+       unsigned char sum = 0;
        unsigned char *ptr;
 
        ptr = start;
@@ -89,9 +132,9 @@
        ptr[checksum_offset] = -sum;
 }
 
-static int 
+static int
 acpi_madt_set_local_apics(
-       int nr_vcpu, 
+       int nr_vcpu,
        ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
 {
        int i;
@@ -104,14 +147,14 @@
                madt->LocalApic[i].Length          = sizeof 
(ACPI_LOCAL_APIC_STRUCTURE);
                madt->LocalApic[i].AcpiProcessorId = i;
                madt->LocalApic[i].ApicId          = i;
-               madt->LocalApic[i].Flags           = 1; 
+               madt->LocalApic[i].Flags           = 1;
        }
 
        madt->Header.Header.Length =
-               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) - 
+               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
                (MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
 
-       return 0;                            
+       return 0;
 }
 
 #define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
@@ -133,7 +176,7 @@
                madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
                madt->Header.Header.Length);
 
-       return 0;              
+       return 0;
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/vm86.h
--- a/tools/firmware/vmxassist/vm86.h   Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/vm86.h   Mon Jan  9 11:22:17 2006
@@ -24,7 +24,7 @@
 #include <stdint.h>
 #endif
 
-#include <xen/vmx_assist.h>
+#include <xen/hvm/vmx_assist.h>
 
 #define        NR_EXCEPTION_HANDLER    32
 #define        NR_INTERRUPT_HANDLERS   16
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/vmxloader.c
--- a/tools/firmware/vmxassist/vmxloader.c      Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/vmxloader.c      Mon Jan  9 11:22:17 2006
@@ -24,12 +24,10 @@
 #include "machine.h"
 #include "roms.h"
 
-#ifdef _ACPI_
 #include "acpi.h"
 #include "../acpi/acpi2_0.h"  // for ACPI_PHYSICAL_ADDRESS
 int acpi_madt_update(unsigned char* acpi_start);
-#endif
-
+int get_acpi_enabled(void);
 
 /*
  * C runtime start off
@@ -120,18 +118,17 @@
                memcpy((void *)0xC0000,
                        vgabios_stdvga, sizeof(vgabios_stdvga));
        }
-#ifdef _ACPI_
-       puts("Loading ACPI ...\n");
 
-       acpi_madt_update(acpi);
-
-       if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
-               /* make sure acpi table does not overlap rombios
-                * currently acpi less than 8K will be OK.
-                */
-                memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi));
+       if (get_acpi_enabled() != 0) {
+               puts("Loading ACPI ...\n");
+               acpi_madt_update((unsigned char*)acpi);
+               if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
+                       /* make sure acpi table does not overlap rombios
+                        * currently acpi less than 8K will be OK.
+                        */
+                       memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, 
sizeof(acpi));
+               }
        }
-#endif
 
        puts("Loading VMXAssist ...\n");
        memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist));
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8254.c
--- a/tools/ioemu/hw/i8254.c    Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/hw/i8254.c    Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
  */
 #include "vl.h"
 #include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 
 //#define DEBUG_PIT
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8259.c
--- a/tools/ioemu/hw/i8259.c    Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/hw/i8259.c    Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
  */
 #include "vl.h"
 #include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 
 /* debug PIC */
 //#define DEBUG_PIC
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8259_stub.c
--- a/tools/ioemu/hw/i8259_stub.c       Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/hw/i8259_stub.c       Mon Jan  9 11:22:17 2006
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 #include "xenctrl.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 #include <stdio.h>
 #include "cpu.h"
 #include "cpu-all.h"
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/target-i386-dm/helper2.c      Mon Jan  9 11:22:17 2006
@@ -48,7 +48,7 @@
 #include <sys/ioctl.h>
 
 #include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 #include <xen/linux/evtchn.h>
 
 #include "cpu.h"
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/vl.c  Mon Jan  9 11:22:17 2006
@@ -2948,6 +2948,7 @@
             case QEMU_OPTION_vcpus:
                 vcpus = atoi(optarg);
                 fprintf(logfile, "qemu: the number of cpus is %d\n", vcpus);
+                break;
             case QEMU_OPTION_pci:
                 pci_enabled = 1;
                 break;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_core.c     Mon Jan  9 11:22:17 2006
@@ -55,7 +55,7 @@
     }
  
     for (i = 0; i < info.max_vcpu_id; i++)
-        if (xc_domain_get_vcpu_context(xc_handle, domid,
+        if (xc_vcpu_getcontext(xc_handle, domid,
                                        i, &ctxt[nr_vcpus]) == 0)
             nr_vcpus++;
  
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_domain.c   Mon Jan  9 11:22:17 2006
@@ -58,16 +58,16 @@
     return do_dom0_op(xc_handle, &op);
 }
 
-int xc_domain_pincpu(int xc_handle,
-                     uint32_t domid, 
-                     int vcpu,
-                     cpumap_t cpumap)
-{
-    DECLARE_DOM0_OP;
-    op.cmd = DOM0_PINCPUDOMAIN;
-    op.u.pincpudomain.domain  = (domid_t)domid;
-    op.u.pincpudomain.vcpu    = vcpu;
-    op.u.pincpudomain.cpumap  = cpumap;
+int xc_vcpu_setaffinity(int xc_handle,
+                        uint32_t domid, 
+                        int vcpu,
+                        cpumap_t cpumap)
+{
+    DECLARE_DOM0_OP;
+    op.cmd = DOM0_SETVCPUAFFINITY;
+    op.u.setvcpuaffinity.domain  = (domid_t)domid;
+    op.u.setvcpuaffinity.vcpu    = vcpu;
+    op.u.setvcpuaffinity.cpumap  = cpumap;
     return do_dom0_op(xc_handle, &op);
 }
 
@@ -155,7 +155,7 @@
     return ret;
 }
 
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
                                uint32_t domid,
                                uint32_t vcpu,
                                vcpu_guest_context_t *ctxt)
@@ -345,10 +345,10 @@
     return do_dom0_op(xc_handle, &op);
 }
 
-int xc_domain_get_vcpu_info(int xc_handle,
-                            uint32_t domid,
-                            uint32_t vcpu,
-                            xc_vcpuinfo_t *info)
+int xc_vcpu_getinfo(int xc_handle,
+                    uint32_t domid,
+                    uint32_t vcpu,
+                    xc_vcpuinfo_t *info)
 {
     int rc;
     DECLARE_DOM0_OP;
@@ -380,18 +380,18 @@
     return do_dom0_op(xc_handle, &op);
 }
 
-int xc_domain_setinfo(int xc_handle,
-                      uint32_t domid,
-                      uint32_t vcpu,
-                      vcpu_guest_context_t *ctxt)
+int xc_vcpu_setcontext(int xc_handle,
+                       uint32_t domid,
+                       uint32_t vcpu,
+                       vcpu_guest_context_t *ctxt)
 {
     dom0_op_t op;
     int rc;
 
-    op.cmd = DOM0_SETDOMAININFO;
-    op.u.setdomaininfo.domain = domid;
-    op.u.setdomaininfo.vcpu = vcpu;
-    op.u.setdomaininfo.ctxt = ctxt;
+    op.cmd = DOM0_SETVCPUCONTEXT;
+    op.u.setvcpucontext.domain = domid;
+    op.u.setvcpucontext.vcpu = vcpu;
+    op.u.setvcpucontext.ctxt = ctxt;
 
     if ( (rc = mlock(ctxt, sizeof(*ctxt))) != 0 )
         return rc;
@@ -402,6 +402,38 @@
 
     return rc;
 
+}
+
+int xc_domain_irq_permission(int xc_handle,
+                             uint32_t domid,
+                             uint8_t pirq,
+                             uint8_t allow_access)
+{
+    dom0_op_t op;
+
+    op.cmd = DOM0_IRQ_PERMISSION;
+    op.u.irq_permission.domain = domid;
+    op.u.irq_permission.pirq = pirq;
+    op.u.irq_permission.allow_access = allow_access;
+
+    return do_dom0_op(xc_handle, &op);
+}
+
+int xc_domain_iomem_permission(int xc_handle,
+                               uint32_t domid,
+                               unsigned long first_pfn,
+                               unsigned long nr_pfns,
+                               uint8_t allow_access)
+{
+    dom0_op_t op;
+
+    op.cmd = DOM0_IOMEM_PERMISSION;
+    op.u.iomem_permission.domain = domid;
+    op.u.iomem_permission.first_pfn = first_pfn;
+       op.u.iomem_permission.nr_pfns = nr_pfns;
+    op.u.iomem_permission.allow_access = allow_access;
+
+    return do_dom0_op(xc_handle, &op);
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_ia64_stubs.c       Mon Jan  9 11:22:17 2006
@@ -5,7 +5,7 @@
 #include <stdlib.h>
 #include <zlib.h>
 #include "xen/arch-ia64.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 
 /* this is a very ugly way of getting FPSR_DEFAULT.  struct ia64_fpreg is
  * mysteriously declared in two places: /usr/include/asm/fpu.h and
@@ -627,6 +627,7 @@
                  unsigned int control_evtchn,
                  unsigned int lapic,
                  unsigned int vcpus,
+                 unsigned int acpi,
                  unsigned int store_evtchn,
                  unsigned long *store_mfn)
 {
@@ -663,7 +664,7 @@
         goto error_out;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ){
+    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) ){
         PERROR("Could not get vcpu context");
         goto error_out;
     }
@@ -687,11 +688,11 @@
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
-    launch_op.u.setdomaininfo.domain = (domid_t)domid;
-    launch_op.u.setdomaininfo.vcpu   = 0;
-    launch_op.u.setdomaininfo.ctxt   = ctxt;
-
-    launch_op.cmd = DOM0_SETDOMAININFO;
+    launch_op.u.setvcpucontext.domain = (domid_t)domid;
+    launch_op.u.setvcpucontext.vcpu   = 0;
+    launch_op.u.setvcpucontext.ctxt   = ctxt;
+
+    launch_op.cmd = DOM0_SETVCPUCONTEXT;
     rc = do_dom0_op(xc_handle, &launch_op);
     return rc;
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_linux_build.c      Mon Jan  9 11:22:17 2006
@@ -393,10 +393,14 @@
     start_info->store_evtchn = store_evtchn;
     start_info->console_mfn   = nr_pages - 1;
     start_info->console_evtchn = console_evtchn;
+    start_info->nr_pages       = nr_pages;     // FIXME?: nr_pages - 2 ????
     if ( initrd_len != 0 )
     {
         ctxt->initrd.start    = vinitrd_start;
         ctxt->initrd.size     = initrd_len;
+    } else {
+        ctxt->initrd.start    = 0;
+        ctxt->initrd.size     = 0;
     }
     strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
     ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
@@ -790,7 +794,7 @@
         goto error_out;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
     {
         PERROR("Could not get vcpu context");
         goto error_out;
@@ -893,11 +897,11 @@
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
-    launch_op.u.setdomaininfo.domain = (domid_t)domid;
-    launch_op.u.setdomaininfo.vcpu   = 0;
-    launch_op.u.setdomaininfo.ctxt   = ctxt;
-
-    launch_op.cmd = DOM0_SETDOMAININFO;
+    launch_op.u.setvcpucontext.domain = (domid_t)domid;
+    launch_op.u.setvcpucontext.vcpu   = 0;
+    launch_op.u.setvcpucontext.ctxt   = ctxt;
+
+    launch_op.cmd = DOM0_SETVCPUCONTEXT;
     rc = xc_dom0_op(xc_handle, &launch_op);
     
     return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_linux_restore.c    Mon Jan  9 11:22:17 2006
@@ -171,7 +171,7 @@
 
 
     /* Only have to worry about vcpu 0 even for SMP */
-    if (xc_domain_get_vcpu_context( xc_handle, dom, 0, &ctxt)) {
+    if (xc_vcpu_getcontext( xc_handle, dom, 0, &ctxt)) {
         ERR("Could not get vcpu context");
         goto out;
     }
@@ -735,10 +735,10 @@
 
     DPRINTF("Domain ready to be built.\n");
 
-    op.cmd = DOM0_SETDOMAININFO;
-    op.u.setdomaininfo.domain = (domid_t)dom;
-    op.u.setdomaininfo.vcpu   = 0;
-    op.u.setdomaininfo.ctxt   = &ctxt;
+    op.cmd = DOM0_SETVCPUCONTEXT;
+    op.u.setvcpucontext.domain = (domid_t)dom;
+    op.u.setvcpucontext.vcpu   = 0;
+    op.u.setvcpucontext.ctxt   = &ctxt;
     rc = xc_dom0_op(xc_handle, &op);
 
     if (rc != 0) {
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_linux_save.c       Mon Jan  9 11:22:17 2006
@@ -382,7 +382,7 @@
         return -1;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */, ctxt)) 
+    if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) 
         ERR("Could not get vcpu context");
 
 
@@ -643,7 +643,7 @@
     }
     
     /* Only have to worry about vcpu 0 even for SMP */
-    if (xc_domain_get_vcpu_context(xc_handle, dom, 0, &ctxt)) {
+    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
         ERR("Could not get vcpu context");
         goto out;
     }
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c  Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_pagetab.c  Mon Jan  9 11:22:17 2006
@@ -74,7 +74,7 @@
 #define pt_levels 4
 #endif
 
-    if (xc_domain_get_vcpu_context(xc_handle, dom, vcpu, &ctx) != 0) {
+    if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) {
         fprintf(stderr, "failed to retreive vcpu context\n");
         goto out;
     }
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_ptrace.c   Mon Jan  9 11:22:17 2006
@@ -33,7 +33,7 @@
     if (online)
         *online = 0;
     if ( !(regs_valid & (1 << cpu)) ) { 
-        retval = xc_domain_get_vcpu_context(xc_handle, current_domid, 
+        retval = xc_vcpu_getcontext(xc_handle, current_domid, 
                                                cpu, &ctxt[cpu]);
         if ( retval ) 
             goto done;
@@ -43,8 +43,7 @@
        if ( online == NULL )
            goto done;
 
-       retval = xc_domain_get_vcpu_info(xc_handle, current_domid,
-                                        cpu, &info);
+       retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info);
        *online = info.online;
     
  done:
@@ -395,7 +394,7 @@
 
     case PTRACE_SETREGS:
         SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs);
-        retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+        retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
         if (retval)
             goto error_out;
         break;
@@ -405,7 +404,7 @@
          *  during single-stepping - but that just seems retarded
          */
         ctxt[cpu].user_regs.eflags |= PSL_T; 
-        retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+        retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
         if ( retval )
         {
             perror("dom0 op failed");
@@ -423,8 +422,8 @@
                 /* Clear trace flag */
                 if ( ctxt[cpu].user_regs.eflags & PSL_T ) {
                     ctxt[cpu].user_regs.eflags &= ~PSL_T;
-                    retval = xc_domain_setinfo(xc_handle, current_domid, 
-                                               cpu, &ctxt[cpu]);
+                    retval = xc_vcpu_setcontext(xc_handle, current_domid, 
+                                                cpu, &ctxt[cpu]);
                     if ( retval ) {
                         perror("dom0 op failed");
                         goto error_out;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_vmx_build.c        Mon Jan  9 11:22:17 2006
@@ -9,7 +9,8 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <zlib.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/ioreq.h>
 
 #define VMX_LOADER_ENTR_ADDR  0x00100000
 
@@ -33,9 +34,6 @@
 #define E820_MAP_NR_OFFSET  0x000001E8
 #define E820_MAP_OFFSET     0x000002D0
 
-#define VCPU_NR_PAGE        0x0009F000
-#define VCPU_NR_OFFSET      0x00000800
-
 struct e820entry {
     uint64_t addr;
     uint64_t size;
@@ -119,26 +117,50 @@
     return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
 }
 
+static void
+set_hvm_info_checksum(struct hvm_info_table *t)
+{
+    uint8_t *ptr = (uint8_t *)t, sum = 0;
+    unsigned int i;
+
+    t->checksum = 0;
+
+    for (i = 0; i < t->length; i++)
+        sum += *ptr++;
+
+    t->checksum = -sum;
+}
+
 /*
- * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
- * vmxloader will use it to config ACPI MADT table
+ * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
+ * vmxloader will use this info to set BIOS accordingly
  */
-#define VCPU_MAGIC      0x76637075  /* "vcpu" */
-static int set_vcpu_nr(int xc_handle, uint32_t dom,
-                        unsigned long *pfn_list, unsigned int vcpus)
-{
-    char         *va_map;
-    unsigned int *va_vcpus;
-
-    va_map = xc_map_foreign_range(xc_handle, dom,
-                                  PAGE_SIZE, PROT_READ|PROT_WRITE,
-                                  pfn_list[VCPU_NR_PAGE >> PAGE_SHIFT]);
+static int set_hvm_info(int xc_handle, uint32_t dom,
+                        unsigned long *pfn_list, unsigned int vcpus,
+                        unsigned int acpi, unsigned int apic)
+{
+    char *va_map;
+    struct hvm_info_table *va_hvm;
+
+    va_map = xc_map_foreign_range(
+        xc_handle,
+        dom,
+        PAGE_SIZE,
+        PROT_READ|PROT_WRITE,
+        pfn_list[HVM_INFO_PFN]);
+    
     if ( va_map == NULL )
         return -1;
 
-    va_vcpus = (unsigned int *)(va_map + VCPU_NR_OFFSET);
-    va_vcpus[0] = VCPU_MAGIC;
-    va_vcpus[1] = vcpus;
+    va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
+    memset(va_hvm, 0, sizeof(*va_hvm));
+    strncpy(va_hvm->signature, "HVM INFO", 8);
+    va_hvm->length       = sizeof(struct hvm_info_table);
+    va_hvm->acpi_enabled = acpi;
+    va_hvm->apic_enabled = apic;
+    va_hvm->nr_vcpus     = vcpus;
+
+    set_hvm_info_checksum(va_hvm);
 
     munmap(va_map, PAGE_SIZE);
 
@@ -279,8 +301,9 @@
                        vcpu_guest_context_t *ctxt,
                        unsigned long shared_info_frame,
                        unsigned int control_evtchn,
-                       unsigned int lapic,
                        unsigned int vcpus,
+                       unsigned int acpi,
+                       unsigned int apic,
                        unsigned int store_evtchn,
                        unsigned long *store_mfn)
 {
@@ -490,20 +513,14 @@
             goto error_out;
     }
 
-    if (set_vcpu_nr(xc_handle, dom, page_array, vcpus)) {
-        fprintf(stderr, "Couldn't set vcpu number for VMX guest.\n");
-        goto error_out;
-    }
-
-    *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
-    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
-        goto error_out;
-
-    shared_page_frame = (v_end - PAGE_SIZE) >> PAGE_SHIFT;
-
-    if ((e820_page = xc_map_foreign_range(
-        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-        page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0)
+    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
+        fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
+        goto error_out;
+    }
+
+    if ( (e820_page = xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
         goto error_out;
     memset(e820_page, 0, PAGE_SIZE);
     e820_map_nr = build_e820map(e820_page, v_end);
@@ -518,25 +535,29 @@
     munmap(e820_page, PAGE_SIZE);
 
     /* shared_info page starts its life empty. */
-    if ((shared_info = xc_map_foreign_range(
-        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-        shared_info_frame)) == 0)
+    if ( (shared_info = xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         shared_info_frame)) == 0 )
         goto error_out;
     memset(shared_info, 0, sizeof(shared_info_t));
     /* Mask all upcalls... */
     for ( i = 0; i < MAX_VIRT_CPUS; i++ )
         shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
-
     munmap(shared_info, PAGE_SIZE);
 
     /* Populate the event channel port in the shared page */
-    if ((sp = (shared_iopage_t *) xc_map_foreign_range(
-        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-        page_array[shared_page_frame])) == 0)
+    shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
+    if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         shared_page_frame)) == 0 )
         goto error_out;
     memset(sp, 0, PAGE_SIZE);
     sp->sp_global.eport = control_evtchn;
     munmap(sp, PAGE_SIZE);
+
+    *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
+    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
+        goto error_out;
 
     /* Send the page update requests down to the hypervisor. */
     if ( xc_finish_mmu_updates(xc_handle, mmu) )
@@ -559,7 +580,7 @@
     ctxt->user_regs.eax = 0;
     ctxt->user_regs.esp = 0;
     ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot 
cpu */
-    ctxt->user_regs.ecx = lapic;
+    ctxt->user_regs.ecx = 0;
     ctxt->user_regs.esi = 0;
     ctxt->user_regs.edi = 0;
     ctxt->user_regs.ebp = 0;
@@ -572,29 +593,6 @@
     free(mmu);
     free(page_array);
     return -1;
-}
-
-#define VMX_FEATURE_FLAG 0x20
-
-static int vmx_identify(void)
-{
-    int eax, ecx;
-
-    __asm__ __volatile__ (
-#if defined(__i386__)
-                          "push %%ebx; cpuid; pop %%ebx"
-#elif defined(__x86_64__)
-                          "push %%rbx; cpuid; pop %%rbx"
-#endif
-                          : "=a" (eax), "=c" (ecx)
-                          : "0" (1)
-                          : "dx");
-
-    if (!(ecx & VMX_FEATURE_FLAG)) {
-        return -1;
-    }
-
-    return 0;
 }
 
 int xc_vmx_build(int xc_handle,
@@ -602,8 +600,9 @@
                  int memsize,
                  const char *image_name,
                  unsigned int control_evtchn,
-                 unsigned int lapic,
                  unsigned int vcpus,
+                 unsigned int acpi,
+                 unsigned int apic,
                  unsigned int store_evtchn,
                  unsigned long *store_mfn)
 {
@@ -613,10 +612,18 @@
     unsigned long nr_pages;
     char         *image = NULL;
     unsigned long image_size;
-
-    if ( vmx_identify() < 0 )
-    {
-        PERROR("CPU doesn't support VMX Extensions");
+    xen_capabilities_info_t xen_caps;
+
+    if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
+    {
+        PERROR("Failed to get xen version info");
+        goto error_out;
+    }
+
+    if ( !strstr(xen_caps, "hvm") )
+    {
+        PERROR("CPU doesn't support VMX Extensions or "
+               "CPU VMX Extensions are not turned on");
         goto error_out;
     }
 
@@ -644,7 +651,7 @@
         goto error_out;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
     {
         PERROR("Could not get vcpu context");
         goto error_out;
@@ -659,7 +666,7 @@
 
     if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
                      ctxt, op.u.getdomaininfo.shared_info_frame, 
control_evtchn,
-                     lapic, vcpus, store_evtchn, store_mfn) < 0)
+                     vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -701,11 +708,11 @@
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
-    launch_op.u.setdomaininfo.domain = (domid_t)domid;
-    launch_op.u.setdomaininfo.vcpu   = 0;
-    launch_op.u.setdomaininfo.ctxt   = ctxt;
-
-    launch_op.cmd = DOM0_SETDOMAININFO;
+    launch_op.u.setvcpucontext.domain = (domid_t)domid;
+    launch_op.u.setvcpucontext.vcpu   = 0;
+    launch_op.u.setvcpucontext.ctxt   = ctxt;
+
+    launch_op.cmd = DOM0_SETVCPUCONTEXT;
     rc = xc_dom0_op(xc_handle, &launch_op);
 
     return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xenctrl.h     Mon Jan  9 11:22:17 2006
@@ -181,10 +181,11 @@
  */
 int xc_domain_destroy(int xc_handle, 
                       uint32_t domid);
-int xc_domain_pincpu(int xc_handle,
-                     uint32_t domid,
-                     int vcpu,
-                     cpumap_t cpumap);
+
+int xc_vcpu_setaffinity(int xc_handle,
+                        uint32_t domid,
+                        int vcpu,
+                        cpumap_t cpumap);
 
 /**
  * This function will return information about one or more domains. It is
@@ -208,7 +209,7 @@
 
 
 /**
- * This function will set the vcpu context for the specified domain.
+ * This function will set the execution context for the specified vcpu.
  *
  * @parm xc_handle a handle to an open hypervisor interface
  * @parm domid the domain to set the vcpu context for
@@ -216,10 +217,10 @@
  * @parm ctxt pointer to the the cpu context with the values to set
  * @return the number of domains enumerated or -1 on error
  */
-int xc_domain_setinfo(int xc_handle,
-                      uint32_t domid,
-                      uint32_t vcpu,
-                      vcpu_guest_context_t *ctxt);
+int xc_vcpu_setcontext(int xc_handle,
+                       uint32_t domid,
+                       uint32_t vcpu,
+                       vcpu_guest_context_t *ctxt);
 /**
  * This function will return information about one or more domains, using a
  * single hypercall.  The domain information will be stored into the supplied
@@ -249,17 +250,16 @@
  *            domain
  * @return 0 on success, -1 on failure
  */
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
                                uint32_t domid,
                                uint32_t vcpu,
                                vcpu_guest_context_t *ctxt);
 
 typedef dom0_getvcpuinfo_t xc_vcpuinfo_t;
-int xc_domain_get_vcpu_info(int xc_handle,
-                            uint32_t domid,
-                            uint32_t vcpu,
-                            xc_vcpuinfo_t *info);
-
+int xc_vcpu_getinfo(int xc_handle,
+                    uint32_t domid,
+                    uint32_t vcpu,
+                    xc_vcpuinfo_t *info);
 
 int xc_domain_setcpuweight(int xc_handle,
                            uint32_t domid,
@@ -379,6 +379,17 @@
                                 uint32_t first_port,
                                 uint32_t nr_ports,
                                 uint32_t allow_access);
+
+int xc_domain_irq_permission(int xc_handle,
+                             uint32_t domid,
+                             uint8_t pirq,
+                             uint8_t allow_access);
+
+int xc_domain_iomem_permission(int xc_handle,
+                               uint32_t domid,
+                               unsigned long first_pfn,
+                               unsigned long nr_pfns,
+                               uint8_t allow_access);
 
 unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, 
                                    unsigned long mfn);
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xenguest.h    Mon Jan  9 11:22:17 2006
@@ -56,8 +56,9 @@
                  int memsize,
                  const char *image_name,
                  unsigned int control_evtchn,
-                 unsigned int lapic,
                  unsigned int vcpus,
+                 unsigned int acpi,
+                 unsigned int apic,
                  unsigned int store_evtchn,
                  unsigned long *store_mfn);
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jan  9 11:22:17 2006
@@ -135,9 +135,9 @@
 }
 
 
-static PyObject *pyxc_domain_pincpu(XcObject *self,
-                                    PyObject *args,
-                                    PyObject *kwds)
+static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
+                                       PyObject *args,
+                                       PyObject *kwds)
 {
     uint32_t dom;
     int vcpu = 0, i;
@@ -157,7 +157,7 @@
             cpumap |= (cpumap_t)1 << PyInt_AsLong(PyList_GetItem(cpulist, i));
     }
   
-    if ( xc_domain_pincpu(self->xc_handle, dom, vcpu, cpumap) != 0 )
+    if ( xc_vcpu_setaffinity(self->xc_handle, dom, vcpu, cpumap) != 0 )
         return PyErr_SetFromErrno(xc_error);
     
     Py_INCREF(zero);
@@ -297,7 +297,7 @@
                                       &dom, &vcpu) )
         return NULL;
 
-    rc = xc_domain_get_vcpu_info(self->xc_handle, dom, vcpu, &info);
+    rc = xc_vcpu_getinfo(self->xc_handle, dom, vcpu, &info);
     if ( rc < 0 )
         return PyErr_SetFromErrno(xc_error);
 
@@ -362,21 +362,23 @@
     uint32_t dom;
     char *image;
     int control_evtchn, store_evtchn;
+    int memsize;
     int vcpus = 1;
-    int lapic = 0;
-    int memsize;
+    int acpi = 0;
+    int apic = 0;
     unsigned long store_mfn = 0;
 
     static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
-                                "memsize", "image", "lapic", "vcpus", NULL };
-
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisii", kwd_list,
+                                "memsize", "image", "vcpus", "acpi", "apic",
+                                NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
                                       &dom, &control_evtchn, &store_evtchn,
-                                      &memsize, &image, &lapic, &vcpus) )
+                                      &memsize, &image, &vcpus, &acpi, &apic) )
         return NULL;
 
     if ( xc_vmx_build(self->xc_handle, dom, memsize, image, control_evtchn,
-                      lapic, vcpus, store_evtchn, &store_mfn) != 0 )
+                      vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
     return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
@@ -774,6 +776,52 @@
     return zero;
 }
 
+static PyObject *pyxc_domain_irq_permission(PyObject *self,
+                                            PyObject *args,
+                                            PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    uint32_t dom;
+    int pirq, allow_access, ret;
+
+    static char *kwd_list[] = { "dom", "pirq", "allow_access", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iii", kwd_list, 
+                                      &dom, &pirq, &allow_access) )
+        return NULL;
+
+    ret = xc_domain_irq_permission(
+        xc->xc_handle, dom, pirq, allow_access);
+    if ( ret != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_domain_iomem_permission(PyObject *self,
+                                               PyObject *args,
+                                               PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    uint32_t dom;
+    unsigned long first_pfn, nr_pfns, allow_access, ret;
+
+    static char *kwd_list[] = { "dom", "first_pfn", "nr_pfns", "allow_access", 
NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "illi", kwd_list, 
+                                      &dom, &first_pfn, &nr_pfns, 
&allow_access) )
+        return NULL;
+
+    ret = xc_domain_iomem_permission(
+        xc->xc_handle, dom, first_pfn, nr_pfns, allow_access);
+    if ( ret != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
 
 static PyObject *dom_op(XcObject *self, PyObject *args,
                         int (*fn)(int, uint32_t))
@@ -842,8 +890,8 @@
       " dom [int]:    Identifier of domain to be destroyed.\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
-    { "domain_pincpu", 
-      (PyCFunction)pyxc_domain_pincpu, 
+    { "vcpu_setaffinity", 
+      (PyCFunction)pyxc_vcpu_setaffinity, 
       METH_VARARGS | METH_KEYWORDS, "\n"
       "Pin a VCPU to a specified set CPUs.\n"
       " dom [int]:     Identifier of domain to which VCPU belongs.\n"
@@ -1067,6 +1115,25 @@
       " dom          [int]: Identifier of domain to be allowed access.\n"
       " first_port   [int]: First IO port\n"
       " nr_ports     [int]: Number of IO ports\n"
+      " allow_access [int]: Non-zero means enable access; else disable 
access\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "domain_irq_permission",
+      (PyCFunction)pyxc_domain_irq_permission,
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Allow a domain access to a physical IRQ\n"
+      " dom          [int]: Identifier of domain to be allowed access.\n"
+      " pirq         [int]: The Physical IRQ\n"
+      " allow_access [int]: Non-zero means enable access; else disable 
access\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "domain_iomem_permission",
+      (PyCFunction)pyxc_domain_iomem_permission,
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Allow a domain access to a range of IO memory pages\n"
+      " dom          [int]: Identifier of domain to be allowed access.\n"
+      " first_pfn   [long]: First page of I/O Memory\n"
+      " nr_pfns     [long]: Number of pages of I/O Memory (>0)\n"
       " allow_access [int]: Non-zero means enable access; else disable 
access\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/XendDomain.py       Mon Jan  9 11:22:17 2006
@@ -443,7 +443,7 @@
         cpumap = map(lambda x: int(x),
                      cpumap.replace("[", "").replace("]", "").split(","))
         try:
-            return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
+            return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap)
         except Exception, ex:
             raise XendError(str(ex))
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/XendDomainInfo.py   Mon Jan  9 11:22:17 2006
@@ -1179,7 +1179,7 @@
                 for v in range(0, self.info['max_vcpu_id']+1):
                     # pincpu takes a list of ints
                     cpu = [ int( cpus[v % len(cpus)] ) ]
-                    xc.domain_pincpu(self.domid, v, cpu)
+                    xc.vcpu_setaffinity(self.domid, v, cpu)
 
             m = self.image.getDomainMemory(self.info['memory'] * 1024)
             balloon.free(m)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/image.py    Mon Jan  9 11:22:17 2006
@@ -189,11 +189,16 @@
     def configure(self, imageConfig, deviceConfig):
         ImageHandler.configure(self, imageConfig, deviceConfig)
 
+        info = xc.xeninfo()
+        if not 'hvm' in info['xen_caps']:
+            raise VmError("vmx: not an Intel VT platform, we stop creating!")
+
         self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
         self.device_model = sxp.child_value(imageConfig, 'device_model')
         if not self.device_model:
             raise VmError("vmx: missing device model")
         self.display = sxp.child_value(imageConfig, 'display')
+        self.xauthority = sxp.child_value(imageConfig, 'xauthority')
 
         self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
                         ("image/device-model", self.device_model),
@@ -204,10 +209,8 @@
 
         self.dmargs += self.configVNC(imageConfig)
 
-        self.lapic = 0
-        lapic = sxp.child_value(imageConfig, 'lapic')
-        if not lapic is None:
-            self.lapic = int(lapic)
+        self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
+        self.apic = int(sxp.child_value(imageConfig, 'apic', 0))
 
     def buildDomain(self):
         # Create an event channel
@@ -222,17 +225,18 @@
         log.debug("control_evtchn = %d", self.device_channel)
         log.debug("store_evtchn   = %d", store_evtchn)
         log.debug("memsize        = %d", self.vm.getMemoryTarget() / 1024)
-        log.debug("lapic          = %d", self.lapic)
         log.debug("vcpus          = %d", self.vm.getVCpuCount())
+        log.debug("acpi           = %d", self.acpi)
+        log.debug("apic           = %d", self.apic)
 
         return xc.vmx_build(dom            = self.vm.getDomid(),
                             image          = self.kernel,
                             control_evtchn = self.device_channel,
                             store_evtchn   = store_evtchn,
                             memsize        = self.vm.getMemoryTarget() / 1024,
-                            lapic          = self.lapic,
-                            vcpus          = self.vm.getVCpuCount())
-
+                            vcpus          = self.vm.getVCpuCount(),
+                            acpi           = self.acpi,
+                            apic           = self.apic)
 
     # Return a list of cmd line args to the device models based on the
     # xm config file
@@ -264,44 +268,44 @@
         nics = 0
         for (name, info) in deviceConfig:
             if name == 'vbd':
-               uname = sxp.child_value(info, 'uname')
-               typedev = sxp.child_value(info, 'dev')
-               (_, vbdparam) = string.split(uname, ':', 1)
-               if re.match('^ioemu:', typedev):
-                  (emtype, vbddev) = string.split(typedev, ':', 1)
-               else:
-                  emtype = 'vbd'
-                  vbddev = typedev
-               if emtype != 'ioemu':
-                  continue;
-               vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
-               if vbddev not in vbddev_list:
-                  raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
-               ret.append("-%s" % vbddev)
-               ret.append("%s" % vbdparam)
+                uname = sxp.child_value(info, 'uname')
+                typedev = sxp.child_value(info, 'dev')
+                (_, vbdparam) = string.split(uname, ':', 1)
+                if 'ioemu:' in typedev:
+                    (emtype, vbddev) = string.split(typedev, ':', 1)
+                else:
+                    emtype = 'vbd'
+                    vbddev = typedev
+                if emtype == 'vbd':
+                    continue;
+                vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
+                if vbddev not in vbddev_list:
+                    raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+                ret.append("-%s" % vbddev)
+                ret.append("%s" % vbdparam)
             if name == 'vif':
-               type = sxp.child_value(info, 'type')
-               if type != 'ioemu':
-                   continue
-               nics += 1
-               if mac != None:
-                   continue
-               mac = sxp.child_value(info, 'mac')
-               bridge = sxp.child_value(info, 'bridge')
-               if mac == None:
-                   mac = randomMAC()
-               if bridge == None:
-                   bridge = 'xenbr0'
-               ret.append("-macaddr")
-               ret.append("%s" % mac)
-               ret.append("-bridge")
-               ret.append("%s" % bridge)
+                type = sxp.child_value(info, 'type')
+                if type != 'ioemu':
+                    continue
+                nics += 1
+                if mac != None:
+                    continue
+                mac = sxp.child_value(info, 'mac')
+                bridge = sxp.child_value(info, 'bridge')
+                if mac == None:
+                    mac = randomMAC()
+                if bridge == None:
+                    bridge = 'xenbr0'
+                ret.append("-macaddr")
+                ret.append("%s" % mac)
+                ret.append("-bridge")
+                ret.append("%s" % bridge)
             if name == 'vtpm':
-               instance = sxp.child_value(info, 'pref_instance')
-               ret.append("-instance")
-               ret.append("%s" % instance)
+                instance = sxp.child_value(info, 'pref_instance')
+                ret.append("-instance")
+                ret.append("%s" % instance)
         ret.append("-nics")
-        ret.append("%d" % nics) 
+        ret.append("%d" % nics)
         return ret
 
     def configVNC(self, config):
@@ -340,6 +344,8 @@
         env = dict(os.environ)
         if self.display:
             env['DISPLAY'] = self.display
+        if self.xauthority:
+            env['XAUTHORITY'] = self.xauthority
         log.info("spawning device models: %s %s", self.device_model, args)
         self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
         log.info("device model pid: %d", self.pid)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/server/blkif.py     Mon Jan  9 11:22:17 2006
@@ -31,7 +31,7 @@
     """Block device interface controller. Handles all block devices
     for a domain.
     """
-    
+
     def __init__(self, vm):
         """Create a block device controller.
         """
@@ -40,9 +40,9 @@
 
     def getDeviceDetails(self, config):
         """@see DevController.getDeviceDetails"""
-        
+
         dev = sxp.child_value(config, 'dev')
-        if re.match('^ioemu:', dev):
+        if 'ioemu:' in dev:
             return (None,{},{})
 
         devid = blkif.blkdev_name_to_number(dev)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xm/create.py     Mon Jan  9 11:22:17 2006
@@ -160,9 +160,13 @@
           fn=set_int, default=None,
           use="CPUS to run the domain on.")
 
-gopts.var('lapic', val='LAPIC',
+gopts.var('acpi', val='ACPI',
           fn=set_int, default=0,
-          use="Disable or enable local APIC of VMX domain.")
+          use="Disable or enable ACPI of VMX domain.")
+
+gopts.var('apic', val='APIC',
+          fn=set_int, default=0,
+          use="Disable or enable APIC of VMX domain.")
 
 gopts.var('vcpus', val='VCPUS',
           fn=set_int, default=1,
@@ -387,6 +391,10 @@
 gopts.var('display', val='DISPLAY',
           fn=set_value, default=None,
           use="X11 display to use")
+
+gopts.var('xauthority', val='XAUTHORITY',
+          fn=set_value, default=None,
+          use="X11 Authority to use")
 
 
 def err(msg):
@@ -526,7 +534,8 @@
     """
     args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
              'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
-             'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic']
+             'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'acpi', 'apic',
+             'xauthority' ]
     for a in args:
         if (vals.__dict__[a]):
             config_image.append([a, vals.__dict__[a]])
@@ -801,6 +810,9 @@
     if not gopts.vals.display:
         gopts.vals.display = os.getenv("DISPLAY")
 
+    if not gopts.vals.xauthority:
+        gopts.vals.xauthority = os.getenv("XAUTHORITY")
+
     # Process remaining args as config variables.
     for arg in args:
         if '=' in arg:
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xm/main.py       Mon Jan  9 11:22:17 2006
@@ -390,7 +390,6 @@
 
 
 def xm_vcpu_list(args):
-    print 'Name                              ID  VCPU  CPU  State  Time(s)  
CPU Affinity'
 
     from xen.xend.XendClient import server
     if args:
@@ -400,6 +399,8 @@
         dominfo = map(
             lambda x: server.xend_domain_vcpuinfo(sxp.child_value(x, 'name')),
             doms)
+
+    print 'Name                              ID  VCPU  CPU  State  Time(s)  
CPU Affinity'
 
     for dom in dominfo:
         def get_info(n):
@@ -625,6 +626,8 @@
     server.xend_domain_cpu_sedf_set(dom, *v)
 
 def xm_info(args):
+    arg_check(args, "info", 0)
+
     from xen.xend.XendClient import server
     info = server.xend_node()
     
@@ -645,9 +648,12 @@
 
 
 def xm_top(args):
+    arg_check(args, "top", 0)
+
     os.execvp('xentop', ['xentop'])
 
 def xm_dmesg(args):
+    arg_check(args, "dmesg", 0)
     
     gopts = Opts(use="""[-c|--clear]
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Mon Jan  9 11:19:55 2006
+++ b/tools/tests/test_x86_emulator.c   Mon Jan  9 11:22:17 2006
@@ -92,7 +92,7 @@
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
     res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
@@ -110,7 +110,7 @@
     regs.ecx    = 0x12345678UL;
 #endif
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
@@ -125,7 +125,7 @@
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
@@ -141,7 +141,7 @@
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
@@ -157,7 +157,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x12345678) || 
          (regs.eflags != 0x200) ||
@@ -174,7 +174,7 @@
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
@@ -193,7 +193,7 @@
     regs.edi    = (unsigned long)&res + 2;
     regs.error_code = 0; /* read fault */
     cr2         = regs.esi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x44554455) ||
          (regs.eflags != 0x200) ||
@@ -211,7 +211,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)&res;
     cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
@@ -229,7 +229,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
     cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -243,7 +243,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
     cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -260,7 +260,7 @@
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
     res         = 0x82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) ||
          (res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
@@ -275,7 +275,7 @@
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
     res         = 0x1234aa82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) ||
          (res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/README
--- a/tools/vtpm_manager/README Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/README Mon Jan  9 11:22:17 2006
@@ -53,11 +53,6 @@
 
 MANUAL_DM_LAUNCH             -> Must manually launch & kill VTPMs
 
-WELL_KNOWN_SRK_AUTH          -> Rather than randomly generating the password 
for the SRK,
-                                use a well known value. This is necessary for 
sharing use
-                                of the SRK across applications. Such as VTPM 
and Dom0
-                                measurement software.
-
 WELL_KNOWN_OWNER_AUTH        -> Rather than randomly generating the password 
for the owner,
                                 use a well known value. This is useful for 
debugging and for
                                 poor bios which do not support clearing TPM if 
OwnerAuth is
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/Rules.mk
--- a/tools/vtpm_manager/Rules.mk       Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/Rules.mk       Mon Jan  9 11:22:17 2006
@@ -56,8 +56,7 @@
 # Do not have manager launch DMs.
 #CFLAGS += -DMANUAL_DM_LAUNCH
 
-# Fixed SRK
-CFLAGS += -DWELL_KNOWN_SRK_AUTH
+# Fixed OwnerAuth
 #CFLAGS += -DWELL_KNOWN_OWNER_AUTH
 
 # TPM Hardware Device or TPM Simulator
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/securestorage.c
--- a/tools/vtpm_manager/manager/securestorage.c        Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/securestorage.c        Mon Jan  9 11:22:17 2006
@@ -65,7 +65,7 @@
   UINT32 i;
   struct pack_constbuf_t symkey_cipher32, data_cipher32;
   
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping[%d]: 0x", buffer_len(inbuf));
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Input[%d]: 0x", 
buffer_len(inbuf));
   for (i=0; i< buffer_len(inbuf); i++)
     vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
   vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -94,6 +94,12 @@
               BSG_TPM_SIZE32_DATA, &data_cipher32);
 
   vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of 
E(data)\n", buffer_len(&symkey_cipher), buffer_len(&data_cipher));
+
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Output[%d]: 0x", 
buffer_len(sealed_data));
+  for (i=0; i< buffer_len(sealed_data); i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_data->bytes[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+
   goto egress;
 
  abort_egress:
@@ -125,7 +131,7 @@
 
   memset(&symkey, 0, sizeof(symkey_t));
 
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "envelope decrypting[%ld]: 0x", cipher_size);
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypt Input[%ld]: 0x", 
cipher_size);
   for (i=0; i< cipher_size; i++)
     vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cipher[i]);
   vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -155,6 +161,11 @@
   
   // Decrypt State
   TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &data_cipher, 
unsealed_data) );
+
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypte Output[%d]: 0x", 
buffer_len(unsealed_data));
+  for (i=0; i< buffer_len(unsealed_data); i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", unsealed_data->bytes[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
   
   goto egress;
   
@@ -291,124 +302,175 @@
   return status;
 }
 
+
 TPM_RESULT VTPM_SaveService(void) {
   TPM_RESULT status=TPM_SUCCESS;
   int fh, dmis=-1;
-  
-  BYTE *flat_global;
-  int flat_global_size, bytes_written;
+
+  BYTE *flat_boot_key, *flat_dmis, *flat_enc;
+  buffer_t clear_flat_global, enc_flat_global;
   UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
+  UINT32 bootKeySize = buffer_len(&vtpm_globals->bootKeyWrap);
   struct pack_buf_t storage_key_pack = {storageKeySize, 
vtpm_globals->storageKeyWrap.bytes};
-  
+  struct pack_buf_t boot_key_pack = {bootKeySize, 
vtpm_globals->bootKeyWrap.bytes};
+
   struct hashtable_itr *dmi_itr;
   VTPM_DMI_RESOURCE *dmi_res;
-  
-  UINT32 flat_global_full_size;
-  
-  // Global Values needing to be saved
-  flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
-    sizeof(UINT32) +       // storagekeysize
-    storageKeySize +       // storage key
-    hashtable_count(vtpm_globals->dmi_map) * // num DMIS
-    (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
-  
-  
-  flat_global = (BYTE *) malloc( flat_global_full_size);
-  
-  flat_global_size = BSG_PackList(flat_global, 4,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->owner_usage_auth,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->srk_usage_auth,
-                                 BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                                 BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
+
+  UINT32 boot_key_size, flat_dmis_size;
+
+  // Initially fill these with buffer sizes for each data type. Later fill
+  // in actual size, once flattened.
+  boot_key_size =  sizeof(UINT32) +       // bootkeysize
+                   bootKeySize;           // boot key
+
+  TPMTRYRETURN(buffer_init(&clear_flat_global, 3*sizeof(TPM_DIGEST) + // Auths
+                                              sizeof(UINT32) +// storagekeysize
+                                              storageKeySize, NULL) ); // 
storage key
+
+  flat_dmis_size = (hashtable_count(vtpm_globals->dmi_map) - 1) * // num DMIS 
(-1 for Dom0)
+                   (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
+
+  flat_boot_key = (BYTE *) malloc( boot_key_size );
+  flat_enc = (BYTE *) malloc( sizeof(UINT32) );
+  flat_dmis = (BYTE *) malloc( flat_dmis_size );
+
+  boot_key_size = BSG_PackList(flat_boot_key, 1,
+                               BSG_TPM_SIZE32_DATA, &boot_key_pack);
+
+  BSG_PackList(clear_flat_global.bytes, 3,
+                BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+                BSG_TPM_SECRET,   &vtpm_globals->storage_key_usage_auth,
+                BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
+  TPMTRYRETURN(envelope_encrypt(&clear_flat_global,
+                                &vtpm_globals->bootKey,
+                                &enc_flat_global) );
+
+  BSG_PackConst(buffer_len(&enc_flat_global), 4, flat_enc);
+
   // Per DMI values to be saved
   if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-    
+
     dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
     do {
       dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
       dmis++;
 
       // No need to save dmi0.
-      if (dmi_res->dmi_id == 0)        
-       continue;
-      
-      
-      flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
-                                       BSG_TYPE_UINT32, &dmi_res->dmi_id,
-                                       BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
-                                       BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
-      
+      if (dmi_res->dmi_id == 0)
+        continue;
+
+
+      flat_dmis_size += BSG_PackList( flat_dmis + flat_dmis_size, 3,
+                                        BSG_TYPE_UINT32, &dmi_res->dmi_id,
+                                        BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
+                                        BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
+
     } while (hashtable_iterator_advance(dmi_itr));
   }
-  
-  //FIXME: Once we have a way to protect a TPM key, we should use it to 
-  //       encrypt this blob. BUT, unless there is a way to ensure the key is
-  //       not used by other apps, this encryption is useless.
+
   fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
   if (fh == -1) {
     vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", 
STATE_FILE);
     status = TPM_IOERROR;
     goto abort_egress;
   }
-  
-  if ( (bytes_written = write(fh, flat_global, flat_global_size)) != 
flat_global_size ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes 
written.\n", bytes_written, flat_global_size);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  vtpm_globals->DMI_table_dirty = FALSE; 
-  
+
+  if ( ( write(fh, flat_boot_key, boot_key_size) != boot_key_size ) ||
+       ( write(fh, flat_enc, sizeof(UINT32)) != sizeof(UINT32) ) ||
+       ( write(fh, enc_flat_global.bytes, buffer_len(&enc_flat_global)) != 
buffer_len(&enc_flat_global) ) ||
+       ( write(fh, flat_dmis, flat_dmis_size) != flat_dmis_size ) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Failed to completely write service data.\n");
+    status = TPM_IOERROR;
+    goto abort_egress;
+ }
+
+  vtpm_globals->DMI_table_dirty = FALSE;
+
   goto egress;
-  
+
  abort_egress:
  egress:
-  
-  free(flat_global);
+
+  free(flat_boot_key);
+  free(flat_enc);
+  buffer_free(&enc_flat_global);
+  free(flat_dmis);
   close(fh);
-  
+
   vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = 
%d)\n", (int) status, dmis);
   return status;
 }
 
 TPM_RESULT VTPM_LoadService(void) {
-  
+
   TPM_RESULT status=TPM_SUCCESS;
   int fh, stat_ret, dmis=0;
   long fh_size = 0, step_size;
-  BYTE *flat_global=NULL;
-  struct pack_buf_t storage_key_pack;
-  UINT32 *dmi_id_key;
-  
+  BYTE *flat_table=NULL;
+  buffer_t  unsealed_data;
+  struct pack_buf_t storage_key_pack, boot_key_pack;
+  UINT32 *dmi_id_key, enc_size;
+
   VTPM_DMI_RESOURCE *dmi_res;
   struct stat file_stat;
-  
+
+  TPM_HANDLE boot_key_handle;
+  TPM_AUTHDATA boot_usage_auth;
+  memset(&boot_usage_auth, 0, sizeof(TPM_AUTHDATA));
+
   fh = open(STATE_FILE, O_RDONLY );
   stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
+  if (stat_ret == 0)
     fh_size = file_stat.st_size;
   else {
     status = TPM_IOERROR;
     goto abort_egress;
   }
-  
-  flat_global = (BYTE *) malloc(fh_size);
-  
-  if ((long) read(fh, flat_global, fh_size) != fh_size ) {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
+
+  flat_table = (BYTE *) malloc(fh_size);
+
+  if ((long) read(fh, flat_table, fh_size) != fh_size ) {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+
+  // Read Boot Key
+  step_size = BSG_UnpackList( flat_table, 2,
+                              BSG_TPM_SIZE32_DATA, &boot_key_pack,
+                              BSG_TYPE_UINT32, &enc_size);
+
+  TPMTRYRETURN(buffer_init(&vtpm_globals->bootKeyWrap, 0, 0) );
+  TPMTRYRETURN(buffer_append_raw(&vtpm_globals->bootKeyWrap, 
boot_key_pack.size, boot_key_pack.data) );
+
+  //Load Boot Key
+  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+                              TPM_SRK_KEYHANDLE,
+                              &vtpm_globals->bootKeyWrap,
+                              &SRK_AUTH,
+                              &boot_key_handle,
+                              &vtpm_globals->keyAuth,
+                              &vtpm_globals->bootKey,
+                              FALSE) );
+
+  TPMTRYRETURN( envelope_decrypt(enc_size,
+                                 flat_table + step_size,
+                                 vtpm_globals->manager_tcs_handle,
+                                 boot_key_handle,
+                                 (const TPM_AUTHDATA*) &boot_usage_auth,
+                                 &unsealed_data) );
+  step_size += enc_size;
+
   // Global Values needing to be saved
-  step_size = BSG_UnpackList( flat_global, 4,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
-                             BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                             BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
+  BSG_UnpackList( unsealed_data.bytes, 3,
+                  BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+                  BSG_TPM_SECRET,   &vtpm_globals->storage_key_usage_auth,
+                  BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
   TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
   TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, 
storage_key_pack.size, storage_key_pack.data) );
-  
+
   // Per DMI values to be saved
   while ( step_size < fh_size ){
     if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
@@ -417,35 +479,38 @@
     } else {
       dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
       dmis++;
-      
+
       dmi_res->connected = FALSE;
-      
-      step_size += BSG_UnpackList(flat_global + step_size, 3,
-                                 BSG_TYPE_UINT32, &dmi_res->dmi_id, 
-                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
-                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
-      
+
+      step_size += BSG_UnpackList(flat_table + step_size, 3,
+                                 BSG_TYPE_UINT32, &dmi_res->dmi_id,
+                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
+                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
+
       // install into map
       dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
       *dmi_id_key = dmi_res->dmi_id;
       if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
-       status = TPM_FAIL;
-       goto abort_egress;
+        status = TPM_FAIL;
+        goto abort_egress;
       }
-      
+
     }
-    
-  }
-  
+
+  }
+
   vtpmloginfo(VTPM_LOG_VTPM, "Loaded saved state (dmis = %d).\n", dmis);
   goto egress;
-  
+
  abort_egress:
   vtpmlogerror(VTPM_LOG_VTPM, "Failed to load service data with error = %s\n", 
tpm_get_error_name(status));
  egress:
-  
-  free(flat_global);
+
+  free(flat_table);
   close(fh);
-  
+
+  // TODO: Could be nice and evict BootKey. (Need to add EvictKey to VTSP.
+
   return status;
 }
+
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtpm_manager.c
--- a/tools/vtpm_manager/manager/vtpm_manager.c Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtpm_manager.c Mon Jan  9 11:22:17 2006
@@ -74,16 +74,15 @@
 #endif
 
 // --------------------------- Well Known Auths --------------------------
-#ifdef WELL_KNOWN_SRK_AUTH
-static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
+const TPM_AUTHDATA SRK_AUTH = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff,
                                   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
-#endif
 
 #ifdef WELL_KNOWN_OWNER_AUTH
 static BYTE FIXED_OWNER_AUTH[20] =  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
                                   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
 #endif
-                                  
+
+
 // -------------------------- Hash table functions --------------------
 
 static unsigned int hashfunc32(void *ky) {
@@ -100,13 +99,7 @@
   
   TPM_RESULT status = TPM_SUCCESS;
   
-  // Generate Auth's for SRK & Owner
-#ifdef WELL_KNOWN_SRK_AUTH 
-  memcpy(vtpm_globals->srk_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
-#else    
-  Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );  
-#endif
-  
+  // Generate Auth for Owner
 #ifdef WELL_KNOWN_OWNER_AUTH 
   memcpy(vtpm_globals->owner_usage_auth, FIXED_OWNER_AUTH, 
sizeof(TPM_AUTHDATA));
 #else    
@@ -116,14 +109,14 @@
   // Take Owership of TPM
   CRYPTO_INFO ek_cryptoInfo;
   
-  vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
   status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
   
   // If we can read PubEK then there is no owner and we should take it.
   if (status == TPM_SUCCESS) { 
+    vtpmloginfo(VTPM_LOG_VTPM, "Failed to readEK meaning TPM has an owner. 
Creating Keys off existing SRK.\n");
     TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
                                    (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, 
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                                   &SRK_AUTH,
                                    &ek_cryptoInfo,
                                    &vtpm_globals->keyAuth)); 
   
@@ -142,7 +135,7 @@
   TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
                          TPM_ET_KEYHANDLE,
                          TPM_SRK_KEYHANDLE, 
-                         (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                         &SRK_AUTH,
                          &sharedsecret, 
                          &osap) ); 
 
@@ -157,8 +150,43 @@
                                    &vtpm_globals->storageKeyWrap,
                                    &osap) );
   
-  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
-  
+  // Generate boot key's auth
+  Crypto_GetRandom(  &vtpm_globals->storage_key_usage_auth, 
+                    sizeof(TPM_AUTHDATA) );
+  
+  TPM_AUTHDATA bootKeyWrapAuth;
+  memset(&bootKeyWrapAuth, 0, sizeof(bootKeyWrapAuth));
+  
+  TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
+                         TPM_ET_KEYHANDLE,
+                         TPM_SRK_KEYHANDLE, 
+                         &SRK_AUTH,
+                         &sharedsecret, 
+                         &osap) ); 
+
+  osap.fContinueAuthSession = FALSE;
+ 
+  // FIXME: This key protects the global secrets on disk. It should use TPM
+  //        PCR bindings to limit its use to legit configurations.
+  //        Current binds are open, implying a Trusted VM contains this code.
+  //        If this VM is not Trusted, use measurement and PCR bindings.
+  TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
+                                   TPM_KEY_BIND,
+                                   (const TPM_AUTHDATA*)&bootKeyWrapAuth,
+                                   TPM_SRK_KEYHANDLE, 
+                                   (const TPM_AUTHDATA*)&sharedsecret,
+                                   &vtpm_globals->bootKeyWrap,
+                                   &osap) );
+
+  // Populate CRYPTO_INFO vtpm_globals->bootKey. This does not load it into 
the TPM
+  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+                              TPM_SRK_KEYHANDLE,
+                              &vtpm_globals->bootKeyWrap,
+                              NULL,
+                              NULL,
+                              NULL,
+                              &vtpm_globals->bootKey,
+                              TRUE ) );
   goto egress;
   
  abort_egress:
@@ -278,24 +306,26 @@
 #endif
 
     // Check status of rx_fh. If necessary attempt to re-open it.    
+    char* s = NULL;
     if (*rx_fh < 0) {
 #ifdef VTPM_MULTI_VM
-      *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+      s = VTPM_BE_DEV;
 #else
       if (threadType == BE_LISTENER_THREAD) 
   #ifdef DUMMY_BACKEND
-       *rx_fh = open("/tmp/in.fifo", O_RDWR);
+       s = "/tmp/in.fifo";
   #else
-        *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+      s = VTPM_BE_DEV;
   #endif
       else  // DMI Listener   
-       *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
+       s = VTPM_RX_FIFO;
+      *rx_fh = open(s, O_RDWR);
 #endif    
     }
     
     // Respond to failures to open rx_fh
     if (*rx_fh < 0) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh for %s.\n", s);
 #ifdef VTPM_MULTI_VM
       return TPM_IOERROR; 
 #else
@@ -713,7 +743,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 TPM_RESULT VTPM_Init_Service() {
-  TPM_RESULT status = TPM_FAIL;   
+  TPM_RESULT status = TPM_FAIL, serviceStatus;   
   BYTE *randomsead;
   UINT32 randomsize;
 
@@ -737,7 +767,7 @@
   
   // Create new TCS Object
   vtpm_globals->manager_tcs_handle = 0;
-  
+ 
   TPMTRYRETURN(TCS_create());
   
   // Create TCS Context for service
@@ -756,17 +786,24 @@
   vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
 
        // If failed, create new Service.
-  if (VTPM_LoadService() != TPM_SUCCESS)
+  serviceStatus = VTPM_LoadService();
+  if (serviceStatus == TPM_IOERROR) {
+    vtpmloginfo(VTPM_LOG_VTPM, "Failed to read service file. Assuming first 
time initialization.\n");
     TPMTRYRETURN( VTPM_Create_Service() );    
+  } else if (serviceStatus != TPM_SUCCESS) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Failed to read existing service file");
+    exit(1);
+  }
 
   //Load Storage Key 
   TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
                              TPM_SRK_KEYHANDLE,
                              &vtpm_globals->storageKeyWrap,
-                             (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                             &SRK_AUTH,
                              &vtpm_globals->storageKeyHandle,
                              &vtpm_globals->keyAuth,
-                             &vtpm_globals->storageKey) );
+                             &vtpm_globals->storageKey,
+                              FALSE ) );
 
   // Create entry for Dom0 for control messages
   TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
@@ -797,12 +834,11 @@
                free (dmi_itr);
   }
   
-       
+  if ( (vtpm_globals->DMI_table_dirty) && (VTPM_SaveService() != TPM_SUCCESS) )
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+
   TCS_CloseContext(vtpm_globals->manager_tcs_handle);
-  
-  if ( (vtpm_globals->DMI_table_dirty) &&
-       (VTPM_SaveService() != TPM_SUCCESS) )
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+  TCS_destroy();
   
   hashtable_destroy(vtpm_globals->dmi_map, 1);
   free(vtpm_globals);
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtpmpriv.h
--- a/tools/vtpm_manager/manager/vtpmpriv.h     Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtpmpriv.h     Mon Jan  9 11:22:17 2006
@@ -108,6 +108,7 @@
   TCS_CONTEXT_HANDLE  manager_tcs_handle;     // TCS Handle used by manager
   TPM_HANDLE          storageKeyHandle;       // Key used by persistent store
   CRYPTO_INFO         storageKey;             // For software encryption
+  CRYPTO_INFO         bootKey;                // For saving table
   TCS_AUTH            keyAuth;                // OIAP session for storageKey 
   BOOL                DMI_table_dirty;        // Indicates that a command
                                               // has updated the DMI table
@@ -115,15 +116,17 @@
     
   // Persistent Data
   TPM_AUTHDATA        owner_usage_auth;       // OwnerAuth of real TPM
-  TPM_AUTHDATA        srk_usage_auth;         // SRK Auth of real TPM    
   buffer_t            storageKeyWrap;         // Wrapped copy of storageKey
+  TPM_AUTHDATA        srk_usage_auth;
+  TPM_AUTHDATA        storage_key_usage_auth; 
 
-  TPM_AUTHDATA        storage_key_usage_auth; 
-    
+  buffer_t            bootKeyWrap;            // Wrapped copy of boot key 
+
 }VTPM_GLOBALS;
 
-//Global dmi map
-extern VTPM_GLOBALS *vtpm_globals;
+// --------------------------- Global Values --------------------------
+extern VTPM_GLOBALS *vtpm_globals;   // Key info and DMI states
+extern const TPM_AUTHDATA SRK_AUTH;  // SRK Well Known Auth Value
 
 // ********************** Command Handler Prototypes ***********************
 TPM_RESULT VTPM_Handle_Load_NVM(       VTPM_DMI_RESOURCE *myDMI, 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtsp.c
--- a/tools/vtpm_manager/manager/vtsp.c Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtsp.c Mon Jan  9 11:22:17 2006
@@ -144,7 +144,10 @@
   if (memcmp (&hm, &(auth->HMAC), sizeof(TPM_DIGEST)) == 0)  // 0 indicates 
equality
     return (TPM_SUCCESS);
   else {
-    VTSP_OIAP( hContext, auth);
+    // If specified, reconnect the OIAP session.
+    // NOTE: This only works for TCS's that never have a 0 context. 
+    if (hContext) 
+      VTSP_OIAP( hContext, auth);
     return (TPM_AUTHFAIL);
   }
 }
@@ -157,6 +160,10 @@
   TPMTRYRETURN( TCSP_OIAP(hContext,
                          &auth->AuthHandle,
                          &auth->NonceEven) );
+
+  memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+  auth->fContinueAuthSession = FALSE;
+
   goto egress;
   
  abort_egress:
@@ -195,6 +202,9 @@
                 BSG_TPM_NONCE, &nonceOddOSAP);
   
   Crypto_HMAC(sharedSecretText, sizeof(sharedSecretText), (BYTE *) usageAuth, 
TPM_DIGEST_SIZE, (BYTE *) sharedSecret);       
+
+  memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+  auth->fContinueAuthSession = FALSE;
     
   goto egress;
   
@@ -287,9 +297,6 @@
   srkKeyInfo.parms = (BYTE *) &srkRSAkeyInfo;
   
   struct pack_buf_t srkText;
-  
-  // GenerateAuth new nonceOdd    
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   
   //These values are accurate for an enc(AuthData).
   struct pack_buf_t encOwnerAuth, encSrkAuth;
@@ -383,9 +390,6 @@
   BYTE *paramText;        // Digest to make Auth.
   UINT32 paramTextSize;
     
-  // Generate HMAC   
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-  
   paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
   
   paramTextSize = BSG_PackList(paramText, 1,
@@ -504,9 +508,6 @@
   newKeyText.data = flatKey;
   newKeyText.size = flatKeySize;
   
-  // GenerateAuth new nonceOdd    
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-  
   // Generate HMAC
   paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
   
@@ -563,63 +564,66 @@
                         const TPM_AUTHDATA          *parentAuth,
                         TPM_HANDLE                  *newKeyHandle,
                         TCS_AUTH                    *auth,
-                        CRYPTO_INFO                 *cryptoinfo /*= NULL*/) {
-  
-  
-  vtpmloginfo(VTPM_LOG_VTSP, "Loading Key.\n%s","");
+                        CRYPTO_INFO                 *cryptoinfo,
+                        const BOOL                  skipTPMLoad) { 
+  
+  
+  vtpmloginfo(VTPM_LOG_VTSP, "Loading Key %s.\n", (!skipTPMLoad ? "into TPM" : 
"only into memory"));
   
   TPM_RESULT status = TPM_SUCCESS;
   TPM_COMMAND_CODE command = TPM_ORD_LoadKey;
-  
-  BYTE *paramText;        // Digest to make Auth.
+
+  BYTE *paramText=NULL;        // Digest to make Auth.
   UINT32 paramTextSize;
-  
-  if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) || 
-      (newKeyHandle==NULL) || (auth==NULL)) {
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  // Generate Extra TCS Parameters
-  TPM_HANDLE phKeyHMAC;
-  
-  // Generate HMAC
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-  
-  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
-  
-  paramTextSize = BSG_PackList(paramText, 1,
-                              BSG_TPM_COMMAND_CODE, &command);
-  
-  memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes, 
buffer_len(rgbWrappedKeyBlob));
-  paramTextSize += buffer_len(rgbWrappedKeyBlob);
-  
-  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+
+  // SkipTPMLoad stops key from being loaded into TPM, but still generates 
CRYPTO_INFO for it
+  if (! skipTPMLoad) { 
+  
+    if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) || 
+        (newKeyHandle==NULL) || (auth==NULL)) {
+      status = TPM_BAD_PARAMETER;
+      goto abort_egress;
+    }
+  
+    // Generate Extra TCS Parameters
+    TPM_HANDLE phKeyHMAC;
+  
+    paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
+  
+    paramTextSize = BSG_PackList(paramText, 1,
+                                BSG_TPM_COMMAND_CODE, &command);
+  
+    memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes, 
buffer_len(rgbWrappedKeyBlob));
+    paramTextSize += buffer_len(rgbWrappedKeyBlob);
+  
+    TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
                              parentAuth, auth) );
   
-  // Call TCS
-  TPMTRYRETURN( TCSP_LoadKeyByBlob(  hContext,
-                                    hUnwrappingKey,
-                                    buffer_len(rgbWrappedKeyBlob),
-                                    rgbWrappedKeyBlob->bytes,
-                                    auth,
-                                    newKeyHandle,
-                                    &phKeyHMAC) );
-  
-  // Verify Auth
-  paramTextSize = BSG_PackList(paramText, 3,
-                              BSG_TPM_RESULT, &status,
-                              BSG_TPM_COMMAND_CODE, &command,
-                              BSG_TPM_HANDLE, newKeyHandle);
-  
-  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
-                           parentAuth, auth, 
-                           hContext) );
-  
-  // Unpack/return key structure
+    // Call TCS
+    TPMTRYRETURN( TCSP_LoadKeyByBlob(  hContext,
+                                      hUnwrappingKey,
+                                      buffer_len(rgbWrappedKeyBlob),
+                                      rgbWrappedKeyBlob->bytes,
+                                      auth,
+                                      newKeyHandle,
+                                      &phKeyHMAC) );
+  
+    // Verify Auth
+    paramTextSize = BSG_PackList(paramText, 3,
+                                BSG_TPM_RESULT, &status,
+                                BSG_TPM_COMMAND_CODE, &command,
+                                BSG_TPM_HANDLE, newKeyHandle);
+  
+    TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                             parentAuth, auth, 
+                             hContext) );
+  } 
+  
+  // Build cryptoinfo structure for software crypto function. 
   if (cryptoinfo != NULL) {
     TPM_KEY newKey;
     
+    // Unpack/return key structure
     BSG_Unpack(BSG_TPM_KEY, rgbWrappedKeyBlob->bytes , &newKey);
     TPM_RSA_KEY_PARMS rsaKeyParms;
     
@@ -669,9 +673,6 @@
   struct pack_buf_t clear_data32;
   BYTE *clear_data_text;
   UINT32 clear_data_size;
-  
-  // Generate HMAC   
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   
   struct pack_buf_t bound_data32 = {bound_data->size, bound_data->bytes};
   
@@ -781,6 +782,196 @@
   return TPM_SUCCESS;
 }
 
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE    hContext,
+                     const TPM_KEY_HANDLE        keyHandle,
+                     const TPM_AUTHDATA          *sealDataAuth,
+                     const TPM_PCR_COMPOSITE     *pcrComp,
+                     const buffer_t              *inData,
+                     TPM_STORED_DATA             *sealedData,                  
                 
+                     const TPM_SECRET            *osapSharedSecret,
+                     TCS_AUTH                    *auth) {
+
+  TPM_RESULT status = TPM_SUCCESS;
+  TPM_COMMAND_CODE command = TPM_ORD_Seal;
+
+  BYTE *paramText;        // Digest to make Auth.
+  UINT32 paramTextSize;
+
+  // Generate PCR_Info Struct from Comp
+  TPM_PCR_INFO pcrInfo;
+  UINT32 pcrInfoSize, flatpcrSize;
+  BYTE flatpcr[3 +                          // PCR_Select = 3 1 byte banks
+               sizeof(UINT16) +             //              2 byte UINT16
+               sizeof(UINT32) +             // PCR_Comp   = 4 byte UINT32
+               24 * sizeof(TPM_PCRVALUE) ]; //              up to 24 PCRs
+
+  if (pcrComp != NULL) {
+      //printf("\n\tBinding to PCRs: ");
+      //for(int i = 0 ; i < pcrComp->select.sizeOfSelect ; i++)
+      //printf("%2.2x", pcrComp->select.pcrSelect[i]);
+
+      memcpy(&pcrInfo.pcrSelection, &pcrComp->select, 
sizeof(TPM_PCR_SELECTION));
+
+      flatpcrSize = BSG_Pack(BSG_TPM_PCR_COMPOSITE, (BYTE *) pcrComp, flatpcr);
+      Crypto_SHA1Full((BYTE *) flatpcr, flatpcrSize, (BYTE *) 
&(pcrInfo.digestAtRelease));
+      memset(&(pcrInfo.digestAtCreation), 0, sizeof(TPM_DIGEST));
+      pcrInfoSize = BSG_Pack(BSG_TPM_PCR_INFO, (BYTE *) &pcrInfo, flatpcr);
+  } else {
+      //printf("\n\tBinding to no PCRS.");
+      pcrInfoSize = 0;
+  }
+
+  // Calculate encUsageAuth
+  BYTE XORbuffer[sizeof(TPM_SECRET) + sizeof(TPM_NONCE)];
+  UINT32 XORbufferSize = sizeof(XORbuffer);
+  TPM_DIGEST XORKey;
+  TPM_ENCAUTH encAuth;
+
+  BSG_PackList( XORbuffer, 2,
+                BSG_TPM_SECRET, osapSharedSecret,
+                BSG_TPM_NONCE, &auth->NonceEven );
+
+  Crypto_SHA1Full(XORbuffer, XORbufferSize, (BYTE *) &XORKey);
+
+  int i;
+  for (i=0; i < TPM_DIGEST_SIZE; i++)
+    ((BYTE *) &encAuth)[i] = ((BYTE *) &XORKey)[i] ^ ((BYTE *) 
sealDataAuth)[i];
+
+  // Generate Extra TCS Parameters
+  UINT32 inDataSize = buffer_len(inData);
+  struct pack_buf_t inData_pack = {inDataSize, inData->bytes};
+  struct pack_buf_t pcrInfo_pack = {pcrInfoSize, flatpcr};
+
+  UINT32 sealedDataSize;
+  BYTE *flatSealedData=NULL;
+
+  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
+
+  paramTextSize = BSG_PackList(paramText, 4,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_ENCAUTH, &encAuth,
+                               BSG_TPM_SIZE32_DATA, &pcrInfo_pack,
+                               BSG_TPM_SIZE32_DATA, &inData_pack);
+
+  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+                              osapSharedSecret, auth) );
+
+  // Call TCS
+  TPMTRYRETURN( TCSP_Seal( hContext,
+                           keyHandle,
+                           encAuth,
+                           pcrInfoSize,
+                           flatpcr,
+                           inDataSize,
+                           inData->bytes,
+                           auth,
+                           &sealedDataSize,
+                           &flatSealedData) );
+
+  // Unpack/return key structure
+  BSG_Unpack( BSG_TPM_STORED_DATA, flatSealedData, sealedData );
+
+  paramTextSize = BSG_PackList(paramText, 3,
+                               BSG_TPM_RESULT, &status,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_STORED_DATA, sealedData);
+
+  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                            osapSharedSecret, auth,
+                            0) );
+
+
+  goto egress;
+
+ abort_egress:
+ egress:
+
+  if (flatSealedData)
+    TCS_FreeMemory( hContext, flatSealedData);
+
+  free(paramText);
+  return status;
+}
+
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE    hContext,
+                       const TPM_KEY_HANDLE        keyHandle,
+                       const TPM_STORED_DATA       *sealedData,
+                       const TPM_AUTHDATA          *key_usage_auth,
+                       const TPM_AUTHDATA          *data_usage_auth,
+                       buffer_t                    *outData,
+                       TCS_AUTH                    *auth,
+                       TCS_AUTH                    *dataAuth) {
+
+  TPM_RESULT status = TPM_SUCCESS;
+  TPM_COMMAND_CODE command = TPM_ORD_Unseal;
+
+  BYTE *paramText;        // Digest to make Auth.
+  UINT32 paramTextSize;
+
+  // Generate Extra TCS Parameters
+  UINT32 sealDataSize, clearDataSize;
+  BYTE *flatSealedData= (BYTE *) malloc(sizeof(TPM_VERSION) +
+                                        2 * sizeof(UINT32) +
+                                        sealedData->sealInfoSize +
+                                        sealedData->encDataSize),
+       *clearData=NULL;
+
+  sealDataSize = BSG_Pack(BSG_TPM_STORED_DATA, sealedData, flatSealedData );
+
+  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
+
+  paramTextSize = BSG_PackList(paramText, 2,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_STORED_DATA, sealedData);
+
+  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+                              key_usage_auth, auth) );
+
+  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+                              data_usage_auth, dataAuth) );
+  // Call TCS
+  TPMTRYRETURN( TCSP_Unseal(  hContext,
+                              keyHandle,
+                              sealDataSize,
+                              flatSealedData,
+                              auth,
+                              dataAuth,
+                              &clearDataSize,
+                              &clearData) );
+
+  // Verify Auth
+  struct pack_buf_t clearData_pack = {clearDataSize, clearData};
+
+  paramTextSize = BSG_PackList(paramText, 3,
+                               BSG_TPM_RESULT, &status,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_SIZE32_DATA, &clearData_pack);
+
+  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                            key_usage_auth, auth,
+                            hContext) );
+
+  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                            data_usage_auth, dataAuth,
+                            hContext) );
+
+  // Unpack/return key structure
+  TPMTRYRETURN( buffer_init(outData, clearDataSize, clearData) );
+
+  goto egress;
+
+ abort_egress:
+ egress:
+
+  if (flatSealedData)
+    TCS_FreeMemory( hContext, clearData);
+
+  free(paramText);
+  return status;
+}
+
+
 // Function Reaches into unsupported TCS command, beware.
 TPM_RESULT VTSP_RawTransmit(const TCS_CONTEXT_HANDLE    hContext,
                             const buffer_t *inbuf,
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtsp.h
--- a/tools/vtpm_manager/manager/vtsp.h Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtsp.h Mon Jan  9 11:22:17 2006
@@ -86,7 +86,8 @@
                         const TPM_AUTHDATA          *parentAuth,
                         TPM_HANDLE                  *newKeyHandle,
                         TCS_AUTH                    *pAuth,
-                        CRYPTO_INFO                 *cryptoinfo);
+                        CRYPTO_INFO                 *cryptoinfo,
+                        const BOOL                  skipTPMLoad);
 
 TPM_RESULT VTSP_Unbind( const TCS_CONTEXT_HANDLE    hContext,
                         const TPM_KEY_HANDLE        key_handle,
@@ -99,4 +100,22 @@
             const buffer_t *inData, 
             buffer_t *outData);
                         
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE    hContext,
+                     const TPM_KEY_HANDLE        keyHandle,
+                     const TPM_AUTHDATA          *sealDataAuth,
+                     const TPM_PCR_COMPOSITE     *pcrComp,
+                     const buffer_t              *inData,
+                     TPM_STORED_DATA             *sealedData,                  
                 
+                     const TPM_SECRET            *osapSharedSecret,
+                     TCS_AUTH                    *auth);
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE    hContext,
+                       const TPM_KEY_HANDLE        keyHandle,
+                       const TPM_STORED_DATA       *sealedData,
+                       const TPM_AUTHDATA          *key_usage_auth,
+                       const TPM_AUTHDATA          *data_usage_auth,
+                       buffer_t                    *outData,
+                       TCS_AUTH                    *auth,
+                       TCS_AUTH                    *dataAuth);
+
 #endif //_VTSP_H_
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/tcs/tcs.c
--- a/tools/vtpm_manager/tcs/tcs.c      Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/tcs/tcs.c      Mon Jan  9 11:22:17 2006
@@ -636,7 +636,7 @@
   TDDL_UINT32  OutLength = TCPA_MAX_BUFFER_LENGTH;
   
   // check input params
-  if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || 
*SealedData == NULL)
+  if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || 
SealedData == NULL)
     return TPM_BAD_PARAMETER;
   
   // Convert Byte Input parameter in the input byte stream InBuf
diff -r 25e3c8668f1f -r 8af1199488d3 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile   Mon Jan  9 11:19:55 2006
+++ b/tools/xentrace/Makefile   Mon Jan  9 11:22:17 2006
@@ -15,24 +15,32 @@
 OBJS     = $(patsubst %.c,%.o,$(wildcard *.c))
 
 BIN      = xentrace tbctl setsize
-LIBBIN   = xenctx
+LIBBIN   = 
 SCRIPTS  = xentrace_format
 MAN1     = $(wildcard *.1)
 MAN8     = $(wildcard *.8)
+
+ifeq ($(XEN_TARGET_ARCH),x86_32)
+LIBBIN  += xenctx
+endif
+
+ifeq ($(XEN_TARGET_ARCH),x86_64)
+LIBBIN  += xenctx
+endif
 
 all: build
 build: $(BIN) $(LIBBIN)
 
 install: build
        [ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
-       [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \
+       [ -z "$(LIBBIN)"] || [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
        [ -d $(DESTDIR)/usr/share/man/man1 ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/share/man/man1
        [ -d $(DESTDIR)/usr/share/man/man8 ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/share/man/man8
        $(INSTALL_PROG) $(BIN) $(SCRIPTS) $(DESTDIR)/usr/bin
-       $(INSTALL_PROG) $(LIBBIN) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
+       [ -z "$(LIBBIN)"] || $(INSTALL_PROG) $(LIBBIN) 
$(DESTDIR)/usr/$(LIBDIR)/xen/bin
        $(INSTALL_DATA) $(MAN1) $(DESTDIR)/usr/share/man/man1
        $(INSTALL_DATA) $(MAN8) $(DESTDIR)/usr/share/man/man8
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c   Mon Jan  9 11:19:55 2006
+++ b/tools/xentrace/xenctx.c   Mon Jan  9 11:22:17 2006
@@ -380,10 +380,10 @@
         exit(-1);
     }
 
-    ret = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, &ctx);
+    ret = xc_vcpu_getcontext(xc_handle, domid, vcpu, &ctx);
     if (ret < 0) {
         xc_domain_unpause(xc_handle, domid);
-        perror("xc_domain_get_vcpu_context");
+        perror("xc_vcpu_getcontext");
         exit(-1);
     }
 
diff -r 25e3c8668f1f -r 8af1199488d3 
tools/xm-test/tests/network-attach/Makefile.am
--- a/tools/xm-test/tests/network-attach/Makefile.am    Mon Jan  9 11:19:55 2006
+++ b/tools/xm-test/tests/network-attach/Makefile.am    Mon Jan  9 11:22:17 2006
@@ -6,7 +6,7 @@
        03_network_attach_detach_multiple_pos.test  \
        04_network_attach_baddomain_neg.test
 
-XFAIL_TESTS = 03_network_attach_detach_multiple_pos.test
+XFAIL_TESTS = 
 
 EXTRA_DIST = $(TESTS) $(XFAIL_TESTS) network_utils.py
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/Makefile    Mon Jan  9 11:22:17 2006
@@ -23,6 +23,13 @@
        __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o                   \
        __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o
 
+# xen stack unwinder
+# unwind_decoder.c is included in unwind.c
+OBJS += unwind.o
+#unwind.o: CFLAGS += -DUNW_DEBUG=4
+
+OBJS += process-linux-xen.o
+
 # perfmon.o
 # unwind.o needed for kernel unwinding (rare)
 
@@ -31,11 +38,26 @@
 # remove following line if not privifying in memory
 # OBJS += privify.o
 
-default: $(OBJS) head.o xen.lds.s
-       $(LD) -r -o arch.o $(OBJS)
+default: $(TARGET)
+
+$(CURDIR)/arch.o: $(OBJS)
+       $(LD) -r -o $@ $(OBJS)
+
+$(TARGET)-syms: $(ALL_OBJS) head.o xen.lds.s
        $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
-               -Map map.out head.o $(ALL_OBJS) -o $(TARGET)-syms
-       $(OBJCOPY) -R .note -R .comment -S $(TARGET)-syms $(TARGET)
+               -Map map.out head.o $(ALL_OBJS) -o $@
+       $(NM) -n $@ | $(BASEDIR)/tools/symbols > $(BASEDIR)/xen-syms.S
+       $(MAKE) $(BASEDIR)/xen-syms.o
+       $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
+               -Map map.out head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+       $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
+       $(MAKE) $(BASEDIR)/xen-syms.o
+       $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
+               -Map map.out head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+       rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
+
+$(TARGET): $(TARGET)-syms
+       $(OBJCOPY) -R .note -R .comment -S $(TARGET)-syms $@
        $(NM) -n $(TARGET)-syms | grep -v '\( [aUw] \)\|\(__crc_\)\|\( 
\$[adt]\)'\
                 > $(BASEDIR)/System.map
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/entry.S
--- a/xen/arch/ia64/linux-xen/entry.S   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/entry.S   Mon Jan  9 11:22:17 2006
@@ -1417,7 +1417,6 @@
        br.cond.sptk.many rp                            // goes to 
ia64_leave_kernel
 END(ia64_prepare_handle_unaligned)
 
-#ifndef XEN
        //
        // unw_init_running(void (*callback)(info, arg), void *arg)
        //
@@ -1463,6 +1462,7 @@
        br.ret.sptk.many rp
 END(unw_init_running)
 
+#ifndef XEN
        .rodata
        .align 8
        .globl sys_call_table
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/mmio.c
--- a/xen/arch/ia64/vmx/mmio.c  Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/mmio.c  Mon Jan  9 11:22:17 2006
@@ -29,7 +29,7 @@
 #include <asm/vmx_vcpu.h>
 #include <asm/privop.h>
 #include <asm/types.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/mm.h>
 #include <asm/vmx.h>
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vlsapic.c
--- a/xen/arch/ia64/vmx/vlsapic.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vlsapic.c       Mon Jan  9 11:22:17 2006
@@ -218,7 +218,7 @@
  */
 void vtm_domain_out(VCPU *vcpu)
 {
-    if(!is_idle_task(vcpu->domain))
+    if(!is_idle_domain(vcpu->domain))
        rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
 }
 
@@ -230,7 +230,7 @@
 {
     vtime_t     *vtm;
 
-    if(!is_idle_task(vcpu->domain)) {
+    if(!is_idle_domain(vcpu->domain)) {
        vtm=&(vcpu->arch.arch_vmx.vtm);
        vtm_interruption_update(vcpu, vtm);
     }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_init.c      Mon Jan  9 11:22:17 2006
@@ -42,7 +42,7 @@
 #include <xen/lib.h>
 #include <asm/vmmu.h>
 #include <public/arch-ia64.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx_phy_mode.h>
 #include <asm/processor.h>
 #include <asm/vmx.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_process.c
--- a/xen/arch/ia64/vmx/vmx_process.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_process.c   Mon Jan  9 11:22:17 2006
@@ -53,6 +53,7 @@
 #define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
 
 
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
 extern void rnat_consumption (VCPU *vcpu);
 #define DOMN_PAL_REQUEST    0x110000
 
@@ -185,8 +186,11 @@
        }else if(iim == DOMN_PAL_REQUEST){
         pal_emul(current);
                vmx_vcpu_increment_iip(current);
-    }  else
+    } else {
+               if (iim == 0) 
+                       die_if_kernel("bug check", regs, iim);
                vmx_reflect_interruption(ifa,isr,iim,11,regs);
+    }
 }
 
 
@@ -227,7 +231,7 @@
        struct domain *d = current->domain;
        struct vcpu *v = current;
        // FIXME: Will this work properly if doing an RFI???
-       if (!is_idle_task(d) ) {        // always comes from guest
+       if (!is_idle_domain(d) ) {      // always comes from guest
                extern void vmx_dorfirfi(void);
                struct pt_regs *user_regs = vcpu_regs(current);
                if (local_softirq_pending())
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_support.c
--- a/xen/arch/ia64/vmx/vmx_support.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_support.c   Mon Jan  9 11:22:17 2006
@@ -21,7 +21,7 @@
  */
 #include <xen/config.h>
 #include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vcpu.h>
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/dom_fw.c
--- a/xen/arch/ia64/xen/dom_fw.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/dom_fw.c        Mon Jan  9 11:22:17 2006
@@ -861,12 +861,16 @@
        bp->console_info.orig_x = 0;
        bp->console_info.orig_y = 24;
        bp->fpswa = 0;
-        bp->initrd_start = (dom0_start+dom0_size) -
-                (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
-        bp->initrd_size = ia64_boot_param->initrd_size;
-                printf(" initrd start %0xlx", bp->initrd_start);
-                printf(" initrd size %0xlx", bp->initrd_size);
-
-
+       if (d == dom0) {
+               bp->initrd_start = (dom0_start+dom0_size) -
+                 (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
+               bp->initrd_size = ia64_boot_param->initrd_size;
+       }
+       else {
+               bp->initrd_start = d->arch.initrd_start;
+               bp->initrd_size  = d->arch.initrd_len;
+       }
+       printf(" initrd start %0xlx", bp->initrd_start);
+       printf(" initrd size %0xlx", bp->initrd_size);
        return bp;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/domain.c        Mon Jan  9 11:22:17 2006
@@ -19,6 +19,7 @@
 #include <xen/delay.h>
 #include <xen/softirq.h>
 #include <xen/mm.h>
+#include <xen/iocap.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -45,7 +46,7 @@
 #include <asm/vmx_vcpu.h>
 #include <asm/vmx_vpd.h>
 #include <asm/pal.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #define CONFIG_DOMAIN0_CONTIGUOUS
 unsigned long dom0_start = -1L;
@@ -181,7 +182,7 @@
        memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
 }
 
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
 {
        struct domain *d = v->domain;
        struct thread_info *ti = alloc_thread_info(v);
@@ -248,7 +249,9 @@
                }
        } else
                d->arch.mm = NULL;
-       printf ("arch_do_create_domain: domain=%p\n", d);
+       printf ("arch_do_create_domain: domain=%p\n", d);
+
+       return 0;
 }
 
 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
@@ -291,16 +294,7 @@
        d->arch.cmdline      = c->cmdline;
        new_thread(v, regs->cr_iip, 0, 0);
 
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
        v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
        if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
                           c->vcpu.privregs, sizeof(mapped_regs_t))) {
@@ -428,7 +422,7 @@
                {
                        p = alloc_domheap_page(d);
                        // zero out pages for security reasons
-                       memset(__va(page_to_phys(p)),0,PAGE_SIZE);
+                       if (p) memset(__va(page_to_phys(p)),0,PAGE_SIZE);
                }
                if (unlikely(!p)) {
 printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
@@ -763,7 +757,10 @@
  */
 void physdev_init_dom0(struct domain *d)
 {
-       set_bit(_DOMF_physdev_access, &d->domain_flags);
+       if (iomem_permit_access(d, 0UL, ~0UL))
+               BUG();
+       if (irqs_permit_access(d, 0, NR_PIRQS-1))
+               BUG();
 }
 
 unsigned int vmx_dom0 = 0;
@@ -912,9 +909,9 @@
        memset(si, 0, PAGE_SIZE);
        d->shared_info->arch.start_info_pfn = __pa(si) >> PAGE_SHIFT;
        sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
+       si->nr_pages     = d->tot_pages;
 
 #if 0
-       si->nr_pages     = d->tot_pages;
        si->shared_info  = virt_to_phys(d->shared_info);
        si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
        //si->pt_base      = vpt_start;
@@ -959,16 +956,7 @@
 
        new_thread(v, pkern_entry, 0, 0);
        physdev_init_dom0(d);
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
 
        // FIXME: Hack for keyboard input
 #ifdef CLONE_DOMAIN0
@@ -1027,16 +1015,7 @@
 #endif
        new_thread(v, pkern_entry, 0, 0);
        printk("new_thread returns\n");
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
        __set_bit(0x30, VCPU(v, delivery_mask));
 
        return 0;
@@ -1050,16 +1029,7 @@
                v->domain->domain_id);
        loaddomainelfimage(v->domain,v->domain->arch.image_start);
        new_thread(v, v->domain->arch.entry, 0, 0);
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
 }
 #endif
 
@@ -1098,15 +1068,6 @@
 void domain_pend_keyboard_interrupt(int irq)
 {
        vcpu_pend_interrupt(dom0->vcpu[0],irq);
-}
-
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
-       if ( v->processor == newcpu )
-               return;
-
-       set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
-       v->processor = newcpu;
 }
 
 void sync_vcpu_execstate(struct vcpu *v)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/hyperprivop.S   Mon Jan  9 11:22:17 2006
@@ -543,6 +543,13 @@
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0 ;;
 (p7)   br.spnt.few dispatch_break_fault ;;
+        movl r20=IA64_PSR_CPL ;; 
+        and r22=r20,r30 ;;
+        cmp.ne p7,p0=r22,r0
+(p7)    br.spnt.many 1f ;;
+        cmp.eq p7,p0=r17,r0
+(p7)    br.spnt.few dispatch_break_fault ;;
+1:
 #if 1 /* special handling in case running on simulator */
        movl r20=first_break;;
        ld4 r23=[r20];;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/irq.c   Mon Jan  9 11:22:17 2006
@@ -1377,9 +1377,6 @@
     irq_guest_action_t *action;
     unsigned long       flags;
     int                 rc = 0;
-
-    if ( !IS_CAPABLE_PHYSDEV(d->domain) )
-        return -EPERM;
 
     spin_lock_irqsave(&desc->lock, flags);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/ivt.S   Mon Jan  9 11:22:17 2006
@@ -839,6 +839,8 @@
        mov r17=cr.iim
        mov r31=pr
        ;;
+       cmp.eq p7,p0=r17,r0
+(p7)   br.spnt.few dispatch_break_fault ;;
        movl r18=XSI_PSR_IC
        ;;
        ld8 r19=[r18]
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/process.c       Mon Jan  9 11:22:17 2006
@@ -33,6 +33,7 @@
 #include <xen/multicall.h>
 
 extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64);
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
 
 extern unsigned long dom0_start, dom0_size;
 
@@ -64,26 +65,16 @@
 
 extern struct schedule_data schedule_data[NR_CPUS];
 
-void schedule_tail(struct vcpu *next)
-{
-       unsigned long rr7;
-       //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
-       //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
-
-    // TG: Real HACK FIXME.
-    // This is currently necessary because when a new domain is started, 
-    // the context_switch function of xen/common/schedule.c(__enter_scheduler)
-    // never returns.  Therefore, the lock must be released.
-    // schedule_tail is only called when a domain is started.
-    spin_unlock_irq(&schedule_data[current->processor].schedule_lock);
-
-       /* rr7 will be postponed to last point when resuming back to guest */
-    if(VMX_DOMAIN(current)){
-       vmx_load_all_rr(current);
-    }else{
-           load_region_regs(current);
-            vcpu_load_kernel_regs(current);
-    }
+void schedule_tail(struct vcpu *prev)
+{
+       context_saved(prev);
+
+       if (VMX_DOMAIN(current)) {
+               vmx_load_all_rr(current);
+       } else {
+               load_region_regs(current);
+               vcpu_load_kernel_regs(current);
+       }
 }
 
 void tdpfoo(void) { }
@@ -251,7 +242,7 @@
        struct domain *d = current->domain;
        struct vcpu *v = current;
        // FIXME: Will this work properly if doing an RFI???
-       if (!is_idle_task(d) && user_mode(regs)) {
+       if (!is_idle_domain(d) && user_mode(regs)) {
                //vcpu_poke_timer(v);
                if (vcpu_deliverable_interrupts(v))
                        reflect_extint(regs);
@@ -686,6 +677,8 @@
                        vcpu_increment_iip(current);
        }
        else {
+               if (iim == 0) 
+                       die_if_kernel("bug check", regs, iim);
                PSCB(v,iim) = iim;
                reflect_interruption(isr,regs,IA64_BREAK_VECTOR);
        }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/vcpu.c  Mon Jan  9 11:22:17 2006
@@ -1085,7 +1085,7 @@
        /* gloss over the wraparound problem for now... we know it exists
         * but it doesn't matter right now */
 
-       if (is_idle_task(vcpu->domain)) {
+       if (is_idle_domain(vcpu->domain)) {
 //             printf("****** vcpu_set_next_timer called during idle!!\n");
                vcpu_safe_set_itm(s);
                return;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xenmisc.c       Mon Jan  9 11:22:17 2006
@@ -25,7 +25,6 @@
 int phys_proc_id[NR_CPUS];
 unsigned long loops_per_jiffy = (1<<12);       // from linux/init/main.c
 
-void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n"); 
}
 void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check 
abort handling)\n"); }
 void ia64_mca_cpu_init(void *x) { }
 void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { }
@@ -180,11 +179,6 @@
 // from arch/ia64/traps.c
 ///////////////////////////////
 
-void show_registers(struct pt_regs *regs)
-{
-       printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
-}
-
 int is_kernel_text(unsigned long addr)
 {
        extern char _stext[], _etext[];
@@ -236,7 +230,13 @@
 
 void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ 
((noreturn)) */
 {
-       printk("die_if_kernel: called, not implemented\n");
+       if (user_mode(regs))
+               return;
+
+       printk("%s: %s %ld\n", __func__, str, err);
+       debugtrace_dump();
+       show_registers(regs);
+       domain_crash_synchronous();
 }
 
 long
@@ -320,18 +320,15 @@
        ia64_set_iva(&ia64_ivt);
        ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
                VHPT_ENABLED);
-       if (!is_idle_task(current->domain)) {
+       if (!is_idle_domain(current->domain)) {
                load_region_regs(current);
                vcpu_load_kernel_regs(current);
                    if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
        }
            if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
     }
-}
-
-void context_switch_finalise(struct vcpu *next)
-{
-       /* nothing to do */
+
+    context_saved(prev);
 }
 
 void continue_running(struct vcpu *same)
@@ -368,3 +365,23 @@
                goto loop;
        }
 }
+
+/* FIXME: for the forseeable future, all cpu's that enable VTi have split
+ *  caches and all cpu's that have split caches enable VTi.  This may
+ *  eventually be untrue though. */
+#define cpu_has_split_cache    vmx_enabled
+extern unsigned int vmx_enabled;
+
+void sync_split_caches(void)
+{
+       unsigned long ret, progress = 0;
+
+       if (cpu_has_split_cache) {
+               /* Sync d/i cache conservatively */
+               ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
+               if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
+                       printk("PAL CACHE FLUSH failed\n");
+               else printk("Sync i/d cache for guest SUCC\n");
+       }
+       else printk("sync_split_caches ignored for CPU with no split cache\n");
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xensetup.c      Mon Jan  9 11:22:17 2006
@@ -26,7 +26,7 @@
 
 char saved_command_line[COMMAND_LINE_SIZE];
 
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
 
 cpumask_t cpu_present_map;
 
@@ -382,8 +382,7 @@
         panic("Could not set up DOM0 guest OS\n");
 
     /* PIN domain0 on CPU 0.  */
-    dom0->vcpu[0]->cpumap=1;
-    set_bit(_VCPUF_cpu_pinned, &dom0->vcpu[0]->vcpu_flags);
+    dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0);
 
 #ifdef CLONE_DOMAIN0
     {
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xentime.c
--- a/xen/arch/ia64/xen/xentime.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xentime.c       Mon Jan  9 11:22:17 2006
@@ -127,7 +127,7 @@
                        vcpu_wake(dom0->vcpu[0]);
                }
        }
-       if (!is_idle_task(current->domain))  {
+       if (!is_idle_domain(current->domain))  {
                if (vcpu_timer_expired(current)) {
                        vcpu_pend_timer(current);
                        // ensure another timer interrupt happens even if 
domain doesn't
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/Makefile     Mon Jan  9 11:22:17 2006
@@ -29,6 +29,7 @@
 endif
 
 OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
+OBJS := $(subst $(TARGET_SUBARCH)/xen.lds.o,,$(OBJS))
 
 ifneq ($(crash_debug),y)
 OBJS := $(patsubst cdb%.o,,$(OBJS))
@@ -43,21 +44,24 @@
 $(CURDIR)/arch.o: $(OBJS)
        $(LD) $(LDFLAGS) -r -o $@ $(OBJS)
 
-$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
-       $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds
+       $(LD) $(LDFLAGS) -T xen.lds -N \
            boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
        $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
        $(MAKE) $(BASEDIR)/xen-syms.o
-       $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+       $(LD) $(LDFLAGS) -T xen.lds -N \
            boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
        $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
        $(MAKE) $(BASEDIR)/xen-syms.o
-       $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+       $(LD) $(LDFLAGS) -T xen.lds -N \
            boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
        rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
 
 asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
        $(CC) $(CFLAGS) -S -o $@ $<
+
+xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
+       $(CC) $(CFLAGS) -P -E -Ui386 -D__ASSEMBLY__ -o $@ $<
 
 boot/mkelf32: boot/mkelf32.c
        $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
@@ -73,5 +77,6 @@
        rm -f dm/*.o dm/*~ dm/core
        rm -f genapic/*.o genapic/*~ genapic/core
        rm -f cpu/*.o cpu/*~ cpu/core
+       rm -f xen.lds
 
 .PHONY: default clean
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/boot/x86_32.S        Mon Jan  9 11:22:17 2006
@@ -1,5 +1,6 @@
 #include <xen/config.h>
 #include <public/xen.h>
+#include <asm/asm_defns.h>
 #include <asm/desc.h>
 #include <asm/page.h>
 #include <asm/msr.h>
@@ -53,6 +54,7 @@
         mov     %ecx,%gs
         ljmp    $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
 1:      lss     stack_start-__PAGE_OFFSET,%esp
+        add     $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
 
         /* Reset EFLAGS (subsumes CLI and CLD). */
        pushl   $0
@@ -98,7 +100,7 @@
 1:      stosl   /* low mappings cover as much physmem as possible */
         add     $4,%edi
         add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
+        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
         jne     1b
 #else
         /* Initialize low and high mappings of all memory with 4MB pages */
@@ -111,7 +113,7 @@
         jne     1b
 1:      stosl   /* low mappings cover as much physmem as possible */
         add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
+        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
         jne     1b
 #endif
         
@@ -189,7 +191,7 @@
 /*** STACK LOCATION ***/
         
 ENTRY(stack_start)
-        .long cpu0_stack + STACK_SIZE - 200 - __PAGE_OFFSET
+        .long cpu0_stack
         .long __HYPERVISOR_DS
         
 /*** DESCRIPTOR TABLES ***/
@@ -256,10 +258,6 @@
         .fill 1*PAGE_SIZE,1,0
 #endif
 
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
 ENTRY(cpu0_stack)
         .fill STACK_SIZE,1,0
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/boot/x86_64.S        Mon Jan  9 11:22:17 2006
@@ -1,5 +1,6 @@
 #include <xen/config.h>
 #include <public/xen.h>
+#include <asm/asm_defns.h>
 #include <asm/desc.h>
 #include <asm/page.h>
 #include <asm/msr.h>
@@ -121,7 +122,8 @@
         mov     %rcx,%cr4
 
         mov     stack_start(%rip),%rsp
-        
+        or      $(STACK_SIZE-CPUINFO_sizeof),%rsp
+
         /* Reset EFLAGS (subsumes CLI and CLD). */
         pushq   $0
         popf
@@ -140,7 +142,7 @@
         mov     %ecx,%ss
 
         lidt    idt_descr(%rip)
-                
+
         cmp     $(SECONDARY_CPU_FLAG),%ebx
         je      start_secondary
 
@@ -219,7 +221,7 @@
         .quad   idt_table
 
 ENTRY(stack_start)
-        .quad   cpu0_stack + STACK_SIZE - 200
+        .quad   cpu0_stack
 
 high_start:
         .quad   __high_start
@@ -265,10 +267,6 @@
         .org 0x4000 + PAGE_SIZE
         .code64
 
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
 ENTRY(cpu0_stack)
         .fill STACK_SIZE,1,0
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dm/i8259.c
--- a/xen/arch/x86/dm/i8259.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/dm/i8259.c   Mon Jan  9 11:22:17 2006
@@ -29,7 +29,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vpic.h>
 #include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dm/vmx_vioapic.c
--- a/xen/arch/x86/dm/vmx_vioapic.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/dm/vmx_vioapic.c     Mon Jan  9 11:22:17 2006
@@ -37,7 +37,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vpic.h>
 #include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/dom0_ops.c   Mon Jan  9 11:22:17 2006
@@ -17,6 +17,7 @@
 #include <asm/msr.h>
 #include <xen/trace.h>
 #include <xen/console.h>
+#include <xen/iocap.h>
 #include <asm/shadow.h>
 #include <asm/irq.h>
 #include <asm/processor.h>
@@ -35,13 +36,13 @@
 
 static void write_msr_for(void *unused)
 {
-    if ( ((1 << current->processor) & msr_cpu_mask) )
+    if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
         (void)wrmsr_user(msr_addr, msr_lo, msr_hi);
 }
 
 static void read_msr_for(void *unused)
 {
-    if ( ((1 << current->processor) & msr_cpu_mask) )
+    if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
         (void)rdmsr_user(msr_addr, msr_lo, msr_hi);
 }
 
@@ -102,12 +103,27 @@
             op->u.add_memtype.nr_pfns,
             op->u.add_memtype.type,
             1);
+        if (ret > 0)
+        {
+            (void)__put_user(0, &u_dom0_op->u.add_memtype.handle);
+            (void)__put_user(ret, &u_dom0_op->u.add_memtype.reg);
+            ret = 0;
+        }
     }
     break;
 
     case DOM0_DEL_MEMTYPE:
     {
-        ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+        if (op->u.del_memtype.handle == 0
+            /* mtrr/main.c otherwise does a lookup */
+            && (int)op->u.del_memtype.reg >= 0)
+        {
+            ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+            if (ret > 0)
+                ret = 0;
+        }
+        else
+            ret = -EINVAL;
     }
     break;
 
@@ -141,7 +157,6 @@
         struct domain *d;
         unsigned int fp = op->u.ioport_permission.first_port;
         unsigned int np = op->u.ioport_permission.nr_ports;
-        unsigned int p;
 
         ret = -EINVAL;
         if ( (fp + np) > 65536 )
@@ -152,26 +167,12 @@
             op->u.ioport_permission.domain)) == NULL) )
             break;
 
-        ret = -ENOMEM;
-        if ( d->arch.iobmp_mask != NULL )
-        {
-            if ( (d->arch.iobmp_mask = xmalloc_array(
-                u8, IOBMP_BYTES)) == NULL )
-            {
-                put_domain(d);
-                break;
-            }
-            memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES);
-        }
-
-        ret = 0;
-        for ( p = fp; p < (fp + np); p++ )
-        {
-            if ( op->u.ioport_permission.allow_access )
-                clear_bit(p, d->arch.iobmp_mask);
-            else
-                set_bit(p, d->arch.iobmp_mask);
-        }
+        if ( np == 0 )
+            ret = 0;
+        else if ( op->u.ioport_permission.allow_access )
+            ret = ioports_permit_access(d, fp, fp + np - 1);
+        else
+            ret = ioports_deny_access(d, fp, fp + np - 1);
 
         put_domain(d);
     }
@@ -193,7 +194,7 @@
         memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
         ret = 0;
         if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
-           ret = -EFAULT;
+            ret = -EFAULT;
     }
     break;
     
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/domain.c     Mon Jan  9 11:22:17 2006
@@ -20,6 +20,7 @@
 #include <xen/delay.h>
 #include <xen/softirq.h>
 #include <xen/grant_table.h>
+#include <xen/iocap.h>
 #include <asm/regs.h>
 #include <asm/mc146818rtc.h>
 #include <asm/system.h>
@@ -35,9 +36,7 @@
 #include <xen/console.h>
 #include <xen/elf.h>
 #include <asm/vmx.h>
-#include <asm/vmx_vmcs.h>
 #include <asm/msr.h>
-#include <asm/physdev.h>
 #include <xen/kernel.h>
 #include <xen/multicall.h>
 
@@ -47,17 +46,16 @@
 
 struct percpu_ctxt {
     struct vcpu *curr_vcpu;
-    unsigned int context_not_finalised;
     unsigned int dirty_segment_mask;
 } __cacheline_aligned;
 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
 
-static void continue_idle_task(struct vcpu *v)
+static void continue_idle_domain(struct vcpu *v)
 {
     reset_stack_and_jump(idle_loop);
 }
 
-static void continue_nonidle_task(struct vcpu *v)
+static void continue_nonidle_domain(struct vcpu *v)
 {
     reset_stack_and_jump(ret_from_intr);
 }
@@ -93,12 +91,13 @@
 {
     struct vcpu *v = current;
 
-    ASSERT(is_idle_task(v->domain));
+    ASSERT(is_idle_domain(v->domain));
     percpu_ctxt[smp_processor_id()].curr_vcpu = v;
-    cpu_set(smp_processor_id(), v->domain->cpumask);
-    v->arch.schedule_tail = continue_idle_task;
-
-    idle_loop();
+    cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
+    cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
+    v->arch.schedule_tail = continue_idle_domain;
+
+    reset_stack_and_jump(idle_loop);
 }
 
 static long no_idt[2];
@@ -185,11 +184,17 @@
 {
     struct pfn_info *page;
 
-    if ( d->tot_pages < 10 )
+    printk("Memory pages belonging to domain %u:\n", d->domain_id);
+
+    if ( d->tot_pages >= 10 )
+    {
+        printk("    DomPage list too long to display\n");
+    }
+    else
     {
         list_for_each_entry ( page, &d->page_list, list )
         {
-            printk("Page %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+            printk("    DomPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
                    _p(page_to_phys(page)), _p(page_to_pfn(page)),
                    page->count_info, page->u.inuse.type_info);
         }
@@ -197,15 +202,10 @@
 
     list_for_each_entry ( page, &d->xenpage_list, list )
     {
-        printk("XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+        printk("    XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
                _p(page_to_phys(page)), _p(page_to_pfn(page)),
                page->count_info, page->u.inuse.type_info);
     }
-
-    page = virt_to_page(d->shared_info);
-    printk("Shared_info@%p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
-           _p(page_to_phys(page)), _p(page_to_pfn(page)), page->count_info,
-           page->u.inuse.type_info);
 }
 
 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
@@ -250,24 +250,36 @@
 #endif
 }
 
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
 {
     struct domain *d = v->domain;
     l1_pgentry_t gdt_l1e;
-    int vcpuid, pdpt_order;
+    int vcpuid, pdpt_order, rc;
 #ifdef __x86_64__
     int i;
 #endif
 
-    if ( is_idle_task(d) )
-        return;
-
-    v->arch.schedule_tail = continue_nonidle_task;
-
-    d->shared_info = alloc_xenheap_page();
+    if ( is_idle_domain(d) )
+        return 0;
+
+    d->arch.ioport_caps = 
+        rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+    if ( d->arch.ioport_caps == NULL )
+        return -ENOMEM;
+
+    if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+        return -ENOMEM;
+
+    if ( (rc = ptwr_init(d)) != 0 )
+    {
+        free_xenheap_page(d->shared_info);
+        return rc;
+    }
+
+    v->arch.schedule_tail = continue_nonidle_domain;
+
     memset(d->shared_info, 0, PAGE_SIZE);
     v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
-    v->cpumap = CPUMAP_RUNANYWHERE;
     SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
 
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
@@ -308,25 +320,10 @@
                             __PAGE_HYPERVISOR);
 #endif
 
-    (void)ptwr_init(d);
-
     shadow_lock_init(d);
     INIT_LIST_HEAD(&d->arch.free_shadow_frames);
-}
-
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
-    if ( v->processor == newcpu )
-        return;
-
-    set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
-    v->processor = newcpu;
-
-    if ( VMX_DOMAIN(v) )
-    {
-        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
-        v->arch.schedule_tail = arch_vmx_do_relaunch;
-    }
+
+    return 0;
 }
 
 /* This is called by arch_final_setup_guest and do_boot_vcpu */
@@ -348,6 +345,8 @@
              ((c->user_regs.ss & 3) == 0) )
             return -EINVAL;
     }
+    else if ( !hvm_enabled )
+        return -EINVAL;
 
     clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
     if ( c->flags & VGCF_I387_VALID )
@@ -690,7 +689,7 @@
     struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
     struct vcpu          *n = current;
 
-    if ( !is_idle_task(p->domain) )
+    if ( !is_idle_domain(p->domain) )
     {
         memcpy(&p->arch.guest_context.user_regs,
                stack_regs,
@@ -699,7 +698,7 @@
         save_segments(p);
     }
 
-    if ( !is_idle_task(n->domain) )
+    if ( !is_idle_domain(n->domain) )
     {
         memcpy(stack_regs,
                &n->arch.guest_context.user_regs,
@@ -725,7 +724,8 @@
     }
 
     if ( p->domain != n->domain )
-        cpu_set(cpu, n->domain->cpumask);
+        cpu_set(cpu, n->domain->domain_dirty_cpumask);
+    cpu_set(cpu, n->vcpu_dirty_cpumask);
 
     write_ptbase(n);
 
@@ -738,7 +738,8 @@
     }
 
     if ( p->domain != n->domain )
-        cpu_clear(cpu, p->domain->cpumask);
+        cpu_clear(cpu, p->domain->domain_dirty_cpumask);
+    cpu_clear(cpu, p->vcpu_dirty_cpumask);
 
     percpu_ctxt[cpu].curr_vcpu = n;
 }
@@ -748,28 +749,24 @@
 {
     unsigned int cpu = smp_processor_id();
 
-    ASSERT(!local_irq_is_enabled());
+    ASSERT(local_irq_is_enabled());
 
     set_current(next);
 
-    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
-    {
+    if ( (percpu_ctxt[cpu].curr_vcpu != next) &&
+         !is_idle_domain(next->domain) )
+    {
+        /* This may happen if next has been migrated by the scheduler. */
+        if ( unlikely(!cpus_empty(next->vcpu_dirty_cpumask)) )
+        {
+            ASSERT(!cpu_isset(cpu, next->vcpu_dirty_cpumask));
+            sync_vcpu_execstate(next);
+            ASSERT(cpus_empty(next->vcpu_dirty_cpumask));
+        }
+
+        local_irq_disable();
         __context_switch();
-        percpu_ctxt[cpu].context_not_finalised = 1;
-    }
-}
-
-void context_switch_finalise(struct vcpu *next)
-{
-    unsigned int cpu = smp_processor_id();
-
-    ASSERT(local_irq_is_enabled());
-
-    if ( percpu_ctxt[cpu].context_not_finalised )
-    {
-        percpu_ctxt[cpu].context_not_finalised = 0;
-
-        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+        local_irq_enable();
 
         if ( VMX_DOMAIN(next) )
         {
@@ -783,6 +780,8 @@
         }
     }
 
+    context_saved(prev);
+
     schedule_tail(next);
     BUG();
 }
@@ -812,20 +811,11 @@
 
 void sync_vcpu_execstate(struct vcpu *v)
 {
-    unsigned int cpu = v->processor;
-
-    if ( !cpu_isset(cpu, v->domain->cpumask) )
-        return;
-
-    if ( cpu == smp_processor_id() )
-    {
+    if ( cpu_isset(smp_processor_id(), v->vcpu_dirty_cpumask) )
         (void)__sync_lazy_execstate();
-    }
-    else
-    {
-        /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
-        flush_tlb_mask(cpumask_of_cpu(cpu));
-    }
+
+    /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+    flush_tlb_mask(v->vcpu_dirty_cpumask);
 }
 
 unsigned long __hypercall_create_continuation(
@@ -951,9 +941,7 @@
     struct vcpu *v;
     unsigned long pfn;
 
-    BUG_ON(!cpus_empty(d->cpumask));
-
-    physdev_destroy_state(d);
+    BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
 
     ptwr_destroy(d);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/domain_build.c       Mon Jan  9 11:22:17 2006
@@ -16,13 +16,13 @@
 #include <xen/kernel.h>
 #include <xen/domain.h>
 #include <xen/compile.h>
+#include <xen/iocap.h>
 #include <asm/regs.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-#include <asm/physdev.h>
 #include <asm/shadow.h>
 
 static long dom0_nrpages;
@@ -94,9 +94,9 @@
     return page;
 }
 
-static void process_dom0_ioports_disable()
+static void process_dom0_ioports_disable(void)
 {
-    unsigned long io_from, io_to, io_nr;
+    unsigned long io_from, io_to;
     char *t, *u, *s = opt_dom0_ioports_disable;
 
     if ( *s == '\0' )
@@ -126,8 +126,8 @@
         printk("Disabling dom0 access to ioport range %04lx-%04lx\n",
             io_from, io_to);
 
-        io_nr = io_to - io_from + 1;
-        physdev_modify_ioport_access_range(dom0, 0, io_from, io_nr);
+        if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
+            BUG();
     }
 }
 
@@ -183,7 +183,6 @@
     /* Machine address of next candidate page-table page. */
     unsigned long mpt_alloc;
 
-    extern void physdev_init_dom0(struct domain *);
     extern void translate_l2pgtable(
         struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn);
 
@@ -692,9 +691,6 @@
     zap_low_mappings(l2start);
     zap_low_mappings(idle_pg_table_l2);
 #endif
-    
-    /* DOM0 gets access to everything. */
-    physdev_init_dom0(d);
 
     init_domain_time(d);
 
@@ -746,19 +742,28 @@
         printk("dom0: shadow setup done\n");
     }
 
+    i = 0;
+
+    /* DOM0 is permitted full I/O capabilities. */
+    i |= ioports_permit_access(dom0, 0, 0xFFFF);
+    i |= iomem_permit_access(dom0, 0UL, ~0UL);
+    i |= irqs_permit_access(dom0, 0, NR_PIRQS-1);
+
     /*
      * Modify I/O port access permissions.
      */
     /* Master Interrupt Controller (PIC). */
-    physdev_modify_ioport_access_range(dom0, 0, 0x20, 2);
+    i |= ioports_deny_access(dom0, 0x20, 0x21);
     /* Slave Interrupt Controller (PIC). */
-    physdev_modify_ioport_access_range(dom0, 0, 0xA0, 2);
+    i |= ioports_deny_access(dom0, 0xA0, 0xA1);
     /* Interval Timer (PIT). */
-    physdev_modify_ioport_access_range(dom0, 0, 0x40, 4);
+    i |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
-    physdev_modify_ioport_access_range(dom0, 0, 0x61, 1);
-    /* Command-line passed i/o ranges */
+    i |= ioports_deny_access(dom0, 0x61, 0x61);
+    /* Command-line I/O ranges. */
     process_dom0_ioports_disable();
+
+    BUG_ON(i != 0);
 
     return 0;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/idle0_task.c
--- a/xen/arch/x86/idle0_task.c Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/idle0_task.c Mon Jan  9 11:22:17 2006
@@ -11,6 +11,7 @@
 
 struct vcpu idle0_vcpu = {
     processor:   0,
+    cpu_affinity:CPU_MASK_CPU0,
     domain:      &idle0_domain
 };
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/io_apic.c    Mon Jan  9 11:22:17 2006
@@ -1807,3 +1807,47 @@
 
     return 0;
 }
+
+void dump_ioapic_irq_info(void)
+{
+    struct irq_pin_list *entry;
+    struct IO_APIC_route_entry rte;
+    unsigned int irq, pin, printed = 0;
+    unsigned long flags;
+
+    for ( irq = 0; irq < NR_IRQS; irq++ )
+    {
+        entry = &irq_2_pin[irq];
+        if ( entry->pin == -1 )
+            continue;
+
+        if ( !printed++ )
+            printk("IO-APIC interrupt information:\n");
+
+        printk("    IRQ%3d Vec%3d:\n", irq, irq_to_vector(irq));
+
+        for ( ; ; )
+        {
+            pin = entry->pin;
+
+            printk("      Apic 0x%02x, Pin %2d: ", entry->apic, pin);
+
+            spin_lock_irqsave(&ioapic_lock, flags);
+            *(((int *)&rte) + 0) = io_apic_read(entry->apic, 0x10 + 2 * pin);
+            *(((int *)&rte) + 1) = io_apic_read(entry->apic, 0x11 + 2 * pin);
+            spin_unlock_irqrestore(&ioapic_lock, flags);
+
+            printk("vector=%u, delivery_mode=%u, dest_mode=%s, "
+                   "delivery_status=%d, polarity=%d, irr=%d, "
+                   "trigger=%s, mask=%d\n",
+                   rte.vector, rte.delivery_mode,
+                   rte.dest_mode ? "logical" : "physical",
+                   rte.delivery_status, rte.polarity, rte.irr,
+                   rte.trigger ? "level" : "edge", rte.mask);
+
+            if ( entry->next == 0 )
+                break;
+            entry = &irq_2_pin[entry->next];
+        }
+    }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/irq.c        Mon Jan  9 11:22:17 2006
@@ -12,6 +12,7 @@
 #include <xen/irq.h>
 #include <xen/perfc.h>
 #include <xen/sched.h>
+#include <xen/keyhandler.h>
 #include <asm/current.h>
 #include <asm/smpboot.h>
 
@@ -198,19 +199,21 @@
 
 int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
 {
-    unsigned int        vector = irq_to_vector(irq);
-    struct domain      *d = v->domain;
-    irq_desc_t         *desc = &irq_desc[vector];
+    unsigned int        vector;
+    irq_desc_t         *desc;
     irq_guest_action_t *action;
     unsigned long       flags;
     int                 rc = 0;
     cpumask_t           cpumask = CPU_MASK_NONE;
 
-    if ( !IS_CAPABLE_PHYSDEV(d) )
-        return -EPERM;
-
+    if ( (irq < 0) || (irq >= NR_IRQS) )
+        return -EINVAL;
+
+    vector = irq_to_vector(irq);
     if ( vector == 0 )
-        return -EBUSY;
+        return -EINVAL;
+
+    desc = &irq_desc[vector];
 
     spin_lock_irqsave(&desc->lock, flags);
 
@@ -309,3 +312,71 @@
     spin_unlock_irqrestore(&desc->lock, flags);    
     return 0;
 }
+
+extern void dump_ioapic_irq_info(void);
+
+static void dump_irqs(unsigned char key)
+{
+    int i, irq, vector;
+    irq_desc_t *desc;
+    irq_guest_action_t *action;
+    struct domain *d;
+    unsigned long flags;
+
+    printk("Guest interrupt information:\n");
+
+    for ( irq = 0; irq < NR_IRQS; irq++ )
+    {
+        vector = irq_to_vector(irq);
+        if ( vector == 0 )
+            continue;
+
+        desc = &irq_desc[vector];
+
+        spin_lock_irqsave(&desc->lock, flags);
+
+        if ( desc->status & IRQ_GUEST )
+        {
+            action = (irq_guest_action_t *)desc->action;
+
+            printk("    IRQ%3d Vec%3d: type=%-15s status=%08x "
+                   "in-flight=%d domain-list=",
+                   irq, vector, desc->handler->typename,
+                   desc->status, action->in_flight);
+
+            for ( i = 0; i < action->nr_guests; i++ )
+            {
+                d = action->guest[i];
+                printk("%u(%c%c%c%c)",
+                       d->domain_id,
+                       (test_bit(d->pirq_to_evtchn[irq],
+                                 &d->shared_info->evtchn_pending[0]) ?
+                        'P' : '-'),
+                       (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG,
+                                 &d->shared_info->vcpu_info[0].
+                                 evtchn_pending_sel) ?
+                        'S' : '-'),
+                       (test_bit(d->pirq_to_evtchn[irq],
+                                 &d->shared_info->evtchn_mask[0]) ?
+                        'M' : '-'),
+                       (test_bit(irq, &d->pirq_mask) ?
+                        'M' : '-'));
+                if ( i != action->nr_guests )
+                    printk(",");
+            }
+
+            printk("\n");
+        }
+
+        spin_unlock_irqrestore(&desc->lock, flags);
+    }
+
+    dump_ioapic_irq_info();
+}
+
+static int __init setup_dump_irqs(void)
+{
+    register_keyhandler('i', dump_irqs, "dump interrupt bindings");
+    return 0;
+}
+__initcall(setup_dump_irqs);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/mm.c Mon Jan  9 11:22:17 2006
@@ -96,6 +96,7 @@
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
 #include <xen/event.h>
+#include <xen/iocap.h>
 #include <asm/shadow.h>
 #include <asm/page.h>
 #include <asm/flushtlb.h>
@@ -437,7 +438,6 @@
     unsigned long mfn = l1e_get_pfn(l1e);
     struct pfn_info *page = pfn_to_page(mfn);
     int okay;
-    extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
 
     if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
         return 1;
@@ -455,8 +455,7 @@
         if ( d == dom_io )
             d = current->domain;
 
-        if ( (!IS_PRIV(d)) &&
-             (!IS_CAPABLE_PHYSDEV(d) || !domain_iomem_in_pfn(d, mfn)) )
+        if ( !iomem_access_permitted(d, mfn, mfn) )
         {
             MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn);
             return 0;
@@ -1458,7 +1457,8 @@
                      * was GDT/LDT) but those circumstances should be
                      * very rare.
                      */
-                    cpumask_t mask = page_get_owner(page)->cpumask;
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
                     tlbflush_filter(mask, page->tlbflush_timestamp);
 
                     if ( unlikely(!cpus_empty(mask)) )
@@ -1620,7 +1620,7 @@
         if ( shadow_mode_enabled(d) )
             shadow_sync_all(d);
         if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
-            flush_tlb_mask(d->cpumask);
+            flush_tlb_mask(d->domain_dirty_cpumask);
         else
             local_flush_tlb();
     }
@@ -1692,7 +1692,7 @@
     struct domain *d, unsigned long vmask)
 {
     unsigned int vcpu_id;
-    cpumask_t    pmask;
+    cpumask_t    pmask = CPU_MASK_NONE;
     struct vcpu *v;
 
     while ( vmask != 0 )
@@ -1701,7 +1701,7 @@
         vmask &= ~(1UL << vcpu_id);
         if ( (vcpu_id < MAX_VIRT_CPUS) &&
              ((v = d->vcpu[vcpu_id]) != NULL) )
-            cpu_set(v->processor, pmask);
+            cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
     }
 
     return pmask;
@@ -1870,7 +1870,6 @@
                 break;
             }
             pmask = vcpumask_to_pcpumask(d, vmask);
-            cpus_and(pmask, pmask, d->cpumask);
             if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
                 flush_tlb_mask(pmask);
             else
@@ -1879,15 +1878,15 @@
         }
 
         case MMUEXT_TLB_FLUSH_ALL:
-            flush_tlb_mask(d->cpumask);
+            flush_tlb_mask(d->domain_dirty_cpumask);
             break;
     
         case MMUEXT_INVLPG_ALL:
-            flush_tlb_one_mask(d->cpumask, op.arg1.linear_addr);
+            flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
             break;
 
         case MMUEXT_FLUSH_CACHE:
-            if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
+            if ( unlikely(!cache_flush_permitted(d)) )
             {
                 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
                 okay = 0;
@@ -2498,7 +2497,7 @@
     l1_pgentry_t   val = l1e_from_intpte(val64);
     struct vcpu   *v   = current;
     struct domain *d   = v->domain;
-    unsigned int   cpu = v->processor;
+    unsigned int   cpu = smp_processor_id();
     unsigned long  vmask, bmap_ptr;
     cpumask_t      pmask;
     int            rc  = 0;
@@ -2549,13 +2548,12 @@
             local_flush_tlb();
             break;
         case UVMF_ALL:
-            flush_tlb_mask(d->cpumask);
+            flush_tlb_mask(d->domain_dirty_cpumask);
             break;
         default:
             if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
                 rc = -EFAULT;
             pmask = vcpumask_to_pcpumask(d, vmask);
-            cpus_and(pmask, pmask, d->cpumask);
             flush_tlb_mask(pmask);
             break;
         }
@@ -2570,13 +2568,12 @@
             local_flush_tlb_one(va);
             break;
         case UVMF_ALL:
-            flush_tlb_one_mask(d->cpumask, va);
+            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
             break;
         default:
             if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
                 rc = -EFAULT;
             pmask = vcpumask_to_pcpumask(d, vmask);
-            cpus_and(pmask, pmask, d->cpumask);
             flush_tlb_one_mask(pmask, va);
             break;
         }
@@ -3019,7 +3016,7 @@
 
     /* Ensure that there are no stale writable mappings in any TLB. */
     /* NB. INVLPG is a serialising instruction: flushes pending updates. */
-    flush_tlb_one_mask(d->cpumask, l1va);
+    flush_tlb_one_mask(d->domain_dirty_cpumask, l1va);
     PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
                 PTWR_PRINT_WHICH, ptep, pte.l1);
 
@@ -3343,7 +3340,7 @@
     if ( which == PTWR_PT_ACTIVE )
     {
         l2e_remove_flags(*pl2e, _PAGE_PRESENT);
-        flush_tlb_mask(d->cpumask);
+        flush_tlb_mask(d->domain_dirty_cpumask);
     }
     
     /* Temporarily map the L1 page, and make a copy of it. */
@@ -3370,7 +3367,7 @@
 
  emulate:
     if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
-                           &ptwr_mem_emulator, BITS_PER_LONG/8) )
+                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
         return 0;
     perfc_incrc(ptwr_emulations);
     return EXCRET_fault_fixed;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/physdev.c    Mon Jan  9 11:22:17 2006
@@ -13,27 +13,6 @@
 
 extern int ioapic_guest_read(int apicid, int address, u32 *pval);
 extern int ioapic_guest_write(int apicid, int address, u32 pval);
-
-void physdev_modify_ioport_access_range(
-    struct domain *d, int enable, int port, int num)
-{
-    int i;
-    for ( i = port; i < (port + num); i++ )
-        (enable ? clear_bit : set_bit)(i, d->arch.iobmp_mask);
-}
-
-void physdev_destroy_state(struct domain *d)
-{
-    xfree(d->arch.iobmp_mask);
-    d->arch.iobmp_mask = NULL;
-}
-
-/* Check if a domain controls a device with IO memory within frame @pfn.
- * Returns: 1 if the domain should be allowed to map @pfn, 0 otherwise.  */
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn)
-{
-    return 0;
-}
 
 /*
  * Demuxing hypercall.
@@ -120,18 +99,6 @@
     return ret;
 }
 
-/* Domain 0 has read access to all devices. */
-void physdev_init_dom0(struct domain *d)
-{
-    /* Access to all I/O ports. */
-    d->arch.iobmp_mask = xmalloc_array(u8, IOBMP_BYTES);
-    BUG_ON(d->arch.iobmp_mask == NULL);
-    memset(d->arch.iobmp_mask, 0, IOBMP_BYTES);
-
-    set_bit(_DOMF_physdev_access, &d->domain_flags);
-}
-
-
 /*
  * Local variables:
  * mode: C
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/setup.c      Mon Jan  9 11:22:17 2006
@@ -92,7 +92,7 @@
 #endif
 EXPORT_SYMBOL(mmu_cr4_features);
 
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
 
 int acpi_disabled;
 
@@ -138,131 +138,19 @@
         (*call)();
 }
 
-static void __init start_of_day(void)
-{
-    int i;
-    unsigned long vgdt, gdt_pfn;
-
-    early_cpu_init();
-
-    paging_init();
-
-    /* Unmap the first page of CPU0's stack. */
-    memguard_guard_stack(cpu0_stack);
-
-    open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
-
-    if ( opt_watchdog ) 
-        nmi_watchdog = NMI_LOCAL_APIC;
-
-    sort_exception_tables();
-
-    arch_do_createdomain(current);
-    
-    /*
-     * Map default GDT into its final positions in the idle page table. As
-     * noted in arch_do_createdomain(), we must map for every possible VCPU#.
-     */
-    vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
-    gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
-    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
-    {
-        map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
-        vgdt += 1 << PDPT_VCPU_VA_SHIFT;
-    }
-
-    find_smp_config();
-
-    smp_alloc_memory();
-
-    dmi_scan_machine();
-
-    generic_apic_probe();
-
-    acpi_boot_table_init();
-    acpi_boot_init();
-
-    if ( smp_found_config ) 
-        get_smp_config();
-
-    init_apic_mappings();
-
-    init_IRQ();
-
-    trap_init();
-
-    ac_timer_init();
-
-    early_time_init();
-
-    arch_init_memory();
-
-    scheduler_init();
-
-    identify_cpu(&boot_cpu_data);
-    if ( cpu_has_fxsr )
-        set_in_cr4(X86_CR4_OSFXSR);
-    if ( cpu_has_xmm )
-        set_in_cr4(X86_CR4_OSXMMEXCPT);
-
-    if ( opt_nosmp )
-    {
-        max_cpus = 0;
-        smp_num_siblings = 1;
-        boot_cpu_data.x86_num_cores = 1;
-    }
-
-    smp_prepare_cpus(max_cpus);
-
-    /* We aren't hotplug-capable yet. */
-    BUG_ON(!cpus_empty(cpu_present_map));
-    for_each_cpu ( i )
-        cpu_set(i, cpu_present_map);
-
-    /*
-     * Initialise higher-level timer functions. We do this fairly late
-     * (post-SMP) because the time bases and scale factors need to be updated 
-     * regularly, and SMP initialisation can cause a long delay with 
-     * interrupts not yet enabled.
-     */
-    init_xen_time();
-
-    initialize_keytable();
-
-    serial_init_postirq();
-
-    BUG_ON(!local_irq_is_enabled());
-
-    for_each_present_cpu ( i )
-    {
-        if ( num_online_cpus() >= max_cpus )
-            break;
-        if ( !cpu_online(i) )
-            __cpu_up(i);
-    }
-
-    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
-    smp_cpus_done(max_cpus);
-
-    do_initcalls();
-
-    schedulers_start();
-
-    watchdog_enable();
-}
-
 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
 
 static struct e820entry e820_raw[E820MAX];
 
 void __init __start_xen(multiboot_info_t *mbi)
 {
+    unsigned long vgdt, gdt_pfn;
     char *cmdline;
+    unsigned long _initrd_start = 0, _initrd_len = 0;
+    unsigned int initrdidx = 1;
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, modules_length;
     unsigned long initial_images_start, initial_images_end;
-    unsigned long _initrd_start = 0, _initrd_len = 0;
-    unsigned int initrdidx = 1;
     physaddr_t s, e;
     int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
     struct ns16550_defaults ns16550 = {
@@ -455,6 +343,12 @@
     BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
     BUG_ON(sizeof(vcpu_info_t) != 64);
 
+    /* __foo are defined in public headers. Check they match internal defs. */
+    BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
+#ifdef HYPERVISOR_VIRT_END
+    BUG_ON(__HYPERVISOR_VIRT_END   != HYPERVISOR_VIRT_END);
+#endif
+
     init_frametable();
 
     end_boot_allocator();
@@ -486,7 +380,113 @@
 
     early_boot = 0;
 
-    start_of_day();
+    early_cpu_init();
+
+    paging_init();
+
+    /* Unmap the first page of CPU0's stack. */
+    memguard_guard_stack(cpu0_stack);
+
+    open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
+
+    if ( opt_watchdog ) 
+        nmi_watchdog = NMI_LOCAL_APIC;
+
+    sort_exception_tables();
+
+    if ( arch_do_createdomain(current) != 0 )
+        BUG();
+
+    /*
+     * Map default GDT into its final positions in the idle page table. As
+     * noted in arch_do_createdomain(), we must map for every possible VCPU#.
+     */
+    vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
+    gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
+        vgdt += 1 << PDPT_VCPU_VA_SHIFT;
+    }
+
+    find_smp_config();
+
+    smp_alloc_memory();
+
+    dmi_scan_machine();
+
+    generic_apic_probe();
+
+    acpi_boot_table_init();
+    acpi_boot_init();
+
+    if ( smp_found_config ) 
+        get_smp_config();
+
+    init_apic_mappings();
+
+    init_IRQ();
+
+    trap_init();
+
+    ac_timer_init();
+
+    early_time_init();
+
+    arch_init_memory();
+
+    scheduler_init();
+
+    identify_cpu(&boot_cpu_data);
+    if ( cpu_has_fxsr )
+        set_in_cr4(X86_CR4_OSFXSR);
+    if ( cpu_has_xmm )
+        set_in_cr4(X86_CR4_OSXMMEXCPT);
+
+    if ( opt_nosmp )
+    {
+        max_cpus = 0;
+        smp_num_siblings = 1;
+        boot_cpu_data.x86_num_cores = 1;
+    }
+
+    smp_prepare_cpus(max_cpus);
+
+    /* We aren't hotplug-capable yet. */
+    BUG_ON(!cpus_empty(cpu_present_map));
+    for_each_cpu ( i )
+        cpu_set(i, cpu_present_map);
+
+    /*
+     * Initialise higher-level timer functions. We do this fairly late
+     * (post-SMP) because the time bases and scale factors need to be updated 
+     * regularly, and SMP initialisation can cause a long delay with 
+     * interrupts not yet enabled.
+     */
+    init_xen_time();
+
+    initialize_keytable();
+
+    serial_init_postirq();
+
+    BUG_ON(!local_irq_is_enabled());
+
+    for_each_present_cpu ( i )
+    {
+        if ( num_online_cpus() >= max_cpus )
+            break;
+        if ( !cpu_online(i) )
+            __cpu_up(i);
+    }
+
+    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+    smp_cpus_done(max_cpus);
+
+    do_initcalls();
+
+    schedulers_start();
+
+    watchdog_enable();
 
     shadow_mode_init();
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/shadow.c     Mon Jan  9 11:22:17 2006
@@ -1800,7 +1800,7 @@
     }
 
     /* Other VCPUs mustn't use the revoked writable mappings. */
-    other_vcpus_mask = d->cpumask;
+    other_vcpus_mask = d->domain_dirty_cpumask;
     cpu_clear(smp_processor_id(), other_vcpus_mask);
     flush_tlb_mask(other_vcpus_mask);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/shadow32.c   Mon Jan  9 11:22:17 2006
@@ -2586,7 +2586,7 @@
     }
 
     /* Other VCPUs mustn't use the revoked writable mappings. */
-    other_vcpus_mask = d->cpumask;
+    other_vcpus_mask = d->domain_dirty_cpumask;
     cpu_clear(smp_processor_id(), other_vcpus_mask);
     flush_tlb_mask(other_vcpus_mask);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/smpboot.c    Mon Jan  9 11:22:17 2006
@@ -435,7 +435,7 @@
 
        extern void percpu_traps_init(void);
 
-       set_current(idle_task[cpu]);
+       set_current(idle_domain[cpu]);
        set_processor_id(cpu);
 
        percpu_traps_init();
@@ -763,7 +763,6 @@
 {
        struct domain *idle;
        struct vcpu *v;
-       void *stack;
        unsigned long boot_error;
        int timeout, cpu;
        unsigned long start_eip;
@@ -774,7 +773,7 @@
        if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
                panic("failed 'createdomain' for CPU %d", cpu);
 
-       v = idle_task[cpu] = idle->vcpu[0];
+       v = idle_domain[cpu] = idle->vcpu[0];
 
        set_bit(_DOMF_idle_domain, &idle->domain_flags);
 
@@ -786,16 +785,10 @@
        /* So we see what's up   */
        printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
 
-       stack = alloc_xenheap_pages(STACK_ORDER);
-#if defined(__i386__)
-       stack_start.esp = (void *)__pa(stack);
-#elif defined(__x86_64__)
-       stack_start.esp = stack;
-#endif
-       stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
+       stack_start.esp = alloc_xenheap_pages(STACK_ORDER);
 
        /* Debug build: detect stack overflow by setting up a guard page. */
-       memguard_guard_stack(stack);
+       memguard_guard_stack(stack_start.esp);
 
        /*
         * This grunge runs the startup process for
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/traps.c      Mon Jan  9 11:22:17 2006
@@ -41,6 +41,7 @@
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
 #include <xen/symbols.h>
+#include <xen/iocap.h>
 #include <asm/shadow.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -192,7 +193,8 @@
 
     /* Bounds for range of valid frame pointer. */
     low  = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
-    high = (low & ~(STACK_SIZE - 1)) + (STACK_SIZE - sizeof(struct cpu_info));
+    high = (low & ~(STACK_SIZE - 1)) + 
+        (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
 
     /* The initial frame pointer. */
     next = regs->ebp;
@@ -200,14 +202,14 @@
     for ( ; ; )
     {
         /* Valid frame pointer? */
-        if ( (next < low) || (next > high) )
+        if ( (next < low) || (next >= high) )
         {
             /*
              * Exception stack frames have a different layout, denoted by an
              * inverted frame pointer.
              */
             next = ~next;
-            if ( (next < low) || (next > high) )
+            if ( (next < low) || (next >= high) )
                 break;
             frame = (unsigned long *)next;
             next  = frame[0];
@@ -621,17 +623,7 @@
     unsigned int port, unsigned int bytes,
     struct vcpu *v, struct cpu_user_regs *regs)
 {
-    struct domain *d = v->domain;
-    u16 x;
-
-    if ( d->arch.iobmp_mask != NULL )
-    {
-        x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
-        if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
-            return 1;
-    }
-
-    return 0;
+    return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
 /* Check admin limits. Silently fail the access if it is disallowed. */
@@ -871,7 +863,7 @@
 
     case 0x09: /* WBINVD */
         /* Ignore the instruction if unprivileged. */
-        if ( !IS_CAPABLE_PHYSDEV(v->domain) )
+        if ( !cache_flush_permitted(v->domain) )
             DPRINTK("Non-physdev domain attempted WBINVD.\n");
         else
             wbinvd();
@@ -885,7 +877,8 @@
         switch ( modrm_reg )
         {
         case 0: /* Read CR0 */
-            *reg = v->arch.guest_context.ctrlreg[0];
+            *reg = (read_cr0() & ~X86_CR0_TS) |
+                v->arch.guest_context.ctrlreg[0];
             break;
 
         case 2: /* Read CR2 */
@@ -927,6 +920,11 @@
         switch ( modrm_reg )
         {
         case 0: /* Write CR0 */
+            if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
+            {
+                DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
+                goto fail;
+            }
             (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
             break;
 
@@ -939,6 +937,14 @@
             LOCK_BIGLOCK(v->domain);
             (void)new_guest_cr3(*reg);
             UNLOCK_BIGLOCK(v->domain);
+            break;
+
+        case 4:
+            if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
+            {
+                DPRINTK("Attempt to change CR4 flags.\n");
+                goto fail;
+            }
             break;
 
         default:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx.c        Mon Jan  9 11:22:17 2006
@@ -42,7 +42,7 @@
 #include <asm/shadow_64.h>
 #endif
 #include <public/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx_vpic.h>
 #include <asm/vmx_vlapic.h>
 
@@ -53,7 +53,7 @@
 integer_param("vmx_debug", opt_vmx_debug_level);
 
 static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
+#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
 
 static int vmx_switch_on;
 
@@ -65,11 +65,6 @@
     {
         struct domain *d = v->domain;
         struct vcpu *vc;
-
-        d->arch.vmx_platform.lapic_enable = 
v->arch.guest_context.user_regs.ecx;
-        v->arch.guest_context.user_regs.ecx = 0;
-        VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
-                    d->arch.vmx_platform.lapic_enable);
 
         /* Initialize monitor page table */
         for_each_vcpu(d, vc)
@@ -95,7 +90,7 @@
 void vmx_relinquish_resources(struct vcpu *v)
 {
     struct vmx_virpit *vpit;
-    
+
     if ( !VMX_DOMAIN(v) )
         return;
 
@@ -1955,9 +1950,12 @@
 
 asmlinkage void trace_vmentry (void)
 {
-    TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
-             
trace_values[current->processor][1],trace_values[current->processor][2],
-             
trace_values[current->processor][3],trace_values[current->processor][4]);
+    TRACE_5D(TRC_VMENTRY,
+             trace_values[smp_processor_id()][0],
+             trace_values[smp_processor_id()][1],
+             trace_values[smp_processor_id()][2],
+             trace_values[smp_processor_id()][3],
+             trace_values[smp_processor_id()][4]);
     TRACE_VMEXIT(0,9);
     TRACE_VMEXIT(1,9);
     TRACE_VMEXIT(2,9);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_intercept.c      Mon Jan  9 11:22:17 2006
@@ -24,7 +24,7 @@
 #include <asm/vmx_vpit.h>
 #include <asm/vmx_intercept.h>
 #include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_io.c     Mon Jan  9 11:22:17 2006
@@ -37,7 +37,7 @@
 #include <asm/shadow.h>
 #include <asm/vmx_vpic.h>
 #include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #ifdef CONFIG_VMX
 #if defined (__i386__)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_platform.c       Mon Jan  9 11:22:17 2006
@@ -27,7 +27,7 @@
 #include <xen/trace.h>
 #include <asm/vmx.h>
 #include <asm/vmx_platform.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #include <xen/lib.h>
 #include <xen/sched.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_vlapic.c
--- a/xen/arch/x86/vmx_vlapic.c Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_vlapic.c Mon Jan  9 11:22:17 2006
@@ -32,7 +32,7 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <asm/current.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #ifdef CONFIG_VMX
 
@@ -62,7 +62,7 @@
 
 int vmx_apic_support(struct domain *d)
 {
-    return d->arch.vmx_platform.lapic_enable;
+    return d->arch.vmx_platform.apic_enabled;
 }
 
 s_time_t get_apictime_scheduled(struct vcpu *v)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_vmcs.c   Mon Jan  9 11:22:17 2006
@@ -32,7 +32,7 @@
 #include <asm/flushtlb.h>
 #include <xen/event.h>
 #include <xen/kernel.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/hvm_info_table.h>
 #if CONFIG_PAGING_LEVELS >= 4
 #include <asm/shadow_64.h>
 #endif
@@ -206,35 +206,55 @@
               &d->shared_info->evtchn_mask[0]);
 }
 
-#define VCPU_NR_PAGE        0x0009F000
-#define VCPU_NR_OFFSET      0x00000800
-#define VCPU_MAGIC          0x76637075  /* "vcpu" */
-
-static void vmx_set_vcpu_nr(struct domain *d)
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+    char signature[] = "HVM INFO";
+    uint8_t *ptr = (uint8_t *)t;
+    uint8_t sum = 0;
+    int i;
+
+    /* strncmp(t->signature, "HVM INFO", 8) */
+    for ( i = 0; i < 8; i++ ) {
+        if ( signature[i] != t->signature[i] ) {
+            printk("Bad hvm info signature\n");
+            return 0;
+        }
+    }
+
+    for ( i = 0; i < t->length; i++ )
+        sum += ptr[i];
+
+    return (sum == 0);
+}
+
+static void vmx_get_hvm_info(struct domain *d)
 {
     unsigned char *p;
     unsigned long mpfn;
-    unsigned int *vcpus;
-
-    mpfn = get_mfn_from_pfn(VCPU_NR_PAGE >> PAGE_SHIFT);
-    if (mpfn == INVALID_MFN) {
-        printk("Can not get vcpu number page mfn for VMX domain.\n");
+    struct hvm_info_table *t;
+
+    mpfn = get_mfn_from_pfn(HVM_INFO_PFN);
+    if ( mpfn == INVALID_MFN ) {
+        printk("Can not get hvm info page mfn for VMX domain.\n");
         domain_crash_synchronous();
     }
 
     p = map_domain_page(mpfn);
-    if (p == NULL) {
-        printk("Can not map vcpu number page for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-
-    vcpus = (unsigned int *)(p + VCPU_NR_OFFSET);
-    if (vcpus[0] != VCPU_MAGIC) {
-        printk("Bad vcpus magic, set vcpu number to 1 by default.\n");
-        d->arch.vmx_platform.nr_vcpu = 1;
-    }
-
-    d->arch.vmx_platform.nr_vcpu = vcpus[1];
+    if ( p == NULL ) {
+        printk("Can not map hvm info page for VMX domain.\n");
+        domain_crash_synchronous();
+    }
+
+    t = (struct hvm_info_table *)(p + HVM_INFO_OFFSET);
+
+    if ( validate_hvm_info(t) ) {
+        d->arch.vmx_platform.nr_vcpus = t->nr_vcpus;
+        d->arch.vmx_platform.apic_enabled = t->apic_enabled;
+    } else {
+        printk("Bad hvm info table\n");
+        d->arch.vmx_platform.nr_vcpus = 1;
+        d->arch.vmx_platform.apic_enabled = 0;
+    }
 
     unmap_domain_page(p);
 }
@@ -244,10 +264,10 @@
     struct vmx_platform *platform;
 
     vmx_map_io_shared_page(d);
-    vmx_set_vcpu_nr(d);
+    vmx_get_hvm_info(d);
 
     platform = &d->arch.vmx_platform;
-    pic_init(&platform->vmx_pic,  pic_irq_request, 
+    pic_init(&platform->vmx_pic,  pic_irq_request,
              &platform->interrupt_request);
     register_pic_io_hook();
 
@@ -335,6 +355,8 @@
     __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
 
     v->arch.schedule_tail = arch_vmx_do_resume;
+    v->arch.arch_vmx.launch_cpu = smp_processor_id();
+
     /* init guest tsc to start from 0 */
     rdtscll(host_tsc);
     v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
@@ -617,11 +639,21 @@
 
 void arch_vmx_do_resume(struct vcpu *v)
 {
-    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
-    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
-    vmx_do_resume(v);
-    reset_stack_and_jump(vmx_asm_do_resume);
+    if ( v->arch.arch_vmx.launch_cpu == smp_processor_id() )
+    {
+        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+        vmx_do_resume(v);
+        reset_stack_and_jump(vmx_asm_do_resume);
+    }
+    else
+    {
+        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
+        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+        vmx_do_resume(v);
+        vmx_set_host_env(v);
+        v->arch.arch_vmx.launch_cpu = smp_processor_id();
+        reset_stack_and_jump(vmx_asm_do_relaunch);
+    }
 }
 
 void arch_vmx_do_launch(struct vcpu *v)
@@ -641,18 +673,6 @@
     }
     vmx_do_launch(v);
     reset_stack_and_jump(vmx_asm_do_launch);
-}
-
-void arch_vmx_do_relaunch(struct vcpu *v)
-{
-    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
-    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
-    vmx_do_resume(v);
-    vmx_set_host_env(v);
-    v->arch.schedule_tail = arch_vmx_do_resume;
-
-    reset_stack_and_jump(vmx_asm_do_relaunch);
 }
 
 #endif /* CONFIG_VMX */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/x86_emulate.c        Mon Jan  9 11:22:17 2006
@@ -371,6 +371,21 @@
    (_type)_x; \
 })
 
+/* Access/update address held in a register, based on addressing mode. */
+#define register_address(sel, reg)                                      \
+    ((ad_bytes == sizeof(unsigned long)) ? (reg) :                      \
+     ((mode == X86EMUL_MODE_REAL) ? /* implies ad_bytes == 2 */         \
+      (((unsigned long)(sel) << 4) + ((reg) & 0xffff)) :                \
+      ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
+#define register_address_increment(reg, inc)                            \
+do {                                                                    \
+    if ( ad_bytes == sizeof(unsigned long) )                            \
+        (reg) += (inc);                                                 \
+    else                                                                \
+        (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) |             \
+                (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1));     \
+} while (0)
+
 void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
@@ -420,32 +435,64 @@
 {
     uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
     uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
-    unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
-    unsigned int lock_prefix = 0, rep_prefix = 0, i;
+    uint16_t *seg = NULL; /* override segment */
+    unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
     int rc = 0;
     struct operand src, dst;
 
     /* Shadow copy of register state. Committed on successful emulation. */
     struct cpu_user_regs _regs = *regs;
 
+    switch ( mode )
+    {
+    case X86EMUL_MODE_REAL:
+    case X86EMUL_MODE_PROT16:
+        op_bytes = ad_bytes = 2;
+        break;
+    case X86EMUL_MODE_PROT32:
+        op_bytes = ad_bytes = 4;
+        break;
+#ifdef __x86_64__
+    case X86EMUL_MODE_PROT64:
+        op_bytes = 4;
+        ad_bytes = 8;
+        break;
+#endif
+    default:
+        return -1;
+    }
+
     /* Legacy prefixes. */
     for ( i = 0; i < 8; i++ )
     {
         switch ( b = insn_fetch(uint8_t, 1, _regs.eip) )
         {
         case 0x66: /* operand-size override */
-            op_bytes ^= 6;                    /* switch between 2/4 bytes */
+            op_bytes ^= 6;      /* switch between 2/4 bytes */
             break;
         case 0x67: /* address-size override */
-            ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4/8 bytes */
+            if ( mode == X86EMUL_MODE_PROT64 )
+                ad_bytes ^= 12; /* switch between 4/8 bytes */
+            else
+                ad_bytes ^= 6;  /* switch between 2/4 bytes */
             break;
         case 0x2e: /* CS override */
+            seg = &_regs.cs;
+            break;
         case 0x3e: /* DS override */
+            seg = &_regs.ds;
+            break;
         case 0x26: /* ES override */
+            seg = &_regs.es;
+            break;
         case 0x64: /* FS override */
+            seg = &_regs.fs;
+            break;
         case 0x65: /* GS override */
+            seg = &_regs.gs;
+            break;
         case 0x36: /* SS override */
-            DPRINTF("Warning: ignoring a segment override.\n");
+            seg = &_regs.ss;
             break;
         case 0xf0: /* LOCK */
             lock_prefix = 1;
@@ -461,8 +508,12 @@
     }
  done_prefixes:
 
+    /* Note quite the same as 80386 real mode, but hopefully good enough. */
+    if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) )
+        goto cannot_emulate;
+
     /* REX prefix. */
-    if ( (mode == 8) && ((b & 0xf0) == 0x40) )
+    if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
     {
         rex_prefix = b;
         if ( b & 8 )
@@ -674,7 +725,7 @@
         emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
         break;
     case 0x63: /* movsxd */
-        if ( mode != 8 ) /* x86/64 long mode only */
+        if ( mode != X86EMUL_MODE_PROT64 )
             goto cannot_emulate;
         dst.val = (int32_t)src.val;
         break;
@@ -721,12 +772,13 @@
         dst.val = src.val;
         break;
     case 0x8f: /* pop (sole member of Grp1a) */
-        /* 64-bit mode: POP defaults to 64-bit operands. */
-        if ( (mode == 8) && (dst.bytes == 4) )
+        /* 64-bit mode: POP always pops a 64-bit operand. */
+        if ( mode == X86EMUL_MODE_PROT64 )
             dst.bytes = 8;
-        if ( (rc = ops->read_std(_regs.esp, &dst.val, dst.bytes)) != 0 )
+        if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
+                                 &dst.val, dst.bytes)) != 0 )
             goto done;
-        _regs.esp += dst.bytes;
+        register_address_increment(_regs.esp, dst.bytes);
         break;
     case 0xc0 ... 0xc1: grp2: /* Grp2 */
         switch ( modrm_reg )
@@ -797,16 +849,17 @@
             emulate_1op("dec", dst, _regs.eflags);
             break;
         case 6: /* push */
-            /* 64-bit mode: PUSH defaults to 64-bit operands. */
-            if ( (mode == 8) && (dst.bytes == 4) )
+            /* 64-bit mode: PUSH always pushes a 64-bit operand. */
+            if ( mode == X86EMUL_MODE_PROT64 )
             {
                 dst.bytes = 8;
                 if ( (rc = ops->read_std((unsigned long)dst.ptr,
                                          &dst.val, 8)) != 0 )
                     goto done;
             }
-            _regs.esp -= dst.bytes;
-            if ( (rc = ops->write_std(_regs.esp, dst.val, dst.bytes)) != 0 )
+            register_address_increment(_regs.esp, -dst.bytes);
+            if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
+                                      dst.val, dst.bytes)) != 0 )
                 goto done;
             dst.val = dst.orig_val; /* skanky: disable writeback */
             break;
@@ -873,19 +926,22 @@
         {
             /* Write fault: destination is special memory. */
             dst.ptr = (unsigned long *)cr2;
-            if ( (rc = ops->read_std(_regs.esi - _regs.edi + cr2, 
+            if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
+                                                      _regs.esi),
                                      &dst.val, dst.bytes)) != 0 )
                 goto done;
         }
         else
         {
             /* Read fault: source is special memory. */
-            dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2);
+            dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
             if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
                 goto done;
         }
-        _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
-        _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+        register_address_increment(
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+        register_address_increment(
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -895,7 +951,8 @@
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
-        _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+        register_address_increment(
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
@@ -903,7 +960,8 @@
         dst.ptr   = (unsigned long *)&_regs.eax;
         if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
             goto done;
-        _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+        register_address_increment(
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/bitmap.c
--- a/xen/common/bitmap.c       Mon Jan  9 11:19:55 2006
+++ b/xen/common/bitmap.c       Mon Jan  9 11:22:17 2006
@@ -282,6 +282,111 @@
 #endif
 EXPORT_SYMBOL(__bitmap_weight);
 
+/*
+ * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * second version by Paul Jackson, third by Joe Korty.
+ */
+
+#define CHUNKSZ                                32
+#define nbits_to_hold_value(val)       fls(val)
+#define roundup_power2(val,modulus)    (((val) + (modulus) - 1) & ~((modulus) 
- 1))
+#define unhex(c)                       (isdigit(c) ? (c - '0') : (toupper(c) - 
'A' + 10))
+#define BASEDEC 10             /* fancier cpuset lists input in decimal */
+
+/**
+ * bitmap_scnprintf - convert bitmap to an ASCII hex string.
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Exactly @nmaskbits bits are displayed.  Hex digits are grouped into
+ * comma-separated sets of eight digits per set.
+ */
+int bitmap_scnprintf(char *buf, unsigned int buflen,
+       const unsigned long *maskp, int nmaskbits)
+{
+       int i, word, bit, len = 0;
+       unsigned long val;
+       const char *sep = "";
+       int chunksz;
+       u32 chunkmask;
+
+       chunksz = nmaskbits & (CHUNKSZ - 1);
+       if (chunksz == 0)
+               chunksz = CHUNKSZ;
+
+       i = roundup_power2(nmaskbits, CHUNKSZ) - CHUNKSZ;
+       for (; i >= 0; i -= CHUNKSZ) {
+               chunkmask = ((1ULL << chunksz) - 1);
+               word = i / BITS_PER_LONG;
+               bit = i % BITS_PER_LONG;
+               val = (maskp[word] >> bit) & chunkmask;
+               len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep,
+                       (chunksz+3)/4, val);
+               chunksz = CHUNKSZ;
+               sep = ",";
+       }
+       return len;
+}
+EXPORT_SYMBOL(bitmap_scnprintf);
+
+/*
+ * bscnl_emit(buf, buflen, rbot, rtop, bp)
+ *
+ * Helper routine for bitmap_scnlistprintf().  Write decimal number
+ * or range to buf, suppressing output past buf+buflen, with optional
+ * comma-prefix.  Return len of what would be written to buf, if it
+ * all fit.
+ */
+static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int 
len)
+{
+       if (len > 0)
+               len += scnprintf(buf + len, buflen - len, ",");
+       if (rbot == rtop)
+               len += scnprintf(buf + len, buflen - len, "%d", rbot);
+       else
+               len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop);
+       return len;
+}
+
+/**
+ * bitmap_scnlistprintf - convert bitmap to list format ASCII string
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Output format is a comma-separated list of decimal numbers and
+ * ranges.  Consecutively set bits are shown as two hyphen-separated
+ * decimal numbers, the smallest and largest bit numbers set in
+ * the range.  Output format is compatible with the format
+ * accepted as input by bitmap_parselist().
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing '\0', as
+ * per ISO C99.
+ */
+int bitmap_scnlistprintf(char *buf, unsigned int buflen,
+       const unsigned long *maskp, int nmaskbits)
+{
+       int len = 0;
+       /* current bit is 'cur', most recently seen range is [rbot, rtop] */
+       int cur, rbot, rtop;
+
+       rbot = cur = find_first_bit(maskp, nmaskbits);
+       while (cur < nmaskbits) {
+               rtop = cur;
+               cur = find_next_bit(maskp, nmaskbits, cur+1);
+               if (cur >= nmaskbits || cur > rtop + 1) {
+                       len = bscnl_emit(buf, buflen, rbot, rtop, len);
+                       rbot = cur;
+               }
+       }
+       return len;
+}
+EXPORT_SYMBOL(bitmap_scnlistprintf);
+
 /**
  *     bitmap_find_free_region - find a contiguous aligned mem region
  *     @bitmap: an array of unsigned longs corresponding to the bitmap
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Mon Jan  9 11:19:55 2006
+++ b/xen/common/dom0_ops.c     Mon Jan  9 11:22:17 2006
@@ -16,6 +16,7 @@
 #include <xen/domain_page.h>
 #include <xen/trace.h>
 #include <xen/console.h>
+#include <xen/iocap.h>
 #include <asm/current.h>
 #include <public/dom0_ops.h>
 #include <public/sched_ctl.h>
@@ -109,13 +110,13 @@
     switch ( op->cmd )
     {
 
-    case DOM0_SETDOMAININFO:
-    {
-        struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
+    case DOM0_SETVCPUCONTEXT:
+    {
+        struct domain *d = find_domain_by_id(op->u.setvcpucontext.domain);
         ret = -ESRCH;
         if ( d != NULL )
         {
-            ret = set_info_guest(d, &op->u.setdomaininfo);
+            ret = set_info_guest(d, &op->u.setvcpucontext);
             put_domain(d);
         }
     }
@@ -283,11 +284,12 @@
     }
     break;
 
-    case DOM0_PINCPUDOMAIN:
-    {
-        domid_t dom = op->u.pincpudomain.domain;
+    case DOM0_SETVCPUAFFINITY:
+    {
+        domid_t dom = op->u.setvcpuaffinity.domain;
         struct domain *d = find_domain_by_id(dom);
         struct vcpu *v;
+        cpumask_t new_affinity;
 
         if ( d == NULL )
         {
@@ -295,15 +297,15 @@
             break;
         }
         
-        if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) ||
-             !d->vcpu[op->u.pincpudomain.vcpu] )
+        if ( (op->u.setvcpuaffinity.vcpu >= MAX_VIRT_CPUS) ||
+             !d->vcpu[op->u.setvcpuaffinity.vcpu] )
         {
             ret = -EINVAL;
             put_domain(d);
             break;
         }
 
-        v = d->vcpu[op->u.pincpudomain.vcpu];
+        v = d->vcpu[op->u.setvcpuaffinity.vcpu];
         if ( v == NULL )
         {
             ret = -ESRCH;
@@ -318,22 +320,13 @@
             break;
         }
 
-        v->cpumap = op->u.pincpudomain.cpumap;
-
-        if ( v->cpumap == CPUMAP_RUNANYWHERE )
-        {
-            clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
-        }
-        else
-        {
-            /* pick a new cpu from the usable map */
-            int new_cpu;
-            new_cpu = (int)find_first_set_bit(v->cpumap) % num_online_cpus();
-            vcpu_pause(v);
-            vcpu_migrate_cpu(v, new_cpu);
-            set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
-            vcpu_unpause(v);
-        }
+        new_affinity = v->cpu_affinity;
+        memcpy(cpus_addr(new_affinity),
+               &op->u.setvcpuaffinity.cpumap,
+               min((int)BITS_TO_LONGS(NR_CPUS),
+                   (int)sizeof(op->u.setvcpuaffinity.cpumap)));
+
+        ret = vcpu_set_affinity(v, &new_affinity);
 
         put_domain(d);
     }
@@ -505,7 +498,11 @@
         op->u.getvcpuinfo.running  = test_bit(_VCPUF_running, &v->vcpu_flags);
         op->u.getvcpuinfo.cpu_time = v->cpu_time;
         op->u.getvcpuinfo.cpu      = v->processor;
-        op->u.getvcpuinfo.cpumap   = v->cpumap;
+        op->u.getvcpuinfo.cpumap   = 0;
+        memcpy(&op->u.getvcpuinfo.cpumap,
+               cpus_addr(v->cpu_affinity),
+               min((int)BITS_TO_LONGS(NR_CPUS),
+                   (int)sizeof(op->u.getvcpuinfo.cpumap)));
         ret = 0;
 
         if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )     
@@ -582,6 +579,7 @@
         }
     }
     break;
+
     case DOM0_SETDEBUGGING:
     {
         struct domain *d; 
@@ -596,6 +594,53 @@
             put_domain(d);
             ret = 0;
         }
+    }
+    break;
+
+    case DOM0_IRQ_PERMISSION:
+    {
+        struct domain *d;
+        unsigned int pirq = op->u.irq_permission.pirq;
+
+        ret = -EINVAL;
+        if ( pirq >= NR_PIRQS )
+            break;
+
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.irq_permission.domain);
+        if ( d == NULL )
+            break;
+
+        if ( op->u.irq_permission.allow_access )
+            ret = irq_permit_access(d, pirq);
+        else
+            ret = irq_deny_access(d, pirq);
+
+        put_domain(d);
+    }
+    break;
+
+    case DOM0_IOMEM_PERMISSION:
+    {
+        struct domain *d;
+        unsigned long pfn = op->u.iomem_permission.first_pfn;
+        unsigned long nr_pfns = op->u.iomem_permission.nr_pfns;
+
+        ret = -EINVAL;
+        if ( (pfn + nr_pfns - 1) < pfn ) /* wrap? */
+            break;
+
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.iomem_permission.domain);
+        if ( d == NULL )
+            break;
+
+        if ( op->u.iomem_permission.allow_access )
+            ret = iomem_permit_access(d, pfn, pfn + nr_pfns - 1);
+        else
+            ret = iomem_deny_access(d, pfn, pfn + nr_pfns - 1);
+
+        put_domain(d);
     }
     break;
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/domain.c
--- a/xen/common/domain.c       Mon Jan  9 11:19:55 2006
+++ b/xen/common/domain.c       Mon Jan  9 11:22:17 2006
@@ -16,6 +16,7 @@
 #include <xen/console.h>
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
+#include <xen/rangeset.h>
 #include <asm/debugger.h>
 #include <public/dom0_ops.h>
 #include <public/sched.h>
@@ -50,25 +51,24 @@
     else
         set_bit(_DOMF_ctrl_pause, &d->domain_flags);
 
-    if ( !is_idle_task(d) &&
+    if ( !is_idle_domain(d) &&
          ((evtchn_init(d) != 0) || (grant_table_create(d) != 0)) )
-    {
-        evtchn_destroy(d);
-        free_domain(d);
-        return NULL;
-    }
+        goto fail1;
     
     if ( (v = alloc_vcpu(d, 0, cpu)) == NULL )
-    {
-        grant_table_destroy(d);
-        evtchn_destroy(d);
-        free_domain(d);
-        return NULL;
-    }
-
-    arch_do_createdomain(v);
-    
-    if ( !is_idle_task(d) )
+        goto fail2;
+
+    rangeset_domain_initialise(d);
+
+    d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
+    d->irq_caps   = rangeset_new(d, "Interrupts", 0);
+
+    if ( (d->iomem_caps == NULL) ||
+         (d->irq_caps == NULL) ||
+         (arch_do_createdomain(v) != 0) )
+        goto fail3;
+
+    if ( !is_idle_domain(d) )
     {
         write_lock(&domlist_lock);
         pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
@@ -83,6 +83,15 @@
     }
 
     return d;
+
+ fail3:
+    rangeset_domain_destroy(d);
+ fail2:
+    grant_table_destroy(d);
+ fail1:
+    evtchn_destroy(d);
+    free_domain(d);
+    return NULL;
 }
 
 
@@ -164,20 +173,23 @@
 
     BUG_ON(d == NULL);
     BUG_ON(d == current->domain);
-    BUG_ON(!test_bit(_DOMF_shuttingdown, &d->domain_flags));
-    BUG_ON(test_bit(_DOMF_shutdown, &d->domain_flags));
+
+    LOCK_BIGLOCK(d);
 
     /* Make sure that every vcpu is descheduled before we finalise. */
     for_each_vcpu ( d, v )
         vcpu_sleep_sync(v);
-    BUG_ON(!cpus_empty(d->cpumask));
+    BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
 
     sync_pagetable_state(d);
 
-    set_bit(_DOMF_shutdown, &d->domain_flags);
-    clear_bit(_DOMF_shuttingdown, &d->domain_flags);
-
-    send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+    /* Don't set DOMF_shutdown until execution contexts are sync'ed. */
+    if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) )
+        send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+
+    UNLOCK_BIGLOCK(d);
+
+    put_domain(d);
 }
 
 static __init int domain_shutdown_finaliser_init(void)
@@ -213,16 +225,17 @@
 
     /* Mark the domain as shutting down. */
     d->shutdown_code = reason;
-    if ( !test_and_set_bit(_DOMF_shuttingdown, &d->domain_flags) )
-    {
-        /* This vcpu won the race to finalise the shutdown. */
-        domain_shuttingdown[smp_processor_id()] = d;
-        raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
-    }
 
     /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
     for_each_vcpu ( d, v )
+    {
+        atomic_inc(&v->pausecnt);
         vcpu_sleep_nosync(v);
+    }
+
+    get_knownalive_domain(d);
+    domain_shuttingdown[smp_processor_id()] = d;
+    raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
 }
 
 
@@ -271,6 +284,8 @@
     *pd = d->next_in_hashbucket;
     write_unlock(&domlist_lock);
 
+    rangeset_domain_destroy(d);
+
     evtchn_destroy(d);
     grant_table_destroy(d);
 
@@ -346,11 +361,11 @@
  * of domains other than domain 0. ie. the domains that are being built by 
  * the userspace dom0 domain builder.
  */
-int set_info_guest(struct domain *d, dom0_setdomaininfo_t *setdomaininfo)
+int set_info_guest(struct domain *d, dom0_setvcpucontext_t *setvcpucontext)
 {
     int rc = 0;
     struct vcpu_guest_context *c = NULL;
-    unsigned long vcpu = setdomaininfo->vcpu;
+    unsigned long vcpu = setvcpucontext->vcpu;
     struct vcpu *v; 
 
     if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
@@ -363,7 +378,7 @@
         return -ENOMEM;
 
     rc = -EFAULT;
-    if ( copy_from_user(c, setdomaininfo->ctxt, sizeof(*c)) == 0 )
+    if ( copy_from_user(c, setvcpucontext->ctxt, sizeof(*c)) == 0 )
         rc = arch_set_info_guest(v, c);
 
     xfree(c);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Mon Jan  9 11:19:55 2006
+++ b/xen/common/event_channel.c        Mon Jan  9 11:22:17 2006
@@ -22,6 +22,7 @@
 #include <xen/sched.h>
 #include <xen/event.h>
 #include <xen/irq.h>
+#include <xen/iocap.h>
 #include <asm/current.h>
 
 #include <public/xen.h>
@@ -241,6 +242,9 @@
 
     if ( pirq >= ARRAY_SIZE(d->pirq_to_evtchn) )
         return -EINVAL;
+
+    if ( !irq_access_permitted(d, pirq) )
+        return -EPERM;
 
     spin_lock(&d->evtchn_lock);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Mon Jan  9 11:19:55 2006
+++ b/xen/common/grant_table.c  Mon Jan  9 11:22:17 2006
@@ -469,7 +469,7 @@
     for ( i = 0; i < count; i++ )
         (void)__gnttab_unmap_grant_ref(&uop[i]);
 
-    flush_tlb_mask(current->domain->cpumask);
+    flush_tlb_mask(current->domain->domain_dirty_cpumask);
 
     return 0;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Mon Jan  9 11:19:55 2006
+++ b/xen/common/keyhandler.c   Mon Jan  9 11:22:17 2006
@@ -11,6 +11,7 @@
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <xen/domain.h>
+#include <xen/rangeset.h>
 #include <asm/debugger.h>
 
 #define KEY_MAX 256
@@ -96,44 +97,60 @@
     machine_restart(NULL); 
 }
 
-static void do_task_queues(unsigned char key)
+static void cpuset_print(char *set, int size, cpumask_t mask)
+{
+    *set++ = '{';
+    set += cpulist_scnprintf(set, size-2, mask);
+    *set++ = '}';
+    *set++ = '\0';
+}
+
+static void dump_domains(unsigned char key)
 {
     struct domain *d;
     struct vcpu   *v;
     s_time_t       now = NOW();
-
-    printk("'%c' pressed -> dumping task queues (now=0x%X:%08X)\n", key,
+    char           cpuset[100];
+
+    printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
            (u32)(now>>32), (u32)now); 
 
     read_lock(&domlist_lock);
 
     for_each_domain ( d )
     {
-        printk("Xen: DOM %u, flags=%lx refcnt=%d nr_pages=%d "
-               "xenheap_pages=%d\n", d->domain_id, d->domain_flags,
-               atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages);
-        /* The handle is printed according to the OSF DCE UUID spec., even
-           though it is not necessarily such a thing, for ease of use when it
-           _is_ one of those. */
-        printk("     handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
+        printk("General information for domain %u:\n", d->domain_id);
+        cpuset_print(cpuset, sizeof(cpuset), d->domain_dirty_cpumask);
+        printk("    flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d "
+               "dirty_cpus=%s\n",
+               d->domain_flags, atomic_read(&d->refcnt),
+               d->tot_pages, d->xenheap_pages, cpuset);
+        printk("    handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
                "%02x%02x-%02x%02x%02x%02x%02x%02x\n",
                d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
                d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
                d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
                d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
 
+        rangeset_domain_printk(d);
+
         dump_pageframe_info(d);
                
+        printk("VCPU information and callbacks for domain %u:\n",
+               d->domain_id);
         for_each_vcpu ( d, v ) {
-            printk("Guest: %p CPU %d [has=%c] flags=%lx "
-                   "upcall_pend = %02x, upcall_mask = %02x\n", v,
-                   v->processor,
+            printk("    VCPU%d: CPU%d [has=%c] flags=%lx "
+                   "upcall_pend = %02x, upcall_mask = %02x ",
+                   v->vcpu_id, v->processor,
                    test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F',
                    v->vcpu_flags,
                    v->vcpu_info->evtchn_upcall_pending, 
                    v->vcpu_info->evtchn_upcall_mask);
-            printk("Notifying guest... %d/%d\n", d->domain_id, v->vcpu_id); 
-            printk("port %d/%d stat %d %d %d\n",
+            cpuset_print(cpuset, sizeof(cpuset), v->vcpu_dirty_cpumask);
+            printk("dirty_cpus=%s ", cpuset);
+            cpuset_print(cpuset, sizeof(cpuset), v->cpu_affinity);
+            printk("cpu_affinity=%s\n", cpuset);
+            printk("    Notifying guest (virq %d, port %d, stat %d/%d/%d)\n",
                    VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
                    test_bit(v->virq_to_evtchn[VIRQ_DEBUG], 
                             &d->shared_info->evtchn_pending[0]),
@@ -191,7 +208,7 @@
     register_keyhandler(
         'L', reset_sched_histo, "reset sched latency histogram");
     register_keyhandler(
-        'q', do_task_queues, "dump task queues + guest state");
+        'q', dump_domains, "dump domain (and guest debug) info");
     register_keyhandler(
         'r', dump_runq,      "dump run queues");
     register_irq_keyhandler(
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/memory.c
--- a/xen/common/memory.c       Mon Jan  9 11:19:55 2006
+++ b/xen/common/memory.c       Mon Jan  9 11:22:17 2006
@@ -15,6 +15,7 @@
 #include <xen/sched.h>
 #include <xen/event.h>
 #include <xen/shadow.h>
+#include <xen/iocap.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 #include <public/memory.h>
@@ -35,7 +36,8 @@
          !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
         return 0;
 
-    if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) )
+    if ( (extent_order != 0) &&
+         !multipage_allocation_permitted(current->domain) )
     {
         DPRINTK("Only I/O-capable domains may allocate multi-page extents.\n");
         return 0;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Mon Jan  9 11:19:55 2006
+++ b/xen/common/page_alloc.c   Mon Jan  9 11:22:17 2006
@@ -615,7 +615,7 @@
             shadow_drop_references(d, &pg[i]);
             ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
             pg[i].tlbflush_timestamp  = tlbflush_current_time();
-            pg[i].u.free.cpumask      = d->cpumask;
+            pg[i].u.free.cpumask      = d->domain_dirty_cpumask;
             list_del(&pg[i].list);
         }
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c    Mon Jan  9 11:19:55 2006
+++ b/xen/common/sched_bvt.c    Mon Jan  9 11:22:17 2006
@@ -31,7 +31,8 @@
     struct list_head    run_list;         /* runqueue list pointers */
     u32                 avt;              /* actual virtual time */
     u32                 evt;              /* effective virtual time */
-    struct vcpu  *vcpu;
+    int                 migrated;         /* migrated to a new CPU */
+    struct vcpu         *vcpu;
     struct bvt_dom_info *inf;
 };
 
@@ -219,7 +220,7 @@
 
     einf->vcpu = v;
 
-    if ( is_idle_task(v->domain) )
+    if ( is_idle_domain(v->domain) )
     {
         einf->avt = einf->evt = ~0U;
         BUG_ON(__task_on_runqueue(v));
@@ -250,9 +251,11 @@
 
     /* Set the BVT parameters. AVT should always be updated 
        if CPU migration ocurred.*/
-    if ( einf->avt < CPU_SVT(cpu) || 
-         unlikely(test_bit(_VCPUF_cpu_migrated, &v->vcpu_flags)) )
+    if ( (einf->avt < CPU_SVT(cpu)) || einf->migrated )
+    {
         einf->avt = CPU_SVT(cpu);
+        einf->migrated = 0;
+    }
 
     /* Deal with warping here. */
     einf->evt = calc_evt(v, einf->avt);
@@ -265,7 +268,7 @@
         ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
         ctx_allow;
 
-    if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) )
+    if ( is_idle_domain(curr->domain) || (einf->evt <= curr_evt) )
         cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
     else if ( schedule_data[cpu].s_timer.expires > r_time )
         set_ac_timer(&schedule_data[cpu].s_timer, r_time);
@@ -274,11 +277,27 @@
 
 static void bvt_sleep(struct vcpu *v)
 {
-    if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+    if ( schedule_data[v->processor].curr == v )
         cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
     else  if ( __task_on_runqueue(v) )
         __del_from_runqueue(v);
 }
+
+
+static int bvt_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+    if ( v == current )
+        return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+    vcpu_pause(v);
+    v->cpu_affinity = *affinity;
+    v->processor = first_cpu(v->cpu_affinity);
+    EBVT_INFO(v)->migrated = 1;
+    vcpu_unpause(v);
+
+    return 0;
+}
+
 
 /**
  * bvt_free_task - free BVT private structures for a task
@@ -380,7 +399,7 @@
     ASSERT(prev_einf != NULL);
     ASSERT(__task_on_runqueue(prev));
 
-    if ( likely(!is_idle_task(prev->domain)) ) 
+    if ( likely(!is_idle_domain(prev->domain)) ) 
     {
         prev_einf->avt = calc_avt(prev, now);
         prev_einf->evt = calc_evt(prev, prev_einf->avt);
@@ -390,7 +409,7 @@
         
         __del_from_runqueue(prev);
         
-        if ( domain_runnable(prev) )
+        if ( vcpu_runnable(prev) )
             __add_to_runqueue_tail(prev);
     }
 
@@ -471,13 +490,13 @@
     }
 
     /* work out time for next run through scheduler */
-    if ( is_idle_task(next->domain) ) 
+    if ( is_idle_domain(next->domain) ) 
     {
         r_time = ctx_allow;
         goto sched_done;
     }
 
-    if ( (next_prime == NULL) || is_idle_task(next_prime->domain) )
+    if ( (next_prime == NULL) || is_idle_domain(next_prime->domain) )
     {
         /* We have only one runnable task besides the idle task. */
         r_time = 10 * ctx_allow;     /* RN: random constant */
@@ -557,6 +576,7 @@
     .dump_cpu_state = bvt_dump_cpu_state,
     .sleep          = bvt_sleep,
     .wake           = bvt_wake,
+    .set_affinity   = bvt_set_affinity
 };
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Mon Jan  9 11:19:55 2006
+++ b/xen/common/sched_sedf.c   Mon Jan  9 11:22:17 2006
@@ -325,21 +325,29 @@
     list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
 }
 
+
 /* Allocates memory for per domain private scheduling data*/
-static int sedf_alloc_task(struct vcpu *d) {
-    PRINT(2,"sedf_alloc_task was called, domain-id 
%i.%i\n",d->domain->domain_id,
-          d->vcpu_id);
-    if (d->domain->sched_priv == NULL) {
-        if ((d->domain->sched_priv = 
-             xmalloc(struct sedf_dom_info)) == NULL )
+static int sedf_alloc_task(struct vcpu *d)
+{
+    PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
+          d->domain->domain_id, d->vcpu_id);
+
+    if ( d->domain->sched_priv == NULL )
+    {
+        d->domain->sched_priv = xmalloc(struct sedf_dom_info);
+        if ( d->domain->sched_priv == NULL )
             return -1;
         memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
     }
-    if ((d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
+
+    if ( (d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
         return -1;
+
     memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+
     return 0;
 }
+
 
 /* Setup the sedf_dom_info */
 static void sedf_add_task(struct vcpu *d)
@@ -363,14 +371,17 @@
         INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
     }
        
-    if (d->domain->domain_id==0) {
+    if ( d->domain->domain_id == 0 )
+    {
         /*set dom0 to something useful to boot the machine*/
         inf->period    = MILLISECS(20);
         inf->slice     = MILLISECS(15);
         inf->latency   = 0;
         inf->deadl_abs = 0;
         inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
-    } else {
+    }
+    else
+    {
         /*other domains run in best effort mode*/
         inf->period    = WEIGHT_PERIOD;
         inf->slice     = 0;
@@ -379,14 +390,18 @@
         inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
         inf->extraweight = 1;
     }
+
     inf->period_orig = inf->period; inf->slice_orig = inf->slice;
     INIT_LIST_HEAD(&(inf->list));
     INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
     INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
  
-    if (!is_idle_task(d->domain)) {
+    if ( !is_idle_domain(d->domain) )
+    {
         extraq_check(d);
-    } else {
+    }
+    else
+    {
         EDOM_INFO(d)->deadl_abs = 0;
         EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
     }
@@ -396,19 +411,28 @@
 static void sedf_free_task(struct domain *d)
 {
     int i;
+
     PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
+
     ASSERT(d->sched_priv != NULL);
     xfree(d->sched_priv);
  
-    for (i = 0; i < MAX_VIRT_CPUS; i++)
-        if ( d->vcpu[i] ) {
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        if ( d->vcpu[i] )
+        {
             ASSERT(d->vcpu[i]->sched_priv != NULL);
             xfree(d->vcpu[i]->sched_priv);
         }
-}
-
-/* handles the rescheduling, bookkeeping of domains running in their 
realtime-time :)*/
-static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
+    }
+}
+
+/*
+ * Handles the rescheduling & bookkeeping of domains running in their
+ * guaranteed timeslice.
+ */
+static void desched_edf_dom(s_time_t now, struct vcpu* d)
+{
     struct sedf_vcpu_info* inf = EDOM_INFO(d);
     /*current domain is running in real time mode*/
  
@@ -418,27 +442,30 @@
 
     /*scheduling decisions, which don't remove the running domain
       from the runq*/
-    if ((inf->cputime < inf->slice) && sedf_runnable(d))
+    if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
         return;
   
     __del_from_queue(d);
   
     /*manage bookkeeping (i.e. calculate next deadline,
       memorize overun-time of slice) of finished domains*/
-    if (inf->cputime >= inf->slice) {
+    if ( inf->cputime >= inf->slice )
+    {
         inf->cputime -= inf->slice;
   
-        if (inf->period < inf->period_orig) {
+        if ( inf->period < inf->period_orig )
+        {
             /*this domain runs in latency scaling or burst mode*/
 #if (UNBLOCK == UNBLOCK_BURST)
             /*if we are runnig in burst scaling wait for two periods
               before scaling periods up again*/ 
-            if (now - inf->unblock_abs >= 2 * inf->period)
+            if ( (now - inf->unblock_abs) >= (2 * inf->period) )
 #endif
             {
                 inf->period *= 2; inf->slice *= 2;
-                if ((inf->period > inf->period_orig) ||
-                    (inf->slice > inf->slice_orig)) {
+                if ( (inf->period > inf->period_orig) ||
+                     (inf->slice > inf->slice_orig) )
+                {
                     /*reset slice & period*/
                     inf->period = inf->period_orig;
                     inf->slice = inf->slice_orig;
@@ -450,36 +477,46 @@
     }
  
     /*add a runnable domain to the waitqueue*/
-    if (sedf_runnable(d))
+    if ( sedf_runnable(d) )
+    {
         __add_to_waitqueue_sort(d);
-    else {
+    }
+    else
+    {
         /*we have a blocked realtime task -> remove it from exqs too*/
 #if (EXTRA > EXTRA_OFF)
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-        if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
-#endif
-        if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
-#endif
-    }
+        if ( extraq_on(d, EXTRA_PEN_Q) )
+            extraq_del(d, EXTRA_PEN_Q);
+#endif
+        if ( extraq_on(d, EXTRA_UTIL_Q) )
+            extraq_del(d, EXTRA_UTIL_Q);
+#endif
+    }
+
     ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
     ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
                  sedf_runnable(d)));
 }
 
+
 /* Update all elements on the queues */
-static inline void update_queues(s_time_t now, struct list_head* runq, 
-                                 struct list_head* waitq) {
-    struct list_head     *cur,*tmp;
+static void update_queues(
+    s_time_t now, struct list_head *runq, struct list_head *waitq)
+{
+    struct list_head     *cur, *tmp;
     struct sedf_vcpu_info *curinf;
  
     PRINT(3,"Updating waitq..\n");
+
     /*check for the first elements of the waitqueue, whether their
       next period has already started*/
     list_for_each_safe(cur, tmp, waitq) {
         curinf = list_entry(cur, struct sedf_vcpu_info, list);
         PRINT(4,"\tLooking @ dom %i.%i\n",
               curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
-        if (PERIOD_BEGIN(curinf) <= now) {
+        if ( PERIOD_BEGIN(curinf) <= now )
+        {
             __del_from_queue(curinf->vcpu);
             __add_to_runqueue_sort(curinf->vcpu);
         }
@@ -488,13 +525,16 @@
     }
  
     PRINT(3,"Updating runq..\n");
+
     /*process the runq, find domains that are on
       the runqueue which shouldn't be there*/
     list_for_each_safe(cur, tmp, runq) {
         curinf = list_entry(cur,struct sedf_vcpu_info,list);
         PRINT(4,"\tLooking @ dom %i.%i\n",
               curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
-        if (unlikely(curinf->slice == 0)) {
+
+        if ( unlikely(curinf->slice == 0) )
+        {
             /*ignore domains with empty slice*/
             PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
                   curinf->vcpu->domain->domain_id,
@@ -504,7 +544,8 @@
             /*move them to their next period*/
             curinf->deadl_abs += curinf->period;
             /*ensure that the start of the next period is in the future*/
-            if (unlikely(PERIOD_BEGIN(curinf) < now)) {
+            if ( unlikely(PERIOD_BEGIN(curinf) < now) )
+            {
                 curinf->deadl_abs += 
                     (DIV_UP(now - PERIOD_BEGIN(curinf),
                            curinf->period)) * curinf->period;
@@ -513,8 +554,10 @@
             __add_to_waitqueue_sort(curinf->vcpu);
             continue;
         }
-        if (unlikely((curinf->deadl_abs < now) ||
-                     (curinf->cputime > curinf->slice))) {
+
+        if ( unlikely((curinf->deadl_abs < now) ||
+                      (curinf->cputime > curinf->slice)) )
+        {
             /*we missed the deadline or the slice was
               already finished... might hapen because
               of dom_adj.*/
@@ -550,6 +593,7 @@
     PRINT(3,"done updating the queues\n");
 }
 
+
 #if (EXTRA > EXTRA_OFF)
 /* removes a domain from the head of the according extraQ and
    requeues it at a specified position:
@@ -557,9 +601,10 @@
      weighted ext.: insert in sorted list by score
    if the domain is blocked / has regained its short-block-loss
    time it is not put on any queue */
-static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
+static void desched_extra_dom(s_time_t now, struct vcpu* d)
+{
     struct sedf_vcpu_info *inf = EDOM_INFO(d);
-    int    i    = extra_get_cur_q(inf);
+    int i = extra_get_cur_q(inf);
  
 #if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
     unsigned long         oldscore;
@@ -575,14 +620,15 @@
     extraq_del(d, i);
 
 #if (EXTRA == EXTRA_ROUNDR)
-    if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
+    if ( sedf_runnable(d) && (inf->status & EXTRA_AWARE) )
         /*add to the tail if it is runnable => round-robin*/
         extraq_add_tail(d, EXTRA_UTIL_Q);
 #elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
     /*update the score*/
-    oldscore      = inf->score[i];
+    oldscore = inf->score[i];
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-    if (i == EXTRA_PEN_Q) {
+    if ( i == EXTRA_PEN_Q )
+    {
         /*domain was running in L0 extraq*/
         /*reduce block lost, probably more sophistication here!*/
         /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
@@ -605,12 +651,13 @@
         inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
             inf->short_block_lost_tot;
         oldscore = 0;
-    } else
+    }
+    else
 #endif
     {
         /*domain was running in L1 extraq => score is inverse of
           utilization and is used somewhat incremental!*/
-        if (!inf->extraweight)
+        if ( !inf->extraweight )
             /*NB: use fixed point arithmetic with 10 bits*/
             inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
                 inf->slice;
@@ -619,24 +666,32 @@
               full (ie 100%) utilization is equivalent to 128 extraweight*/
             inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
     }
+
  check_extra_queues:
     /* Adding a runnable domain to the right queue and removing blocked ones*/
-    if (sedf_runnable(d)) {
+    if ( sedf_runnable(d) )
+    {
         /*add according to score: weighted round robin*/
         if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
             ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
             extraq_add_sort_update(d, i, oldscore);
     }
-    else {
+    else
+    {
         /*remove this blocked domain from the waitq!*/
         __del_from_queue(d);
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
         /*make sure that we remove a blocked domain from the other
           extraq too*/
-        if (i == EXTRA_PEN_Q) {
-            if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
-        } else {
-            if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+        if ( i == EXTRA_PEN_Q )
+        {
+            if ( extraq_on(d, EXTRA_UTIL_Q) )
+                extraq_del(d, EXTRA_UTIL_Q);
+        }
+        else
+        {
+            if ( extraq_on(d, EXTRA_PEN_Q) )
+                extraq_del(d, EXTRA_PEN_Q);
         }
 #endif
     }
@@ -647,16 +702,21 @@
 }
 #endif
 
-static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
-                                                        s_time_t end_xt, 
struct list_head *extraq[], int cpu) {
+
+static struct task_slice sedf_do_extra_schedule(
+    s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
+{
     struct task_slice   ret;
     struct sedf_vcpu_info *runinf;
     ASSERT(end_xt > now);
+
     /* Enough time left to use for extratime? */
-    if (end_xt - now < EXTRA_QUANTUM)
+    if ( end_xt - now < EXTRA_QUANTUM )
         goto return_idle;
+
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-    if (!list_empty(extraq[EXTRA_PEN_Q])) {
+    if ( !list_empty(extraq[EXTRA_PEN_Q]) )
+    {
         /*we still have elements on the level 0 extraq 
           => let those run first!*/
         runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
@@ -667,9 +727,12 @@
 #ifdef SEDF_STATS
         runinf->pen_extra_slices++;
 #endif
-    } else
-#endif
-        if (!list_empty(extraq[EXTRA_UTIL_Q])) {
+    }
+    else
+#endif
+    {
+        if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
+        {
             /*use elements from the normal extraqueue*/
             runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
                                   struct sedf_vcpu_info,
@@ -680,6 +743,7 @@
         }
         else
             goto return_idle;
+    }
 
     ASSERT(ret.time > 0);
     ASSERT(sedf_runnable(ret.task));
@@ -692,6 +756,8 @@
     ASSERT(sedf_runnable(ret.task));
     return ret;
 }
+
+
 /* Main scheduling function
    Reasons for calling this function are:
    -timeslice for the current period used up
@@ -699,7 +765,7 @@
    -and various others ;) in general: determine which domain to run next*/
 static struct task_slice sedf_do_schedule(s_time_t now)
 {
-    int                   cpu      = current->processor;
+    int                   cpu      = smp_processor_id();
     struct list_head     *runq     = RUNQ(cpu);
     struct list_head     *waitq    = WAITQ(cpu);
 #if (EXTRA > EXTRA_OFF)
@@ -711,20 +777,21 @@
     struct task_slice      ret;
 
     /*idle tasks don't need any of the following stuf*/
-    if (is_idle_task(current->domain))
+    if (is_idle_domain(current->domain))
         goto check_waitq;
  
     /* create local state of the status of the domain, in order to avoid
        inconsistent state during scheduling decisions, because data for
-       domain_runnable is not protected by the scheduling lock!*/
-    if(!domain_runnable(current))
+       vcpu_runnable is not protected by the scheduling lock!*/
+    if ( !vcpu_runnable(current) )
         inf->status |= SEDF_ASLEEP;
  
-    if (inf->status & SEDF_ASLEEP)
+    if ( inf->status & SEDF_ASLEEP )
         inf->block_abs = now;
 
 #if (EXTRA > EXTRA_OFF)
-    if (unlikely(extra_runs(inf))) {
+    if ( unlikely(extra_runs(inf)) )
+    {
         /*special treatment of domains running in extra time*/
         desched_extra_dom(now, current);
     }
@@ -739,10 +806,12 @@
     /*now simply pick the first domain from the runqueue, which has the
       earliest deadline, because the list is sorted*/
  
-    if (!list_empty(runq)) {
+    if ( !list_empty(runq) )
+    {
         runinf   = list_entry(runq->next,struct sedf_vcpu_info,list);
         ret.task = runinf->vcpu;
-        if (!list_empty(waitq)) {
+        if ( !list_empty(waitq) )
+        {
             waitinf  = list_entry(waitq->next,
                                   struct sedf_vcpu_info,list);
             /*rerun scheduler, when scheduled domain reaches it's
@@ -751,14 +820,16 @@
             ret.time = MIN(now + runinf->slice - runinf->cputime,
                            PERIOD_BEGIN(waitinf)) - now;
         }
-        else {
+        else
+        {
             ret.time = runinf->slice - runinf->cputime;
         }
         CHECK(ret.time > 0);
         goto sched_done;
     }
  
-    if (!list_empty(waitq)) {
+    if ( !list_empty(waitq) )
+    {
         waitinf  = list_entry(waitq->next,struct sedf_vcpu_info, list);
         /*we could not find any suitable domain 
           => look for domains that are aware of extratime*/
@@ -771,7 +842,8 @@
 #endif
         CHECK(ret.time > 0);
     }
-    else {
+    else
+    {
         /*this could probably never happen, but one never knows...*/
         /*it can... imagine a second CPU, which is pure scifi ATM,
           but one never knows ;)*/
@@ -782,11 +854,13 @@
  sched_done: 
     /*TODO: Do something USEFUL when this happens and find out, why it
       still can happen!!!*/
-    if (ret.time<0) {
+    if ( ret.time < 0)
+    {
         printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
                ret.time);
         ret.time = EXTRA_QUANTUM;
     }
+
     EDOM_INFO(ret.task)->sched_start_abs = now;
     CHECK(ret.time > 0);
     ASSERT(sedf_runnable(ret.task));
@@ -794,30 +868,36 @@
     return ret;
 }
 
-static void sedf_sleep(struct vcpu *d) {
-    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, 
d->vcpu_id);
- 
-    if (is_idle_task(d->domain))
+
+static void sedf_sleep(struct vcpu *d)
+{
+    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
+          d->domain->domain_id, d->vcpu_id);
+ 
+    if ( is_idle_domain(d->domain) )
         return;
 
     EDOM_INFO(d)->status |= SEDF_ASLEEP;
  
-    if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
+    if ( schedule_data[d->processor].curr == d )
+    {
         cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
     }
-    else  {
+    else
+    {
         if ( __task_on_queue(d) )
             __del_from_queue(d);
 #if (EXTRA > EXTRA_OFF)
-        if (extraq_on(d, EXTRA_UTIL_Q)) 
+        if ( extraq_on(d, EXTRA_UTIL_Q) ) 
             extraq_del(d, EXTRA_UTIL_Q);
 #endif
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-        if (extraq_on(d, EXTRA_PEN_Q))
+        if ( extraq_on(d, EXTRA_PEN_Q) )
             extraq_del(d, EXTRA_PEN_Q);
 #endif
     }
 }
+
 
 /* This function wakes up a domain, i.e. moves them into the waitqueue
  * things to mention are: admission control is taking place nowhere at
@@ -890,17 +970,21 @@
  *     -either behaviour can lead to missed deadlines in other domains as
  *      opposed to approaches 1,2a,2b
  */
-static inline void unblock_short_vcons
-(struct sedf_vcpu_info* inf, s_time_t now) {
+#if (UNBLOCK <= UNBLOCK_SHORT_RESUME)
+static void unblock_short_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+{
     inf->deadl_abs += inf->period;
     inf->cputime = 0;
 }
-
-static inline void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
+#endif
+
+#if (UNBLOCK == UNBLOCK_SHORT_RESUME)
+static void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
 {
     /*treat blocked time as consumed by the domain*/
     inf->cputime += now - inf->block_abs; 
-    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+    if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+    {
         /*we don't have a reasonable amount of time in 
           our slice left :( => start in next period!*/
         unblock_short_vcons(inf, now);
@@ -910,8 +994,11 @@
         inf->short_cont++;
 #endif
 }
-static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
-                                                s_time_t now) {
+#endif
+
+static void unblock_short_extra_support(
+    struct sedf_vcpu_info* inf, s_time_t now)
+{
     /*this unblocking scheme tries to support the domain, by assigning it
     a priority in extratime distribution according to the loss of time
     in this slice due to blocking*/
@@ -919,26 +1006,29 @@
  
     /*no more realtime execution in this period!*/
     inf->deadl_abs += inf->period;
-    if (likely(inf->block_abs)) {
+    if ( likely(inf->block_abs) )
+    {
         //treat blocked time as consumed by the domain*/
         /*inf->cputime += now - inf->block_abs;*/
         /*penalty is time the domain would have
           had if it continued to run */
         pen = (inf->slice - inf->cputime);
-        if (pen < 0) pen = 0;
+        if ( pen < 0 )
+            pen = 0;
         /*accumulate all penalties over the periods*/
         /*inf->short_block_lost_tot += pen;*/
         /*set penalty to the current value*/
         inf->short_block_lost_tot = pen;
         /*not sure which one is better.. but seems to work well...*/
   
-        if (inf->short_block_lost_tot) {
+        if ( inf->short_block_lost_tot )
+        {
             inf->score[0] = (inf->period << 10) /
                 inf->short_block_lost_tot;
 #ifdef SEDF_STATS
             inf->pen_extra_blocks++;
 #endif
-            if (extraq_on(inf->vcpu, EXTRA_PEN_Q))
+            if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
                 /*remove domain for possible resorting!*/
                 extraq_del(inf->vcpu, EXTRA_PEN_Q);
             else
@@ -951,36 +1041,53 @@
             extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
         }
     }
+
     /*give it a fresh slice in the next period!*/
     inf->cputime = 0;
 }
-static inline void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+
+
+#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
+static void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
 {
     /* align to next future period */
     inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
         * inf->period;
     inf->cputime = 0;
 }
-
-static inline void unblock_long_cons_a (struct sedf_vcpu_info* inf,
-                                        s_time_t now) {
+#endif
+
+
+#if 0
+static void unblock_long_cons_a (struct sedf_vcpu_info* inf, s_time_t now)
+{
     /*treat the time the domain was blocked in the
-   CURRENT period as consumed by the domain*/
+     CURRENT period as consumed by the domain*/
     inf->cputime = (now - inf->deadl_abs) % inf->period; 
-    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+    if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+    {
         /*we don't have a reasonable amount of time in our slice
           left :( => start in next period!*/
         unblock_long_vcons(inf, now);
     }
 }
-static inline void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t 
now) {
+#endif
+
+
+static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
+{
     /*Conservative 2b*/
     /*Treat the unblocking time as a start of a new period */
     inf->deadl_abs = now + inf->period;
     inf->cputime = 0;
 }
-static inline void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t 
now) {
-    if (likely(inf->latency)) {
+
+
+#if (UNBLOCK == UNBLOCK_ATROPOS)
+static void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now)
+{
+    if ( likely(inf->latency) )
+    {
         /*scale the slice and period accordingly to the latency hint*/
         /*reduce period temporarily to the latency hint*/
         inf->period = inf->latency;
@@ -993,18 +1100,24 @@
         inf->deadl_abs = now + inf->period;
         inf->cputime = 0;
     } 
-    else {
+    else
+    {
         /*we don't have a latency hint.. use some other technique*/
         unblock_long_cons_b(inf, now);
     }
 }
+#endif
+
+
+#if (UNBLOCK == UNBLOCK_BURST)
 /*a new idea of dealing with short blocks: burst period scaling*/
-static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t 
now)
+static void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
 {
     /*treat blocked time as consumed by the domain*/
     inf->cputime += now - inf->block_abs;
  
-    if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
+    if ( (inf->cputime + EXTRA_QUANTUM) <= inf->slice )
+    {
         /*if we can still use some time in the current slice
           then use it!*/
 #ifdef SEDF_STATS
@@ -1012,10 +1125,12 @@
         inf->short_cont++;
 #endif
     }
-    else {
+    else
+    {
         /*we don't have a reasonable amount of time in
           our slice left => switch to burst mode*/
-        if (likely(inf->unblock_abs)) {
+        if ( likely(inf->unblock_abs) )
+        {
             /*set the period-length to the current blocking
               interval, possible enhancements: average over last
               blocking intervals, user-specified minimum,...*/
@@ -1030,17 +1145,23 @@
             /*set new (shorter) deadline*/
             inf->deadl_abs += inf->period;
         }
-        else {
+        else
+        {
             /*in case we haven't unblocked before
               start in next period!*/
             inf->cputime=0;
             inf->deadl_abs += inf->period;
         }
     }
+
     inf->unblock_abs = now;
 }
-static inline void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t 
now) {
-    if (unlikely(inf->latency && (inf->period > inf->latency))) {
+
+
+static void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now)
+{
+    if ( unlikely(inf->latency && (inf->period > inf->latency)) )
+    {
         /*scale the slice and period accordingly to the latency hint*/
         inf->period = inf->latency;
         /*check for overflows on multiplication*/
@@ -1052,23 +1173,28 @@
         inf->deadl_abs = now + inf->period;
         inf->cputime = 0;
     }
-    else {
+    else
+    {
         /*we don't have a latency hint.. or we are currently in 
           "burst mode": use some other technique
           NB: this should be in fact the normal way of operation,
           when we are in sync with the device!*/
         unblock_long_cons_b(inf, now);
     }
+
     inf->unblock_abs = now;
 }
+#endif /* UNBLOCK == UNBLOCK_BURST */
+
 
 #define DOMAIN_EDF   1
 #define DOMAIN_EXTRA_PEN  2
 #define DOMAIN_EXTRA_UTIL  3
 #define DOMAIN_IDLE   4
-static inline int get_run_type(struct vcpu* d) {
+static inline int get_run_type(struct vcpu* d)
+{
     struct sedf_vcpu_info* inf = EDOM_INFO(d);
-    if (is_idle_task(d->domain))
+    if (is_idle_domain(d->domain))
         return DOMAIN_IDLE;
     if (inf->status & EXTRA_RUN_PEN)
         return DOMAIN_EXTRA_PEN;
@@ -1076,6 +1202,8 @@
         return DOMAIN_EXTRA_UTIL;
     return DOMAIN_EDF;
 }
+
+
 /*Compares two domains in the relation of whether the one is allowed to
   interrupt the others execution.
   It returns true (!=0) if a switch to the other domain is good.
@@ -1085,8 +1213,10 @@
   In the same class priorities are assigned as following:
    EDF: early deadline > late deadline
    L0 extra-time: lower score > higher score*/
-static inline int should_switch(struct vcpu* cur,
-                                struct vcpu* other, s_time_t now) {
+static inline int should_switch(struct vcpu *cur,
+                                struct vcpu *other,
+                                s_time_t now)
+{
     struct sedf_vcpu_info *cur_inf, *other_inf;
     cur_inf   = EDOM_INFO(cur);
     other_inf = EDOM_INFO(other);
@@ -1119,41 +1249,51 @@
     }
     return 1;
 }
-void sedf_wake(struct vcpu *d) {
+
+void sedf_wake(struct vcpu *d)
+{
     s_time_t              now = NOW();
     struct sedf_vcpu_info* inf = EDOM_INFO(d);
 
     PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
           d->vcpu_id);
 
-    if (unlikely(is_idle_task(d->domain)))
+    if ( unlikely(is_idle_domain(d->domain)) )
         return;
    
-    if ( unlikely(__task_on_queue(d)) ) {
+    if ( unlikely(__task_on_queue(d)) )
+    {
         PRINT(3,"\tdomain %i.%i is already in some queue\n",
               d->domain->domain_id, d->vcpu_id);
         return;
     }
+
     ASSERT(!sedf_runnable(d));
     inf->status &= ~SEDF_ASLEEP;
     ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
     ASSERT(!extraq_on(d, EXTRA_PEN_Q));
  
-    if (unlikely(inf->deadl_abs == 0))
+    if ( unlikely(inf->deadl_abs == 0) )
+    {
         /*initial setup of the deadline*/
         inf->deadl_abs = now + inf->slice;
+    }
   
-    PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
-          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, 
inf->deadl_abs,
-          inf->period, now);
+    PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+          "now= %"PRIu64")\n",
+          d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
+
 #ifdef SEDF_STATS 
     inf->block_tot++;
 #endif
-    if (unlikely(now < PERIOD_BEGIN(inf))) {
+
+    if ( unlikely(now < PERIOD_BEGIN(inf)) )
+    {
         PRINT(4,"extratime unblock\n");
         /* unblocking in extra-time! */
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-        if (inf->status & EXTRA_WANT_PEN_Q) {
+        if ( inf->status & EXTRA_WANT_PEN_Q )
+        {
             /*we have a domain that wants compensation
               for block penalty and did just block in
               its compensation time. Give it another
@@ -1163,8 +1303,10 @@
 #endif
         extraq_check_add_unblocked(d, 0);
     }  
-    else {  
-        if (now < inf->deadl_abs) {
+    else
+    {  
+        if ( now < inf->deadl_abs )
+        {
             PRINT(4,"short unblocking\n");
             /*short blocking*/
 #ifdef SEDF_STATS
@@ -1182,7 +1324,8 @@
 
             extraq_check_add_unblocked(d, 1);
         }
-        else {
+        else
+        {
             PRINT(4,"long unblocking\n");
             /*long unblocking*/
 #ifdef SEDF_STATS
@@ -1197,7 +1340,6 @@
             unblock_long_cons_c(inf, now);
 #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
             unblock_long_cons_b(inf, now);
-            /*unblock_short_cons_c(inf, now);*/
 #elif (UNBLOCK == UNBLOCK_BURST)
             unblock_long_burst(inf, now);
 #endif
@@ -1205,26 +1347,33 @@
             extraq_check_add_unblocked(d, 1);
         }
     }
-    PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
-          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, 
inf->deadl_abs,
+
+    PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+          "now= %"PRIu64")\n",
+          d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
           inf->period, now);
-    if (PERIOD_BEGIN(inf) > now) {
+
+    if ( PERIOD_BEGIN(inf) > now )
+    {
         __add_to_waitqueue_sort(d);
         PRINT(3,"added to waitq\n");
     }
-    else {
+    else
+    {
         __add_to_runqueue_sort(d);
         PRINT(3,"added to runq\n");
     }
  
 #ifdef SEDF_STATS
     /*do some statistics here...*/
-    if (inf->block_abs != 0) {
+    if ( inf->block_abs != 0 )
+    {
         inf->block_time_tot += now - inf->block_abs;
         inf->penalty_time_tot +=
             PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
     }
 #endif
+
     /*sanity check: make sure each extra-aware domain IS on the util-q!*/
     ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
     ASSERT(__task_on_queue(d));
@@ -1234,27 +1383,48 @@
     ASSERT(d->processor >= 0);
     ASSERT(d->processor < NR_CPUS);
     ASSERT(schedule_data[d->processor].curr);
-    if (should_switch(schedule_data[d->processor].curr, d, now))
+
+    if ( should_switch(schedule_data[d->processor].curr, d, now) )
         cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 }
 
-/*Print a lot of use-{full, less} information about a domains in the system*/
-static void sedf_dump_domain(struct vcpu *d) {
+
+static int sedf_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+    if ( v == current )
+        return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+    vcpu_pause(v);
+    v->cpu_affinity = *affinity;
+    v->processor = first_cpu(v->cpu_affinity);
+    vcpu_unpause(v);
+
+    return 0;
+}
+
+
+/* Print a lot of useful information about a domains in the system */
+static void sedf_dump_domain(struct vcpu *d)
+{
     printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
            test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
-    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i 
xtr(%s)=%"PRIu64" ew=%hu",
+    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
+           " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
            EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
-           EDOM_INFO(d)->weight, d->cpu_time, 
EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+           EDOM_INFO(d)->weight, d->cpu_time,
+           EDOM_INFO(d)->score[EXTRA_UTIL_Q],
            (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
            EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
-    if (d->cpu_time !=0)
+    
+    if ( d->cpu_time != 0 )
         printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
                / d->cpu_time);
+
 #ifdef SEDF_STATS
-    if (EDOM_INFO(d)->block_time_tot!=0)
+    if ( EDOM_INFO(d)->block_time_tot != 0 )
         printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
                EDOM_INFO(d)->block_time_tot);
-    if (EDOM_INFO(d)->block_tot!=0)
+    if ( EDOM_INFO(d)->block_tot != 0 )
         printf("\n   blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
                "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
                EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
@@ -1271,7 +1441,8 @@
     printf("\n");
 }
 
-/*dumps all domains on hte specified cpu*/
+
+/* dumps all domains on hte specified cpu */
 static void sedf_dump_cpu_state(int i)
 {
     struct list_head      *list, *queue, *tmp;
@@ -1284,7 +1455,8 @@
     queue = RUNQ(i);
     printk("RUNQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
            (unsigned long) queue->next, (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         printk("%3d: ",loop++);
         d_inf = list_entry(list, struct sedf_vcpu_info, list);
         sedf_dump_domain(d_inf->vcpu);
@@ -1293,7 +1465,8 @@
     queue = WAITQ(i); loop = 0;
     printk("\nWAITQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
            (unsigned long) queue->next, (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         printk("%3d: ",loop++);
         d_inf = list_entry(list, struct sedf_vcpu_info, list);
         sedf_dump_domain(d_inf->vcpu);
@@ -1303,7 +1476,8 @@
     printk("\nEXTRAQ (penalty) rq %lx   n: %lx, p: %lx\n",
            (unsigned long)queue, (unsigned long) queue->next,
            (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         d_inf = list_entry(list, struct sedf_vcpu_info,
                            extralist[EXTRA_PEN_Q]);
         printk("%3d: ",loop++);
@@ -1314,7 +1488,8 @@
     printk("\nEXTRAQ (utilization) rq %lx   n: %lx, p: %lx\n",
            (unsigned long)queue, (unsigned long) queue->next,
            (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         d_inf = list_entry(list, struct sedf_vcpu_info,
                            extralist[EXTRA_UTIL_Q]);
         printk("%3d: ",loop++);
@@ -1323,69 +1498,93 @@
  
     loop = 0;
     printk("\nnot on Q\n");
-    for_each_domain(d)
+
+    for_each_domain ( d )
+    {
         for_each_vcpu(d, ed)
-    {
-        if (!__task_on_queue(ed) && (ed->processor == i)) {
-            printk("%3d: ",loop++);
-            sedf_dump_domain(ed);
-        }
-    }
-}
-/*Adjusts periods and slices of the domains accordingly to their weights*/
-static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
+        {
+            if ( !__task_on_queue(ed) && (ed->processor == i) )
+            {
+                printk("%3d: ",loop++);
+                sedf_dump_domain(ed);
+            }
+        }
+    }
+}
+
+
+/* Adjusts periods and slices of the domains accordingly to their weights. */
+static int sedf_adjust_weights(struct sched_adjdom_cmd *cmd)
+{
     struct vcpu *p;
     struct domain      *d;
     int                 sumw[NR_CPUS];
     s_time_t            sumt[NR_CPUS];
     int                 cpu;
  
-    for (cpu=0; cpu < NR_CPUS; cpu++) {
+    for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+    {
         sumw[cpu] = 0;
         sumt[cpu] = 0;
     }
-    /*sum up all weights*/
-    for_each_domain(d)
-        for_each_vcpu(d, p) {
-        if (EDOM_INFO(p)->weight)
-            sumw[p->processor] += EDOM_INFO(p)->weight;
-        else {
-            /*don't modify domains who don't have a weight, but sum
-              up the time they need, projected to a WEIGHT_PERIOD,
-              so that this time is not given to the weight-driven
-              domains*/
-            /*check for overflows*/
-            ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
-                   && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
-            sumt[p->processor] += 
-                (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / 
-                EDOM_INFO(p)->period_orig;
-        }
-    }
-    /*adjust all slices (and periods) to the new weight*/
-    for_each_domain(d) 
-        for_each_vcpu(d, p) {
-        if (EDOM_INFO(p)->weight) {
-            EDOM_INFO(p)->period_orig = 
-                EDOM_INFO(p)->period  = WEIGHT_PERIOD;
-            EDOM_INFO(p)->slice_orig  =
-                EDOM_INFO(p)->slice   = 
-                (EDOM_INFO(p)->weight *
-                 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) / 
-                sumw[p->processor];
-        }
-    }
+
+    /* sum up all weights */
+    for_each_domain( d )
+    {
+        for_each_vcpu( d, p )
+        {
+            if ( EDOM_INFO(p)->weight )
+            {
+                sumw[p->processor] += EDOM_INFO(p)->weight;
+            }
+            else
+            {
+                /*don't modify domains who don't have a weight, but sum
+                  up the time they need, projected to a WEIGHT_PERIOD,
+                  so that this time is not given to the weight-driven
+                  domains*/
+                /*check for overflows*/
+                ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
+                       && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
+                sumt[p->processor] += 
+                    (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / 
+                    EDOM_INFO(p)->period_orig;
+            }
+        }
+    }
+
+    /* adjust all slices (and periods) to the new weight */
+    for_each_domain( d )
+    {
+        for_each_vcpu ( d, p )
+        {
+            if ( EDOM_INFO(p)->weight )
+            {
+                EDOM_INFO(p)->period_orig = 
+                    EDOM_INFO(p)->period  = WEIGHT_PERIOD;
+                EDOM_INFO(p)->slice_orig  =
+                    EDOM_INFO(p)->slice   = 
+                    (EDOM_INFO(p)->weight *
+                     (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) / 
+                    sumw[p->processor];
+            }
+        }
+    }
+
     return 0;
 }
 
+
 /* set or fetch domain scheduling parameters */
-static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
+static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
+{
     struct vcpu *v;
 
     PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
           "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
           p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
           cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
+
     if ( cmd->direction == SCHED_INFO_PUT )
     {
         /*check for sane parameters*/
@@ -1458,6 +1657,7 @@
     .sleep          = sedf_sleep,
     .wake           = sedf_wake,
     .adjdom         = sedf_adjdom,
+    .set_affinity   = sedf_set_affinity
 };
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/schedule.c
--- a/xen/common/schedule.c     Mon Jan  9 11:19:55 2006
+++ b/xen/common/schedule.c     Mon Jan  9 11:22:17 2006
@@ -100,7 +100,9 @@
     v->vcpu_id = vcpu_id;
     v->processor = cpu_id;
     atomic_set(&v->pausecnt, 0);
-    v->cpumap = CPUMAP_RUNANYWHERE;
+
+    v->cpu_affinity = is_idle_domain(d) ?
+        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
 
     d->vcpu[vcpu_id] = v;
 
@@ -143,7 +145,7 @@
     /* Initialise the per-domain timer. */
     init_ac_timer(&v->timer, dom_timer_fn, v, v->processor);
 
-    if ( is_idle_task(d) )
+    if ( is_idle_domain(d) )
     {
         schedule_data[v->processor].curr = v;
         schedule_data[v->processor].idle = v;
@@ -166,7 +168,7 @@
     unsigned long flags;
 
     spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(!domain_runnable(v)) )
+    if ( likely(!vcpu_runnable(v)) )
         SCHED_OP(sleep, v);
     spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
 
@@ -182,7 +184,7 @@
      * flag is cleared and the scheduler lock is released. We also check that
      * the domain continues to be unrunnable, in case someone else wakes it.
      */
-    while ( !domain_runnable(v) &&
+    while ( !vcpu_runnable(v) &&
             (test_bit(_VCPUF_running, &v->vcpu_flags) ||
              spin_is_locked(&schedule_data[v->processor].schedule_lock)) )
         cpu_relax();
@@ -195,15 +197,22 @@
     unsigned long flags;
 
     spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(domain_runnable(v)) )
+    if ( likely(vcpu_runnable(v)) )
     {
         SCHED_OP(wake, v);
         v->wokenup = NOW();
     }
-    clear_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
     spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
 
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
+}
+
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+    if ( cpus_empty(*affinity) )
+        return -EINVAL;
+
+    return SCHED_OP(set_affinity, v, affinity);
 }
 
 /* Block the currently-executing domain until a pertinent event occurs. */
@@ -330,18 +339,23 @@
     do {
         succ = 0;
         __clear_cpu_bits(have_lock);
-        for_each_vcpu(d, v) {
+        for_each_vcpu ( d, v )
+        {
             cpu = v->processor;
-            if (!__get_cpu_bit(cpu, have_lock)) {
+            if ( !__get_cpu_bit(cpu, have_lock) )
+            {
                 /* if we don't have a lock on this CPU: acquire it*/
-                if (spin_trylock(&schedule_data[cpu].schedule_lock)) {
+                if ( spin_trylock(&schedule_data[cpu].schedule_lock) )
+                {
                     /*we have this lock!*/
                     __set_cpu_bit(cpu, have_lock);
                     succ = 1;
-                } else {
+                }
+                else
+                {
                     /*we didn,t get this lock -> free all other locks too!*/
-                    for (cpu = 0; cpu < NR_CPUS; cpu++)
-                        if (__get_cpu_bit(cpu, have_lock))
+                    for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+                        if ( __get_cpu_bit(cpu, have_lock) )
                             spin_unlock(&schedule_data[cpu].schedule_lock);
                     /* and start from the beginning! */
                     succ = 0;
@@ -354,8 +368,8 @@
 
     SCHED_OP(adjdom, d, cmd);
 
-    for (cpu = 0; cpu < NR_CPUS; cpu++)
-        if (__get_cpu_bit(cpu, have_lock))
+    for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+        if ( __get_cpu_bit(cpu, have_lock) )
             spin_unlock(&schedule_data[cpu].schedule_lock);
     __clear_cpu_bits(have_lock);
 
@@ -371,22 +385,20 @@
  */
 static void __enter_scheduler(void)
 {
-    struct vcpu *prev = current, *next = NULL;
-    int                 cpu = prev->processor;
-    s_time_t            now;
+    struct vcpu        *prev = current, *next = NULL;
+    int                 cpu = smp_processor_id();
+    s_time_t            now = NOW();
     struct task_slice   next_slice;
     s32                 r_time;     /* time for new dom to run */
 
+    ASSERT(!in_irq());
+
     perfc_incrc(sched_run);
-    
+
     spin_lock_irq(&schedule_data[cpu].schedule_lock);
-
-    now = NOW();
 
     rem_ac_timer(&schedule_data[cpu].s_timer);
     
-    ASSERT(!in_irq());
-
     prev->cpu_time += now - prev->lastschd;
 
     /* get policy-specific decision on scheduling... */
@@ -394,7 +406,7 @@
 
     r_time = next_slice.time;
     next = next_slice.task;
-    
+
     schedule_data[cpu].curr = next;
     
     next->lastschd = now;
@@ -411,11 +423,6 @@
              prev->domain->domain_id, now - prev->lastschd);
     TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
              next->domain->domain_id, now - next->wokenup, r_time);
-
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
-    set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    perfc_incrc(sched_ctx);
 
     /*
      * Logic of wokenup field in domain struct:
@@ -425,10 +432,10 @@
      * also set here then a preempted runnable domain will get a screwed up
      * "waiting time" value next time it is scheduled.
      */
-    prev->wokenup = NOW();
+    prev->wokenup = now;
 
 #if defined(WAKE_HISTO)
-    if ( !is_idle_task(next->domain) && next->wokenup )
+    if ( !is_idle_domain(next->domain) && next->wokenup )
     {
         ulong diff = (ulong)(now - next->wokenup);
         diff /= (ulong)MILLISECS(1);
@@ -438,7 +445,7 @@
     next->wokenup = (s_time_t)0;
 #elif defined(BLOCKTIME_HISTO)
     prev->lastdeschd = now;
-    if ( !is_idle_task(next->domain) )
+    if ( !is_idle_domain(next->domain) )
     {
         ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
         if (diff <= BUCKETS-2)  schedule_data[cpu].hist[diff]++;
@@ -446,10 +453,16 @@
     }
 #endif
 
+    set_bit(_VCPUF_running, &next->vcpu_flags);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    perfc_incrc(sched_ctx);
+
     prev->sleep_tick = schedule_data[cpu].tick;
 
     /* Ensure that the domain has an up-to-date time base. */
-    if ( !is_idle_task(next->domain) )
+    if ( !is_idle_domain(next->domain) )
     {
         update_dom_time(next);
         if ( next->sleep_tick != schedule_data[cpu].tick )
@@ -461,17 +474,6 @@
              next->domain->domain_id, next->vcpu_id);
 
     context_switch(prev, next);
-
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
-    context_switch_finalise(next);
-}
-
-/* No locking needed -- pointer comparison is safe :-) */
-int idle_cpu(int cpu)
-{
-    struct vcpu *p = schedule_data[cpu].curr;
-    return p == idle_task[cpu];
 }
 
 
@@ -493,11 +495,11 @@
 static void t_timer_fn(void *unused)
 {
     struct vcpu  *v  = current;
-    unsigned int  cpu = v->processor;
+    unsigned int  cpu = smp_processor_id();
 
     schedule_data[cpu].tick++;
 
-    if ( !is_idle_task(v->domain) )
+    if ( !is_idle_domain(v->domain) )
     {
         update_dom_time(v);
         send_guest_virq(v, VIRQ_TIMER);
@@ -531,8 +533,8 @@
         init_ac_timer(&t_timer[i], t_timer_fn, NULL, i);
     }
 
-    schedule_data[0].curr = idle_task[0];
-    schedule_data[0].idle = idle_task[0];
+    schedule_data[0].curr = idle_domain[0];
+    schedule_data[0].idle = idle_domain[0];
 
     for ( i = 0; schedulers[i] != NULL; i++ )
     {
@@ -546,10 +548,10 @@
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
 
-    rc = SCHED_OP(alloc_task, idle_task[0]);
+    rc = SCHED_OP(alloc_task, idle_domain[0]);
     BUG_ON(rc < 0);
 
-    sched_add_domain(idle_task[0]);
+    sched_add_domain(idle_domain[0]);
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/vsprintf.c
--- a/xen/common/vsprintf.c     Mon Jan  9 11:19:55 2006
+++ b/xen/common/vsprintf.c     Mon Jan  9 11:22:17 2006
@@ -12,11 +12,15 @@
 /* 
  * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@xxxxxxxxxxxxxx>
  * - changed to provide snprintf and vsnprintf functions
+ * So Feb  1 16:51:32 CET 2004 Juergen Quade <quade@xxxxxxx>
+ * - scnprintf and vscnprintf
  */
 
 #include <stdarg.h>
 #include <xen/ctype.h>
 #include <xen/lib.h>
+#include <asm/div64.h>
+#include <asm/page.h>
 
 /**
  * simple_strtoul - convert a string to an unsigned long
@@ -33,11 +37,14 @@
         if (*cp == '0') {
             base = 8;
             cp++;
-            if ((*cp == 'x') && isxdigit(cp[1])) {
+            if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
                 cp++;
                 base = 16;
             }
         }
+    } else if (base == 16) {
+        if (cp[0] == '0' && toupper(cp[1]) == 'X')
+            cp += 2;
     }
     while (isxdigit(*cp) &&
            (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
@@ -49,6 +56,8 @@
     return result;
 }
 
+EXPORT_SYMBOL(simple_strtoul);
+
 /**
  * simple_strtol - convert a string to a signed long
  * @cp: The start of the string
@@ -61,6 +70,8 @@
         return -simple_strtoul(cp+1,endp,base);
     return simple_strtoul(cp,endp,base);
 }
+
+EXPORT_SYMBOL(simple_strtol);
 
 /**
  * simple_strtoull - convert a string to an unsigned long long
@@ -77,11 +88,14 @@
         if (*cp == '0') {
             base = 8;
             cp++;
-            if ((*cp == 'x') && isxdigit(cp[1])) {
+            if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
                 cp++;
                 base = 16;
             }
         }
+    } else if (base == 16) {
+        if (cp[0] == '0' && toupper(cp[1]) == 'X')
+            cp += 2;
     }
     while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
                                                                ? toupper(*cp) 
: *cp)-'A'+10) < base) {
@@ -92,6 +106,8 @@
         *endp = (char *)cp;
     return result;
 }
+
+EXPORT_SYMBOL(simple_strtoull);
 
 /**
  * simple_strtoll - convert a string to a signed long long
@@ -123,25 +139,25 @@
 #define SPECIAL 32              /* 0x */
 #define LARGE   64              /* use 'ABCDEF' instead of 'abcdef' */
 
-static char * number(char * buf, char * end, long long num, int base, int 
size, int precision, int type)
+static char * number(char * buf, char * end, unsigned long long num, int base, 
int size, int precision, int type)
 {
     char c,sign,tmp[66];
     const char *digits;
-    const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-    const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    static const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+    static const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
     int i;
 
     digits = (type & LARGE) ? large_digits : small_digits;
     if (type & LEFT)
         type &= ~ZEROPAD;
     if (base < 2 || base > 36)
-        return buf;
+        return NULL;
     c = (type & ZEROPAD) ? '0' : ' ';
     sign = 0;
     if (type & SIGN) {
-        if (num < 0) {
+        if ((signed long long) num < 0) {
             sign = '-';
-            num = -num;
+            num = - (signed long long) num;
             size--;
         } else if (type & PLUS) {
             sign = '+';
@@ -160,6 +176,9 @@
     i = 0;
     if (num == 0)
         tmp[i++]='0';
+    else while (num != 0)
+        tmp[i++] = digits[do_div(num,base)];
+#if 0
     else 
     {
         /* XXX KAF: force unsigned mod and div. */
@@ -167,6 +186,7 @@
         unsigned int base2=(unsigned int)base;
         while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
     }
+#endif
     if (i > precision)
         precision = i;
     size -= precision;
@@ -222,14 +242,22 @@
 }
 
 /**
-* vsnprintf - Format a string and place it in a buffer
-* @buf: The buffer to place the result into
-* @size: The size of the buffer, including the trailing null space
-* @fmt: The format string to use
-* @args: Arguments for the format string
-*
-* Call this function if you are already dealing with a va_list.
-* You probably want snprintf instead.
+ * vsnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf. If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want snprintf instead.
  */
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
@@ -247,6 +275,9 @@
     int qualifier;              /* 'h', 'l', or 'L' for integer fields */
                                 /* 'z' support added 23/7/1999 S.H.    */
                                 /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+    /* Reject out-of-range values early */
+    BUG_ON((int)size < 0);
 
     str = buf;
     end = buf + size - 1;
@@ -307,17 +338,14 @@
 
         /* get the conversion qualifier */
         qualifier = -1;
-        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+            *fmt =='Z' || *fmt == 'z') {
             qualifier = *fmt;
             ++fmt;
             if (qualifier == 'l' && *fmt == 'l') {
                 qualifier = 'L';
                 ++fmt;
             }
-        }
-        if (*fmt == 'q') {
-            qualifier = 'L';
-            ++fmt;
         }
 
         /* default base */
@@ -345,7 +373,7 @@
 
         case 's':
             s = va_arg(args, char *);
-            if (!s)
+            if ((unsigned long)s < PAGE_SIZE)
                 s = "<NULL>";
 
             len = strnlen(s, precision);
@@ -386,7 +414,7 @@
             if (qualifier == 'l') {
                 long * ip = va_arg(args, long *);
                 *ip = (str - buf);
-            } else if (qualifier == 'Z') {
+            } else if (qualifier == 'Z' || qualifier == 'z') {
                 size_t * ip = va_arg(args, size_t *);
                 *ip = (str - buf);
             } else {
@@ -437,7 +465,7 @@
             num = va_arg(args, unsigned long);
             if (flags & SIGN)
                 num = (signed long) num;
-        } else if (qualifier == 'Z') {
+        } else if (qualifier == 'Z' || qualifier == 'z') {
             num = va_arg(args, size_t);
         } else if (qualifier == 'h') {
             num = (unsigned short) va_arg(args, int);
@@ -463,12 +491,43 @@
     return str-buf;
 }
 
+EXPORT_SYMBOL(vsnprintf);
+
+/**
+ * vscnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which have been written into
+ * the @buf not including the trailing '\0'. If @size is <= 0 the function
+ * returns 0.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want scnprintf instead.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+    int i;
+
+    i=vsnprintf(buf,size,fmt,args);
+    return (i >= size) ? (size - 1) : i;
+}
+
+EXPORT_SYMBOL(vscnprintf);
+
 /**
  * snprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
  * @size: The size of the buffer, including the trailing null space
  * @fmt: The format string to use
  * @...: Arguments for the format string
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing null,
+ * as per ISO C99.  If the return is greater than or equal to
+ * @size, the resulting string is truncated.
  */
 int snprintf(char * buf, size_t size, const char *fmt, ...)
 {
@@ -481,26 +540,61 @@
     return i;
 }
 
+EXPORT_SYMBOL(snprintf);
+
+/**
+ * scnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The return value is the number of characters written into @buf not including
+ * the trailing '\0'. If @size is <= 0 the function returns 0. If the return is
+ * greater than or equal to @size, the resulting string is truncated.
+ */
+
+int scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i = vsnprintf(buf, size, fmt, args);
+    va_end(args);
+    return (i >= size) ? (size - 1) : i;
+}
+EXPORT_SYMBOL(scnprintf);
+
 /**
  * vsprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
  * @fmt: The format string to use
  * @args: Arguments for the format string
  *
+ * The function returns the number of characters written
+ * into @buf. Use vsnprintf or vscnprintf in order to avoid
+ * buffer overflows.
+ *
  * Call this function if you are already dealing with a va_list.
  * You probably want sprintf instead.
  */
 int vsprintf(char *buf, const char *fmt, va_list args)
 {
-    return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
-}
-
+    return vsnprintf(buf, INT_MAX, fmt, args);
+}
+
+EXPORT_SYMBOL(vsprintf);
 
 /**
  * sprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
  * @fmt: The format string to use
  * @...: Arguments for the format string
+ *
+ * The function returns the number of characters written
+ * into @buf. Use snprintf or scnprintf in order to avoid
+ * buffer overflows.
  */
 int sprintf(char * buf, const char *fmt, ...)
 {
@@ -508,11 +602,12 @@
     int i;
 
     va_start(args, fmt);
-    i=vsprintf(buf,fmt,args);
+    i=vsnprintf(buf, INT_MAX, fmt, args);
     va_end(args);
     return i;
 }
 
+EXPORT_SYMBOL(sprintf);
 
 /*
  * Local variables:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/drivers/char/ns16550.c
--- a/xen/drivers/char/ns16550.c        Mon Jan  9 11:19:55 2006
+++ b/xen/drivers/char/ns16550.c        Mon Jan  9 11:22:17 2006
@@ -13,6 +13,7 @@
 #include <xen/irq.h>
 #include <xen/sched.h>
 #include <xen/serial.h>
+#include <xen/iocap.h>
 #include <asm/io.h>
 
 /*
@@ -233,11 +234,11 @@
 }
 
 #ifdef CONFIG_X86
-#include <asm/physdev.h>
 static void ns16550_endboot(struct serial_port *port)
 {
     struct ns16550 *uart = port->uart;
-    physdev_modify_ioport_access_range(dom0, 0, uart->io_base, 8);
+    if ( ioports_deny_access(dom0, uart->io_base, uart->io_base + 7) != 0 )
+        BUG();
 }
 #else
 #define ns16550_endboot NULL
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/domain.h     Mon Jan  9 11:22:17 2006
@@ -10,7 +10,7 @@
 #include <asm/vmx_platform.h>
 #include <xen/list.h>
 
-extern void arch_do_createdomain(struct vcpu *);
+extern int arch_do_createdomain(struct vcpu *);
 
 extern void domain_relinquish_resources(struct domain *);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/linux-xen/asm/pal.h
--- a/xen/include/asm-ia64/linux-xen/asm/pal.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/linux-xen/asm/pal.h  Mon Jan  9 11:22:17 2006
@@ -925,7 +925,11 @@
 ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 
*vector)
 {
        struct ia64_pal_retval iprv;
+#ifdef XEN     /* fix a bug in Linux... PAL has changed */
+       PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress);
+#else
        PAL_CALL_IC_OFF(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, 
*progress);
+#endif
        if (vector)
                *vector = iprv.v0;
        *progress = iprv.v1;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/vmx.h        Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
 #define _ASM_IA64_VT_H
 
 #define RR7_SWITCH_SHIFT       12      /* 4k enough */
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 extern void identify_vmx_feature(void);
 extern unsigned int vmx_enabled;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/config.h      Mon Jan  9 11:22:17 2006
@@ -248,12 +248,10 @@
 
 #ifdef CONFIG_X86_PAE
 /* Hypervisor owns top 168MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START  0xF5800000
-# define HYPERVISOR_VIRT_START   (0xF5800000UL)
+#define HYPERVISOR_VIRT_START   mk_unsigned_long(0xF5800000)
 #else
 /* Hypervisor owns top 64MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START  0xFC000000
-# define HYPERVISOR_VIRT_START   (0xFC000000UL)
+#define HYPERVISOR_VIRT_START   mk_unsigned_long(0xFC000000)
 #endif
 
 #define L2_PAGETABLE_FIRST_XEN_SLOT \
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/current.h
--- a/xen/include/asm-x86/current.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/current.h     Mon Jan  9 11:22:17 2006
@@ -49,7 +49,7 @@
 #define reset_stack_and_jump(__fn)              \
     __asm__ __volatile__ (                      \
         "mov %0,%%"__OP"sp; jmp "STR(__fn)      \
-        : : "r" (guest_cpu_user_regs()) )
+        : : "r" (guest_cpu_user_regs()) : "memory" )
 
 #define schedule_tail(_ed) (((_ed)->arch.schedule_tail)(_ed))
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/domain.h      Mon Jan  9 11:22:17 2006
@@ -24,8 +24,8 @@
     /* Writable pagetables. */
     struct ptwr_info ptwr[2];
 
-    /* I/O-port access bitmap mask. */
-    u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
+    /* I/O-port admin-specified access capabilities. */
+    struct rangeset *ioport_caps;
 
     /* Shadow mode status and controls. */
     struct shadow_ops *ops;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/mm.h  Mon Jan  9 11:22:17 2006
@@ -336,11 +336,13 @@
 int  revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
 
 void cleanup_writable_pagetable(struct domain *d);
-#define sync_pagetable_state(d)                 \
-    do {                                        \
-        LOCK_BIGLOCK(d);                        \
-        cleanup_writable_pagetable(d);          \
-        UNLOCK_BIGLOCK(d);                      \
+#define sync_pagetable_state(d)                                 \
+    do {                                                        \
+        LOCK_BIGLOCK(d);                                        \
+        /* Avoid racing with ptwr_destroy(). */                 \
+        if ( !test_bit(_DOMF_dying, &(d)->domain_flags) )       \
+            cleanup_writable_pagetable(d);                      \
+        UNLOCK_BIGLOCK(d);                                      \
     } while ( 0 )
 
 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/msr.h
--- a/xen/include/asm-x86/msr.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/msr.h Mon Jan  9 11:22:17 2006
@@ -12,7 +12,7 @@
        __asm__ __volatile__("rdmsr" \
                            : "=a" (a__), "=d" (b__) \
                            : "c" (msr)); \
-       val = a__ | (b__<<32); \
+       val = a__ | ((u64)b__<<32); \
 } while(0); 
 
 #define wrmsr(msr,val1,val2) \
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/processor.h   Mon Jan  9 11:22:17 2006
@@ -190,7 +190,7 @@
 #ifdef CONFIG_X86_HT
 extern void detect_ht(struct cpuinfo_x86 *c);
 #else
-static inline void detect_ht(struct cpuinfo_x86 *c) {}
+static always_inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
 /*
@@ -209,7 +209,7 @@
 /*
  * CPUID functions returning a single datum
  */
-static inline unsigned int cpuid_eax(unsigned int op)
+static always_inline unsigned int cpuid_eax(unsigned int op)
 {
     unsigned int eax;
 
@@ -219,7 +219,7 @@
             : "bx", "cx", "dx");
     return eax;
 }
-static inline unsigned int cpuid_ebx(unsigned int op)
+static always_inline unsigned int cpuid_ebx(unsigned int op)
 {
     unsigned int eax, ebx;
 
@@ -229,7 +229,7 @@
             : "cx", "dx" );
     return ebx;
 }
-static inline unsigned int cpuid_ecx(unsigned int op)
+static always_inline unsigned int cpuid_ecx(unsigned int op)
 {
     unsigned int eax, ecx;
 
@@ -239,7 +239,7 @@
             : "bx", "dx" );
     return ecx;
 }
-static inline unsigned int cpuid_edx(unsigned int op)
+static always_inline unsigned int cpuid_edx(unsigned int op)
 {
     unsigned int eax, edx;
 
@@ -281,7 +281,7 @@
  */
 extern unsigned long mmu_cr4_features;
 
-static inline void set_in_cr4 (unsigned long mask)
+static always_inline void set_in_cr4 (unsigned long mask)
 {
     unsigned long dummy;
     mmu_cr4_features |= mask;
@@ -292,7 +292,7 @@
         : "=&r" (dummy) : "irg" (mask) );
 }
 
-static inline void clear_in_cr4 (unsigned long mask)
+static always_inline void clear_in_cr4 (unsigned long mask)
 {
     unsigned long dummy;
     mmu_cr4_features &= ~mask;
@@ -334,7 +334,7 @@
        outb((data), 0x23); \
 } while (0)
 
-static inline void __monitor(const void *eax, unsigned long ecx,
+static always_inline void __monitor(const void *eax, unsigned long ecx,
                unsigned long edx)
 {
        /* "monitor %eax,%ecx,%edx;" */
@@ -343,7 +343,7 @@
                : :"a" (eax), "c" (ecx), "d"(edx));
 }
 
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static always_inline void __mwait(unsigned long eax, unsigned long ecx)
 {
        /* "mwait %eax,%ecx;" */
        asm volatile(
@@ -460,7 +460,7 @@
 };
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
+static always_inline void rep_nop(void)
 {
     __asm__ __volatile__ ( "rep;nop" : : : "memory" );
 }
@@ -471,7 +471,7 @@
 #ifdef         CONFIG_MPENTIUMIII
 
 #define ARCH_HAS_PREFETCH
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
 {
     __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
 }
@@ -482,12 +482,12 @@
 #define ARCH_HAS_PREFETCHW
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
 {
     __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
 }
 
-extern inline void prefetchw(const void *x)
+extern always_inline void prefetchw(const void *x)
 {
     __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/shadow.h      Mon Jan  9 11:22:17 2006
@@ -341,10 +341,10 @@
 #if SHADOW_VERBOSE_DEBUG
 #define SH_LOG(_f, _a...)                                               \
     printk("DOM%uP%u: SH_LOG(%d): " _f "\n",                            \
-       current->domain->domain_id , current->processor, __LINE__ , ## _a )
+       current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a )
 #define SH_VLOG(_f, _a...)                                              \
     printk("DOM%uP%u: SH_VLOG(%d): " _f "\n",                           \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define SH_LOG(_f, _a...) ((void)0)
 #define SH_VLOG(_f, _a...) ((void)0)
@@ -353,7 +353,7 @@
 #if SHADOW_VVERBOSE_DEBUG
 #define SH_VVLOG(_f, _a...)                                             \
     printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n",                          \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define SH_VVLOG(_f, _a...) ((void)0)
 #endif
@@ -361,7 +361,7 @@
 #if SHADOW_VVVERBOSE_DEBUG
 #define SH_VVVLOG(_f, _a...)                                            \
     printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n",                         \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define SH_VVVLOG(_f, _a...) ((void)0)
 #endif
@@ -369,7 +369,7 @@
 #if FULLSHADOW_DEBUG
 #define FSH_LOG(_f, _a...)                                              \
     printk("DOM%uP%u: FSH_LOG(%d): " _f "\n",                           \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define FSH_LOG(_f, _a...) ((void)0)
 #endif
@@ -591,7 +591,7 @@
         if ( need_flush )
         {
             perfc_incrc(update_hl2e_invlpg);
-            flush_tlb_one_mask(v->domain->cpumask,
+            flush_tlb_one_mask(v->domain->domain_dirty_cpumask,
                                &linear_pg_table[l1_linear_offset(va)]);
         }
     }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx.h Mon Jan  9 11:22:17 2006
@@ -26,7 +26,7 @@
 #include <asm/vmx_vmcs.h>
 #include <asm/i387.h>
 
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 extern int hvm_enabled;
 
@@ -38,7 +38,6 @@
 
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
-extern void arch_vmx_do_relaunch(struct vcpu *);
 
 extern unsigned int cpu_rev;
 
@@ -506,7 +505,7 @@
 
 static inline unsigned int vmx_get_vcpu_nr(struct domain *d)
 {
-    return d->arch.vmx_platform.nr_vcpu;
+    return d->arch.vmx_platform.nr_vcpus;
 }
 
 static inline shared_iopage_t *get_sp(struct domain *d)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_intercept.h
--- a/xen/include/asm-x86/vmx_intercept.h       Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_intercept.h       Mon Jan  9 11:22:17 2006
@@ -6,7 +6,7 @@
 #include <xen/lib.h>
 #include <xen/time.h>
 #include <xen/errno.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #define MAX_IO_HANDLER              8
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_platform.h
--- a/xen/include/asm-x86/vmx_platform.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_platform.h        Mon Jan  9 11:22:17 2006
@@ -33,10 +33,10 @@
     (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
 
 #define operand_size(operand)   \
-      ((operand >> 24) & 0xFF)
+    ((operand >> 24) & 0xFF)
 
 #define operand_index(operand)  \
-      ((operand >> 16) & 0xFF)
+    ((operand >> 16) & 0xFF)
 
 /* for instruction.operand[].size */
 #define BYTE    1
@@ -81,13 +81,13 @@
 
 struct vmx_platform {
     unsigned long          shared_page_va;
-    unsigned int           nr_vcpu;
-    unsigned int           lapic_enable;
+    unsigned int           nr_vcpus;
+    unsigned int           apic_enabled;
 
     struct vmx_virpit      vmx_pit;
     struct vmx_io_handler  vmx_io_handler;
     struct vmx_virpic      vmx_pic;
-    struct vmx_vioapic      vmx_vioapic;
+    struct vmx_vioapic     vmx_vioapic;
     unsigned char          round_info[256];
     spinlock_t             round_robin_lock;
     int                    interrupt_request;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_vlapic.h
--- a/xen/include/asm-x86/vmx_vlapic.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_vlapic.h  Mon Jan  9 11:22:17 2006
@@ -21,7 +21,7 @@
 #define VMX_VLAPIC_H
 
 #include <asm/msr.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 static inline int __fls(uint32_t word)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_vmcs.h    Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
 #include <asm/vmx_cpu.h>
 #include <asm/vmx_platform.h>
 #include <asm/vmx_vlapic.h>
-#include <public/vmx_assist.h>
+#include <public/hvm/vmx_assist.h>
 
 extern int start_vmx(void);
 extern void stop_vmx(void);
@@ -86,7 +86,8 @@
 #define PC_DEBUG_PORT   0x80
 
 struct arch_vmx_struct {
-    struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual */
+    struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual. */
+    unsigned int            launch_cpu; /* VMCS is valid on this CPU. */
     unsigned long           flags;  /* VMCS flags */
     unsigned long           cpu_cr0; /* copy of guest CR0 */
     unsigned long           cpu_shadow_cr0; /* copy of guest read shadow CR0 */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/x86_emulate.h Mon Jan  9 11:22:17 2006
@@ -18,10 +18,11 @@
  * special treatment or emulation (*_emulated).
  * 
  * The emulator assumes that an instruction accesses only one 'emulated memory'
- * location, and that this is one of its data operands. Instruction fetches and
+ * location, that this location is the given linear faulting address (cr2), and
+ * that this is one of the instruction's data operands. Instruction fetches and
  * stack operations are assumed never to access emulated memory. The emulator
  * automatically deduces which operand of a string-move operation is accessing
- * emulated memory, and requires that the other operand accesses normal memory.
+ * emulated memory, and assumes that the other operand accesses normal memory.
  * 
  * NOTES:
  *  1. The emulator isn't very smart about emulated vs. standard memory.
@@ -36,6 +37,7 @@
  *     then immediately bail.
  *  3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
  *     cmpxchg8b_emulated need support 8-byte accesses.
+ *  4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
  */
 /* Access completed successfully: continue emulation as normal. */
 #define X86EMUL_CONTINUE        0
@@ -141,14 +143,27 @@
 
 struct cpu_user_regs;
 
+/* Execution mode, passed to the emulator. */
+#define X86EMUL_MODE_REAL     0 /* Real mode.             */
+#define X86EMUL_MODE_PROT16   2 /* 16-bit protected mode. */
+#define X86EMUL_MODE_PROT32   4 /* 32-bit protected mode. */
+#define X86EMUL_MODE_PROT64   8 /* 64-bit (long) mode.    */
+
+/* Host execution mode. */
+#if defined(__i386__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
+#elif defined(__x86_64__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
+#endif
+
 /*
  * x86_emulate_memop: Emulate an instruction that faulted attempting to
  *                    read/write a 'special' memory area.
  *  @regs: Register state at time of fault.
- *  @cr2:  Linear faulting address.
+ *  @cr2:  Linear faulting address within an emulated/special memory area.
  *  @ops:  Interface to access special memory.
- *  @mode: Current execution mode, represented by the default size of memory
- *         addresses, in bytes. Valid values are 2, 4 and 8 (x86/64 only).
+ *  @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
+ * Returns -1 on failure, 0 on success.
  */
 extern int
 x86_emulate_memop(
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/arch-x86_32.h  Mon Jan  9 11:22:17 2006
@@ -49,10 +49,15 @@
  * machine->physical mapping table starts at this address, read-only.
  */
 #ifdef CONFIG_X86_PAE
-# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#define __HYPERVISOR_VIRT_START 0xF5800000
 #else
-# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#define __HYPERVISOR_VIRT_START 0xFC000000
 #endif
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
 #ifndef machine_to_phys_mapping
 #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
 #endif
@@ -137,7 +142,7 @@
     unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
 } arch_vcpu_info_t;
 
-#endif
+#endif /* !__ASSEMBLY__ */
 
 #endif
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/arch-x86_64.h  Mon Jan  9 11:22:17 2006
@@ -59,9 +59,12 @@
 /* And the trap vector is... */
 #define TRAP_INSTR "syscall"
 
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+
 #ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START (0xFFFF800000000000UL)
-#define HYPERVISOR_VIRT_END   (0xFFFF880000000000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
 #endif
 
 /* Maximum number of virtual CPUs in multi-processor guests. */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/dom0_ops.h     Mon Jan  9 11:22:17 2006
@@ -94,14 +94,14 @@
     xen_domain_handle_t handle;
 } dom0_getdomaininfo_t;
 
-#define DOM0_SETDOMAININFO      13
+#define DOM0_SETVCPUCONTEXT   13
 typedef struct {
     /* IN variables. */
     domid_t               domain;
     uint32_t              vcpu;
     /* IN/OUT parameters */
     vcpu_guest_context_t *ctxt;
-} dom0_setdomaininfo_t;
+} dom0_setvcpucontext_t;
 
 #define DOM0_MSR              15
 typedef struct {
@@ -163,13 +163,13 @@
 /* 
  * Set which physical cpus a vcpu can execute on.
  */
-#define DOM0_PINCPUDOMAIN     20
+#define DOM0_SETVCPUAFFINITY  20
 typedef struct {
     /* IN variables. */
     domid_t   domain;
     uint32_t  vcpu;
     cpumap_t  cpumap;
-} dom0_pincpudomain_t;
+} dom0_setvcpuaffinity_t;
 
 /* Get trace buffers machine base address */
 #define DOM0_TBUFCONTROL       21
@@ -410,6 +410,21 @@
     uint8_t enable;
 } dom0_setdebugging_t;
 
+#define DOM0_IRQ_PERMISSION 46
+typedef struct {
+    domid_t domain;          /* domain to be affected */
+    uint8_t pirq;
+    uint8_t allow_access;    /* flag to specify enable/disable of IRQ access */
+} dom0_irq_permission_t;
+
+#define DOM0_IOMEM_PERMISSION 47
+typedef struct {
+    domid_t  domain;          /* domain to be affected */
+    unsigned long first_pfn;  /* first page (physical page number) in range */
+    unsigned long nr_pfns;    /* number of pages in range (>0) */
+    uint8_t allow_access;     /* allow (!0) or deny (0) access to range? */
+} dom0_iomem_permission_t;
+ 
 typedef struct {
     uint32_t cmd;
     uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
@@ -421,13 +436,13 @@
         dom0_getmemlist_t        getmemlist;
         dom0_schedctl_t          schedctl;
         dom0_adjustdom_t         adjustdom;
-        dom0_setdomaininfo_t     setdomaininfo;
+        dom0_setvcpucontext_t    setvcpucontext;
         dom0_getdomaininfo_t     getdomaininfo;
         dom0_getpageframeinfo_t  getpageframeinfo;
         dom0_msr_t               msr;
         dom0_settime_t           settime;
         dom0_readconsole_t       readconsole;
-        dom0_pincpudomain_t      pincpudomain;
+        dom0_setvcpuaffinity_t   setvcpuaffinity;
         dom0_tbufcontrol_t       tbufcontrol;
         dom0_physinfo_t          physinfo;
         dom0_sched_id_t          sched_id;
@@ -448,6 +463,8 @@
         dom0_max_vcpus_t         max_vcpus;
         dom0_setdomainhandle_t   setdomainhandle;        
         dom0_setdebugging_t      setdebugging;
+        dom0_irq_permission_t    irq_permission;
+        dom0_iomem_permission_t  iomem_permission;
         uint8_t                  pad[128];
     } u;
 } dom0_op_t;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/xen.h
--- a/xen/include/public/xen.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/xen.h  Mon Jan  9 11:22:17 2006
@@ -426,6 +426,15 @@
 
 typedef uint8_t xen_domain_handle_t[16];
 
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __XEN_PUBLIC_XEN_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/bitmap.h
--- a/xen/include/xen/bitmap.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/bitmap.h  Mon Jan  9 11:22:17 2006
@@ -41,6 +41,8 @@
  * bitmap_weight(src, nbits)                   Hamming Weight: number set bits
  * bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
  * bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
+ * bitmap_scnprintf(buf, len, src, nbits)      Print bitmap src to buf
+ * bitmap_scnlistprintf(buf, len, src, nbits)  Print bitmap src as list to buf
  */
 
 /*
@@ -93,6 +95,10 @@
                        const unsigned long *bitmap2, int bits);
 extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
+extern int bitmap_scnprintf(char *buf, unsigned int len,
+                       const unsigned long *src, int nbits);
+extern int bitmap_scnlistprintf(char *buf, unsigned int len,
+                       const unsigned long *src, int nbits);
 extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
 extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
 extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/compiler.h
--- a/xen/include/xen/compiler.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/compiler.h        Mon Jan  9 11:22:17 2006
@@ -19,4 +19,10 @@
 #define __attribute_used__ __attribute__((__unused__))
 #endif
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#define __must_check __attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
+
 #endif /* __LINUX_COMPILER_H */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/config.h
--- a/xen/include/xen/config.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/config.h  Mon Jan  9 11:22:17 2006
@@ -43,4 +43,13 @@
 #define __STR(...) #__VA_ARGS__
 #define STR(...) __STR(__VA_ARGS__)
 
+#ifndef __ASSEMBLY__
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+#else /* __ASSEMBLY__ */
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+#endif /* !__ASSEMBLY__ */
+
 #endif /* __XEN_CONFIG_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/cpumask.h Mon Jan  9 11:22:17 2006
@@ -8,8 +8,8 @@
  * See detailed comments in the file xen/bitmap.h describing the
  * data type on which these cpumasks are based.
  *
- * For details of cpumask_scnprintf() and cpumask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of cpumask_scnprintf() and cpulist_scnprintf(),
+ * see bitmap_scnprintf() and bitmap_scnlistprintf() in lib/bitmap.c.
  *
  * The available cpumask operations are:
  *
@@ -36,8 +36,8 @@
  * void cpus_shift_right(dst, src, n)  Shift right
  * void cpus_shift_left(dst, src, n)   Shift left
  *
- * int first_cpu(mask)                 Number lowest set bit, or >= NR_CPUS
- * int next_cpu(cpu, mask)             Next cpu past 'cpu', or >= NR_CPUS
+ * int first_cpu(mask)                 Number lowest set bit, or NR_CPUS
+ * int next_cpu(cpu, mask)             Next cpu past 'cpu', or NR_CPUS
  *
  * cpumask_t cpumask_of_cpu(cpu)       Return cpumask with bit 'cpu' set
  * CPU_MASK_ALL                                Initializer - all bits set
@@ -45,7 +45,7 @@
  * unsigned long *cpus_addr(mask)      Array of unsigned long's in mask
  *
  * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask
+ * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
  *
  * for_each_cpu_mask(cpu, mask)                for-loop cpu over mask
  *
@@ -207,13 +207,13 @@
 #define first_cpu(src) __first_cpu(&(src), NR_CPUS)
 static inline int __first_cpu(const cpumask_t *srcp, int nbits)
 {
-       return find_first_bit(srcp->bits, nbits);
+       return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
 }
 
 #define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
 static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
 {
-       return find_next_bit(srcp->bits, nbits, n+1);
+       return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
 }
 
 #define cpumask_of_cpu(cpu)                                            \
@@ -259,7 +259,6 @@
 
 #define cpus_addr(src) ((src).bits)
 
-/*
 #define cpumask_scnprintf(buf, len, src) \
                        __cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
 static inline int __cpumask_scnprintf(char *buf, int len,
@@ -268,14 +267,13 @@
        return bitmap_scnprintf(buf, len, srcp->bits, nbits);
 }
 
-#define cpumask_parse(ubuf, ulen, src) \
-                       __cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
-static inline int __cpumask_parse(const char __user *buf, int len,
-                                       cpumask_t *dstp, int nbits)
-{
-       return bitmap_parse(buf, len, dstp->bits, nbits);
-}
-*/
+#define cpulist_scnprintf(buf, len, src) \
+                       __cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpulist_scnprintf(char *buf, int len,
+                                       const cpumask_t *srcp, int nbits)
+{
+       return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
+}
 
 #if NR_CPUS > 1
 #define for_each_cpu_mask(cpu, mask)           \
@@ -368,7 +366,7 @@
        for_each_cpu_mask(cpu, (mask))          \
                if (cpu_online(cpu))            \
                        break;                  \
-       min_t(int, NR_CPUS, cpu);               \
+       cpu;                                    \
 })
 
 #define for_each_cpu(cpu)        for_each_cpu_mask((cpu), cpu_possible_map)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/domain.h  Mon Jan  9 11:22:17 2006
@@ -13,12 +13,10 @@
 
 extern void free_vcpu_struct(struct vcpu *v);
 
-extern void arch_do_createdomain(struct vcpu *v);
+extern int arch_do_createdomain(struct vcpu *v);
 
-extern int  arch_set_info_guest(
+extern int arch_set_info_guest(
     struct vcpu *v, struct vcpu_guest_context *c);
-
-extern void vcpu_migrate_cpu(struct vcpu *v, int newcpu);
 
 extern void free_perdomain_pt(struct domain *d);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/lib.h     Mon Jan  9 11:22:17 2006
@@ -53,10 +53,16 @@
 /* vsprintf.c */
 extern int sprintf(char * buf, const char * fmt, ...)
     __attribute__ ((format (printf, 2, 3)));
-extern int vsprintf(char *buf, const char *, va_list);
+extern int vsprintf(char *buf, const char *, va_list)
+    __attribute__ ((format (printf, 2, 0)));
 extern int snprintf(char * buf, size_t size, const char * fmt, ...)
     __attribute__ ((format (printf, 3, 4)));
-extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+    __attribute__ ((format (printf, 3, 0)));
+extern int scnprintf(char * buf, size_t size, const char * fmt, ...)
+    __attribute__ ((format (printf, 3, 4)));
+extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+    __attribute__ ((format (printf, 3, 0)));
 
 long simple_strtol(
     const char *cp,char **endp, unsigned int base);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/sched-if.h        Mon Jan  9 11:22:17 2006
@@ -13,8 +13,8 @@
 
 struct schedule_data {
     spinlock_t          schedule_lock;  /* spinlock protecting curr        */
-    struct vcpu *curr;           /* current task                    */
-    struct vcpu *idle;           /* idle task for this cpu          */
+    struct vcpu        *curr;           /* current task                    */
+    struct vcpu        *idle;           /* idle task for this cpu          */
     void               *sched_priv;
     struct ac_timer     s_timer;        /* scheduling timer                */
     unsigned long       tick;           /* current periodic 'tick'         */
@@ -39,6 +39,7 @@
     void         (*rem_task)       (struct vcpu *);
     void         (*sleep)          (struct vcpu *);
     void         (*wake)           (struct vcpu *);
+    int          (*set_affinity)   (struct vcpu *, cpumask_t *);
     struct task_slice (*do_schedule) (s_time_t);
     int          (*control)        (struct sched_ctl_cmd *);
     int          (*adjdom)         (struct domain *,
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/sched.h   Mon Jan  9 11:22:17 2006
@@ -11,6 +11,7 @@
 #include <xen/time.h>
 #include <xen/ac_timer.h>
 #include <xen/grant_table.h>
+#include <xen/rangeset.h>
 #include <asm/domain.h>
 
 extern unsigned long volatile jiffies;
@@ -50,8 +51,6 @@
 int  evtchn_init(struct domain *d);
 void evtchn_destroy(struct domain *d);
 
-#define CPUMAP_RUNANYWHERE 0xFFFFFFFF
-
 struct vcpu 
 {
     int              vcpu_id;
@@ -79,7 +78,11 @@
 
     atomic_t         pausecnt;
 
-    cpumap_t         cpumap;        /* which cpus this domain can run on */
+    /* Bitmask of CPUs on which this VCPU may run. */
+    cpumask_t        cpu_affinity;
+
+    /* Bitmask of CPUs which are holding onto this VCPU's state. */
+    cpumask_t        vcpu_dirty_cpumask;
 
     struct arch_vcpu arch;
 };
@@ -109,6 +112,9 @@
 
     struct domain   *next_in_list;
     struct domain   *next_in_hashbucket;
+
+    struct list_head rangesets;
+    spinlock_t       rangesets_lock;
 
     /* Event channel information. */
     struct evtchn   *evtchn[NR_EVTCHN_BUCKETS];
@@ -125,6 +131,10 @@
     u16              pirq_to_evtchn[NR_PIRQS];
     u32              pirq_mask[NR_PIRQS/32];
 
+    /* I/O capabilities (access to IRQs and memory-mapped I/O). */
+    struct rangeset *iomem_caps;
+    struct rangeset *irq_caps;
+
     unsigned long    domain_flags;
     unsigned long    vm_assist;
 
@@ -133,7 +143,7 @@
     struct vcpu *vcpu[MAX_VIRT_CPUS];
 
     /* Bitmask of CPUs which are holding onto this domain's state. */
-    cpumask_t        cpumask;
+    cpumask_t        domain_dirty_cpumask;
 
     struct arch_domain arch;
 
@@ -165,9 +175,9 @@
 extern struct domain idle0_domain;
 extern struct vcpu idle0_vcpu;
 
-extern struct vcpu *idle_task[NR_CPUS];
+extern struct vcpu *idle_domain[NR_CPUS];
 #define IDLE_DOMAIN_ID   (0x7FFFU)
-#define is_idle_task(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
+#define is_idle_domain(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
 
 struct vcpu *alloc_vcpu(
     struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
@@ -215,7 +225,7 @@
     unsigned long image_start, unsigned long image_len, 
     unsigned long initrd_start, unsigned long initrd_len,
     char *cmdline);
-extern int set_info_guest(struct domain *d, dom0_setdomaininfo_t *);
+extern int set_info_guest(struct domain *d, dom0_setvcpucontext_t *);
 
 struct domain *find_domain_by_id(domid_t dom);
 extern void domain_destruct(struct domain *d);
@@ -261,36 +271,27 @@
 extern void sync_vcpu_execstate(struct vcpu *v);
 
 /*
- * Called by the scheduler to switch to another VCPU. On entry, although
- * VCPUF_running is no longer asserted for @prev, its context is still running
- * on the local CPU and is not committed to memory. The local scheduler lock
- * is therefore still held, and interrupts are disabled, because the local CPU
- * is in an inconsistent state.
- * 
- * The callee must ensure that the local CPU is no longer running in @prev's
- * context, and that the context is saved to memory, before returning.
- * Alternatively, if implementing lazy context switching, it suffices to ensure
- * that invoking sync_vcpu_execstate() will switch and commit @prev's state.
+ * Called by the scheduler to switch to another VCPU. This function must
+ * call context_saved(@prev) when the local CPU is no longer running in
+ * @prev's context, and that context is saved to memory. Alternatively, if
+ * implementing lazy context switching, it suffices to ensure that invoking
+ * sync_vcpu_execstate() will switch and commit @prev's state.
  */
 extern void context_switch(
     struct vcpu *prev, 
     struct vcpu *next);
 
 /*
- * On some architectures (notably x86) it is not possible to entirely load
- * @next's context with interrupts disabled. These may implement a function to
- * finalise loading the new context after interrupts are re-enabled. This
- * function is not given @prev and is not permitted to access it.
- */
-extern void context_switch_finalise(
-    struct vcpu *next);
+ * As described above, context_switch() must call this function when the
+ * local CPU is no longer running in @prev's context, and @prev's context is
+ * saved to memory. Alternatively, if implementing lazy context switching,
+ * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
+ */
+#define context_saved(prev) (clear_bit(_VCPUF_running, &(prev)->vcpu_flags))
 
 /* Called by the scheduler to continue running the current VCPU. */
 extern void continue_running(
     struct vcpu *same);
-
-/* Is CPU 'cpu' idle right now? */
-int idle_cpu(int cpu);
 
 void startup_cpu_idle_loop(void);
 
@@ -356,17 +357,11 @@
  /* Currently running on a CPU? */
 #define _VCPUF_running         3
 #define VCPUF_running          (1UL<<_VCPUF_running)
- /* Disables auto-migration between CPUs. */
-#define _VCPUF_cpu_pinned      4
-#define VCPUF_cpu_pinned       (1UL<<_VCPUF_cpu_pinned)
- /* Domain migrated between CPUs. */
-#define _VCPUF_cpu_migrated    5
-#define VCPUF_cpu_migrated     (1UL<<_VCPUF_cpu_migrated)
  /* Initialization completed. */
-#define _VCPUF_initialised     6
+#define _VCPUF_initialised     4
 #define VCPUF_initialised      (1UL<<_VCPUF_initialised)
  /* VCPU is not-runnable */
-#define _VCPUF_down            7
+#define _VCPUF_down            5
 #define VCPUF_down             (1UL<<_VCPUF_down)
 
 /*
@@ -378,32 +373,25 @@
  /* Is this domain privileged? */
 #define _DOMF_privileged       1
 #define DOMF_privileged        (1UL<<_DOMF_privileged)
- /* May this domain do IO to physical devices? */
-#define _DOMF_physdev_access   2
-#define DOMF_physdev_access    (1UL<<_DOMF_physdev_access)
  /* Guest shut itself down for some reason. */
-#define _DOMF_shutdown         3
+#define _DOMF_shutdown         2
 #define DOMF_shutdown          (1UL<<_DOMF_shutdown)
- /* Guest is in process of shutting itself down (becomes DOMF_shutdown). */
-#define _DOMF_shuttingdown     4
-#define DOMF_shuttingdown      (1UL<<_DOMF_shuttingdown)
  /* Death rattle. */
-#define _DOMF_dying            5
+#define _DOMF_dying            3
 #define DOMF_dying             (1UL<<_DOMF_dying)
  /* Domain is paused by controller software. */
-#define _DOMF_ctrl_pause       6
+#define _DOMF_ctrl_pause       4
 #define DOMF_ctrl_pause        (1UL<<_DOMF_ctrl_pause)
  /* Domain is being debugged by controller software. */
-#define _DOMF_debugging        7
+#define _DOMF_debugging        5
 #define DOMF_debugging         (1UL<<_DOMF_debugging)
 
 
-static inline int domain_runnable(struct vcpu *v)
+static inline int vcpu_runnable(struct vcpu *v)
 {
     return ( (atomic_read(&v->pausecnt) == 0) &&
              !(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) &&
-             !(v->domain->domain_flags &
-               (DOMF_shutdown|DOMF_shuttingdown|DOMF_ctrl_pause)) );
+             !(v->domain->domain_flags & (DOMF_shutdown|DOMF_ctrl_pause)) );
 }
 
 void vcpu_pause(struct vcpu *v);
@@ -414,6 +402,8 @@
 void domain_unpause_by_systemcontroller(struct domain *d);
 void cpu_init(void);
 
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+
 static inline void vcpu_unblock(struct vcpu *v)
 {
     if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
@@ -422,8 +412,6 @@
 
 #define IS_PRIV(_d)                                         \
     (test_bit(_DOMF_privileged, &(_d)->domain_flags))
-#define IS_CAPABLE_PHYSDEV(_d)                              \
-    (test_bit(_DOMF_physdev_access, &(_d)->domain_flags))
 
 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/guest-headers/Makefile
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/tools/guest-headers/Makefile      Mon Jan  9 11:22:17 2006
@@ -0,0 +1,11 @@
+
+XEN_ROOT=../..
+linuxsparsetree = $(XEN_ROOT)/linux-2.6-xen-sparse
+
+check:
+
+install:
+       mkdir -p $(DESTDIR)/usr/include/xen/linux
+       install -m0644 $(linuxsparsetree)/include/asm-xen/linux-public/*.h 
$(DESTDIR)/usr/include/xen/linux
+
+clean:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/process-linux-xen.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/process-linux-xen.c       Mon Jan  9 11:22:17 2006
@@ -0,0 +1,848 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * 04/11/17 Ashok Raj  <ashok.raj@xxxxxxxxx> Added CPU Hotplug Support
+ */
+#ifdef XEN
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/symbols.h>
+#include <xen/smp.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/unwind.h>
+#else
+#define __KERNEL_SYSCALLS__    /* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+#include <linux/unistd.h>
+#include <linux/efi.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+
+#include <asm/cpu.h>
+#include <asm/delay.h>
+#include <asm/elf.h>
+#include <asm/ia32.h>
+#include <asm/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/user.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#include "sigframe.h"
+
+void (*ia64_mark_idle)(int);
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+#endif
+
+void
+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
+{
+       unsigned long ip, sp, bsp;
+       char buf[128];                  /* don't make it so big that it 
overflows the stack! */
+
+       printk("\nCall Trace:\n");
+       do {
+               unw_get_ip(info, &ip);
+               if (ip == 0)
+                       break;
+
+               unw_get_sp(info, &sp);
+               unw_get_bsp(info, &bsp);
+               snprintf(buf, sizeof(buf),
+                        " [<%016lx>] %%s\n"
+                        "                                sp=%016lx 
bsp=%016lx\n",
+                        ip, sp, bsp);
+               print_symbol(buf, ip);
+       } while (unw_unwind(info) >= 0);
+}
+
+void
+show_stack (struct task_struct *task, unsigned long *sp)
+{
+       if (!task)
+               unw_init_running(ia64_do_show_stack, NULL);
+       else {
+               struct unw_frame_info info;
+
+               unw_init_from_blocked_task(&info, task);
+               ia64_do_show_stack(&info, NULL);
+       }
+}
+
+#ifndef XEN
+void
+dump_stack (void)
+{
+       show_stack(NULL, NULL);
+}
+
+EXPORT_SYMBOL(dump_stack);
+#endif
+
+#ifdef XEN
+void
+show_registers(struct pt_regs *regs)
+#else
+void
+show_regs (struct pt_regs *regs)
+#endif
+{
+       unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+#ifndef XEN
+       print_modules();
+       printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, 
smp_processor_id(), current->comm);
+       printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
+              regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+#else
+       printk("\nCPU %d\n", smp_processor_id());
+       printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
+              regs->cr_ipsr, regs->cr_ifs, ip);
+#endif
+       print_symbol("ip is at %s\n", ip);
+       printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+              regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+       printk("rnat: %016lx bsps: %016lx pr  : %016lx\n",
+              regs->ar_rnat, regs->ar_bspstore, regs->pr);
+       printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+              regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+       printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+       printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0, regs->b6, 
regs->b7);
+       printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+              regs->f6.u.bits[1], regs->f6.u.bits[0],
+              regs->f7.u.bits[1], regs->f7.u.bits[0]);
+       printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+              regs->f8.u.bits[1], regs->f8.u.bits[0],
+              regs->f9.u.bits[1], regs->f9.u.bits[0]);
+       printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+              regs->f10.u.bits[1], regs->f10.u.bits[0],
+              regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+       printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1, regs->r2, 
regs->r3);
+       printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8, regs->r9, 
regs->r10);
+       printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, 
regs->r12, regs->r13);
+       printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, 
regs->r15, regs->r16);
+       printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, 
regs->r18, regs->r19);
+       printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, 
regs->r21, regs->r22);
+       printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, 
regs->r24, regs->r25);
+       printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, 
regs->r27, regs->r28);
+       printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, 
regs->r30, regs->r31);
+
+       if (user_mode(regs)) {
+               /* print the stacked registers */
+               unsigned long val, *bsp, ndirty;
+               int i, sof, is_nat = 0;
+
+               sof = regs->cr_ifs & 0x7f;      /* size of frame */
+               ndirty = (regs->loadrs >> 19);
+               bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, 
ndirty);
+               for (i = 0; i < sof; ++i) {
+                       get_user(val, (unsigned long __user *) 
ia64_rse_skip_regs(bsp, i));
+                       printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', 
val,
+                              ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+               }
+       } else
+               show_stack(NULL, NULL);
+}
+
+#ifndef XEN
+void
+do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long 
in_syscall)
+{
+       if (fsys_mode(current, &scr->pt)) {
+               /* defer signal-handling etc. until we return to 
privilege-level 0.  */
+               if (!ia64_psr(&scr->pt)->lp)
+                       ia64_psr(&scr->pt)->lp = 1;
+               return;
+       }
+
+#ifdef CONFIG_PERFMON
+       if (current->thread.pfm_needs_checking)
+               pfm_handle_work();
+#endif
+
+       /* deal with pending signal delivery */
+       if (test_thread_flag(TIF_SIGPENDING))
+               ia64_do_signal(oldset, scr, in_syscall);
+}
+
+static int pal_halt        = 1;
+static int can_do_pal_halt = 1;
+
+static int __init nohalt_setup(char * str)
+{
+       pal_halt = can_do_pal_halt = 0;
+       return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+void
+update_pal_halt_status(int status)
+{
+       can_do_pal_halt = pal_halt && status;
+}
+
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void
+default_idle (void)
+{
+       local_irq_enable();
+       while (!need_resched())
+               if (can_do_pal_halt)
+                       safe_halt();
+               else
+                       cpu_relax();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+       extern void ia64_cpu_local_tick (void);
+       unsigned int this_cpu = smp_processor_id();
+
+       /* Ack it */
+       __get_cpu_var(cpu_state) = CPU_DEAD;
+
+       max_xtp();
+       local_irq_disable();
+       idle_domain_exit();
+       ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
+       /*
+        * The above is a point of no-return, the processor is
+        * expected to be in SAL loop now.
+        */
+       BUG();
+}
+#else
+static inline void play_dead(void)
+{
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void cpu_idle_wait(void)
+{
+       unsigned int cpu, this_cpu = get_cpu();
+       cpumask_t map;
+
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+
+       cpus_clear(map);
+       for_each_online_cpu(cpu) {
+               per_cpu(cpu_idle_state, cpu) = 1;
+               cpu_set(cpu, map);
+       }
+
+       __get_cpu_var(cpu_idle_state) = 0;
+
+       wmb();
+       do {
+               ssleep(1);
+               for_each_online_cpu(cpu) {
+                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
cpu))
+                               cpu_clear(cpu, map);
+               }
+               cpus_and(map, map, cpu_online_map);
+       } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+void __attribute__((noreturn))
+cpu_idle (void)
+{
+       void (*mark_idle)(int) = ia64_mark_idle;
+
+       /* endless idle loop with no priority at all */
+       while (1) {
+#ifdef CONFIG_SMP
+               if (!need_resched())
+                       min_xtp();
+#endif
+               while (!need_resched()) {
+                       void (*idle)(void);
+
+                       if (__get_cpu_var(cpu_idle_state))
+                               __get_cpu_var(cpu_idle_state) = 0;
+
+                       rmb();
+                       if (mark_idle)
+                               (*mark_idle)(1);
+
+                       idle = pm_idle;
+                       if (!idle)
+                               idle = default_idle;
+                       (*idle)();
+               }
+
+               if (mark_idle)
+                       (*mark_idle)(0);
+
+#ifdef CONFIG_SMP
+               normal_xtp();
+#endif
+               schedule();
+               check_pgt_cache();
+               if (cpu_is_offline(smp_processor_id()))
+                       play_dead();
+       }
+}
+
+void
+ia64_save_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+       unsigned long info;
+#endif
+
+       if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+               ia64_save_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+       if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+               pfm_save_regs(task);
+
+       info = __get_cpu_var(pfm_syst_info);
+       if (info & PFM_CPUINFO_SYST_WIDE)
+               pfm_syst_wide_update_task(task, info, 0);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+       if (IS_IA32_PROCESS(ia64_task_regs(task)))
+               ia32_save_state(task);
+#endif
+}
+
+void
+ia64_load_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+       unsigned long info;
+#endif
+
+       if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+               ia64_load_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+       if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+               pfm_load_regs(task);
+
+       info = __get_cpu_var(pfm_syst_info);
+       if (info & PFM_CPUINFO_SYST_WIDE) 
+               pfm_syst_wide_update_task(task, info, 1);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+       if (IS_IA32_PROCESS(ia64_task_regs(task)))
+               ia32_load_state(task);
+#endif
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following  call chain:
+ *
+ *     from user-level:        from kernel:
+ *
+ *     <clone syscall>         <some kernel call frames>
+ *     sys_clone                  :
+ *     do_fork                 do_fork
+ *     copy_thread             copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ *     +---------------------+ (highest addr)
+ *     |   struct pt_regs    |
+ *     +---------------------+
+ *     | struct switch_stack |
+ *     +---------------------+
+ *     |                     |
+ *     |    memory stack     |
+ *     |                     | <-- sp (lowest addr)
+ *     +---------------------+
+ *
+ * Observe that we copy the unat values that are in pt_regs and switch_stack.  
Spilling an
+ * integer to address X causes bit N in ar.unat to be set to the NaT bit of 
the register,
+ * with N=(X & 0x1ff)/8.  Thus, copying the unat value preserves the NaT bits 
ONLY if the
+ * pt_regs structure in the parent is congruent to that of the child, modulo 
512.  Since
+ * the stack is page aligned and the page size is at least 4KB, this is always 
the case,
+ * so there is nothing to worry about.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags,
+            unsigned long user_stack_base, unsigned long user_stack_size,
+            struct task_struct *p, struct pt_regs *regs)
+{
+       extern char ia64_ret_from_clone, ia32_ret_from_clone;
+       struct switch_stack *child_stack, *stack;
+       unsigned long rbs, child_rbs, rbs_size;
+       struct pt_regs *child_ptregs;
+       int retval = 0;
+
+#ifdef CONFIG_SMP
+       /*
+        * For SMP idle threads, fork_by_hand() calls do_fork with
+        * NULL regs.
+        */
+       if (!regs)
+               return 0;
+#endif
+
+       stack = ((struct switch_stack *) regs) - 1;
+
+       child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) 
- 1;
+       child_stack = (struct switch_stack *) child_ptregs - 1;
+
+       /* copy parent's switch_stack & pt_regs to child: */
+       memcpy(child_stack, stack, sizeof(*child_ptregs) + 
sizeof(*child_stack));
+
+       rbs = (unsigned long) current + IA64_RBS_OFFSET;
+       child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+       rbs_size = stack->ar_bspstore - rbs;
+
+       /* copy the parent's register backing store to the child: */
+       memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+       if (likely(user_mode(child_ptregs))) {
+               if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
+                       child_ptregs->r13 = regs->r16;  /* see sys_clone2() in 
entry.S */
+               if (user_stack_base) {
+                       child_ptregs->r12 = user_stack_base + user_stack_size - 
16;
+                       child_ptregs->ar_bspstore = user_stack_base;
+                       child_ptregs->ar_rnat = 0;
+                       child_ptregs->loadrs = 0;
+               }
+       } else {
+               /*
+                * Note: we simply preserve the relative position of
+                * the stack pointer here.  There is no need to
+                * allocate a scratch area here, since that will have
+                * been taken care of by the caller of sys_clone()
+                * already.
+                */
+               child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* 
kernel sp */
+               child_ptregs->r13 = (unsigned long) p;          /* set 
`current' pointer */
+       }
+       child_stack->ar_bspstore = child_rbs + rbs_size;
+       if (IS_IA32_PROCESS(regs))
+               child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
+       else
+               child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+       /* copy parts of thread_struct: */
+       p->thread.ksp = (unsigned long) child_stack - 16;
+
+       /* stop some PSR bits from being inherited.
+        * the psr.up/psr.pp bits must be cleared on fork but inherited on 
execve()
+        * therefore we must specify them explicitly here and not include them 
in
+        * IA64_PSR_BITS_TO_CLEAR.
+        */
+       child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+                                & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | 
IA64_PSR_UP));
+
+       /*
+        * NOTE: The calling convention considers all floating point
+        * registers in the high partition (fph) to be scratch.  Since
+        * the only way to get to this point is through a system call,
+        * we know that the values in fph are all dead.  Hence, there
+        * is no need to inherit the fph state from the parent to the
+        * child and all we have to do is to make sure that
+        * IA64_THREAD_FPH_VALID is cleared in the child.
+        *
+        * XXX We could push this optimization a bit further by
+        * clearing IA64_THREAD_FPH_VALID on ANY system call.
+        * However, it's not clear this is worth doing.  Also, it
+        * would be a slight deviation from the normal Linux system
+        * call behavior where scratch registers are preserved across
+        * system calls (unless used by the system call itself).
+        */
+#      define THREAD_FLAGS_TO_CLEAR    (IA64_THREAD_FPH_VALID | 
IA64_THREAD_DBG_VALID \
+                                        | IA64_THREAD_PM_VALID)
+#      define THREAD_FLAGS_TO_SET      0
+       p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
+                          | THREAD_FLAGS_TO_SET);
+       ia64_drop_fpu(p);       /* don't pick up stale state from a CPU's fph */
+#ifdef CONFIG_IA32_SUPPORT
+       /*
+        * If we're cloning an IA32 task then save the IA32 extra
+        * state from the current task to the new task
+        */
+       if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+               ia32_save_state(p);
+               if (clone_flags & CLONE_SETTLS)
+                       retval = ia32_clone_tls(p, child_ptregs);
+
+               /* Copy partially mapped page list */
+               if (!retval)
+                       retval = ia32_copy_partial_page_list(p, clone_flags);
+       }
+#endif
+
+#ifdef CONFIG_PERFMON
+       if (current->thread.pfm_context)
+               pfm_inherit(p, child_ptregs);
+#endif
+       return retval;
+}
+
+static void
+do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void 
*arg)
+{
+       unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
+       elf_greg_t *dst = arg;
+       struct pt_regs *pt;
+       char nat;
+       int i;
+
+       memset(dst, 0, sizeof(elf_gregset_t));  /* don't leak any kernel bits 
to user-level */
+
+       if (unw_unwind_to_user(info) < 0)
+               return;
+
+       unw_get_sp(info, &sp);
+       pt = (struct pt_regs *) (sp + 16);
+
+       urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
+
+       if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
+               return;
+
+       ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) 
urbs_end),
+                 &ar_rnat);
+
+       /*
+        * coredump format:
+        *      r0-r31
+        *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+        *      predicate registers (p0-p63)
+        *      b0-b7
+        *      ip cfm user-mask
+        *      ar.rsc ar.bsp ar.bspstore ar.rnat
+        *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+        */
+
+       /* r0 is zero */
+       for (i = 1, mask = (1UL << i); i < 32; ++i) {
+               unw_get_gr(info, i, &dst[i], &nat);
+               if (nat)
+                       nat_bits |= mask;
+               mask <<= 1;
+       }
+       dst[32] = nat_bits;
+       unw_get_pr(info, &dst[33]);
+
+       for (i = 0; i < 8; ++i)
+               unw_get_br(info, i, &dst[34 + i]);
+
+       unw_get_rp(info, &ip);
+       dst[42] = ip + ia64_psr(pt)->ri;
+       dst[43] = cfm;
+       dst[44] = pt->cr_ipsr & IA64_PSR_UM;
+
+       unw_get_ar(info, UNW_AR_RSC, &dst[45]);
+       /*
+        * For bsp and bspstore, unw_get_ar() would return the kernel
+        * addresses, but we need the user-level addresses instead:
+        */
+       dst[46] = urbs_end;     /* note: by convention PT_AR_BSP points to the 
end of the urbs! */
+       dst[47] = pt->ar_bspstore;
+       dst[48] = ar_rnat;
+       unw_get_ar(info, UNW_AR_CCV, &dst[49]);
+       unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
+       unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
+       dst[52] = pt->ar_pfs;   /* UNW_AR_PFS is == to pt->cr_ifs for interrupt 
frames */
+       unw_get_ar(info, UNW_AR_LC, &dst[53]);
+       unw_get_ar(info, UNW_AR_EC, &dst[54]);
+       unw_get_ar(info, UNW_AR_CSD, &dst[55]);
+       unw_get_ar(info, UNW_AR_SSD, &dst[56]);
+}
+
+void
+do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void 
*arg)
+{
+       elf_fpreg_t *dst = arg;
+       int i;
+
+       memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits 
*/
+
+       if (unw_unwind_to_user(info) < 0)
+               return;
+
+       /* f0 is 0.0, f1 is 1.0 */
+
+       for (i = 2; i < 32; ++i)
+               unw_get_fr(info, i, dst + i);
+
+       ia64_flush_fph(task);
+       if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
+               memcpy(dst + 32, task->thread.fph, 96*16);
+}
+
+void
+do_copy_regs (struct unw_frame_info *info, void *arg)
+{
+       do_copy_task_regs(current, info, arg);
+}
+
+void
+do_dump_fpu (struct unw_frame_info *info, void *arg)
+{
+       do_dump_task_fpu(current, info, arg);
+}
+
+int
+dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
+{
+       struct unw_frame_info tcore_info;
+
+       if (current == task) {
+               unw_init_running(do_copy_regs, regs);
+       } else {
+               memset(&tcore_info, 0, sizeof(tcore_info));
+               unw_init_from_blocked_task(&tcore_info, task);
+               do_copy_task_regs(task, &tcore_info, regs);
+       }
+       return 1;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+       unw_init_running(do_copy_regs, dst);
+}
+
+int
+dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
+{
+       struct unw_frame_info tcore_info;
+
+       if (current == task) {
+               unw_init_running(do_dump_fpu, dst);
+       } else {
+               memset(&tcore_info, 0, sizeof(tcore_info));
+               unw_init_from_blocked_task(&tcore_info, task);
+               do_dump_task_fpu(task, &tcore_info, dst);
+       }
+       return 1;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+       unw_init_running(do_dump_fpu, dst);
+       return 1;       /* f0-f31 are always valid so we always return 1 */
+}
+
+long
+sys_execve (char __user *filename, char __user * __user *argv, char __user * 
__user *envp,
+           struct pt_regs *regs)
+{
+       char *fname;
+       int error;
+
+       fname = getname(filename);
+       error = PTR_ERR(fname);
+       if (IS_ERR(fname))
+               goto out;
+       error = do_execve(fname, argv, envp, regs);
+       putname(fname);
+out:
+       return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+       extern void start_kernel_thread (void);
+       unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+       struct {
+               struct switch_stack sw;
+               struct pt_regs pt;
+       } regs;
+
+       memset(&regs, 0, sizeof(regs));
+       regs.pt.cr_iip = helper_fptr[0];        /* set entry point (IP) */
+       regs.pt.r1 = helper_fptr[1];            /* set GP */
+       regs.pt.r9 = (unsigned long) fn;        /* 1st argument */
+       regs.pt.r11 = (unsigned long) arg;      /* 2nd argument */
+       /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't 
read.  */
+       regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+       regs.pt.cr_ifs = 1UL << 63;             /* mark as valid, empty frame */
+       regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+       regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+       regs.sw.pr = (1 << PRED_KERNEL_STACK);
+       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, 
NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* This gets called from kernel_thread() via ia64_invoke_thread_helper().  */
+int
+kernel_thread_helper (int (*fn)(void *), void *arg)
+{
+#ifdef CONFIG_IA32_SUPPORT
+       if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+               /* A kernel thread is always a 64-bit process. */
+               current->thread.map_base  = DEFAULT_MAP_BASE;
+               current->thread.task_size = DEFAULT_TASK_SIZE;
+               ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
+               ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
+       }
+#endif
+       return (*fn)(arg);
+}
+
+/*
+ * Flush thread state.  This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+       /*
+        * Remove function-return probe instances associated with this task
+        * and put them back on the free list. Do not insert an exit probe for
+        * this function, it will be disabled by kprobe_flush_task if you do.
+        */
+       kprobe_flush_task(current);
+
+       /* drop floating-point and debug-register state if it exists: */
+       current->thread.flags &= ~(IA64_THREAD_FPH_VALID | 
IA64_THREAD_DBG_VALID);
+       ia64_drop_fpu(current);
+       if (IS_IA32_PROCESS(ia64_task_regs(current)))
+               ia32_drop_partial_page_list(current);
+}
+
+/*
+ * Clean up state associated with current thread.  This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+
+       /*
+        * Remove function-return probe instances associated with this task
+        * and put them back on the free list. Do not insert an exit probe for
+        * this function, it will be disabled by kprobe_flush_task if you do.
+        */
+       kprobe_flush_task(current);
+
+       ia64_drop_fpu(current);
+#ifdef CONFIG_PERFMON
+       /* if needed, stop monitoring and flush state to perfmon context */
+       if (current->thread.pfm_context)
+               pfm_exit_thread(current);
+
+       /* free debug register resources */
+       if (current->thread.flags & IA64_THREAD_DBG_VALID)
+               pfm_release_debug_registers(current);
+#endif
+       if (IS_IA32_PROCESS(ia64_task_regs(current)))
+               ia32_drop_partial_page_list(current);
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+       struct unw_frame_info info;
+       unsigned long ip;
+       int count = 0;
+
+       /*
+        * Note: p may not be a blocked task (it could be current or
+        * another process running on some other CPU.  Rather than
+        * trying to determine if p is really blocked, we just assume
+        * it's blocked and rely on the unwind routines to fail
+        * gracefully if the process wasn't really blocked after all.
+        * --davidm 99/12/15
+        */
+       unw_init_from_blocked_task(&info, p);
+       do {
+               if (unw_unwind(&info) < 0)
+                       return 0;
+               unw_get_ip(&info, &ip);
+               if (!in_sched_functions(ip))
+                       return ip;
+       } while (count++ < 16);
+       return 0;
+}
+
+void
+cpu_halt (void)
+{
+       pal_power_mgmt_info_u_t power_info[8];
+       unsigned long min_power;
+       int i, min_power_state;
+
+       if (ia64_pal_halt_info(power_info) != 0)
+               return;
+
+       min_power_state = 0;
+       min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
+       for (i = 1; i < 8; ++i)
+               if (power_info[i].pal_power_mgmt_info_s.im
+                   && power_info[i].pal_power_mgmt_info_s.power_consumption < 
min_power) {
+                       min_power = 
power_info[i].pal_power_mgmt_info_s.power_consumption;
+                       min_power_state = i;
+               }
+
+       while (1)
+               ia64_pal_halt(min_power_state);
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+       (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
+}
+
+void
+machine_halt (void)
+{
+       cpu_halt();
+}
+
+void
+machine_power_off (void)
+{
+       if (pm_power_off)
+               pm_power_off();
+       machine_halt();
+}
+#endif // !XEN
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind.c  Mon Jan  9 11:22:17 2006
@@ -0,0 +1,2332 @@
+/*
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2003 Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ *     - Change pt_regs_off() to make it less dependant on pt_regs structure.
+ */
+/*
+ * This file implements call frame unwind support for the Linux
+ * kernel.  Parsing and processing the unwind information is
+ * time-consuming, so this implementation translates the unwind
+ * descriptors into unwind scripts.  These scripts are very simple
+ * (basically a sequence of assignments) and efficient to execute.
+ * They are cached for later re-use.  Each script is specific for a
+ * given instruction pointer address and the set of predicate values
+ * that the script depends on (most unwind descriptors are
+ * unconditional and scripts often do not depend on predicates at
+ * all).  This code is based on the unwind conventions described in
+ * the "IA-64 Software Conventions and Runtime Architecture" manual.
+ *
+ * SMP conventions:
+ *     o updates to the global unwind data (in structure "unw") are serialized
+ *       by the unw.lock spinlock
+ *     o each unwind script has its own read-write lock; a thread must acquire
+ *       a read lock before executing a script and must acquire a write lock
+ *       before modifying a script
+ *     o if both the unw.lock spinlock and a script's read-write lock must be
+ *       acquired, then the read-write lock must be acquired first.
+ */
+#ifdef XEN
+#include <xen/types.h>
+#include <xen/elf.h>
+#include <xen/kernel.h>
+#include <xen/sched.h>
+#include <xen/xmalloc.h>
+#include <xen/spinlock.h>
+
+// work around
+#ifdef CONFIG_SMP
+#define write_trylock(lock)    _raw_write_trylock(lock)
+#else
+#define write_trylock(lock)    ({1;})
+#endif
+
+#else
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#endif
+
+#include <asm/unwind.h>
+
+#include <asm/delay.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "entry.h"
+#include "unwind_i.h"
+
+#define UNW_LOG_CACHE_SIZE     7       /* each unw_script is ~256 bytes in 
size */
+#define UNW_CACHE_SIZE         (1 << UNW_LOG_CACHE_SIZE)
+
+#define UNW_LOG_HASH_SIZE      (UNW_LOG_CACHE_SIZE + 1)
+#define UNW_HASH_SIZE          (1 << UNW_LOG_HASH_SIZE)
+
+#define UNW_STATS      0       /* WARNING: this disabled interrupts for long 
time-spans!! */
+
+#ifdef UNW_DEBUG
+  static unsigned int unw_debug_level = UNW_DEBUG;
+#  define UNW_DEBUG_ON(n)      unw_debug_level >= n
+   /* Do not code a printk level, not all debug lines end in newline */
+#  define UNW_DPRINT(n, ...)  if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+#  define inline
+#else /* !UNW_DEBUG */
+#  define UNW_DEBUG_ON(n)  0
+#  define UNW_DPRINT(n, ...)
+#endif /* UNW_DEBUG */
+
+#if UNW_STATS
+# define STAT(x...)    x
+#else
+# define STAT(x...)
+#endif
+
+#ifdef XEN
+#define alloc_reg_state()      xmalloc(struct unw_reg_state)
+#define free_reg_state(usr)    xfree(usr)
+#define alloc_labeled_state()  xmalloc(struct unw_labeled_state)
+#define free_labeled_state(usr)        xfree(usr)
+#else
+#define alloc_reg_state()      kmalloc(sizeof(struct unw_reg_state), 
GFP_ATOMIC)
+#define free_reg_state(usr)    kfree(usr)
+#define alloc_labeled_state()  kmalloc(sizeof(struct unw_labeled_state), 
GFP_ATOMIC)
+#define free_labeled_state(usr)        kfree(usr)
+#endif
+
+typedef unsigned long unw_word;
+typedef unsigned char unw_hash_index_t;
+
+static struct {
+       spinlock_t lock;                        /* spinlock for unwind data */
+
+       /* list of unwind tables (one per load-module) */
+       struct unw_table *tables;
+
+       unsigned long r0;                       /* constant 0 for r0 */
+
+       /* table of registers that prologues can save (and order in which 
they're saved): */
+       const unsigned char save_order[8];
+
+       /* maps a preserved register index (preg_index) to corresponding 
switch_stack offset: */
+       unsigned short sw_off[sizeof(struct unw_frame_info) / 8];
+
+       unsigned short lru_head;                /* index of lead-recently used 
script */
+       unsigned short lru_tail;                /* index of most-recently used 
script */
+
+       /* index into unw_frame_info for preserved register i */
+       unsigned short preg_index[UNW_NUM_REGS];
+
+       short pt_regs_offsets[32];
+
+       /* unwind table for the kernel: */
+       struct unw_table kernel_table;
+
+       /* unwind table describing the gate page (kernel code that is mapped 
into user space): */
+       size_t gate_table_size;
+       unsigned long *gate_table;
+
+       /* hash table that maps instruction pointer to script index: */
+       unsigned short hash[UNW_HASH_SIZE];
+
+       /* script cache: */
+       struct unw_script cache[UNW_CACHE_SIZE];
+
+# ifdef UNW_DEBUG
+       const char *preg_name[UNW_NUM_REGS];
+# endif
+# if UNW_STATS
+       struct {
+               struct {
+                       int lookups;
+                       int hinted_hits;
+                       int normal_hits;
+                       int collision_chain_traversals;
+               } cache;
+               struct {
+                       unsigned long build_time;
+                       unsigned long run_time;
+                       unsigned long parse_time;
+                       int builds;
+                       int news;
+                       int collisions;
+                       int runs;
+               } script;
+               struct {
+                       unsigned long init_time;
+                       unsigned long unwind_time;
+                       int inits;
+                       int unwinds;
+               } api;
+       } stat;
+# endif
+} unw = {
+       .tables = &unw.kernel_table,
+       .lock = SPIN_LOCK_UNLOCKED,
+       .save_order = {
+               UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
+               UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
+       },
+       .preg_index = {
+               offsetof(struct unw_frame_info, pri_unat_loc)/8,        /* 
PRI_UNAT_GR */
+               offsetof(struct unw_frame_info, pri_unat_loc)/8,        /* 
PRI_UNAT_MEM */
+               offsetof(struct unw_frame_info, bsp_loc)/8,
+               offsetof(struct unw_frame_info, bspstore_loc)/8,
+               offsetof(struct unw_frame_info, pfs_loc)/8,
+               offsetof(struct unw_frame_info, rnat_loc)/8,
+               offsetof(struct unw_frame_info, psp)/8,
+               offsetof(struct unw_frame_info, rp_loc)/8,
+               offsetof(struct unw_frame_info, r4)/8,
+               offsetof(struct unw_frame_info, r5)/8,
+               offsetof(struct unw_frame_info, r6)/8,
+               offsetof(struct unw_frame_info, r7)/8,
+               offsetof(struct unw_frame_info, unat_loc)/8,
+               offsetof(struct unw_frame_info, pr_loc)/8,
+               offsetof(struct unw_frame_info, lc_loc)/8,
+               offsetof(struct unw_frame_info, fpsr_loc)/8,
+               offsetof(struct unw_frame_info, b1_loc)/8,
+               offsetof(struct unw_frame_info, b2_loc)/8,
+               offsetof(struct unw_frame_info, b3_loc)/8,
+               offsetof(struct unw_frame_info, b4_loc)/8,
+               offsetof(struct unw_frame_info, b5_loc)/8,
+               offsetof(struct unw_frame_info, f2_loc)/8,
+               offsetof(struct unw_frame_info, f3_loc)/8,
+               offsetof(struct unw_frame_info, f4_loc)/8,
+               offsetof(struct unw_frame_info, f5_loc)/8,
+               offsetof(struct unw_frame_info, fr_loc[16 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[17 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[18 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[19 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[20 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[21 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[22 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[23 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[24 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[25 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[26 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[27 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[28 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[29 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[30 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[31 - 16])/8,
+       },
+       .pt_regs_offsets = {
+               [0] = -1,
+               offsetof(struct pt_regs,  r1),
+               offsetof(struct pt_regs,  r2),
+               offsetof(struct pt_regs,  r3),
+               [4] = -1, [5] = -1, [6] = -1, [7] = -1,
+               offsetof(struct pt_regs,  r8),
+               offsetof(struct pt_regs,  r9),
+               offsetof(struct pt_regs, r10),
+               offsetof(struct pt_regs, r11),
+               offsetof(struct pt_regs, r12),
+               offsetof(struct pt_regs, r13),
+               offsetof(struct pt_regs, r14),
+               offsetof(struct pt_regs, r15),
+               offsetof(struct pt_regs, r16),
+               offsetof(struct pt_regs, r17),
+               offsetof(struct pt_regs, r18),
+               offsetof(struct pt_regs, r19),
+               offsetof(struct pt_regs, r20),
+               offsetof(struct pt_regs, r21),
+               offsetof(struct pt_regs, r22),
+               offsetof(struct pt_regs, r23),
+               offsetof(struct pt_regs, r24),
+               offsetof(struct pt_regs, r25),
+               offsetof(struct pt_regs, r26),
+               offsetof(struct pt_regs, r27),
+               offsetof(struct pt_regs, r28),
+               offsetof(struct pt_regs, r29),
+               offsetof(struct pt_regs, r30),
+               offsetof(struct pt_regs, r31),
+       },
+       .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 },
+#ifdef UNW_DEBUG
+       .preg_name = {
+               "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", 
"ar.rnat", "psp", "rp",
+               "r4", "r5", "r6", "r7",
+               "ar.unat", "pr", "ar.lc", "ar.fpsr",
+               "b1", "b2", "b3", "b4", "b5",
+               "f2", "f3", "f4", "f5",
+               "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+               "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+       }
+#endif
+};
+
+static inline int
+read_only (void *addr)
+{
+       return (unsigned long) ((char *) addr - (char *) &unw.r0) < 
sizeof(unw.r0);
+}
+
+/*
+ * Returns offset of rREG in struct pt_regs.
+ */
+static inline unsigned long
+pt_regs_off (unsigned long reg)
+{
+       short off = -1;
+
+       if (reg < ARRAY_SIZE(unw.pt_regs_offsets))
+               off = unw.pt_regs_offsets[reg];
+
+       if (off < 0) {
+               UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", 
__FUNCTION__, reg);
+               off = 0;
+       }
+       return (unsigned long) off;
+}
+
+static inline struct pt_regs *
+get_scratch_regs (struct unw_frame_info *info)
+{
+       if (!info->pt) {
+               /* This should not happen with valid unwind info.  */
+               UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting 
info->pt\n", __FUNCTION__);
+               if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
+                       info->pt = (unsigned long) ((struct pt_regs *) 
info->psp - 1);
+               else
+                       info->pt = info->sp - 16;
+       }
+       UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, 
info->pt);
+       return (struct pt_regs *) info->pt;
+}
+
+/* Unwind accessors.  */
+
+int
+unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, 
char *nat, int write)
+{
+       unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat;
+       struct unw_ireg *ireg;
+       struct pt_regs *pt;
+
+       if ((unsigned) regnum - 1 >= 127) {
+               if (regnum == 0 && !write) {
+                       *val = 0;       /* read r0 always returns 0 */
+                       *nat = 0;
+                       return 0;
+               }
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+
+       if (regnum < 32) {
+               if (regnum >= 4 && regnum <= 7) {
+                       /* access a preserved register */
+                       ireg = &info->r4 + (regnum - 4);
+                       addr = ireg->loc;
+                       if (addr) {
+                               nat_addr = addr + ireg->nat.off;
+                               switch (ireg->nat.type) {
+                                     case UNW_NAT_VAL:
+                                       /* simulate getf.sig/setf.sig */
+                                       if (write) {
+                                               if (*nat) {
+                                                       /* write NaTVal and be 
done with it */
+                                                       addr[0] = 0;
+                                                       addr[1] = 0x1fffe;
+                                                       return 0;
+                                               }
+                                               addr[1] = 0x1003e;
+                                       } else {
+                                               if (addr[0] == 0 && addr[1] == 
0x1ffe) {
+                                                       /* return NaT and be 
done with it */
+                                                       *val = 0;
+                                                       *nat = 1;
+                                                       return 0;
+                                               }
+                                       }
+                                       /* fall through */
+                                     case UNW_NAT_NONE:
+                                       dummy_nat = 0;
+                                       nat_addr = &dummy_nat;
+                                       break;
+
+                                     case UNW_NAT_MEMSTK:
+                                       nat_mask = (1UL << ((long) addr & 
0x1f8)/8);
+                                       break;
+
+                                     case UNW_NAT_REGSTK:
+                                       nat_addr = ia64_rse_rnat_addr(addr);
+                                       if ((unsigned long) addr < 
info->regstk.limit
+                                           || (unsigned long) addr >= 
info->regstk.top)
+                                       {
+                                               UNW_DPRINT(0, "unwind.%s: %p 
outside of regstk "
+                                                       "[0x%lx-0x%lx)\n",
+                                                       __FUNCTION__, (void *) 
addr,
+                                                       info->regstk.limit,
+                                                       info->regstk.top);
+                                               return -1;
+                                       }
+                                       if ((unsigned long) nat_addr >= 
info->regstk.top)
+                                               nat_addr = &info->sw->ar_rnat;
+                                       nat_mask = (1UL << 
ia64_rse_slot_num(addr));
+                                       break;
+                               }
+                       } else {
+                               addr = &info->sw->r4 + (regnum - 4);
+                               nat_addr = &info->sw->ar_unat;
+                               nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+                       }
+               } else {
+                       /* access a scratch register */
+                       pt = get_scratch_regs(info);
+                       addr = (unsigned long *) ((unsigned long)pt + 
pt_regs_off(regnum));
+                       if (info->pri_unat_loc)
+                               nat_addr = info->pri_unat_loc;
+                       else
+                               nat_addr = &info->sw->caller_unat;
+                       nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+               }
+       } else {
+               /* access a stacked register */
+               addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 
32);
+               nat_addr = ia64_rse_rnat_addr(addr);
+               if ((unsigned long) addr < info->regstk.limit
+                   || (unsigned long) addr >= info->regstk.top)
+               {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to access 
register outside "
+                                  "of rbs\n",  __FUNCTION__);
+                       return -1;
+               }
+               if ((unsigned long) nat_addr >= info->regstk.top)
+                       nat_addr = &info->sw->ar_rnat;
+               nat_mask = (1UL << ia64_rse_slot_num(addr));
+       }
+
+       if (write) {
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else {
+                       *addr = *val;
+                       if (*nat)
+                               *nat_addr |= nat_mask;
+                       else
+                               *nat_addr &= ~nat_mask;
+               }
+       } else {
+               if ((*nat_addr & nat_mask) == 0) {
+                       *val = *addr;
+                       *nat = 0;
+               } else {
+                       *val = 0;       /* if register is a NaT, *addr may 
contain kernel data! */
+                       *nat = 1;
+               }
+       }
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_gr);
+
+int
+unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, 
int write)
+{
+       unsigned long *addr;
+       struct pt_regs *pt;
+
+       switch (regnum) {
+               /* scratch: */
+             case 0: pt = get_scratch_regs(info); addr = &pt->b0; break;
+             case 6: pt = get_scratch_regs(info); addr = &pt->b6; break;
+             case 7: pt = get_scratch_regs(info); addr = &pt->b7; break;
+
+               /* preserved: */
+             case 1: case 2: case 3: case 4: case 5:
+               addr = *(&info->b1_loc + (regnum - 1));
+               if (!addr)
+                       addr = &info->sw->b1 + (regnum - 1);
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+       if (write)
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_br);
+
+int
+unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg 
*val, int write)
+{
+       struct ia64_fpreg *addr = NULL;
+       struct pt_regs *pt;
+
+       if ((unsigned) (regnum - 2) >= 126) {
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+
+       if (regnum <= 5) {
+               addr = *(&info->f2_loc + (regnum - 2));
+               if (!addr)
+                       addr = &info->sw->f2 + (regnum - 2);
+       } else if (regnum <= 15) {
+               if (regnum <= 11) {
+                       pt = get_scratch_regs(info);
+                       addr = &pt->f6  + (regnum - 6);
+               }
+               else
+                       addr = &info->sw->f12 + (regnum - 12);
+       } else if (regnum <= 31) {
+               addr = info->fr_loc[regnum - 16];
+               if (!addr)
+                       addr = &info->sw->f16 + (regnum - 16);
+       } else {
+               struct task_struct *t = info->task;
+
+               if (write)
+                       ia64_sync_fph(t);
+               else
+                       ia64_flush_fph(t);
+#ifdef XEN
+               addr = t->arch._thread.fph + (regnum - 32);
+#else
+               addr = t->thread.fph + (regnum - 32);
+#endif
+       }
+
+       if (write)
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_fr);
+
+int
+unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, 
int write)
+{
+       unsigned long *addr;
+       struct pt_regs *pt;
+
+       switch (regnum) {
+             case UNW_AR_BSP:
+               addr = info->bsp_loc;
+               if (!addr)
+                       addr = &info->sw->ar_bspstore;
+               break;
+
+             case UNW_AR_BSPSTORE:
+               addr = info->bspstore_loc;
+               if (!addr)
+                       addr = &info->sw->ar_bspstore;
+               break;
+
+             case UNW_AR_PFS:
+               addr = info->pfs_loc;
+               if (!addr)
+                       addr = &info->sw->ar_pfs;
+               break;
+
+             case UNW_AR_RNAT:
+               addr = info->rnat_loc;
+               if (!addr)
+                       addr = &info->sw->ar_rnat;
+               break;
+
+             case UNW_AR_UNAT:
+               addr = info->unat_loc;
+               if (!addr)
+                       addr = &info->sw->caller_unat;
+               break;
+
+             case UNW_AR_LC:
+               addr = info->lc_loc;
+               if (!addr)
+                       addr = &info->sw->ar_lc;
+               break;
+
+             case UNW_AR_EC:
+               if (!info->cfm_loc)
+                       return -1;
+               if (write)
+                       *info->cfm_loc =
+                               (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 
0x3f) << 52);
+               else
+                       *val = (*info->cfm_loc >> 52) & 0x3f;
+               return 0;
+
+             case UNW_AR_FPSR:
+               addr = info->fpsr_loc;
+               if (!addr)
+                       addr = &info->sw->ar_fpsr;
+               break;
+
+             case UNW_AR_RSC:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_rsc;
+               break;
+
+             case UNW_AR_CCV:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_ccv;
+               break;
+
+             case UNW_AR_CSD:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_csd;
+               break;
+
+             case UNW_AR_SSD:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_ssd;
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+
+       if (write) {
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       } else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_ar);
+
+int
+unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
+{
+       unsigned long *addr;
+
+       addr = info->pr_loc;
+       if (!addr)
+               addr = &info->sw->pr;
+
+       if (write) {
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       } else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_pr);
+
+
+/* Routines to manipulate the state stack.  */
+
+static inline void
+push (struct unw_state_record *sr)
+{
+       struct unw_reg_state *rs;
+
+       rs = alloc_reg_state();
+       if (!rs) {
+               printk(KERN_ERR "unwind: cannot stack reg state!\n");
+               return;
+       }
+       memcpy(rs, &sr->curr, sizeof(*rs));
+       sr->curr.next = rs;
+}
+
+static void
+pop (struct unw_state_record *sr)
+{
+       struct unw_reg_state *rs = sr->curr.next;
+
+       if (!rs) {
+               printk(KERN_ERR "unwind: stack underflow!\n");
+               return;
+       }
+       memcpy(&sr->curr, rs, sizeof(*rs));
+       free_reg_state(rs);
+}
+
+/* Make a copy of the state stack.  Non-recursive to avoid stack overflows.  */
+static struct unw_reg_state *
+dup_state_stack (struct unw_reg_state *rs)
+{
+       struct unw_reg_state *copy, *prev = NULL, *first = NULL;
+
+       while (rs) {
+               copy = alloc_reg_state();
+               if (!copy) {
+                       printk(KERN_ERR "unwind.dup_state_stack: out of 
memory\n");
+                       return NULL;
+               }
+               memcpy(copy, rs, sizeof(*copy));
+               if (first)
+                       prev->next = copy;
+               else
+                       first = copy;
+               rs = rs->next;
+               prev = copy;
+       }
+       return first;
+}
+
+/* Free all stacked register states (but not RS itself).  */
+static void
+free_state_stack (struct unw_reg_state *rs)
+{
+       struct unw_reg_state *p, *next;
+
+       for (p = rs->next; p != NULL; p = next) {
+               next = p->next;
+               free_reg_state(p);
+       }
+       rs->next = NULL;
+}
+
+/* Unwind decoder routines */
+
+static enum unw_register_index __attribute_const__
+decode_abreg (unsigned char abreg, int memory)
+{
+       switch (abreg) {
+             case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04);
+             case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22);
+             case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30);
+             case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41);
+             case 0x60: return UNW_REG_PR;
+             case 0x61: return UNW_REG_PSP;
+             case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : 
UNW_REG_PRI_UNAT_GR;
+             case 0x63: return UNW_REG_RP;
+             case 0x64: return UNW_REG_BSP;
+             case 0x65: return UNW_REG_BSPSTORE;
+             case 0x66: return UNW_REG_RNAT;
+             case 0x67: return UNW_REG_UNAT;
+             case 0x68: return UNW_REG_FPSR;
+             case 0x69: return UNW_REG_PFS;
+             case 0x6a: return UNW_REG_LC;
+             default:
+               break;
+       }
+       UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg);
+       return UNW_REG_LC;
+}
+
+static void
+set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned 
long val)
+{
+       reg->val = val;
+       reg->where = where;
+       if (reg->when == UNW_WHEN_NEVER)
+               reg->when = when;
+}
+
+static void
+alloc_spill_area (unsigned long *offp, unsigned long regsize,
+                 struct unw_reg_info *lo, struct unw_reg_info *hi)
+{
+       struct unw_reg_info *reg;
+
+       for (reg = hi; reg >= lo; --reg) {
+               if (reg->where == UNW_WHERE_SPILL_HOME) {
+                       reg->where = UNW_WHERE_PSPREL;
+                       *offp -= regsize;
+                       reg->val = *offp;
+               }
+       }
+}
+
+static inline void
+spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, 
unw_word t)
+{
+       struct unw_reg_info *reg;
+
+       for (reg = *regp; reg <= lim; ++reg) {
+               if (reg->where == UNW_WHERE_SPILL_HOME) {
+                       reg->when = t;
+                       *regp = reg + 1;
+                       return;
+               }
+       }
+       UNW_DPRINT(0, "unwind.%s: excess spill!\n",  __FUNCTION__);
+}
+
+static inline void
+finish_prologue (struct unw_state_record *sr)
+{
+       struct unw_reg_info *reg;
+       unsigned long off;
+       int i;
+
+       /*
+        * First, resolve implicit register save locations (see Section 
"11.4.2.3 Rules
+        * for Using Unwind Descriptors", rule 3):
+        */
+       for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) {
+               reg = sr->curr.reg + unw.save_order[i];
+               if (reg->where == UNW_WHERE_GR_SAVE) {
+                       reg->where = UNW_WHERE_GR;
+                       reg->val = sr->gr_save_loc++;
+               }
+       }
+
+       /*
+        * Next, compute when the fp, general, and branch registers get
+        * saved.  This must come before alloc_spill_area() because
+        * we need to know which registers are spilled to their home
+        * locations.
+        */
+       if (sr->imask) {
+               unsigned char kind, mask = 0, *cp = sr->imask;
+               int t;
+               static const unsigned char limit[3] = {
+                       UNW_REG_F31, UNW_REG_R7, UNW_REG_B5
+               };
+               struct unw_reg_info *(regs[3]);
+
+               regs[0] = sr->curr.reg + UNW_REG_F2;
+               regs[1] = sr->curr.reg + UNW_REG_R4;
+               regs[2] = sr->curr.reg + UNW_REG_B1;
+
+               for (t = 0; t < sr->region_len; ++t) {
+                       if ((t & 3) == 0)
+                               mask = *cp++;
+                       kind = (mask >> 2*(3-(t & 3))) & 3;
+                       if (kind > 0)
+                               spill_next_when(&regs[kind - 1], sr->curr.reg + 
limit[kind - 1],
+                                               sr->region_start + t);
+               }
+       }
+       /*
+        * Next, lay out the memory stack spill area:
+        */
+       if (sr->any_spills) {
+               off = sr->spill_offset;
+               alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, 
sr->curr.reg + UNW_REG_F31);
+               alloc_spill_area(&off,  8, sr->curr.reg + UNW_REG_B1, 
sr->curr.reg + UNW_REG_B5);
+               alloc_spill_area(&off,  8, sr->curr.reg + UNW_REG_R4, 
sr->curr.reg + UNW_REG_R7);
+       }
+}
+
+/*
+ * Region header descriptors.
+ */
+
+static void
+desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char 
grsave,
+              struct unw_state_record *sr)
+{
+       int i, region_start;
+
+       if (!(sr->in_body || sr->first_region))
+               finish_prologue(sr);
+       sr->first_region = 0;
+
+       /* check if we're done: */
+       if (sr->when_target < sr->region_start + sr->region_len) {
+               sr->done = 1;
+               return;
+       }
+
+       region_start = sr->region_start + sr->region_len;
+
+       for (i = 0; i < sr->epilogue_count; ++i)
+               pop(sr);
+       sr->epilogue_count = 0;
+       sr->epilogue_start = UNW_WHEN_NEVER;
+
+       sr->region_start = region_start;
+       sr->region_len = rlen;
+       sr->in_body = body;
+
+       if (!body) {
+               push(sr);
+
+               for (i = 0; i < 4; ++i) {
+                       if (mask & 0x8)
+                               set_reg(sr->curr.reg + unw.save_order[i], 
UNW_WHERE_GR,
+                                       sr->region_start + sr->region_len - 1, 
grsave++);
+                       mask <<= 1;
+               }
+               sr->gr_save_loc = grsave;
+               sr->any_spills = 0;
+               sr->imask = NULL;
+               sr->spill_offset = 0x10;        /* default to psp+16 */
+       }
+}
+
+/*
+ * Prologue descriptors.
+ */
+
+static inline void
+desc_abi (unsigned char abi, unsigned char context, struct unw_state_record 
*sr)
+{
+       if (abi == 3 && context == 'i') {
+               sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
+               UNW_DPRINT(3, "unwind.%s: interrupt frame\n",  __FUNCTION__);
+       }
+       else
+               UNW_DPRINT(0, "unwind%s: ignoring 
unwabi(abi=0x%x,context=0x%x)\n",
+                               __FUNCTION__, abi, context);
+}
+
+static inline void
+desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record 
*sr)
+{
+       int i;
+
+       for (i = 0; i < 5; ++i) {
+               if (brmask & 1)
+                       set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR,
+                               sr->region_start + sr->region_len - 1, gr++);
+               brmask >>= 1;
+       }
+}
+
+static inline void
+desc_br_mem (unsigned char brmask, struct unw_state_record *sr)
+{
+       int i;
+
+       for (i = 0; i < 5; ++i) {
+               if (brmask & 1) {
+                       set_reg(sr->curr.reg + UNW_REG_B1 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               brmask >>= 1;
+       }
+}
+
+static inline void
+desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record 
*sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((grmask & 1) != 0) {
+                       set_reg(sr->curr.reg + UNW_REG_R4 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               grmask >>= 1;
+       }
+       for (i = 0; i < 20; ++i) {
+               if ((frmask & 1) != 0) {
+                       int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4;
+                       set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               frmask >>= 1;
+       }
+}
+
+static inline void
+desc_fr_mem (unsigned char frmask, struct unw_state_record *sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((frmask & 1) != 0) {
+                       set_reg(sr->curr.reg + UNW_REG_F2 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               frmask >>= 1;
+       }
+}
+
+static inline void
+desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record 
*sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((grmask & 1) != 0)
+                       set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR,
+                               sr->region_start + sr->region_len - 1, gr++);
+               grmask >>= 1;
+       }
+}
+
+static inline void
+desc_gr_mem (unsigned char grmask, struct unw_state_record *sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((grmask & 1) != 0) {
+                       set_reg(sr->curr.reg + UNW_REG_R4 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               grmask >>= 1;
+       }
+}
+
+static inline void
+desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr)
+{
+       set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE,
+               sr->region_start + min_t(int, t, sr->region_len - 1), 16*size);
+}
+
+static inline void
+desc_mem_stack_v (unw_word t, struct unw_state_record *sr)
+{
+       sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, 
sr->region_len - 1);
+}
+
+static inline void
+desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr)
+{
+       set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + 
sr->region_len - 1, dst);
+}
+
+static inline void
+desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record 
*sr)
+{
+       set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + 
sr->region_len - 1,
+               0x10 - 4*pspoff);
+}
+
+static inline void
+desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr)
+{
+       set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + 
sr->region_len - 1,
+               4*spoff);
+}
+
+static inline void
+desc_rp_br (unsigned char dst, struct unw_state_record *sr)
+{
+       sr->return_link_reg = dst;
+}
+
+static inline void
+desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr)
+{
+       struct unw_reg_info *reg = sr->curr.reg + regnum;
+
+       if (reg->where == UNW_WHERE_NONE)
+               reg->where = UNW_WHERE_GR_SAVE;
+       reg->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+}
+
+static inline void
+desc_spill_base (unw_word pspoff, struct unw_state_record *sr)
+{
+       sr->spill_offset = 0x10 - 4*pspoff;
+}
+
+static inline unsigned char *
+desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr)
+{
+       sr->imask = imaskp;
+       return imaskp + (2*sr->region_len + 7)/8;
+}
+
+/*
+ * Body descriptors.
+ */
+static inline void
+desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr)
+{
+       sr->epilogue_start = sr->region_start + sr->region_len - 1 - t;
+       sr->epilogue_count = ecount + 1;
+}
+
+static inline void
+desc_copy_state (unw_word label, struct unw_state_record *sr)
+{
+       struct unw_labeled_state *ls;
+
+       for (ls = sr->labeled_states; ls; ls = ls->next) {
+               if (ls->label == label) {
+                       free_state_stack(&sr->curr);
+                       memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr));
+                       sr->curr.next = dup_state_stack(ls->saved_state.next);
+                       return;
+               }
+       }
+       printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label);
+}
+
+static inline void
+desc_label_state (unw_word label, struct unw_state_record *sr)
+{
+       struct unw_labeled_state *ls;
+
+       ls = alloc_labeled_state();
+       if (!ls) {
+               printk(KERN_ERR "unwind.desc_label_state(): out of memory\n");
+               return;
+       }
+       ls->label = label;
+       memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state));
+       ls->saved_state.next = dup_state_stack(sr->curr.next);
+
+       /* insert into list of labeled states: */
+       ls->next = sr->labeled_states;
+       sr->labeled_states = ls;
+}
+
+/*
+ * General descriptors.
+ */
+
+static inline int
+desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr)
+{
+       if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len 
- 1))
+               return 0;
+       if (qp > 0) {
+               if ((sr->pr_val & (1UL << qp)) == 0)
+                       return 0;
+               sr->pr_mask |= (1UL << qp);
+       }
+       return 1;
+}
+
+static inline void
+desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct 
unw_state_record *sr)
+{
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       r = sr->curr.reg + decode_abreg(abreg, 0);
+       r->where = UNW_WHERE_NONE;
+       r->when = UNW_WHEN_NEVER;
+       r->val = 0;
+}
+
+static inline void
+desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned 
char x,
+                    unsigned char ytreg, struct unw_state_record *sr)
+{
+       enum unw_where where = UNW_WHERE_GR;
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       if (x)
+               where = UNW_WHERE_BR;
+       else if (ytreg & 0x80)
+               where = UNW_WHERE_FR;
+
+       r = sr->curr.reg + decode_abreg(abreg, 0);
+       r->where = where;
+       r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+       r->val = (ytreg & 0x7f);
+}
+
+static inline void
+desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, 
unw_word pspoff,
+                    struct unw_state_record *sr)
+{
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       r = sr->curr.reg + decode_abreg(abreg, 1);
+       r->where = UNW_WHERE_PSPREL;
+       r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+       r->val = 0x10 - 4*pspoff;
+}
+
+static inline void
+desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, 
unw_word spoff,
+                      struct unw_state_record *sr)
+{
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       r = sr->curr.reg + decode_abreg(abreg, 1);
+       r->where = UNW_WHERE_SPREL;
+       r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+       r->val = 4*spoff;
+}
+
+#define UNW_DEC_BAD_CODE(code)                 printk(KERN_ERR "unwind: 
unknown code 0x%02x\n", \
+                                                      code);
+
+/*
+ * region headers:
+ */
+#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg)    desc_prologue(0,r,m,gr,arg)
+#define UNW_DEC_PROLOGUE(fmt,b,r,arg)          desc_prologue(b,r,0,32,arg)
+/*
+ * prologue descriptors:
+ */
+#define UNW_DEC_ABI(fmt,a,c,arg)               desc_abi(a,c,arg)
+#define UNW_DEC_BR_GR(fmt,b,g,arg)             desc_br_gr(b,g,arg)
+#define UNW_DEC_BR_MEM(fmt,b,arg)              desc_br_mem(b,arg)
+#define UNW_DEC_FRGR_MEM(fmt,g,f,arg)          desc_frgr_mem(g,f,arg)
+#define UNW_DEC_FR_MEM(fmt,f,arg)              desc_fr_mem(f,arg)
+#define UNW_DEC_GR_GR(fmt,m,g,arg)             desc_gr_gr(m,g,arg)
+#define UNW_DEC_GR_MEM(fmt,m,arg)              desc_gr_mem(m,arg)
+#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg)       desc_mem_stack_f(t,s,arg)
+#define UNW_DEC_MEM_STACK_V(fmt,t,arg)         desc_mem_stack_v(t,arg)
+#define UNW_DEC_REG_GR(fmt,r,d,arg)            desc_reg_gr(r,d,arg)
+#define UNW_DEC_REG_PSPREL(fmt,r,o,arg)                desc_reg_psprel(r,o,arg)
+#define UNW_DEC_REG_SPREL(fmt,r,o,arg)         desc_reg_sprel(r,o,arg)
+#define UNW_DEC_REG_WHEN(fmt,r,t,arg)          desc_reg_when(r,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)     
desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)    
desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg)
+#define UNW_DEC_PRIUNAT_GR(fmt,r,arg)          
desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg)
+#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg)      
desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg)       
desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_RP_BR(fmt,d,arg)               desc_rp_br(d,arg)
+#define UNW_DEC_SPILL_BASE(fmt,o,arg)          desc_spill_base(o,arg)
+#define UNW_DEC_SPILL_MASK(fmt,m,arg)          (m = desc_spill_mask(m,arg))
+/*
+ * body descriptors:
+ */
+#define UNW_DEC_EPILOGUE(fmt,t,c,arg)          desc_epilogue(t,c,arg)
+#define UNW_DEC_COPY_STATE(fmt,l,arg)          desc_copy_state(l,arg)
+#define UNW_DEC_LABEL_STATE(fmt,l,arg)         desc_label_state(l,arg)
+/*
+ * general unwind descriptors:
+ */
+#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg)   desc_spill_reg_p(p,t,a,x,y,arg)
+#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg)       desc_spill_reg_p(0,t,a,x,y,arg)
+#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg)  desc_spill_psprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg)      desc_spill_psprel_p(0,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg)   desc_spill_sprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg)       desc_spill_sprel_p(0,t,a,o,arg)
+#define UNW_DEC_RESTORE_P(f,p,t,a,arg)         desc_restore_p(p,t,a,arg)
+#define UNW_DEC_RESTORE(f,t,a,arg)             desc_restore_p(0,t,a,arg)
+
+#include "unwind_decoder.c"
+
+
+/* Unwind scripts. */
+
+static inline unw_hash_index_t
+hash (unsigned long ip)
+{
+#      define hashmagic        0x9e3779b97f4a7c16UL    /* based on 
(sqrt(5)/2-1)*2^64 */
+
+       return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE);
+#undef hashmagic
+}
+
+static inline long
+cache_match (struct unw_script *script, unsigned long ip, unsigned long pr)
+{
+       read_lock(&script->lock);
+       if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0)
+               /* keep the read lock... */
+               return 1;
+       read_unlock(&script->lock);
+       return 0;
+}
+
+static inline struct unw_script *
+script_lookup (struct unw_frame_info *info)
+{
+       struct unw_script *script = unw.cache + info->hint;
+       unsigned short index;
+       unsigned long ip, pr;
+
+       if (UNW_DEBUG_ON(0))
+               return NULL;    /* Always regenerate scripts in debug mode */
+
+       STAT(++unw.stat.cache.lookups);
+
+       ip = info->ip;
+       pr = info->pr;
+
+       if (cache_match(script, ip, pr)) {
+               STAT(++unw.stat.cache.hinted_hits);
+               return script;
+       }
+
+       index = unw.hash[hash(ip)];
+       if (index >= UNW_CACHE_SIZE)
+               return NULL;
+
+       script = unw.cache + index;
+       while (1) {
+               if (cache_match(script, ip, pr)) {
+                       /* update hint; no locking required as single-word 
writes are atomic */
+                       STAT(++unw.stat.cache.normal_hits);
+                       unw.cache[info->prev_script].hint = script - unw.cache;
+                       return script;
+               }
+               if (script->coll_chain >= UNW_HASH_SIZE)
+                       return NULL;
+               script = unw.cache + script->coll_chain;
+               STAT(++unw.stat.cache.collision_chain_traversals);
+       }
+}
+
+/*
+ * On returning, a write lock for the SCRIPT is still being held.
+ */
+static inline struct unw_script *
+script_new (unsigned long ip)
+{
+       struct unw_script *script, *prev, *tmp;
+       unw_hash_index_t index;
+       unsigned short head;
+
+       STAT(++unw.stat.script.news);
+
+       /*
+        * Can't (easily) use cmpxchg() here because of ABA problem
+        * that is intrinsic in cmpxchg()...
+        */
+       head = unw.lru_head;
+       script = unw.cache + head;
+       unw.lru_head = script->lru_chain;
+
+       /*
+        * We'd deadlock here if we interrupted a thread that is holding a read 
lock on
+        * script->lock.  Thus, if the write_trylock() fails, we simply bail 
out.  The
+        * alternative would be to disable interrupts whenever we hold a 
read-lock, but
+        * that seems silly.
+        */
+       if (!write_trylock(&script->lock))
+               return NULL;
+
+       /* re-insert script at the tail of the LRU chain: */
+       unw.cache[unw.lru_tail].lru_chain = head;
+       unw.lru_tail = head;
+
+       /* remove the old script from the hash table (if it's there): */
+       if (script->ip) {
+               index = hash(script->ip);
+               tmp = unw.cache + unw.hash[index];
+               prev = NULL;
+               while (1) {
+                       if (tmp == script) {
+                               if (prev)
+                                       prev->coll_chain = tmp->coll_chain;
+                               else
+                                       unw.hash[index] = tmp->coll_chain;
+                               break;
+                       } else
+                               prev = tmp;
+                       if (tmp->coll_chain >= UNW_CACHE_SIZE)
+                       /* old script wasn't in the hash-table */
+                               break;
+                       tmp = unw.cache + tmp->coll_chain;
+               }
+       }
+
+       /* enter new script in the hash table */
+       index = hash(ip);
+       script->coll_chain = unw.hash[index];
+       unw.hash[index] = script - unw.cache;
+
+       script->ip = ip;        /* set new IP while we're holding the locks */
+
+       STAT(if (script->coll_chain < UNW_CACHE_SIZE) 
++unw.stat.script.collisions);
+
+       script->flags = 0;
+       script->hint = 0;
+       script->count = 0;
+       return script;
+}
+
+static void
+script_finalize (struct unw_script *script, struct unw_state_record *sr)
+{
+       script->pr_mask = sr->pr_mask;
+       script->pr_val = sr->pr_val;
+       /*
+        * We could down-grade our write-lock on script->lock here but
+        * the rwlock API doesn't offer atomic lock downgrading, so
+        * we'll just keep the write-lock and release it later when
+        * we're done using the script.
+        */
+}
+
+static inline void
+script_emit (struct unw_script *script, struct unw_insn insn)
+{
+       if (script->count >= UNW_MAX_SCRIPT_LEN) {
+               UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u 
instructions!\n",
+                       __FUNCTION__, UNW_MAX_SCRIPT_LEN);
+               return;
+       }
+       script->insn[script->count++] = insn;
+}
+
+static inline void
+emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+       struct unw_reg_info *r = sr->curr.reg + i;
+       enum unw_insn_opcode opc;
+       struct unw_insn insn;
+       unsigned long val = 0;
+
+       switch (r->where) {
+             case UNW_WHERE_GR:
+               if (r->val >= 32) {
+                       /* register got spilled to a stacked register */
+                       opc = UNW_INSN_SETNAT_TYPE;
+                       val = UNW_NAT_REGSTK;
+               } else
+                       /* register got spilled to a scratch register */
+                       opc = UNW_INSN_SETNAT_MEMSTK;
+               break;
+
+             case UNW_WHERE_FR:
+               opc = UNW_INSN_SETNAT_TYPE;
+               val = UNW_NAT_VAL;
+               break;
+
+             case UNW_WHERE_BR:
+               opc = UNW_INSN_SETNAT_TYPE;
+               val = UNW_NAT_NONE;
+               break;
+
+             case UNW_WHERE_PSPREL:
+             case UNW_WHERE_SPREL:
+               opc = UNW_INSN_SETNAT_MEMSTK;
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for 
where = %u\n",
+                          __FUNCTION__, r->where);
+               return;
+       }
+       insn.opc = opc;
+       insn.dst = unw.preg_index[i];
+       insn.val = val;
+       script_emit(script, insn);
+}
+
+static void
+compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+       struct unw_reg_info *r = sr->curr.reg + i;
+       enum unw_insn_opcode opc;
+       unsigned long val, rval;
+       struct unw_insn insn;
+       long need_nat_info;
+
+       if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target)
+               return;
+
+       opc = UNW_INSN_MOVE;
+       val = rval = r->val;
+       need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7);
+
+       switch (r->where) {
+             case UNW_WHERE_GR:
+               if (rval >= 32) {
+                       opc = UNW_INSN_MOVE_STACKED;
+                       val = rval - 32;
+               } else if (rval >= 4 && rval <= 7) {
+                       if (need_nat_info) {
+                               opc = UNW_INSN_MOVE2;
+                               need_nat_info = 0;
+                       }
+                       val = unw.preg_index[UNW_REG_R4 + (rval - 4)];
+               } else if (rval == 0) {
+                       opc = UNW_INSN_MOVE_CONST;
+                       val = 0;
+               } else {
+                       /* register got spilled to a scratch register */
+                       opc = UNW_INSN_MOVE_SCRATCH;
+                       val = pt_regs_off(rval);
+               }
+               break;
+
+             case UNW_WHERE_FR:
+               if (rval <= 5)
+                       val = unw.preg_index[UNW_REG_F2  + (rval -  2)];
+               else if (rval >= 16 && rval <= 31)
+                       val = unw.preg_index[UNW_REG_F16 + (rval - 16)];
+               else {
+                       opc = UNW_INSN_MOVE_SCRATCH;
+                       if (rval <= 11)
+                               val = offsetof(struct pt_regs, f6) + 16*(rval - 
6);
+                       else
+                               UNW_DPRINT(0, "unwind.%s: kernel may not touch 
f%lu\n",
+                                          __FUNCTION__, rval);
+               }
+               break;
+
+             case UNW_WHERE_BR:
+               if (rval >= 1 && rval <= 5)
+                       val = unw.preg_index[UNW_REG_B1 + (rval - 1)];
+               else {
+                       opc = UNW_INSN_MOVE_SCRATCH;
+                       if (rval == 0)
+                               val = offsetof(struct pt_regs, b0);
+                       else if (rval == 6)
+                               val = offsetof(struct pt_regs, b6);
+                       else
+                               val = offsetof(struct pt_regs, b7);
+               }
+               break;
+
+             case UNW_WHERE_SPREL:
+               opc = UNW_INSN_ADD_SP;
+               break;
+
+             case UNW_WHERE_PSPREL:
+               opc = UNW_INSN_ADD_PSP;
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' 
value of %u\n",
+                          __FUNCTION__, i, r->where);
+               break;
+       }
+       insn.opc = opc;
+       insn.dst = unw.preg_index[i];
+       insn.val = val;
+       script_emit(script, insn);
+       if (need_nat_info)
+               emit_nat_info(sr, i, script);
+
+       if (i == UNW_REG_PSP) {
+               /*
+                * info->psp must contain the _value_ of the previous
+                * sp, not it's save location.  We get this by
+                * dereferencing the value we just stored in
+                * info->psp:
+                */
+               insn.opc = UNW_INSN_LOAD;
+               insn.dst = insn.val = unw.preg_index[UNW_REG_PSP];
+               script_emit(script, insn);
+       }
+}
+
+static inline const struct unw_table_entry *
+lookup (struct unw_table *table, unsigned long rel_ip)
+{
+       const struct unw_table_entry *e = NULL;
+       unsigned long lo, hi, mid;
+
+       /* do a binary search for right entry: */
+       for (lo = 0, hi = table->length; lo < hi; ) {
+               mid = (lo + hi) / 2;
+               e = &table->array[mid];
+               if (rel_ip < e->start_offset)
+                       hi = mid;
+               else if (rel_ip >= e->end_offset)
+                       lo = mid + 1;
+               else
+                       break;
+       }
+       if (rel_ip < e->start_offset || rel_ip >= e->end_offset)
+               return NULL;
+       return e;
+}
+
+/*
+ * Build an unwind script that unwinds from state OLD_STATE to the
+ * entrypoint of the function that called OLD_STATE.
+ */
+static inline struct unw_script *
+build_script (struct unw_frame_info *info)
+{
+       const struct unw_table_entry *e = NULL;
+       struct unw_script *script = NULL;
+       struct unw_labeled_state *ls, *next;
+       unsigned long ip = info->ip;
+       struct unw_state_record sr;
+       struct unw_table *table;
+       struct unw_reg_info *r;
+       struct unw_insn insn;
+       u8 *dp, *desc_end;
+       u64 hdr;
+       int i;
+       STAT(unsigned long start, parse_start;)
+
+       STAT(++unw.stat.script.builds; start = ia64_get_itc());
+
+       /* build state record */
+       memset(&sr, 0, sizeof(sr));
+       for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+               r->when = UNW_WHEN_NEVER;
+       sr.pr_val = info->pr;
+
+       UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip);
+       script = script_new(ip);
+       if (!script) {
+               UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n",  
__FUNCTION__);
+               STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+               return NULL;
+       }
+       unw.cache[info->prev_script].hint = script - unw.cache;
+
+       /* search the kernels and the modules' unwind tables for IP: */
+
+       STAT(parse_start = ia64_get_itc());
+
+       for (table = unw.tables; table; table = table->next) {
+               if (ip >= table->start && ip < table->end) {
+                       e = lookup(table, ip - table->segment_base);
+                       break;
+               }
+       }
+       if (!e) {
+               /* no info, return default unwinder (leaf proc, no mem stack, 
no saved regs)  */
+               UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev 
ip=0x%lx)\n",
+                       __FUNCTION__, ip, unw.cache[info->prev_script].ip);
+               sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+               sr.curr.reg[UNW_REG_RP].when = -1;
+               sr.curr.reg[UNW_REG_RP].val = 0;
+               compile_reg(&sr, UNW_REG_RP, script);
+               script_finalize(script, &sr);
+               STAT(unw.stat.script.parse_time += ia64_get_itc() - 
parse_start);
+               STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+               return script;
+       }
+
+       sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + 
e->start_offset))/16
+                         + (ip & 0xfUL));
+       hdr = *(u64 *) (table->segment_base + e->info_offset);
+       dp =   (u8 *)  (table->segment_base + e->info_offset + 8);
+       desc_end = dp + 8*UNW_LENGTH(hdr);
+
+       while (!sr.done && dp < desc_end)
+               dp = unw_decode(dp, sr.in_body, &sr);
+
+       if (sr.when_target > sr.epilogue_start) {
+               /*
+                * sp has been restored and all values on the memory stack below
+                * psp also have been restored.
+                */
+               sr.curr.reg[UNW_REG_PSP].val = 0;
+               sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE;
+               sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER;
+               for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+                       if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10)
+                           || r->where == UNW_WHERE_SPREL)
+                       {
+                               r->val = 0;
+                               r->where = UNW_WHERE_NONE;
+                               r->when = UNW_WHEN_NEVER;
+                       }
+       }
+
+       script->flags = sr.flags;
+
+       /*
+        * If RP did't get saved, generate entry for the return link
+        * register.
+        */
+       if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) {
+               sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+               sr.curr.reg[UNW_REG_RP].when = -1;
+               sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
+               UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx 
where=%d val=0x%lx\n",
+                          __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where,
+                          sr.curr.reg[UNW_REG_RP].val);
+       }
+
+#ifdef UNW_DEBUG
+       UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
+               __FUNCTION__, table->segment_base + e->start_offset, 
sr.when_target);
+       for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
+               if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
+                       UNW_DPRINT(1, "  %s <- ", unw.preg_name[r - 
sr.curr.reg]);
+                       switch (r->where) {
+                             case UNW_WHERE_GR:     UNW_DPRINT(1, "r%lu", 
r->val); break;
+                             case UNW_WHERE_FR:     UNW_DPRINT(1, "f%lu", 
r->val); break;
+                             case UNW_WHERE_BR:     UNW_DPRINT(1, "b%lu", 
r->val); break;
+                             case UNW_WHERE_SPREL:  UNW_DPRINT(1, 
"[sp+0x%lx]", r->val); break;
+                             case UNW_WHERE_PSPREL: UNW_DPRINT(1, 
"[psp+0x%lx]", r->val); break;
+                             case UNW_WHERE_NONE:
+                               UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - 
sr.curr.reg], r->val);
+                               break;
+
+                             default:
+                               UNW_DPRINT(1, "BADWHERE(%d)", r->where);
+                               break;
+                       }
+                       UNW_DPRINT(1, "\t\t%d\n", r->when);
+               }
+       }
+#endif
+
+       STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+       /* translate state record into unwinder instructions: */
+
+       /*
+        * First, set psp if we're dealing with a fixed-size frame;
+        * subsequent instructions may depend on this value.
+        */
+       if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when
+           && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE)
+           && sr.curr.reg[UNW_REG_PSP].val != 0) {
+               /* new psp is sp plus frame size */
+               insn.opc = UNW_INSN_ADD;
+               insn.dst = offsetof(struct unw_frame_info, psp)/8;
+               insn.val = sr.curr.reg[UNW_REG_PSP].val;        /* frame size */
+               script_emit(script, insn);
+       }
+
+       /* determine where the primary UNaT is: */
+       if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+               i = UNW_REG_PRI_UNAT_MEM;
+       else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when)
+               i = UNW_REG_PRI_UNAT_GR;
+       else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > 
sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+               i = UNW_REG_PRI_UNAT_MEM;
+       else
+               i = UNW_REG_PRI_UNAT_GR;
+
+       compile_reg(&sr, i, script);
+
+       for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i)
+               compile_reg(&sr, i, script);
+
+       /* free labeled register states & stack: */
+
+       STAT(parse_start = ia64_get_itc());
+       for (ls = sr.labeled_states; ls; ls = next) {
+               next = ls->next;
+               free_state_stack(&ls->saved_state);
+               free_labeled_state(ls);
+       }
+       free_state_stack(&sr.curr);
+       STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+       script_finalize(script, &sr);
+       STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+       return script;
+}
+
+/*
+ * Apply the unwinding actions represented by OPS and update SR to
+ * reflect the state that existed upon entry to the function that this
+ * unwinder represents.
+ */
+static inline void
+run_script (struct unw_script *script, struct unw_frame_info *state)
+{
+       struct unw_insn *ip, *limit, next_insn;
+       unsigned long opc, dst, val, off;
+       unsigned long *s = (unsigned long *) state;
+       STAT(unsigned long start;)
+
+       STAT(++unw.stat.script.runs; start = ia64_get_itc());
+       state->flags = script->flags;
+       ip = script->insn;
+       limit = script->insn + script->count;
+       next_insn = *ip;
+
+       while (ip++ < limit) {
+               opc = next_insn.opc;
+               dst = next_insn.dst;
+               val = next_insn.val;
+               next_insn = *ip;
+
+         redo:
+               switch (opc) {
+                     case UNW_INSN_ADD:
+                       s[dst] += val;
+                       break;
+
+                     case UNW_INSN_MOVE2:
+                       if (!s[val])
+                               goto lazy_init;
+                       s[dst+1] = s[val+1];
+                       s[dst] = s[val];
+                       break;
+
+                     case UNW_INSN_MOVE:
+                       if (!s[val])
+                               goto lazy_init;
+                       s[dst] = s[val];
+                       break;
+
+                     case UNW_INSN_MOVE_SCRATCH:
+                       if (state->pt) {
+                               s[dst] = (unsigned long) 
get_scratch_regs(state) + val;
+                       } else {
+                               s[dst] = 0;
+                               UNW_DPRINT(0, "unwind.%s: no state->pt, 
dst=%ld, val=%ld\n",
+                                          __FUNCTION__, dst, val);
+                       }
+                       break;
+
+                     case UNW_INSN_MOVE_CONST:
+                       if (val == 0)
+                               s[dst] = (unsigned long) &unw.r0;
+                       else {
+                               s[dst] = 0;
+                               UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST 
bad val=%ld\n",
+                                          __FUNCTION__, val);
+                       }
+                       break;
+
+
+                     case UNW_INSN_MOVE_STACKED:
+                       s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned 
long *)state->bsp,
+                                                                   val);
+                       break;
+
+                     case UNW_INSN_ADD_PSP:
+                       s[dst] = state->psp + val;
+                       break;
+
+                     case UNW_INSN_ADD_SP:
+                       s[dst] = state->sp + val;
+                       break;
+
+                     case UNW_INSN_SETNAT_MEMSTK:
+                       if (!state->pri_unat_loc)
+                               state->pri_unat_loc = &state->sw->caller_unat;
+                       /* register off. is a multiple of 8, so the least 3 
bits (type) are 0 */
+                       s[dst+1] = ((unsigned long) state->pri_unat_loc - 
s[dst]) | UNW_NAT_MEMSTK;
+                       break;
+
+                     case UNW_INSN_SETNAT_TYPE:
+                       s[dst+1] = val;
+                       break;
+
+                     case UNW_INSN_LOAD:
+#ifdef UNW_DEBUG
+                       if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) 
!= 0
+#ifndef XEN
+                           || s[val] < TASK_SIZE
+#endif
+                               )
+                       {
+                               UNW_DPRINT(0, "unwind.%s: rejecting bad 
psp=0x%lx\n",
+                                          __FUNCTION__, s[val]);
+                               break;
+                       }
+#endif
+                       s[dst] = *(unsigned long *) s[val];
+                       break;
+               }
+       }
+       STAT(unw.stat.script.run_time += ia64_get_itc() - start);
+       return;
+
+  lazy_init:
+       off = unw.sw_off[val];
+       s[val] = (unsigned long) state->sw + off;
+       if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct 
switch_stack, r7))
+               /*
+                * We're initializing a general register: init NaT info, too.  
Note that
+                * the offset is a multiple of 8 which gives us the 3 bits 
needed for
+                * the type field.
+                */
+               s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | 
UNW_NAT_MEMSTK;
+       goto redo;
+}
+
+static int
+find_save_locs (struct unw_frame_info *info)
+{
+       int have_write_lock = 0;
+       struct unw_script *scr;
+       unsigned long flags = 0;
+
+       if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf))
+#ifndef XEN
+           || info->ip < TASK_SIZE
+#endif
+               ) {
+               /* don't let obviously bad addresses pollute the cache */
+               /* FIXME: should really be level 0 but it occurs too often. KAO 
*/
+               UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", 
__FUNCTION__, info->ip);
+               info->rp_loc = NULL;
+               return -1;
+       }
+
+       scr = script_lookup(info);
+       if (!scr) {
+               spin_lock_irqsave(&unw.lock, flags);
+               scr = build_script(info);
+               if (!scr) {
+                       spin_unlock_irqrestore(&unw.lock, flags);
+                       UNW_DPRINT(0,
+                                  "unwind.%s: failed to locate/build unwind 
script for ip %lx\n",
+                                  __FUNCTION__, info->ip);
+                       return -1;
+               }
+               have_write_lock = 1;
+       }
+       info->hint = scr->hint;
+       info->prev_script = scr - unw.cache;
+
+       run_script(scr, info);
+
+       if (have_write_lock) {
+               write_unlock(&scr->lock);
+               spin_unlock_irqrestore(&unw.lock, flags);
+       } else
+               read_unlock(&scr->lock);
+       return 0;
+}
+
+int
+unw_unwind (struct unw_frame_info *info)
+{
+       unsigned long prev_ip, prev_sp, prev_bsp;
+       unsigned long ip, pr, num_regs;
+       STAT(unsigned long start, flags;)
+       int retval;
+
+       STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = 
ia64_get_itc());
+
+       prev_ip = info->ip;
+       prev_sp = info->sp;
+       prev_bsp = info->bsp;
+
+       /* restore the ip */
+       if (!info->rp_loc) {
+               /* FIXME: should really be level 0 but it occurs too often. KAO 
*/
+               UNW_DPRINT(1, "unwind.%s: failed to locate return link 
(ip=0x%lx)!\n",
+                          __FUNCTION__, info->ip);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+       ip = info->ip = *info->rp_loc;
+       if (ip < GATE_ADDR) {
+               UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", 
__FUNCTION__, ip);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       /* restore the cfm: */
+       if (!info->pfs_loc) {
+               UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", 
__FUNCTION__);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+       info->cfm_loc = info->pfs_loc;
+
+       /* restore the bsp: */
+       pr = info->pr;
+       num_regs = 0;
+       if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
+               info->pt = info->sp + 16;
+               if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
+                       num_regs = *info->cfm_loc & 0x7f;               /* size 
of frame */
+               info->pfs_loc =
+                       (unsigned long *) (info->pt + offsetof(struct pt_regs, 
ar_pfs));
+               UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", 
__FUNCTION__, info->pt);
+       } else
+               num_regs = (*info->cfm_loc >> 7) & 0x7f;        /* size of 
locals */
+       info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) 
info->bsp, -num_regs);
+       if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
+               UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range 
[0x%lx-0x%lx]\n",
+                       __FUNCTION__, info->bsp, info->regstk.limit, 
info->regstk.top);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       /* restore the sp: */
+       info->sp = info->psp;
+       if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
+               UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range 
[0x%lx-0x%lx]\n",
+                       __FUNCTION__, info->sp, info->memstk.top, 
info->memstk.limit);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == 
prev_bsp) {
+               UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here 
(ip=0x%lx)\n",
+                          __FUNCTION__, ip);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       /* as we unwind, the saved ar.unat becomes the primary unat: */
+       info->pri_unat_loc = info->unat_loc;
+
+       /* finally, restore the predicates: */
+       unw_get_pr(info, &info->pr);
+
+       retval = find_save_locs(info);
+       STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+       return retval;
+}
+EXPORT_SYMBOL(unw_unwind);
+
+int
+unw_unwind_to_user (struct unw_frame_info *info)
+{
+       unsigned long ip, sp, pr = 0;
+
+       while (unw_unwind(info) >= 0) {
+               unw_get_sp(info, &sp);
+               if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
+                   < IA64_PT_REGS_SIZE) {
+                       UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel 
stack\n",
+                                  __FUNCTION__);
+                       break;
+               }
+               if (unw_is_intr_frame(info) &&
+                   (pr & (1UL << PRED_USER_STACK)))
+                       return 0;
+               if (unw_get_pr (info, &pr) < 0) {
+                       unw_get_rp(info, &ip);
+                       UNW_DPRINT(0, "unwind.%s: failed to read "
+                                  "predicate register (ip=0x%lx)\n",
+                               __FUNCTION__, ip);
+                       return -1;
+               }
+       }
+       unw_get_ip(info, &ip);
+       UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
+                  __FUNCTION__, ip);
+       return -1;
+}
+EXPORT_SYMBOL(unw_unwind_to_user);
+
+static void
+init_frame_info (struct unw_frame_info *info, struct task_struct *t,
+                struct switch_stack *sw, unsigned long stktop)
+{
+       unsigned long rbslimit, rbstop, stklimit;
+       STAT(unsigned long start, flags;)
+
+       STAT(local_irq_save(flags); ++unw.stat.api.inits; start = 
ia64_get_itc());
+
+       /*
+        * Subtle stuff here: we _could_ unwind through the switch_stack frame 
but we
+        * don't want to do that because it would be slow as each preserved 
register would
+        * have to be processed.  Instead, what we do here is zero out the 
frame info and
+        * start the unwind process at the function that created the 
switch_stack frame.
+        * When a preserved value in switch_stack needs to be accessed, 
run_script() will
+        * initialize the appropriate pointer on demand.
+        */
+       memset(info, 0, sizeof(*info));
+
+       rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
+       rbstop   = sw->ar_bspstore;
+       if (rbstop - (unsigned long) t >= IA64_STK_OFFSET)
+               rbstop = rbslimit;
+
+       stklimit = (unsigned long) t + IA64_STK_OFFSET;
+       if (stktop <= rbstop)
+               stktop = rbstop;
+
+       info->regstk.limit = rbslimit;
+       info->regstk.top   = rbstop;
+       info->memstk.limit = stklimit;
+       info->memstk.top   = stktop;
+       info->task = t;
+       info->sw  = sw;
+       info->sp = info->psp = stktop;
+       info->pr = sw->pr;
+       UNW_DPRINT(3, "unwind.%s:\n"
+                  "  task   0x%lx\n"
+                  "  rbs = [0x%lx-0x%lx)\n"
+                  "  stk = [0x%lx-0x%lx)\n"
+                  "  pr     0x%lx\n"
+                  "  sw     0x%lx\n"
+                  "  sp     0x%lx\n",
+                  __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, 
stklimit,
+                  info->pr, (unsigned long) info->sw, info->sp);
+       STAT(unw.stat.api.init_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+}
+
+void
+unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, 
struct switch_stack *sw)
+{
+       unsigned long sol;
+
+       init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16);
+       info->cfm_loc = &sw->ar_pfs;
+       sol = (*info->cfm_loc >> 7) & 0x7f;
+       info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) 
info->regstk.top, -sol);
+       info->ip = sw->b0;
+       UNW_DPRINT(3, "unwind.%s:\n"
+                  "  bsp    0x%lx\n"
+                  "  sol    0x%lx\n"
+                  "  ip     0x%lx\n",
+                  __FUNCTION__, info->bsp, sol, info->ip);
+       find_save_locs(info);
+}
+
+EXPORT_SYMBOL(unw_init_frame_info);
+
+void
+unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
+{
+#ifdef XEN
+       struct switch_stack *sw = (struct switch_stack *) (t->arch._thread.ksp 
+ 16);
+#else
+       struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+#endif
+
+       UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__);
+       unw_init_frame_info(info, t, sw);
+}
+EXPORT_SYMBOL(unw_init_from_blocked_task);
+
+static void
+init_unwind_table (struct unw_table *table, const char *name, unsigned long 
segment_base,
+                  unsigned long gp, const void *table_start, const void 
*table_end)
+{
+       const struct unw_table_entry *start = table_start, *end = table_end;
+
+       table->name = name;
+       table->segment_base = segment_base;
+       table->gp = gp;
+       table->start = segment_base + start[0].start_offset;
+       table->end = segment_base + end[-1].end_offset;
+       table->array = start;
+       table->length = end - start;
+}
+
+#ifndef XEN
+void *
+unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned 
long gp,
+                     const void *table_start, const void *table_end)
+{
+       const struct unw_table_entry *start = table_start, *end = table_end;
+       struct unw_table *table;
+       unsigned long flags;
+
+       if (end - start <= 0) {
+               UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty 
unwind table\n",
+                          __FUNCTION__);
+               return NULL;
+       }
+
+       table = kmalloc(sizeof(*table), GFP_USER);
+       if (!table)
+               return NULL;
+
+       init_unwind_table(table, name, segment_base, gp, table_start, 
table_end);
+
+       spin_lock_irqsave(&unw.lock, flags);
+       {
+               /* keep kernel unwind table at the front (it's searched most 
commonly): */
+               table->next = unw.tables->next;
+               unw.tables->next = table;
+       }
+       spin_unlock_irqrestore(&unw.lock, flags);
+
+       return table;
+}
+
+void
+unw_remove_unwind_table (void *handle)
+{
+       struct unw_table *table, *prev;
+       struct unw_script *tmp;
+       unsigned long flags;
+       long index;
+
+       if (!handle) {
+               UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove 
non-existent unwind table\n",
+                          __FUNCTION__);
+               return;
+       }
+
+       table = handle;
+       if (table == &unw.kernel_table) {
+               UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind 
table is a "
+                          "no-can-do!\n", __FUNCTION__);
+               return;
+       }
+
+       spin_lock_irqsave(&unw.lock, flags);
+       {
+               /* first, delete the table: */
+
+               for (prev = (struct unw_table *) &unw.tables; prev; prev = 
prev->next)
+                       if (prev->next == table)
+                               break;
+               if (!prev) {
+                       UNW_DPRINT(0, "unwind.%s: failed to find unwind table 
%p\n",
+                                  __FUNCTION__, (void *) table);
+                       spin_unlock_irqrestore(&unw.lock, flags);
+                       return;
+               }
+               prev->next = table->next;
+       }
+       spin_unlock_irqrestore(&unw.lock, flags);
+
+       /* next, remove hash table entries for this table */
+
+       for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+               tmp = unw.cache + unw.hash[index];
+               if (unw.hash[index] >= UNW_CACHE_SIZE
+                   || tmp->ip < table->start || tmp->ip >= table->end)
+                       continue;
+
+               write_lock(&tmp->lock);
+               {
+                       if (tmp->ip >= table->start && tmp->ip < table->end) {
+                               unw.hash[index] = tmp->coll_chain;
+                               tmp->ip = 0;
+                       }
+               }
+               write_unlock(&tmp->lock);
+       }
+
+       kfree(table);
+}
+
+static int __init
+create_gate_table (void)
+{
+       const struct unw_table_entry *entry, *start, *end;
+       unsigned long *lp, segbase = GATE_ADDR;
+       size_t info_size, size;
+       char *info;
+       Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + 
GATE_EHDR->e_phoff);
+       int i;
+
+       for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr)
+               if (phdr->p_type == PT_IA_64_UNWIND) {
+                       punw = phdr;
+                       break;
+               }
+
+       if (!punw) {
+               printk("%s: failed to find gate DSO's unwind table!\n", 
__FUNCTION__);
+               return 0;
+       }
+
+       start = (const struct unw_table_entry *) punw->p_vaddr;
+       end = (struct unw_table_entry *) ((char *) start + punw->p_memsz);
+       size  = 0;
+
+       unw_add_unwind_table("linux-gate.so", segbase, 0, start, end);
+
+       for (entry = start; entry < end; ++entry)
+               size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + 
entry->info_offset));
+       size += 8;      /* reserve space for "end of table" marker */
+
+       unw.gate_table = kmalloc(size, GFP_KERNEL);
+       if (!unw.gate_table) {
+               unw.gate_table_size = 0;
+               printk(KERN_ERR "%s: unable to create unwind data for gate 
page!\n", __FUNCTION__);
+               return 0;
+       }
+       unw.gate_table_size = size;
+
+       lp = unw.gate_table;
+       info = (char *) unw.gate_table + size;
+
+       for (entry = start; entry < end; ++entry, lp += 3) {
+               info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + 
entry->info_offset));
+               info -= info_size;
+               memcpy(info, (char *) segbase + entry->info_offset, info_size);
+
+               lp[0] = segbase + entry->start_offset;          /* start */
+               lp[1] = segbase + entry->end_offset;            /* end */
+               lp[2] = info - (char *) unw.gate_table;         /* info */
+       }
+       *lp = 0;        /* end-of-table marker */
+       return 0;
+}
+
+__initcall(create_gate_table);
+#endif // !XEN
+
+void __init
+unw_init (void)
+{
+       extern char __gp[];
+       extern void unw_hash_index_t_is_too_narrow (void);
+       long i, off;
+
+       if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
+               unw_hash_index_t_is_too_narrow();
+
+       unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
+       unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
+       unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
+       unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
+       unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
+       unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
+       unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
+       unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
+       for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8)
+               unw.sw_off[unw.preg_index[i]] = off;
+       for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8)
+               unw.sw_off[unw.preg_index[i]] = off;
+       for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16)
+               unw.sw_off[unw.preg_index[i]] = off;
+       for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16)
+               unw.sw_off[unw.preg_index[i]] = off;
+
+       for (i = 0; i < UNW_CACHE_SIZE; ++i) {
+               if (i > 0)
+                       unw.cache[i].lru_chain = (i - 1);
+               unw.cache[i].coll_chain = -1;
+               rwlock_init(&unw.cache[i].lock);
+       }
+       unw.lru_head = UNW_CACHE_SIZE - 1;
+       unw.lru_tail = 0;
+
+       init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned 
long) __gp,
+                         __start_unwind, __end_unwind);
+}
+
+/*
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ *     This system call has been deprecated.  The new and improved way to get
+ *     at the kernel's unwind info is via the gate DSO.  The address of the
+ *     ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR.
+ *
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ * This system call copies the unwind data into the buffer pointed to by BUF 
and returns
+ * the size of the unwind data.  If BUF_SIZE is smaller than the size of the 
unwind data
+ * or if BUF is NULL, nothing is copied, but the system call still returns the 
size of the
+ * unwind data.
+ *
+ * The first portion of the unwind data contains an unwind table and rest 
contains the
+ * associated unwind info (in no particular order).  The unwind table consists 
of a table
+ * of entries of the form:
+ *
+ *     u64 start;      (64-bit address of start of function)
+ *     u64 end;        (64-bit address of start of function)
+ *     u64 info;       (BUF-relative offset to unwind info)
+ *
+ * The end of the unwind table is indicated by an entry with a START address 
of zero.
+ *
+ * Please see the IA-64 Software Conventions and Runtime Architecture manual 
for details
+ * on the format of the unwind info.
+ *
+ * ERRORS
+ *     EFAULT  BUF points outside your accessible address space.
+ */
+asmlinkage long
+sys_getunwind (void __user *buf, size_t buf_size)
+{
+       if (buf && buf_size >= unw.gate_table_size)
+               if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0)
+                       return -EFAULT;
+       return unw.gate_table_size;
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind_decoder.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind_decoder.c  Mon Jan  9 11:22:17 2006
@@ -0,0 +1,459 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * Generic IA-64 unwind info decoder.
+ *
+ * This file is used both by the Linux kernel and objdump.  Please keep
+ * the two copies of this file in sync.
+ *
+ * You need to customize the decoder by defining the following
+ * macros/constants before including this file:
+ *
+ *  Types:
+ *     unw_word        Unsigned integer type with at least 64 bits 
+ *
+ *  Register names:
+ *     UNW_REG_BSP
+ *     UNW_REG_BSPSTORE
+ *     UNW_REG_FPSR
+ *     UNW_REG_LC
+ *     UNW_REG_PFS
+ *     UNW_REG_PR
+ *     UNW_REG_RNAT
+ *     UNW_REG_PSP
+ *     UNW_REG_RP
+ *     UNW_REG_UNAT
+ *
+ *  Decoder action macros:
+ *     UNW_DEC_BAD_CODE(code)
+ *     UNW_DEC_ABI(fmt,abi,context,arg)
+ *     UNW_DEC_BR_GR(fmt,brmask,gr,arg)
+ *     UNW_DEC_BR_MEM(fmt,brmask,arg)
+ *     UNW_DEC_COPY_STATE(fmt,label,arg)
+ *     UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
+ *     UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
+ *     UNW_DEC_FR_MEM(fmt,frmask,arg)
+ *     UNW_DEC_GR_GR(fmt,grmask,gr,arg)
+ *     UNW_DEC_GR_MEM(fmt,grmask,arg)
+ *     UNW_DEC_LABEL_STATE(fmt,label,arg)
+ *     UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
+ *     UNW_DEC_MEM_STACK_V(fmt,t,arg)
+ *     UNW_DEC_PRIUNAT_GR(fmt,r,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
+ *     UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
+ *     UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
+ *     UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
+ *     UNW_DEC_REG_REG(fmt,src,dst,arg)
+ *     UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
+ *     UNW_DEC_REG_WHEN(fmt,reg,t,arg)
+ *     UNW_DEC_RESTORE(fmt,t,abreg,arg)
+ *     UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
+ *     UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
+ *     UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
+ *     UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
+ *     UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ *     UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
+ *     UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
+ *     UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
+ *     UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ */
+
+static unw_word
+unw_decode_uleb128 (unsigned char **dpp)
+{
+  unsigned shift = 0;
+  unw_word byte, result = 0;
+  unsigned char *bp = *dpp;
+
+  while (1)
+    {
+      byte = *bp++;
+      result |= (byte & 0x7f) << shift;
+      if ((byte & 0x80) == 0)
+       break;
+      shift += 7;
+    }
+  *dpp = bp;
+  return result;
+}
+
+static unsigned char *
+unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, abreg;
+  unw_word t, off;
+
+  byte1 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  if (byte1 & 0x80)
+         UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
+  else
+         UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  ytreg = byte2;
+  x = (byte1 >> 7) & 1;
+  if ((byte1 & 0x80) == 0 && ytreg == 0)
+    UNW_DEC_RESTORE(X2, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, qp;
+  unw_word t, off;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+
+  if (byte1 & 0x80)
+    UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
+  else
+    UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+  x = (byte2 >> 7) & 1;
+  ytreg = byte3;
+
+  if ((byte2 & 0x80) == 0 && byte3 == 0)
+    UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int body = (code & 0x20) != 0;
+  unw_word rlen;
+
+  rlen = (code & 0x1f);
+  UNW_DEC_PROLOGUE(R1, body, rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, mask, grsave;
+  unw_word rlen;
+
+  byte1 = *dp++;
+
+  mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+  grsave = (byte1 & 0x7f);
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word rlen;
+
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char brmask = (code & 0x1f);
+
+  UNW_DEC_BR_MEM(P1, brmask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
+{
+  if ((code & 0x10) == 0)
+    {
+      unsigned char byte1 = *dp++;
+
+      UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
+                   (byte1 & 0x7f), arg);
+    }
+  else if ((code & 0x08) == 0)
+    {
+      unsigned char byte1 = *dp++, r, dst;
+
+      r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+      dst = (byte1 & 0x7f);
+      switch (r)
+       {
+       case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
+       case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
+       case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
+       case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
+       case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
+       case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
+       case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
+       case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
+       case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
+       case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
+       case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
+       case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
+       default: UNW_DEC_BAD_CODE(r); break;
+       }
+    }
+  else if ((code & 0x7) == 0)
+    UNW_DEC_SPILL_MASK(P4, dp, arg);
+  else if ((code & 0x7) == 1)
+    {
+      unw_word grmask, frmask, byte1, byte2, byte3;
+
+      byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+      grmask = ((byte1 >> 4) & 0xf);
+      frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
+      UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
+    }
+  else
+    UNW_DEC_BAD_CODE(code);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int gregs = (code & 0x10) != 0;
+  unsigned char mask = (code & 0x0f);
+
+  if (gregs)
+    UNW_DEC_GR_MEM(P6, mask, arg);
+  else
+    UNW_DEC_FR_MEM(P6, mask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char r, byte1, byte2;
+  unw_word t, size;
+
+  if ((code & 0x10) == 0)
+    {
+      r = (code & 0xf);
+      t = unw_decode_uleb128 (&dp);
+      switch (r)
+       {
+       case 0:
+         size = unw_decode_uleb128 (&dp);
+         UNW_DEC_MEM_STACK_F(P7, t, size, arg);
+         break;
+
+       case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
+       case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
+       case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
+       case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
+       case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
+       case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
+       case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
+       case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
+       case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
+       case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
+       case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
+       case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
+       case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
+       case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
+       case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
+       default: UNW_DEC_BAD_CODE(r); break;
+       }
+    }
+  else
+    {
+      switch (code & 0xf)
+       {
+       case 0x0: /* p8 */
+         {
+           r = *dp++;
+           t = unw_decode_uleb128 (&dp);
+           switch (r)
+             {
+             case  1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
+             case  2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
+             case  3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
+             case  4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
+             case  5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
+             case  6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
+             case  7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
+             case  8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
+             case  9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
+             case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
+             case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+             case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+             case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
+             case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
+             case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
+             case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
+             case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
+             case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
+             case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
+             default: UNW_DEC_BAD_CODE(r); break;
+           }
+         }
+         break;
+
+       case 0x1:
+         byte1 = *dp++; byte2 = *dp++;
+         UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
+         break;
+
+       case 0xf: /* p10 */
+         byte1 = *dp++; byte2 = *dp++;
+         UNW_DEC_ABI(P10, byte1, byte2, arg);
+         break;
+
+       case 0x9:
+         return unw_decode_x1 (dp, code, arg);
+
+       case 0xa:
+         return unw_decode_x2 (dp, code, arg);
+
+       case 0xb:
+         return unw_decode_x3 (dp, code, arg);
+
+       case 0xc:
+         return unw_decode_x4 (dp, code, arg);
+
+       default:
+         UNW_DEC_BAD_CODE(code);
+         break;
+       }
+    }
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word label = (code & 0x1f);
+
+  if ((code & 0x20) != 0)
+    UNW_DEC_COPY_STATE(B1, label, arg);
+  else
+    UNW_DEC_LABEL_STATE(B1, label, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t;
+
+  t = unw_decode_uleb128 (&dp);
+  UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t, ecount, label;
+
+  if ((code & 0x10) == 0)
+    {
+      t = unw_decode_uleb128 (&dp);
+      ecount = unw_decode_uleb128 (&dp);
+      UNW_DEC_EPILOGUE(B3, t, ecount, arg);
+    }
+  else if ((code & 0x07) == 0)
+    {
+      label = unw_decode_uleb128 (&dp);
+      if ((code & 0x08) != 0)
+       UNW_DEC_COPY_STATE(B4, label, arg);
+      else
+       UNW_DEC_LABEL_STATE(B4, label, arg);
+    }
+  else
+    switch (code & 0x7)
+      {
+      case 1: return unw_decode_x1 (dp, code, arg);
+      case 2: return unw_decode_x2 (dp, code, arg);
+      case 3: return unw_decode_x3 (dp, code, arg);
+      case 4: return unw_decode_x4 (dp, code, arg);
+      default: UNW_DEC_BAD_CODE(code); break;
+      }
+  return dp;
+}
+
+typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
+
+static unw_decoder unw_decode_table[2][8] =
+{
+  /* prologue table: */
+  {
+    unw_decode_r1,     /* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_p1,     /* 4 */
+    unw_decode_p2_p5,
+    unw_decode_p6,
+    unw_decode_p7_p10
+  },
+  {
+    unw_decode_r1,     /* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_b1,     /* 4 */
+    unw_decode_b1,
+    unw_decode_b2,
+    unw_decode_b3_x4
+  }
+};
+
+/*
+ * Decode one descriptor and return address of next descriptor.
+ */
+static inline unsigned char *
+unw_decode (unsigned char *dp, int inside_body, void *arg)
+{
+  unw_decoder decoder;
+  unsigned char code;
+
+  code = *dp++;
+  decoder = unw_decode_table[inside_body][code >> 5];
+  dp = (*decoder) (dp, code, arg);
+  return dp;
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind_i.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind_i.h        Mon Jan  9 11:22:17 2006
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * Kernel unwind support.
+ */
+
+#define UNW_VER(x)             ((x) >> 48)
+#define UNW_FLAG_MASK          0x0000ffff00000000
+#define UNW_FLAG_OSMASK                0x0000f00000000000
+#define UNW_FLAG_EHANDLER(x)   ((x) & 0x0000000100000000L)
+#define UNW_FLAG_UHANDLER(x)   ((x) & 0x0000000200000000L)
+#define UNW_LENGTH(x)          ((x) & 0x00000000ffffffffL)
+
+enum unw_register_index {
+       /* primary unat: */
+       UNW_REG_PRI_UNAT_GR,
+       UNW_REG_PRI_UNAT_MEM,
+
+       /* register stack */
+       UNW_REG_BSP,                                    /* register stack 
pointer */
+       UNW_REG_BSPSTORE,
+       UNW_REG_PFS,                                    /* previous function 
state */
+       UNW_REG_RNAT,
+       /* memory stack */
+       UNW_REG_PSP,                                    /* previous memory 
stack pointer */
+       /* return pointer: */
+       UNW_REG_RP,
+
+       /* preserved registers: */
+       UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
+       UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
+       UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
+       UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
+       UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
+       UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
+       UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
+       UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
+       UNW_NUM_REGS
+};
+
+struct unw_info_block {
+       u64 header;
+       u64 desc[0];            /* unwind descriptors */
+       /* personality routine and language-specific data follow behind 
descriptors */
+};
+
+struct unw_table {
+       struct unw_table *next;         /* must be first member! */
+       const char *name;
+       unsigned long gp;               /* global pointer for this load-module 
*/
+       unsigned long segment_base;     /* base for offsets in the unwind table 
entries */
+       unsigned long start;
+       unsigned long end;
+       const struct unw_table_entry *array;
+       unsigned long length;
+};
+
+enum unw_where {
+       UNW_WHERE_NONE,                 /* register isn't saved at all */
+       UNW_WHERE_GR,                   /* register is saved in a general 
register */
+       UNW_WHERE_FR,                   /* register is saved in a 
floating-point register */
+       UNW_WHERE_BR,                   /* register is saved in a branch 
register */
+       UNW_WHERE_SPREL,                /* register is saved on memstack 
(sp-relative) */
+       UNW_WHERE_PSPREL,               /* register is saved on memstack 
(psp-relative) */
+       /*
+        * At the end of each prologue these locations get resolved to
+        * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
+        */
+       UNW_WHERE_SPILL_HOME,           /* register is saved in its spill home 
*/
+       UNW_WHERE_GR_SAVE               /* register is saved in next general 
register */
+};
+
+#define UNW_WHEN_NEVER 0x7fffffff
+
+struct unw_reg_info {
+       unsigned long val;              /* save location: register number or 
offset */
+       enum unw_where where;           /* where the register gets saved */
+       int when;                       /* when the register gets saved */
+};
+
+struct unw_reg_state {
+       struct unw_reg_state *next;             /* next (outer) element on 
state stack */
+       struct unw_reg_info reg[UNW_NUM_REGS];  /* register save locations */
+};
+
+struct unw_labeled_state {
+       struct unw_labeled_state *next;         /* next labeled state (or NULL) 
*/
+       unsigned long label;                    /* label for this state */
+       struct unw_reg_state saved_state;
+};
+
+struct unw_state_record {
+       unsigned int first_region : 1;  /* is this the first region? */
+       unsigned int done : 1;          /* are we done scanning descriptors? */
+       unsigned int any_spills : 1;    /* got any register spills? */
+       unsigned int in_body : 1;       /* are we inside a body (as opposed to 
a prologue)? */
+       unsigned long flags;            /* see UNW_FLAG_* in unwind.h */
+
+       u8 *imask;                      /* imask of spill_mask record or NULL */
+       unsigned long pr_val;           /* predicate values */
+       unsigned long pr_mask;          /* predicate mask */
+       long spill_offset;              /* psp-relative offset for spill base */
+       int region_start;
+       int region_len;
+       int epilogue_start;
+       int epilogue_count;
+       int when_target;
+
+       u8 gr_save_loc;                 /* next general register to use for 
saving a register */
+       u8 return_link_reg;             /* branch register in which the return 
link is passed */
+
+       struct unw_labeled_state *labeled_states;       /* list of all labeled 
states */
+       struct unw_reg_state curr;      /* current state */
+};
+
+enum unw_nat_type {
+       UNW_NAT_NONE,           /* NaT not represented */
+       UNW_NAT_VAL,            /* NaT represented by NaT value (fp reg) */
+       UNW_NAT_MEMSTK,         /* NaT value is in unat word at offset OFF  */
+       UNW_NAT_REGSTK          /* NaT is in rnat */
+};
+
+enum unw_insn_opcode {
+       UNW_INSN_ADD,                   /* s[dst] += val */
+       UNW_INSN_ADD_PSP,               /* s[dst] = (s.psp + val) */
+       UNW_INSN_ADD_SP,                /* s[dst] = (s.sp + val) */
+       UNW_INSN_MOVE,                  /* s[dst] = s[val] */
+       UNW_INSN_MOVE2,                 /* s[dst] = s[val]; s[dst+1] = s[val+1] 
*/
+       UNW_INSN_MOVE_STACKED,          /* s[dst] = ia64_rse_skip(*s.bsp, val) 
*/
+       UNW_INSN_SETNAT_MEMSTK,         /* s[dst+1].nat.type = MEMSTK;
+                                          s[dst+1].nat.off = *s.pri_unat - 
s[dst] */
+       UNW_INSN_SETNAT_TYPE,           /* s[dst+1].nat.type = val */
+       UNW_INSN_LOAD,                  /* s[dst] = *s[val] */
+       UNW_INSN_MOVE_SCRATCH,          /* s[dst] = scratch reg "val" */
+       UNW_INSN_MOVE_CONST,            /* s[dst] = constant reg "val" */
+};
+
+struct unw_insn {
+       unsigned int opc        :  4;
+       unsigned int dst        :  9;
+       signed int val          : 19;
+};
+
+/*
+ * Preserved general static registers (r4-r7) give rise to two script
+ * instructions; everything else yields at most one instruction; at
+ * the end of the script, the psp gets popped, accounting for one more
+ * instruction.
+ */
+#define UNW_MAX_SCRIPT_LEN     (UNW_NUM_REGS + 5)
+
+struct unw_script {
+       unsigned long ip;               /* ip this script is for */
+       unsigned long pr_mask;          /* mask of predicates script depends on 
*/
+       unsigned long pr_val;           /* predicate values this script is for 
*/
+       rwlock_t lock;
+       unsigned int flags;             /* see UNW_FLAG_* in unwind.h */
+       unsigned short lru_chain;       /* used for least-recently-used chain */
+       unsigned short coll_chain;      /* used for hash collisions */
+       unsigned short hint;            /* hint for next script to try (or -1) 
*/
+       unsigned short count;           /* number of instructions in script */
+       struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
+};
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_32/xen.lds.S
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/x86_32/xen.lds.S     Mon Jan  9 11:22:17 2006
@@ -0,0 +1,85 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
+ * Modified for i386 Xen by Keir Fraser
+ */
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+PHDRS
+{
+  text PT_LOAD ;
+}
+SECTIONS
+{
+  . = 0xFF000000 + 0x100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } :text =0x9090
+  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) } :text
+
+  . = ALIGN(32);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) } :text
+  __stop___ex_table = .;
+
+  . = ALIGN(32);               /* Pre-exception table */
+  __start___pre_ex_table = .;
+  __pre_ex_table : { *(__pre_ex_table) } :text
+  __stop___pre_ex_table = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       } :text
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) } :text
+  .data.init : { *(.data.init) } :text
+  . = ALIGN(32);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) } :text
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) } :text
+  __initcall_end = .;
+  . = ALIGN(STACK_SIZE);
+  __init_end = .;
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss.stack_aligned)
+       *(.bss.page_aligned)
+       *(.bss)
+       } :text
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_64/xen.lds.S
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/x86_64/xen.lds.S     Mon Jan  9 11:22:17 2006
@@ -0,0 +1,83 @@
+/* Excerpts written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> */
+/* Modified for x86-64 Xen by Keir Fraser */
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(start)
+PHDRS
+{
+  text PT_LOAD ;
+}
+SECTIONS
+{
+  . = 0xFFFF830000100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } :text = 0x9090
+  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) } :text
+
+  . = ALIGN(32);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) } :text
+  __stop___ex_table = .;
+
+  . = ALIGN(32);                /* Pre-exception table */
+  __start___pre_ex_table = .;
+  __pre_ex_table : { *(__pre_ex_table) } :text
+  __stop___pre_ex_table = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       } :text
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) } :text
+  .data.init : { *(.data.init) } :text
+  . = ALIGN(32);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) } :text
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) } :text
+  __initcall_end = .;
+  . = ALIGN(STACK_SIZE);
+  __init_end = .;
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss.stack_aligned)
+       *(.bss.page_aligned)
+       *(.bss)
+       } :text
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/rangeset.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/common/rangeset.c     Mon Jan  9 11:22:17 2006
@@ -0,0 +1,399 @@
+/******************************************************************************
+ * rangeset.c
+ * 
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ * 
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#include <xen/sched.h>
+#include <xen/rangeset.h>
+
+/* An inclusive range [s,e] and pointer to next range in ascending order. */
+struct range {
+    struct list_head list;
+    unsigned long s, e;
+};
+
+struct rangeset {
+    /* Owning domain and threaded list of rangesets. */
+    struct list_head rangeset_list;
+    struct domain   *domain;
+
+    /* Ordered list of ranges contained in this set, and protecting lock. */
+    struct list_head range_list;
+    spinlock_t       lock;
+
+    /* Pretty-printing name. */
+    char             name[32];
+
+    /* RANGESETF flags. */
+    unsigned int     flags;
+};
+
+/*****************************
+ * Private range functions hide the underlying linked-list implemnetation.
+ */
+
+/* Find highest range lower than or containing s. NULL if no such range. */
+static struct range *find_range(
+    struct rangeset *r, unsigned long s)
+{
+    struct range *x = NULL, *y;
+
+    list_for_each_entry ( y, &r->range_list, list )
+    {
+        if ( y->s > s )
+            break;
+        x = y;
+    }
+
+    return x;
+}
+
+/* Return the lowest range in the set r, or NULL if r is empty. */
+static struct range *first_range(
+    struct rangeset *r)
+{
+    if ( list_empty(&r->range_list) )
+        return NULL;
+    return list_entry(r->range_list.next, struct range, list);
+}
+
+/* Return range following x in ascending order, or NULL if x is the highest. */
+static struct range *next_range(
+    struct rangeset *r, struct range *x)
+{
+    if ( x->list.next == &r->range_list )
+        return NULL;
+    return list_entry(x->list.next, struct range, list);
+}
+
+/* Insert range y after range x in r. Insert as first range if x is NULL. */
+static void insert_range(
+    struct rangeset *r, struct range *x, struct range *y)
+{
+    list_add(&y->list, (x != NULL) ? &x->list : &r->range_list);
+}
+
+/* Remove a range from its list and free it. */
+static void destroy_range(
+    struct range *x)
+{
+    list_del(&x->list);
+    xfree(x);
+}
+
+/*****************************
+ * Core public functions
+ */
+
+int rangeset_add_range(
+    struct rangeset *r, unsigned long s, unsigned long e)
+{
+    struct range *x, *y;
+    int rc = 0;
+
+    spin_lock(&r->lock);
+
+    x = find_range(r, s);
+    y = find_range(r, e);
+
+    if ( x == y )
+    {
+        if ( (x == NULL) || ((x->e < s) && ((x->e + 1) != s)) )
+        {
+            x = xmalloc(struct range);
+            if ( x == NULL )
+            {
+                rc = -ENOMEM;
+                goto out;
+            }
+
+            x->s = s;
+            x->e = e;
+
+            insert_range(r, y, x);
+        }
+        else if ( x->e < e )
+            x->e = e;
+    }
+    else
+    {
+        if ( x == NULL )
+        {
+            x = first_range(r);
+            x->s = s;
+        }
+        else if ( (x->e < s) && ((x->e + 1) != s) )
+        {
+            x = next_range(r, x);
+            x->s = s;
+        }
+        
+        x->e = (y->e > e) ? y->e : e;
+
+        for ( ; ; )
+        {
+            y = next_range(r, x);
+            if ( (y == NULL) || (y->e > x->e) )
+                break;
+            destroy_range(y);
+        }
+    }
+
+    y = next_range(r, x);
+    if ( (y != NULL) && ((x->e + 1) == y->s) )
+    {
+        x->e = y->e;
+        destroy_range(y);
+    }
+
+ out:
+    spin_unlock(&r->lock);
+    return rc;
+}
+
+int rangeset_remove_range(
+    struct rangeset *r, unsigned long s, unsigned long e)
+{
+    struct range *x, *y, *t;
+    int rc = 0;
+
+    spin_lock(&r->lock);
+
+    x = find_range(r, s);
+    y = find_range(r, e);
+
+    if ( x == y )
+    {
+        if ( (x == NULL) || (x->e < s) )
+            goto out;
+
+        if ( (x->s < s) && (x->e > e) )
+        {
+            y = xmalloc(struct range);
+            if ( y == NULL )
+            {
+                rc = -ENOMEM;
+                goto out;
+            }
+
+            y->s = e + 1;
+            y->e = x->e;
+            x->e = s - 1;
+
+            insert_range(r, x, y);
+        }
+        else if ( (x->s == s) && (x->e <= e) )
+            destroy_range(x);
+        else if ( x->s == s )
+            x->s = e + 1;
+        else if ( x->e <= e )
+            x->e = s - 1;
+    }
+    else
+    {
+        if ( x == NULL )
+            x = first_range(r);
+
+        if ( x->s < s )
+        {
+            x->e = s - 1;
+            x = next_range(r, x);
+        }
+
+        while ( x != y )
+        {
+            t = x;
+            x = next_range(r, x);
+            destroy_range(t);
+        }
+
+        x->s = e + 1;
+        if ( x->s > x->e )
+            destroy_range(x);
+    }
+
+ out:
+    spin_unlock(&r->lock);
+    return rc;
+}
+
+int rangeset_contains_range(
+    struct rangeset *r, unsigned long s, unsigned long e)
+{
+    struct range *x;
+    int contains;
+
+    spin_lock(&r->lock);
+    x = find_range(r, s);
+    contains = (x && (x->e >= e));
+    spin_unlock(&r->lock);
+
+    return contains;
+}
+
+int rangeset_add_singleton(
+    struct rangeset *r, unsigned long s)
+{
+    return rangeset_add_range(r, s, s);
+}
+
+int rangeset_remove_singleton(
+    struct rangeset *r, unsigned long s)
+{
+    return rangeset_remove_range(r, s, s);
+}
+
+int rangeset_contains_singleton(
+    struct rangeset *r, unsigned long s)
+{
+    return rangeset_contains_range(r, s, s);
+}
+
+int rangeset_is_empty(
+    struct rangeset *r)
+{
+    return list_empty(&r->range_list);
+}
+
+struct rangeset *rangeset_new(
+    struct domain *d, char *name, unsigned int flags)
+{
+    struct rangeset *r;
+
+    r = xmalloc(struct rangeset);
+    if ( r == NULL )
+        return NULL;
+
+    spin_lock_init(&r->lock);
+    INIT_LIST_HEAD(&r->range_list);
+
+    BUG_ON(flags & ~RANGESETF_prettyprint_hex);
+    r->flags = flags;
+
+    if ( name != NULL )
+    {
+        strncpy(r->name, name, sizeof(r->name));
+        r->name[sizeof(r->name)-1] = '\0';
+    }
+    else
+    {
+        sprintf(r->name, "(no name)");
+    }
+
+    if ( (r->domain = d) != NULL )
+    {
+        spin_lock(&d->rangesets_lock);
+        list_add(&r->rangeset_list, &d->rangesets);
+        spin_unlock(&d->rangesets_lock);
+    }
+
+    return r;
+}
+
+void rangeset_destroy(
+    struct rangeset *r)
+{
+    struct range *x;
+
+    if ( r == NULL )
+        return;
+
+    if ( r->domain != NULL )
+    {
+        spin_lock(&r->domain->rangesets_lock);
+        list_del(&r->rangeset_list);
+        spin_unlock(&r->domain->rangesets_lock);
+    }
+
+    while ( (x = first_range(r)) != NULL )
+        destroy_range(x);
+
+    xfree(r);
+}
+
+void rangeset_domain_initialise(
+    struct domain *d)
+{
+    INIT_LIST_HEAD(&d->rangesets);
+    spin_lock_init(&d->rangesets_lock);
+}
+
+void rangeset_domain_destroy(
+    struct domain *d)
+{
+    struct rangeset *r;
+
+    while ( !list_empty(&d->rangesets) )
+    {
+        r = list_entry(d->rangesets.next, struct rangeset, rangeset_list);
+
+        BUG_ON(r->domain != d);
+        r->domain = NULL;
+        list_del(&r->rangeset_list);
+
+        rangeset_destroy(r);
+    }
+}
+
+/*****************************
+ * Pretty-printing functions
+ */
+
+static void print_limit(struct rangeset *r, unsigned long s)
+{
+    printk((r->flags & RANGESETF_prettyprint_hex) ? "%lx" : "%lu", s);
+}
+
+void rangeset_printk(
+    struct rangeset *r)
+{
+    int nr_printed = 0;
+    struct range *x;
+
+    spin_lock(&r->lock);
+
+    printk("%-10s {", r->name);
+
+    for ( x = first_range(r); x != NULL; x = next_range(r, x) )
+    {
+        if ( nr_printed++ )
+            printk(",");
+        printk(" ");
+        print_limit(r, x->s);
+        if ( x->s != x->e )
+        {
+            printk("-");
+            print_limit(r, x->e);
+        }
+    }
+
+    printk(" }");
+
+    spin_unlock(&r->lock);
+}
+
+void rangeset_domain_printk(
+    struct domain *d)
+{
+    struct rangeset *r;
+
+    printk("Rangesets belonging to domain %u:\n", d->domain_id);
+
+    spin_lock(&d->rangesets_lock);
+
+    if ( list_empty(&d->rangesets) )
+        printk("    None\n");
+
+    list_for_each_entry ( r, &d->rangesets, rangeset_list )
+    {
+        printk("    ");
+        rangeset_printk(r);
+        printk("\n");
+    }
+
+    spin_unlock(&d->rangesets_lock);
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/iocap.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/iocap.h      Mon Jan  9 11:22:17 2006
@@ -0,0 +1,10 @@
+/******************************************************************************
+ * iocap.h
+ * 
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __IA64_IOCAP_H__
+#define __IA64_IOCAP_H__
+
+#endif /* __IA64_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/iocap.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/iocap.h       Mon Jan  9 11:22:17 2006
@@ -0,0 +1,20 @@
+/******************************************************************************
+ * iocap.h
+ * 
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __X86_IOCAP_H__
+#define __X86_IOCAP_H__
+
+#define ioports_permit_access(d, s, e)                  \
+    rangeset_add_range((d)->arch.ioport_caps, s, e)
+#define ioports_deny_access(d, s, e)                    \
+    rangeset_remove_range((d)->arch.ioport_caps, s, e)
+#define ioports_access_permitted(d, s, e)               \
+    rangeset_contains_range((d)->arch.ioport_caps, s, e)
+
+#define cache_flush_permitted(d)                       \
+    (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __X86_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/hvm_info_table.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/hvm/hvm_info_table.h   Mon Jan  9 11:22:17 2006
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * hvm/hvm_info_table.h
+ * 
+ * HVM parameter and information table, written into guest memory map.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+
+#define HVM_INFO_PFN         0x09F
+#define HVM_INFO_OFFSET      0x800
+#define HVM_INFO_PADDR       ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)
+
+struct hvm_info_table {
+    char        signature[8]; /* "HVM INFO" */
+    uint32_t    length;
+    uint8_t     checksum;
+    uint8_t     acpi_enabled;
+    uint8_t     apic_enabled;
+    uint8_t     pad[1];
+    uint32_t    nr_vcpus;
+};
+
+#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/ioreq.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/hvm/ioreq.h    Mon Jan  9 11:22:17 2006
@@ -0,0 +1,90 @@
+/*
+ * ioreq.h: I/O request definitions for device models
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef _IOREQ_H_
+#define _IOREQ_H_
+
+#define IOREQ_READ      1
+#define IOREQ_WRITE     0
+
+#define STATE_INVALID           0
+#define STATE_IOREQ_READY       1
+#define STATE_IOREQ_INPROCESS   2
+#define STATE_IORESP_READY      3
+#define STATE_IORESP_HOOK       4
+
+#define IOREQ_TYPE_PIO          0 /* pio */
+#define IOREQ_TYPE_COPY         1 /* mmio ops */
+#define IOREQ_TYPE_AND          2
+#define IOREQ_TYPE_OR           3
+#define IOREQ_TYPE_XOR          4
+
+/*
+ * VMExit dispatcher should cooperate with instruction decoder to
+ * prepare this structure and notify service OS and DM by sending
+ * virq
+ */
+typedef struct {
+    uint64_t addr;          /*  physical address            */
+    uint64_t size;          /*  size in bytes               */
+    uint64_t count;         /*  for rep prefixes            */
+    union {
+        uint64_t data;      /*  data                        */
+        void    *pdata;     /*  pointer to data             */
+    } u;
+    uint8_t state:4;
+    uint8_t pdata_valid:1;  /* if 1, use pdata above        */
+    uint8_t dir:1;          /*  1=read, 0=write             */
+    uint8_t df:1;
+    uint8_t type;           /* I/O type                     */
+} ioreq_t;
+
+#define MAX_VECTOR      256
+#define BITS_PER_BYTE   8
+#define INTR_LEN        (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
+#define INTR_LEN_32     (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
+
+typedef struct {
+    uint16_t    pic_elcr;
+    uint16_t    pic_irr;
+    uint16_t    pic_last_irr;
+    uint16_t    pic_clear_irr;
+    int         eport; /* Event channel port */
+} global_iodata_t;
+
+typedef struct {
+    ioreq_t     vp_ioreq;
+} vcpu_iodata_t;
+
+typedef struct {
+    global_iodata_t sp_global;
+    vcpu_iodata_t   vcpu_iodata[1];
+} shared_iopage_t;
+
+#endif /* _IOREQ_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/vmx_assist.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/hvm/vmx_assist.h       Mon Jan  9 11:22:17 2006
@@ -0,0 +1,97 @@
+/*
+ * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+ *
+ * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+ * Copyright (c) 2005, International Business Machines Corporation.
+ */
+
+#ifndef _VMX_ASSIST_H_
+#define _VMX_ASSIST_H_
+
+#define VMXASSIST_BASE         0xD0000
+#define VMXASSIST_MAGIC        0x17101966
+#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
+
+#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
+#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
+
+#ifndef __ASSEMBLY__
+
+union vmcs_arbytes {
+    struct arbyte_fields {
+        unsigned int seg_type : 4,
+            s         : 1,
+            dpl       : 2,
+            p         : 1, 
+            reserved0 : 4,
+            avl       : 1,
+            reserved1 : 1,     
+            default_ops_size: 1,
+            g         : 1,
+            null_bit  : 1, 
+            reserved2 : 15;
+    } fields;
+    unsigned int bytes;
+};
+
+/*
+ * World switch state
+ */
+typedef struct vmx_assist_context {
+    uint32_t  eip;        /* execution pointer */
+    uint32_t  esp;        /* stack pointer */
+    uint32_t  eflags;     /* flags register */
+    uint32_t  cr0;
+    uint32_t  cr3;        /* page table directory */
+    uint32_t  cr4;
+    uint32_t  idtr_limit; /* idt */
+    uint32_t  idtr_base;
+    uint32_t  gdtr_limit; /* gdt */
+    uint32_t  gdtr_base;
+    uint32_t  cs_sel;     /* cs selector */
+    uint32_t  cs_limit;
+    uint32_t  cs_base;
+    union vmcs_arbytes cs_arbytes;
+    uint32_t  ds_sel;     /* ds selector */
+    uint32_t  ds_limit;
+    uint32_t  ds_base;
+    union vmcs_arbytes ds_arbytes;
+    uint32_t  es_sel;     /* es selector */
+    uint32_t  es_limit;
+    uint32_t  es_base;
+    union vmcs_arbytes es_arbytes;
+    uint32_t  ss_sel;     /* ss selector */
+    uint32_t  ss_limit;
+    uint32_t  ss_base;
+    union vmcs_arbytes ss_arbytes;
+    uint32_t  fs_sel;     /* fs selector */
+    uint32_t  fs_limit;
+    uint32_t  fs_base;
+    union vmcs_arbytes fs_arbytes;
+    uint32_t  gs_sel;     /* gs selector */
+    uint32_t  gs_limit;
+    uint32_t  gs_base;
+    union vmcs_arbytes gs_arbytes;
+    uint32_t  tr_sel;     /* task selector */
+    uint32_t  tr_limit;
+    uint32_t  tr_base;
+    union vmcs_arbytes tr_arbytes;
+    uint32_t  ldtr_sel;   /* ldtr selector */
+    uint32_t  ldtr_limit;
+    uint32_t  ldtr_base;
+    union vmcs_arbytes ldtr_arbytes;
+} vmx_assist_context_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _VMX_ASSIST_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/iocap.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/iocap.h   Mon Jan  9 11:22:17 2006
@@ -0,0 +1,34 @@
+/******************************************************************************
+ * iocap.h
+ * 
+ * Per-domain I/O capabilities.
+ */
+
+#ifndef __XEN_IOCAP_H__
+#define __XEN_IOCAP_H__
+
+#include <xen/rangeset.h>
+#include <asm/iocap.h>
+
+#define iomem_permit_access(d, s, e)                    \
+    rangeset_add_range((d)->iomem_caps, s, e)
+#define iomem_deny_access(d, s, e)                      \
+    rangeset_remove_range((d)->iomem_caps, s, e)
+#define iomem_access_permitted(d, s, e)                 \
+    rangeset_contains_range((d)->iomem_caps, s, e)
+
+#define irq_permit_access(d, i)                         \
+    rangeset_add_singleton((d)->irq_caps, i)
+#define irq_deny_access(d, i)                           \
+    rangeset_remove_singleton((d)->irq_caps, i)
+#define irqs_permit_access(d, s, e)                     \
+    rangeset_add_range((d)->irq_caps, s, e)
+#define irqs_deny_access(d, s, e)                       \
+    rangeset_remove_range((d)->irq_caps, s, e)
+#define irq_access_permitted(d, i)                      \
+    rangeset_contains_singleton((d)->irq_caps, i)
+
+#define multipage_allocation_permitted(d)               \
+    (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __XEN_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/rangeset.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/rangeset.h        Mon Jan  9 11:22:17 2006
@@ -0,0 +1,71 @@
+/******************************************************************************
+ * rangeset.h
+ * 
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ * 
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#ifndef __XEN_RANGESET_H__
+#define __XEN_RANGESET_H__
+
+struct domain;
+struct rangeset;
+
+/*
+ * Initialise/destroy per-domain rangeset information.
+ * 
+ * It is invalid to create or destroy a rangeset belonging to a domain @d
+ * before rangeset_domain_initialise(d) returns or after calling
+ * rangeset_domain_destroy(d).
+ */
+void rangeset_domain_initialise(
+    struct domain *d);
+void rangeset_domain_destroy(
+    struct domain *d);
+
+/*
+ * Create/destroy a rangeset. Optionally attach to specified domain @d for
+ * auto-destruction when the domain dies. A name may be specified, for use
+ * in debug pretty-printing, and various RANGESETF flags (defined below).
+ * 
+ * It is invalid to perform any operation on a rangeset @r after calling
+ * rangeset_destroy(r).
+ */
+struct rangeset *rangeset_new(
+    struct domain *d, char *name, unsigned int flags);
+void rangeset_destroy(
+    struct rangeset *r);
+
+/* Flags for passing to rangeset_new(). */
+ /* Pretty-print range limits in hexadecimal. */
+#define _RANGESETF_prettyprint_hex 0
+#define RANGESETF_prettyprint_hex  (1U << _RANGESETF_prettyprint_hex)
+
+int __must_check rangeset_is_empty(
+    struct rangeset *r);
+
+/* Add/remove/query a numeric range. */
+int __must_check rangeset_add_range(
+    struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_remove_range(
+    struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_contains_range(
+    struct rangeset *r, unsigned long s, unsigned long e);
+
+/* Add/remove/query a single number. */
+int __must_check rangeset_add_singleton(
+    struct rangeset *r, unsigned long s);
+int __must_check rangeset_remove_singleton(
+    struct rangeset *r, unsigned long s);
+int __must_check rangeset_contains_singleton(
+    struct rangeset *r, unsigned long s);
+
+/* Rangeset pretty printing. */
+void rangeset_printk(
+    struct rangeset *r);
+void rangeset_domain_printk(
+    struct domain *d);
+
+#endif /* __XEN_RANGESET_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_32/xen.lds
--- a/xen/arch/x86/x86_32/xen.lds       Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,79 +0,0 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
- * Modified for i386 Xen by Keir Fraser
- */
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(start)
-PHDRS
-{
-  text PT_LOAD ;
-}
-SECTIONS
-{
-  . = 0xFF000000 + 0x100000;
-  _text = .;                   /* Text and read-only data */
-  .text : {
-       *(.text)
-       *(.fixup)
-       *(.gnu.warning)
-       } :text =0x9090
-  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
-
-  _etext = .;                  /* End of text section */
-
-  .rodata : { *(.rodata) *(.rodata.*) } :text
-
-  . = ALIGN(32);               /* Exception table */
-  __start___ex_table = .;
-  __ex_table : { *(__ex_table) } :text
-  __stop___ex_table = .;
-
-  . = ALIGN(32);               /* Pre-exception table */
-  __start___pre_ex_table = .;
-  __pre_ex_table : { *(__pre_ex_table) } :text
-  __stop___pre_ex_table = .;
-
-  .data : {                    /* Data */
-       *(.data)
-       CONSTRUCTORS
-       } :text
-
-  . = ALIGN(4096);             /* Init code and data */
-  __init_begin = .;
-  .text.init : { *(.text.init) } :text
-  .data.init : { *(.data.init) } :text
-  . = ALIGN(32);
-  __setup_start = .;
-  .setup.init : { *(.setup.init) } :text
-  __setup_end = .;
-  __initcall_start = .;
-  .initcall.init : { *(.initcall.init) } :text
-  __initcall_end = .;
-  . = ALIGN(8192);
-  __init_end = .;
-
-  __bss_start = .;             /* BSS */
-  .bss : {
-       *(.bss.twopage_aligned)
-       *(.bss.page_aligned)
-       *(.bss)
-       } :text
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-       *(.text.exit)
-       *(.data.exit)
-       *(.exitcall.exit)
-       }
-
-  /* Stabs debugging sections.  */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment 0 : { *(.comment) }
-}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_64/xen.lds
--- a/xen/arch/x86/x86_64/xen.lds       Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,77 +0,0 @@
-/* Excerpts written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> */
-/* Modified for x86-64 Xen by Keir Fraser */
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(start)
-PHDRS
-{
-  text PT_LOAD ;
-}
-SECTIONS
-{
-  . = 0xFFFF830000100000;
-  _text = .;                   /* Text and read-only data */
-  .text : {
-       *(.text)
-       *(.fixup)
-       *(.gnu.warning)
-       } :text = 0x9090
-  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
-
-  _etext = .;                  /* End of text section */
-
-  .rodata : { *(.rodata) *(.rodata.*) } :text
-
-  . = ALIGN(32);               /* Exception table */
-  __start___ex_table = .;
-  __ex_table : { *(__ex_table) } :text
-  __stop___ex_table = .;
-
-  . = ALIGN(32);                /* Pre-exception table */
-  __start___pre_ex_table = .;
-  __pre_ex_table : { *(__pre_ex_table) } :text
-  __stop___pre_ex_table = .;
-
-  .data : {                    /* Data */
-       *(.data)
-       CONSTRUCTORS
-       } :text
-
-  . = ALIGN(4096);             /* Init code and data */
-  __init_begin = .;
-  .text.init : { *(.text.init) } :text
-  .data.init : { *(.data.init) } :text
-  . = ALIGN(32);
-  __setup_start = .;
-  .setup.init : { *(.setup.init) } :text
-  __setup_end = .;
-  __initcall_start = .;
-  .initcall.init : { *(.initcall.init) } :text
-  __initcall_end = .;
-  . = ALIGN(8192);
-  __init_end = .;
-
-  __bss_start = .;             /* BSS */
-  .bss : {
-       *(.bss.twopage_aligned)
-       *(.bss.page_aligned)
-       *(.bss)
-       } :text
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-       *(.text.exit)
-       *(.data.exit)
-       *(.exitcall.exit)
-       }
-
-  /* Stabs debugging sections.  */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment 0 : { *(.comment) }
-}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/physdev.h
--- a/xen/include/asm-x86/physdev.h     Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,17 +0,0 @@
-/******************************************************************************
- * physdev.h
- */
-
-#ifndef __XEN_PHYSDEV_H__
-#define __XEN_PHYSDEV_H__
-
-#include <public/physdev.h>
-
-void physdev_modify_ioport_access_range(
-    struct domain *d, int enable, int port, int num );
-void physdev_destroy_state(struct domain *d);
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn);
-long do_physdev_op(physdev_op_t *uop);
-void physdev_init_dom0(struct domain *d);
-
-#endif /* __XEN_PHYSDEV_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/io/ioreq.h
--- a/xen/include/public/io/ioreq.h     Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,91 +0,0 @@
-/*
- * ioreq.h: I/O request definitions for device models
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef _IOREQ_H_
-#define _IOREQ_H_
-
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_INVALID           0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-#define STATE_IORESP_HOOK       4
-
-#define IOREQ_TYPE_PIO          0 /* pio */
-#define IOREQ_TYPE_COPY         1 /* mmio ops */
-#define IOREQ_TYPE_AND          2
-#define IOREQ_TYPE_OR           3
-#define IOREQ_TYPE_XOR          4
-
-/*
- * VMExit dispatcher should cooperate with instruction decoder to
- * prepare this structure and notify service OS and DM by sending
- * virq 
- */
-typedef struct {
-    uint64_t addr;   /*  physical address            */
-    uint64_t size;   /*  size in bytes               */
-    uint64_t count;  /*  for rep prefixes            */
-    union {
-        uint64_t data;           /*  data                        */
-        void    *pdata;          /*  pointer to data             */
-    } u;
-    uint8_t state:4;
-    uint8_t pdata_valid:1; /* if 1, use pdata above  */
-    uint8_t dir:1;   /*  1=read, 0=write             */
-    uint8_t df:1;
-    uint8_t type;    /* I/O type                     */
-} ioreq_t;
-
-#define MAX_VECTOR      256
-#define BITS_PER_BYTE   8
-#define INTR_LEN        (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
-#define INTR_LEN_32     (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
-
-typedef struct {
-    uint16_t  pic_elcr;
-    uint16_t   pic_irr;
-    uint16_t   pic_last_irr;
-    uint16_t   pic_clear_irr;
-    int      eport; /* Event channel port */
-} global_iodata_t;
-
-typedef struct {
-    ioreq_t       vp_ioreq;
-    unsigned long vp_intr[INTR_LEN];
-} vcpu_iodata_t;
-
-typedef struct {
-    global_iodata_t sp_global;
-    vcpu_iodata_t   vcpu_iodata[1];
-} shared_iopage_t;
-
-#endif /* _IOREQ_H_ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/vmx_assist.h
--- a/xen/include/public/vmx_assist.h   Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,97 +0,0 @@
-/*
- * vmx_assist.h: Context definitions for the VMXASSIST world switch.
- *
- * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
- * Copyright (c) 2005, International Business Machines Corporation.
- */
-
-#ifndef _VMX_ASSIST_H_
-#define _VMX_ASSIST_H_
-
-#define VMXASSIST_BASE         0xD0000
-#define VMXASSIST_MAGIC        0x17101966
-#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
-
-#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
-#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
-
-#ifndef __ASSEMBLY__
-
-union vmcs_arbytes {
-    struct arbyte_fields {
-        unsigned int seg_type : 4,
-            s         : 1,
-            dpl       : 2,
-            p         : 1, 
-            reserved0 : 4,
-            avl       : 1,
-            reserved1 : 1,     
-            default_ops_size: 1,
-            g         : 1,
-            null_bit  : 1, 
-            reserved2 : 15;
-    } fields;
-    unsigned int bytes;
-};
-
-/*
- * World switch state
- */
-typedef struct vmx_assist_context {
-    uint32_t  eip;        /* execution pointer */
-    uint32_t  esp;        /* stack pointer */
-    uint32_t  eflags;     /* flags register */
-    uint32_t  cr0;
-    uint32_t  cr3;        /* page table directory */
-    uint32_t  cr4;
-    uint32_t  idtr_limit; /* idt */
-    uint32_t  idtr_base;
-    uint32_t  gdtr_limit; /* gdt */
-    uint32_t  gdtr_base;
-    uint32_t  cs_sel;     /* cs selector */
-    uint32_t  cs_limit;
-    uint32_t  cs_base;
-    union vmcs_arbytes cs_arbytes;
-    uint32_t  ds_sel;     /* ds selector */
-    uint32_t  ds_limit;
-    uint32_t  ds_base;
-    union vmcs_arbytes ds_arbytes;
-    uint32_t  es_sel;     /* es selector */
-    uint32_t  es_limit;
-    uint32_t  es_base;
-    union vmcs_arbytes es_arbytes;
-    uint32_t  ss_sel;     /* ss selector */
-    uint32_t  ss_limit;
-    uint32_t  ss_base;
-    union vmcs_arbytes ss_arbytes;
-    uint32_t  fs_sel;     /* fs selector */
-    uint32_t  fs_limit;
-    uint32_t  fs_base;
-    union vmcs_arbytes fs_arbytes;
-    uint32_t  gs_sel;     /* gs selector */
-    uint32_t  gs_limit;
-    uint32_t  gs_base;
-    union vmcs_arbytes gs_arbytes;
-    uint32_t  tr_sel;     /* task selector */
-    uint32_t  tr_limit;
-    uint32_t  tr_base;
-    union vmcs_arbytes tr_arbytes;
-    uint32_t  ldtr_sel;   /* ldtr selector */
-    uint32_t  ldtr_limit;
-    uint32_t  ldtr_base;
-    union vmcs_arbytes ldtr_arbytes;
-} vmx_assist_context_t;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _VMX_ASSIST_H_ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.