[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1175627091 21600
# Node ID f378c424e0ced4cbc584e5c6125d065f1cc05d0c
# Parent  fc9e2f7920c95229caaf5ad8fc44965dd891f600
# Parent  7e431ea834a877b1f0c90bdb1e6f1346da4e81cc
merge with xen-unstable.hg
---
 README                                           |   22 
 docs/src/user.tex                                |    4 
 linux-2.6-xen-sparse/arch/ia64/Kconfig           |    9 
 linux-2.6-xen-sparse/drivers/xen/Kconfig         |   16 
 linux-2.6-xen-sparse/drivers/xen/Makefile        |    7 
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c |   38 
 linux-2.6-xen-sparse/drivers/xen/core/Makefile   |    3 
 linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile |    1 
 linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c |  973 +++++++++++++++++++++++
 linux-2.6-xen-sparse/drivers/xen/util.c          |   22 
 linux-2.6-xen-sparse/include/linux/mm.h          |    4 
 linux-2.6-xen-sparse/include/xen/driver_util.h   |    3 
 linux-2.6-xen-sparse/include/xen/public/gntdev.h |  105 ++
 linux-2.6-xen-sparse/mm/memory.c                 |    9 
 tools/blktap/drivers/qcow2raw.c                  |    9 
 tools/examples/xmexample.hvm                     |    4 
 tools/ioemu/target-i386-dm/helper2.c             |   33 
 tools/ioemu/vl.c                                 |    3 
 tools/ioemu/vl.h                                 |    6 
 tools/ioemu/xenstore.c                           |   69 +
 tools/libxc/ia64/xc_ia64_linux_restore.c         |   51 -
 tools/libxc/xc_core.c                            |    4 
 tools/libxc/xc_core_x86.c                        |   12 
 tools/libxc/xc_hvm_restore.c                     |   14 
 tools/libxc/xc_hvm_save.c                        |    7 
 tools/libxc/xc_linux.c                           |  156 +++
 tools/libxc/xc_linux_restore.c                   |   85 +-
 tools/libxc/xc_linux_save.c                      |   66 -
 tools/libxc/xc_resume.c                          |    4 
 tools/libxc/xenctrl.h                            |   59 +
 tools/libxc/xenguest.h                           |    9 
 tools/libxc/xg_private.h                         |    9 
 tools/python/xen/lowlevel/scf/scf.c              |    2 
 tools/python/xen/xend/XendCheckpoint.py          |   13 
 tools/python/xen/xend/XendConfig.py              |    3 
 tools/python/xen/xend/XendDomainInfo.py          |    8 
 tools/python/xen/xend/balloon.py                 |   18 
 tools/python/xen/xend/image.py                   |    3 
 tools/python/xen/xend/osdep.py                   |   50 +
 tools/python/xen/xend/server/SrvServer.py        |    4 
 tools/python/xen/xend/server/relocate.py         |    8 
 tools/python/xen/xm/create.py                    |    6 
 tools/python/xen/xm/main.py                      |    8 
 tools/python/xen/xm/xenapi_create.py             |    1 
 tools/xcutils/xc_restore.c                       |   33 
 tools/xenstat/xentop/xentop.c                    |    2 
 xen/arch/x86/hvm/hvm.c                           |    9 
 xen/arch/x86/hvm/intercept.c                     |   38 
 xen/arch/x86/hvm/io.c                            |   11 
 xen/arch/x86/hvm/platform.c                      |   20 
 xen/arch/x86/hvm/rtc.c                           |    8 
 xen/arch/x86/hvm/svm/vmcb.c                      |   28 
 xen/arch/x86/hvm/vmx/vmcs.c                      |    2 
 xen/arch/x86/hvm/vmx/vmx.c                       |   13 
 xen/arch/x86/mm.c                                |    3 
 xen/arch/x86/mm/hap/hap.c                        |   68 -
 xen/arch/x86/mm/shadow/multi.c                   |    4 
 xen/arch/x86/setup.c                             |    4 
 xen/arch/x86/time.c                              |    2 
 xen/arch/x86/traps.c                             |   17 
 xen/arch/x86/x86_32/traps.c                      |    7 
 xen/arch/x86/x86_64/traps.c                      |   10 
 xen/common/domain.c                              |  121 ++
 xen/common/domctl.c                              |    5 
 xen/common/page_alloc.c                          |   12 
 xen/common/symbols.c                             |   12 
 xen/drivers/char/console.c                       |    8 
 xen/include/asm-x86/domain.h                     |    1 
 xen/include/asm-x86/hvm/io.h                     |    2 
 xen/include/asm-x86/hvm/support.h                |    1 
 xen/include/asm-x86/hvm/vmx/vmcs.h               |    1 
 xen/include/asm-x86/processor.h                  |    8 
 xen/include/asm-x86/time.h                       |    5 
 xen/include/public/hvm/ioreq.h                   |    1 
 xen/include/xen/sched.h                          |   12 
 75 files changed, 2055 insertions(+), 353 deletions(-)

diff -r fc9e2f7920c9 -r f378c424e0ce README
--- a/README    Fri Mar 30 17:18:42 2007 -0600
+++ b/README    Tue Apr 03 13:04:51 2007 -0600
@@ -177,3 +177,25 @@ 5. To rebuild a kernel with a modified c
    an initial ram disk, just like a native system e.g.
     # depmod 2.6.16-xen
     # mkinitrd -v -f --with=aacraid --with=sd_mod --with=scsi_mod 
initrd-2.6.16-xen.img 2.6.16-xen
+
+
+Python Runtime Libraries
+========================
+
+Xend (the Xen daemon) has the following runtime dependencies:
+
+    * Python 2.3 or later.
+      In many distros, the XML-aspects to the standard library
+      (xml.dom.minidom etc) are broken out into a separate python-xml package.
+      This is also required.
+
+          URL:    http://www.python.org/
+          Debian: python, python-xml
+
+    * For optional SSL support, pyOpenSSL:
+          URL:    http://pyopenssl.sourceforge.net/
+          Debian: python-pyopenssl
+
+    * For optional PAM support, PyPAM:
+          URL:    http://www.pangalactic.org/PyPAM/
+          Debian: python-pam
diff -r fc9e2f7920c9 -r f378c424e0ce docs/src/user.tex
--- a/docs/src/user.tex Fri Mar 30 17:18:42 2007 -0600
+++ b/docs/src/user.tex Tue Apr 03 13:04:51 2007 -0600
@@ -3250,6 +3250,10 @@ editing \path{grub.conf}.
 \item [ dma\_emergency\_pool=xxx ] Specify lower bound on size of DMA
   pool below which ordinary allocations will fail rather than fall
   back to allocating from the DMA pool.
+\item [ hap ] Instruct Xen to detect hardware-assisted paging support, such
+  as AMD-V's nested paging or Intel\textregistered VT's extended paging. If 
+  available, Xen will use hardware-assisted paging instead of shadow paging 
+  for guest memory management.
 \end{description}
 
 In addition, the following options may be specified on the Xen command
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Tue Apr 03 13:04:51 2007 -0600
@@ -576,15 +576,6 @@ source "crypto/Kconfig"
 # override default values of drivers/xen/Kconfig
 #
 if XEN
-config XEN_UTIL
-       default n
-
-config XEN_BALLOON
-       default y
-
-config XEN_REBOOT
-       default y
-
 config XEN_SMPBOOT
        default n
 endif
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Tue Apr 03 13:04:51 2007 -0600
@@ -253,22 +253,6 @@ config NO_IDLE_HZ
        bool
        default y
 
-config XEN_UTIL
-       bool
-       default y
-
-config XEN_BALLOON
-       bool
-       default y
-
-config XEN_DEVMEM
-       bool
-       default y
-
-config XEN_REBOOT
-       bool
-       default y
-
 config XEN_SMPBOOT
        bool
        default y
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Tue Apr 03 13:04:51 2007 -0600
@@ -3,10 +3,11 @@ obj-y += evtchn/
 obj-y  += evtchn/
 obj-y  += privcmd/
 obj-y  += xenbus/
+obj-y  += gntdev/
+obj-y  += balloon/
+obj-y  += char/
 
-obj-$(CONFIG_XEN_UTIL)                 += util.o
-obj-$(CONFIG_XEN_BALLOON)              += balloon/
-obj-$(CONFIG_XEN_DEVMEM)               += char/
+obj-y  += util.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri Mar 30 17:18:42 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Tue Apr 03 13:04:51 
2007 -0600
@@ -44,6 +44,7 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 #include <xen/balloon.h>
+#include <xen/driver_util.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
@@ -55,30 +56,6 @@
 
 #define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
 #define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
-
-
-struct class *xen_class;
-EXPORT_SYMBOL_GPL(xen_class);
-
-/*
- * Setup the xen class.  This should probably go in another file, but
- * since blktap is the only user of it so far, it gets to keep it.
- */
-int setup_xen_class(void)
-{
-       int ret;
-
-       if (xen_class)
-               return 0;
-
-       xen_class = class_create(THIS_MODULE, "xen");
-       if ((ret = IS_ERR(xen_class))) {
-               xen_class = NULL;
-               return ret;
-       }
-
-       return 0;
-}
 
 /*
  * The maximum number of requests that can be outstanding at any time
@@ -347,6 +324,7 @@ static const struct file_operations blkt
 
 static tap_blkif_t *get_next_free_dev(void)
 {
+       struct class *class;
        tap_blkif_t *info;
        int minor;
 
@@ -409,9 +387,10 @@ found:
                wmb();
                tapfds[minor] = info;
 
-               class_device_create(xen_class, NULL,
-                                   MKDEV(blktap_major, minor), NULL,
-                                   "blktap%d", minor);
+               if ((class = get_xen_class()) != NULL)
+                       class_device_create(class, NULL,
+                                           MKDEV(blktap_major, minor), NULL,
+                                           "blktap%d", minor);
        }
 
 out:
@@ -1487,6 +1466,7 @@ static int __init blkif_init(void)
 static int __init blkif_init(void)
 {
        int i, ret;
+       struct class *class;
 
        if (!is_running_on_xen())
                return -ENODEV;
@@ -1522,7 +1502,7 @@ static int __init blkif_init(void)
        DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
 
        /* Make sure the xen class exists */
-       if (!setup_xen_class()) {
+       if ((class = get_xen_class()) != NULL) {
                /*
                 * This will allow udev to create the blktap ctrl device.
                 * We only want to create blktap0 first.  We don't want
@@ -1530,7 +1510,7 @@ static int __init blkif_init(void)
                 * We only create the device when a request of a new device is
                 * made.
                 */
-               class_device_create(xen_class, NULL,
+               class_device_create(class, NULL,
                                    MKDEV(blktap_major, 0), NULL,
                                    "blktap0");
        } else {
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Fri Mar 30 17:18:42 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Tue Apr 03 13:04:51 
2007 -0600
@@ -2,12 +2,11 @@
 # Makefile for the linux kernel.
 #
 
-obj-y := evtchn.o gnttab.o features.o
+obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o
 
 obj-$(CONFIG_PROC_FS)          += xen_proc.o
 obj-$(CONFIG_SYSFS)            += hypervisor_sysfs.o
 obj-$(CONFIG_HOTPLUG_CPU)      += cpu_hotplug.o
 obj-$(CONFIG_XEN_SYSFS)                += xen_sysfs.o
-obj-$(CONFIG_XEN_REBOOT)       += reboot.o machine_reboot.o
 obj-$(CONFIG_XEN_SMPBOOT)      += smpboot.o
 obj-$(CONFIG_KEXEC)            += machine_kexec.o
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile  Tue Apr 03 13:04:51 
2007 -0600
@@ -0,0 +1,1 @@
+obj-y  := gntdev.o
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c  Tue Apr 03 13:04:51 
2007 -0600
@@ -0,0 +1,973 @@
+/******************************************************************************
+ * gntdev.c
+ * 
+ * Device for accessing (in user-space) pages that have been granted by other
+ * domains.
+ *
+ * Copyright (c) 2006-2007, D G Murray.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <asm/atomic.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <xen/gnttab.h>
+#include <asm/hypervisor.h>
+#include <xen/balloon.h>
+#include <xen/evtchn.h>
+#include <xen/driver_util.h>
+
+#include <linux/types.h>
+#include <xen/public/gntdev.h>
+
+
+#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@xxxxxxxxxxxx>"
+#define DRIVER_DESC   "User-space granted page access driver"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+
+#define MAX_GRANTS 128
+
+/* A slot can be in one of three states:
+ *
+ * 0. GNTDEV_SLOT_INVALID:
+ *    This slot is not associated with a grant reference, and is therefore free
+ *    to be overwritten by a new grant reference.
+ *
+ * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
+ *    This slot is associated with a grant reference (via the 
+ *    IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
+ *
+ * 2. GNTDEV_SLOT_MAPPED:
+ *    This slot is associated with a grant reference, and has been mmap()-ed.
+ */
+typedef enum gntdev_slot_state {
+       GNTDEV_SLOT_INVALID = 0,
+       GNTDEV_SLOT_NOT_YET_MAPPED,
+       GNTDEV_SLOT_MAPPED
+} gntdev_slot_state_t;
+
+#define GNTDEV_INVALID_HANDLE    -1
+#define GNTDEV_FREE_LIST_INVALID -1
+/* Each opened instance of gntdev is associated with a list of grants,
+ * represented by an array of elements of the following type,
+ * gntdev_grant_info_t.
+ */
+typedef struct gntdev_grant_info {
+       gntdev_slot_state_t state;
+       union {
+               uint32_t free_list_index;
+               struct {
+                       domid_t domid;
+                       grant_ref_t ref;
+                       grant_handle_t kernel_handle;
+                       grant_handle_t user_handle;
+                       uint64_t dev_bus_addr;
+               } valid;
+       } u;
+} gntdev_grant_info_t;
+
+/* Private data structure, which is stored in the file pointer for files
+ * associated with this device.
+ */
+typedef struct gntdev_file_private_data {
+  
+       /* Array of grant information. */
+       gntdev_grant_info_t grants[MAX_GRANTS];
+
+       /* Read/write semaphore used to protect the grants array. */
+       struct rw_semaphore grants_sem;
+
+       /* An array of indices of free slots in the grants array.
+        * N.B. An entry in this list may temporarily have the value
+        * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
+        * from the list by the contiguous allocator, but the list has not yet
+        * been compressed. However, this is not visible across invocations of
+        * the device.
+        */
+       int32_t free_list[MAX_GRANTS];
+       
+       /* The number of free slots in the grants array. */
+       uint32_t free_list_size;
+
+       /* Read/write semaphore used to protect the free list. */
+       struct rw_semaphore free_list_sem;
+       
+       /* Index of the next slot after the most recent contiguous allocation, 
+        * for use in a next-fit allocator.
+        */
+       uint32_t next_fit_index;
+
+       /* Used to map grants into the kernel, before mapping them into user
+        * space.
+        */
+       struct page **foreign_pages;
+
+} gntdev_file_private_data_t;
+
+/* Module lifecycle operations. */
+static int __init gntdev_init(void);
+static void __exit gntdev_exit(void);
+
+module_init(gntdev_init);
+module_exit(gntdev_exit);
+
+/* File operations. */
+static int gntdev_open(struct inode *inode, struct file *flip);
+static int gntdev_release(struct inode *inode, struct file *flip);
+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
+static int gntdev_ioctl (struct inode *inode, struct file *flip,
+                        unsigned int cmd, unsigned long arg);
+
+static struct file_operations gntdev_fops = {
+       .owner = THIS_MODULE,
+       .open = gntdev_open,
+       .release = gntdev_release,
+       .mmap = gntdev_mmap,
+       .ioctl = gntdev_ioctl
+};
+
+/* VM operations. */
+static void gntdev_vma_close(struct vm_area_struct *vma);
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+                             pte_t *ptep, int is_fullmm);
+
+static struct vm_operations_struct gntdev_vmops = {
+       .close = gntdev_vma_close,
+       .zap_pte = gntdev_clear_pte
+};
+
+/* Global variables. */
+
+/* The driver major number, for use when unregistering the driver. */
+static int gntdev_major;
+
+#define GNTDEV_NAME "gntdev"
+
+/* Memory mapping functions
+ * ------------------------
+ *
+ * Every granted page is mapped into both kernel and user space, and the two
+ * following functions return the respective virtual addresses of these pages.
+ *
+ * When shadow paging is disabled, the granted page is mapped directly into
+ * user space; when it is enabled, it is mapped into the kernel and remapped
+ * into user space using vm_insert_page() (see gntdev_mmap(), below).
+ */
+
+/* Returns the virtual address (in user space) of the @page_index'th page
+ * in the given VM area.
+ */
+static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
+                                           int page_index)
+{
+       return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
+}
+
+/* Returns the virtual address (in kernel space) of the @slot_index'th page
+ * mapped by the gntdev instance that owns the given private data struct.
+ */
+static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
+                                             int slot_index)
+{
+       unsigned long pfn;
+       void *kaddr;
+       pfn = page_to_pfn(priv->foreign_pages[slot_index]);
+       kaddr = pfn_to_kaddr(pfn);
+       return (unsigned long) kaddr;
+}
+
+/* Helper functions. */
+
+/* Adds information about a grant reference to the list of grants in the file's
+ * private data structure. Returns non-zero on failure. On success, sets the
+ * value of *offset to the offset that should be mmap()-ed in order to map the
+ * grant reference.
+ */
+static int add_grant_reference(struct file *flip,
+                              struct ioctl_gntdev_grant_ref *op,
+                              uint64_t *offset)
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+
+       uint32_t slot_index;
+
+       if (unlikely(private_data->free_list_size == 0)) {
+               return -ENOMEM;
+       }
+
+       slot_index = private_data->free_list[--private_data->free_list_size];
+
+       /* Copy the grant information into file's private data. */
+       private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
+       private_data->grants[slot_index].u.valid.domid = op->domid;
+       private_data->grants[slot_index].u.valid.ref = op->ref;
+
+       /* The offset is calculated as the index of the chosen entry in the
+        * file's private data's array of grant information. This is then
+        * shifted to give an offset into the virtual "file address space".
+        */
+       *offset = slot_index << PAGE_SHIFT;
+
+       return 0;
+}
+
+/* Adds the @count grant references to the contiguous range in the slot array
+ * beginning at @first_slot. It is assumed that @first_slot was returned by a
+ * previous invocation of find_contiguous_free_range(), during the same
+ * invocation of the driver.
+ */
+static int add_grant_references(struct file *flip,
+                               int count,
+                               struct ioctl_gntdev_grant_ref *ops,
+                               uint32_t first_slot)
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       int i;
+       
+       for (i = 0; i < count; ++i) {
+
+               /* First, mark the slot's entry in the free list as invalid. */
+               int free_list_index = 
+                       private_data->grants[first_slot+i].u.free_list_index;
+               private_data->free_list[free_list_index] = 
+                       GNTDEV_FREE_LIST_INVALID;
+
+               /* Now, update the slot. */
+               private_data->grants[first_slot+i].state = 
+                       GNTDEV_SLOT_NOT_YET_MAPPED;
+               private_data->grants[first_slot+i].u.valid.domid =
+                       ops[i].domid;
+               private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
+       }
+
+       return 0;       
+}
+
+/* Scans through the free list for @flip, removing entries that are marked as
+ * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
+ * the number of valid entries.
+ */
+static void compress_free_list(struct file *flip) 
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       int i, j = 0, old_size;
+       
+       old_size = private_data->free_list_size;
+       for (i = 0; i < old_size; ++i) {
+               if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
+                       private_data->free_list[j] = 
+                               private_data->free_list[i];
+                       ++j;
+               } else {
+                       --private_data->free_list_size;
+               }
+       }
+}
+
+/* Searches the grant array in the private data of @flip for a range of
+ * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
+ *
+ * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
+ */
+static int find_contiguous_free_range(struct file *flip,
+                                     uint32_t num_slots) 
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       
+       int i;
+       int start_index = private_data->next_fit_index;
+       int range_start = 0, range_length;
+
+       if (private_data->free_list_size < num_slots) {
+               return -ENOMEM;
+       }
+
+       /* First search from the start_index to the end of the array. */
+       range_length = 0;
+       for (i = start_index; i < MAX_GRANTS; ++i) {
+               if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+                       if (range_length == 0) {
+                               range_start = i;
+                       }
+                       ++range_length;
+                       if (range_length == num_slots) {
+                               return range_start;
+                       }
+               }
+       }
+       
+       /* Now search from the start of the array to the start_index. */
+       range_length = 0;
+       for (i = 0; i < start_index; ++i) {
+               if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+                       if (range_length == 0) {
+                               range_start = i;
+                       }
+                       ++range_length;
+                       if (range_length == num_slots) {
+                               return range_start;
+                       }
+               }
+       }
+       
+       return -ENOMEM;
+}
+
+/* Interface functions. */
+
+/* Initialises the driver. Called when the module is loaded. */
+static int __init gntdev_init(void)
+{
+       struct class *class;
+       struct class_device *device;
+
+       if (!is_running_on_xen()) {
+               printk(KERN_ERR "You must be running Xen to use gntdev\n");
+               return -ENODEV;
+       }
+
+       gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
+       if (gntdev_major < 0)
+       {
+               printk(KERN_ERR "Could not register gntdev device\n");
+               return -ENOMEM;
+       }
+
+       /* Note that if the sysfs code fails, we will still initialise the
+        * device, and output the major number so that the device can be
+        * created manually using mknod.
+        */
+       if ((class = get_xen_class()) == NULL) {
+               printk(KERN_ERR "Error setting up xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n", 
+                      gntdev_major);
+               return 0;
+       }
+
+       device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
+                                    NULL, GNTDEV_NAME);
+       if (IS_ERR(device)) {
+               printk(KERN_ERR "Error creating gntdev device in xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n",
+                      gntdev_major);
+               return 0;
+       }
+
+       return 0;
+}
+
+/* Cleans up and unregisters the driver. Called when the driver is unloaded.
+ */
+static void __exit gntdev_exit(void)
+{
+       struct class *class;
+       if ((class = get_xen_class()) != NULL)
+               class_device_destroy(class, MKDEV(gntdev_major, 0));
+       unregister_chrdev(gntdev_major, GNTDEV_NAME);
+}
+
+/* Called when the device is opened. */
+static int gntdev_open(struct inode *inode, struct file *flip)
+{
+       gntdev_file_private_data_t *private_data;
+       int i;
+
+       try_module_get(THIS_MODULE);
+
+       /* Allocate space for the per-instance private data. */
+       private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
+       if (!private_data)
+               goto nomem_out;
+
+       /* Allocate space for the kernel-mapping of granted pages. */
+       private_data->foreign_pages = 
+               alloc_empty_pages_and_pagevec(MAX_GRANTS);
+       if (!private_data->foreign_pages)
+               goto nomem_out2;
+
+       /* Initialise the free-list, which contains all slots at first.
+        */
+       for (i = 0; i < MAX_GRANTS; ++i) {
+               private_data->free_list[MAX_GRANTS - i - 1] = i;
+               private_data->grants[i].state = GNTDEV_SLOT_INVALID;
+               private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1;
+       }
+       private_data->free_list_size = MAX_GRANTS;
+       private_data->next_fit_index = 0;
+
+       init_rwsem(&private_data->grants_sem);
+       init_rwsem(&private_data->free_list_sem);
+
+       flip->private_data = private_data;
+
+       return 0;
+
+nomem_out2:
+       kfree(private_data);
+nomem_out:
+       return -ENOMEM;
+}
+
+/* Called when the device is closed.
+ */
+static int gntdev_release(struct inode *inode, struct file *flip)
+{
+       if (flip->private_data) {
+               gntdev_file_private_data_t *private_data = 
+                       (gntdev_file_private_data_t *) flip->private_data;
+               if (private_data->foreign_pages) {
+                       free_empty_pages_and_pagevec
+                               (private_data->foreign_pages, MAX_GRANTS);
+               }
+               kfree(private_data);
+       }
+       module_put(THIS_MODULE);
+       return 0;
+}
+
+/* Called when an attempt is made to mmap() the device. The private data from
+ * @flip contains the list of grant references that can be mapped. The vm_pgoff
+ * field of @vma contains the index into that list that refers to the grant
+ * reference that will be mapped. Only mappings that are a multiple of
+ * PAGE_SIZE are handled.
+ */
+static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) 
+{
+       struct gnttab_map_grant_ref op;
+       unsigned long slot_index = vma->vm_pgoff;
+       unsigned long kernel_vaddr, user_vaddr;
+       uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       uint64_t ptep;
+       int ret;
+       int flags;
+       int i;
+       struct page *page;
+       gntdev_file_private_data_t *private_data = flip->private_data;
+
+       if (unlikely(!private_data)) {
+               printk(KERN_ERR "File's private data is NULL.\n");
+               return -EINVAL;
+       }
+
+       if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) {
+               printk(KERN_ERR "Invalid number of pages or offset"
+                      "(num_pages = %d, first_slot = %ld).\n",
+                      size, slot_index);
+               return -ENXIO;
+       }
+
+       if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
+               printk(KERN_ERR "Writable mappings must be shared.\n");
+               return -EINVAL;
+       }
+
+       /* Slots must be in the NOT_YET_MAPPED state. */
+       down_write(&private_data->grants_sem);
+       for (i = 0; i < size; ++i) {
+               if (private_data->grants[slot_index + i].state != 
+                   GNTDEV_SLOT_NOT_YET_MAPPED) {
+                       printk(KERN_ERR "Slot (index = %ld) is in the wrong "
+                              "state (%d).\n", slot_index + i, 
+                              private_data->grants[slot_index + i].state);
+                       up_write(&private_data->grants_sem);
+                       return -EINVAL;
+               }
+       }
+
+       /* Install the hook for unmapping. */
+       vma->vm_ops = &gntdev_vmops;
+    
+       /* The VM area contains pages from another VM. */
+       vma->vm_flags |= VM_FOREIGN;
+       vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *), 
+                                      GFP_KERNEL);
+       if (vma->vm_private_data == NULL) {
+               printk(KERN_ERR "Couldn't allocate mapping structure for VM "
+                      "area.\n");
+               return -ENOMEM;
+       }
+
+       /* This flag prevents Bad PTE errors when the memory is unmapped. */
+       vma->vm_flags |= VM_RESERVED;
+
+       /* This flag prevents this VM area being copied on a fork(). A better
+        * behaviour might be to explicitly carry out the appropriate mappings
+        * on fork(), but I don't know if there's a hook for this.
+        */
+       vma->vm_flags |= VM_DONTCOPY;
+
+#ifdef CONFIG_X86
+       /* This flag ensures that the page tables are not unpinned before the
+        * VM area is unmapped. Therefore Xen still recognises the PTE as
+        * belonging to an L1 pagetable, and the grant unmap operation will
+        * succeed, even if the process does not exit cleanly.
+        */
+       vma->vm_mm->context.has_foreign_mappings = 1;
+#endif
+
+       for (i = 0; i < size; ++i) {
+
+               flags = GNTMAP_host_map;
+               if (!(vma->vm_flags & VM_WRITE))
+                       flags |= GNTMAP_readonly;
+
+               kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
+               user_vaddr = get_user_vaddr(vma, i);
+               page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
+
+               gnttab_set_map_op(&op, kernel_vaddr, flags,   
+                                 private_data->grants[slot_index+i]
+                                 .u.valid.ref, 
+                                 private_data->grants[slot_index+i]
+                                 .u.valid.domid);
+
+               /* Carry out the mapping of the grant reference. */
+               ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 
+                                               &op, 1);
+               BUG_ON(ret);
+               if (op.status) {
+                       printk(KERN_ERR "Error mapping the grant reference "
+                              "into the kernel (%d). domid = %d; ref = %d\n",
+                              op.status,
+                              private_data->grants[slot_index+i]
+                              .u.valid.domid,
+                              private_data->grants[slot_index+i]
+                              .u.valid.ref);
+                       goto undo_map_out;
+               }
+
+               /* Store a reference to the page that will be mapped into user
+                * space.
+                */
+               ((struct page **) vma->vm_private_data)[i] = page;
+
+               /* Mark mapped page as reserved. */
+               SetPageReserved(page);
+
+               /* Record the grant handle, for use in the unmap operation. */
+               private_data->grants[slot_index+i].u.valid.kernel_handle = 
+                       op.handle;
+               private_data->grants[slot_index+i].u.valid.dev_bus_addr = 
+                       op.dev_bus_addr;
+               
+               private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
+               private_data->grants[slot_index+i].u.valid.user_handle =
+                       GNTDEV_INVALID_HANDLE;
+
+               /* Now perform the mapping to user space. */
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+
+                       /* NOT USING SHADOW PAGE TABLES. */
+                       /* In this case, we map the grant(s) straight into user
+                        * space.
+                        */
+
+                       /* Get the machine address of the PTE for the user 
+                        *  page.
+                        */
+                       if ((ret = create_lookup_pte_addr(vma->vm_mm, 
+                                                         vma->vm_start 
+                                                         + (i << PAGE_SHIFT), 
+                                                         &ptep)))
+                       {
+                               printk(KERN_ERR "Error obtaining PTE pointer "
+                                      "(%d).\n", ret);
+                               goto undo_map_out;
+                       }
+                       
+                       /* Configure the map operation. */
+               
+                       /* The reference is to be used by host CPUs. */
+                       flags = GNTMAP_host_map;
+                       
+                       /* Specifies a user space mapping. */
+                       flags |= GNTMAP_application_map;
+                       
+                       /* The map request contains the machine address of the
+                        * PTE to update.
+                        */
+                       flags |= GNTMAP_contains_pte;
+                       
+                       if (!(vma->vm_flags & VM_WRITE))
+                               flags |= GNTMAP_readonly;
+
+                       gnttab_set_map_op(&op, ptep, flags, 
+                                         private_data->grants[slot_index+i]
+                                         .u.valid.ref, 
+                                         private_data->grants[slot_index+i]
+                                         .u.valid.domid);
+
+                       /* Carry out the mapping of the grant reference. */
+                       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+                                                       &op, 1);
+                       BUG_ON(ret);
+                       if (op.status) {
+                               printk(KERN_ERR "Error mapping the grant "
+                                      "reference into user space (%d). domid "
+                                      "= %d; ref = %d\n", op.status,
+                                      private_data->grants[slot_index+i].u
+                                      .valid.domid,
+                                      private_data->grants[slot_index+i].u
+                                      .valid.ref);
+                               goto undo_map_out;
+                       }
+                       
+                       /* Record the grant handle, for use in the unmap 
+                        * operation. 
+                        */
+                       private_data->grants[slot_index+i].u.
+                               valid.user_handle = op.handle;
+
+                       /* Update p2m structure with the new mapping. */
+                       set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
+                                           FOREIGN_FRAME(private_data->
+                                                         grants[slot_index+i]
+                                                         .u.valid.dev_bus_addr
+                                                         >> PAGE_SHIFT));
+               } else {
+                       /* USING SHADOW PAGE TABLES. */
+                       /* In this case, we simply insert the page into the VM
+                        * area. */
+                       ret = vm_insert_page(vma, user_vaddr, page);
+               }
+
+       }
+
+       up_write(&private_data->grants_sem);
+       return 0;
+
+undo_map_out:
+       /* If we have a mapping failure, the unmapping will be taken care of
+        * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
+        * All we need to do here is free the vma_private_data.
+        */
+       kfree(vma->vm_private_data);
+
+       /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+        * to NULL on failure. However, we need this in gntdev_clear_pte() to
+        * unmap the grants. Therefore, we smuggle a reference to the file's
+        * private data in the VM area's private data pointer.
+        */
+       vma->vm_private_data = private_data;
+       
+       up_write(&private_data->grants_sem);
+
+       return -ENOMEM;
+}
+
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+                             pte_t *ptep, int is_fullmm)
+{
+       int slot_index, ret;
+       pte_t copy;
+       struct gnttab_unmap_grant_ref op;
+       gntdev_file_private_data_t *private_data;
+
+       /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+        * to NULL on failure. However, we need this in gntdev_clear_pte() to
+        * unmap the grants. Therefore, we smuggle a reference to the file's
+        * private data in the VM area's private data pointer.
+        */
+       if (vma->vm_file) {
+               private_data = (gntdev_file_private_data_t *)
+                       vma->vm_file->private_data;
+       } else if (vma->vm_private_data) {
+               private_data = (gntdev_file_private_data_t *)
+                       vma->vm_private_data;
+       } else {
+               private_data = NULL; /* gcc warning */
+               BUG();
+       }
+
+       /* Copy the existing value of the PTE for returning. */
+       copy = *ptep;
+
+       /* Calculate the grant relating to this PTE. */
+       slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+
+       /* Only unmap grants if the slot has been mapped. This could be being
+        * called from a failing mmap().
+        */
+       if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
+
+               /* First, we clear the user space mapping, if it has been made.
+                */
+               if (private_data->grants[slot_index].u.valid.user_handle !=
+                   GNTDEV_INVALID_HANDLE && 
+                   !xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* NOT USING SHADOW PAGE TABLES. */
+                       gnttab_set_unmap_op(&op, virt_to_machine(ptep), 
+                                           GNTMAP_contains_pte,
+                                           private_data->grants[slot_index]
+                                           .u.valid.user_handle);
+                       ret = HYPERVISOR_grant_table_op(
+                               GNTTABOP_unmap_grant_ref, &op, 1);
+                       BUG_ON(ret);
+                       if (op.status)
+                               printk("User unmap grant status = %d\n", 
+                                      op.status);
+               } else {
+                       /* USING SHADOW PAGE TABLES. */
+                       pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+               }
+
+               /* Finally, we unmap the grant from kernel space. */
+               gnttab_set_unmap_op(&op, 
+                                   get_kernel_vaddr(private_data, slot_index),
+                                   GNTMAP_host_map, 
+                                   private_data->grants[slot_index].u.valid
+                                   .kernel_handle);
+               ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 
+                                               &op, 1);
+               BUG_ON(ret);
+               if (op.status)
+                       printk("Kernel unmap grant status = %d\n", op.status);
+
+
+               /* Return slot to the not-yet-mapped state, so that it may be
+                * mapped again, or removed by a subsequent ioctl.
+                */
+               private_data->grants[slot_index].state = 
+                       GNTDEV_SLOT_NOT_YET_MAPPED;
+
+               /* Invalidate the physical to machine mapping for this page. */
+               set_phys_to_machine(__pa(get_kernel_vaddr(private_data, 
+                                                         slot_index)) 
+                                   >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+
+       } else {
+               pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+       }
+
+       return copy;
+}
+
+/* "Destructor" for a VM area.
+ */
+static void gntdev_vma_close(struct vm_area_struct *vma) {
+       if (vma->vm_private_data) {
+               kfree(vma->vm_private_data);
+       }
+}
+
+/* Called when an ioctl is made on the device.
+ */
+static int gntdev_ioctl(struct inode *inode, struct file *flip,
+                       unsigned int cmd, unsigned long arg)
+{
+       int rc = 0;
+       gntdev_file_private_data_t *private_data = 
+               (gntdev_file_private_data_t *) flip->private_data;
+
+       switch (cmd) {
+       case IOCTL_GNTDEV_MAP_GRANT_REF:
+       {
+               struct ioctl_gntdev_map_grant_ref op;
+               down_write(&private_data->grants_sem);
+               down_write(&private_data->free_list_sem);
+
+               if ((rc = copy_from_user(&op, (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto map_out;
+               }
+               if (unlikely(op.count <= 0)) {
+                       rc = -EINVAL;
+                       goto map_out;
+               }
+
+               if (op.count == 1) {
+                       if ((rc = add_grant_reference(flip, &op.refs[0],
+                                                     &op.index)) < 0) {
+                               printk(KERN_ERR "Adding grant reference "
+                                      "failed (%d).\n", rc);
+                               goto map_out;
+                       }
+               } else {
+                       struct ioctl_gntdev_grant_ref *refs, *u;
+                       refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
+                       if (!refs) {
+                               rc = -ENOMEM;
+                               goto map_out;
+                       }
+                       u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
+                       if ((rc = copy_from_user(refs,
+                                                (void __user *)u,
+                                                sizeof(*refs) * op.count))) {
+                               printk(KERN_ERR "Copying refs from user failed"
+                                      " (%d).\n", rc);
+                               rc = -EINVAL;
+                               goto map_out;
+                       }
+                       if ((rc = find_contiguous_free_range(flip, op.count))
+                           < 0) {
+                               printk(KERN_ERR "Finding contiguous range "
+                                      "failed (%d).\n", rc);
+                               kfree(refs);
+                               goto map_out;
+                       }
+                       op.index = rc << PAGE_SHIFT;
+                       if ((rc = add_grant_references(flip, op.count,
+                                                      refs, rc))) {
+                               printk(KERN_ERR "Adding grant references "
+                                      "failed (%d).\n", rc);
+                               kfree(refs);
+                               goto map_out;
+                       }
+                       compress_free_list(flip);
+                       kfree(refs);
+               }
+               if ((rc = copy_to_user((void __user *) arg, 
+                                      &op, 
+                                      sizeof(op)))) {
+                       printk(KERN_ERR "Copying result back to user failed "
+                              "(%d)\n", rc);
+                       rc = -EFAULT;
+                       goto map_out;
+               }
+       map_out:
+               up_write(&private_data->grants_sem);
+               up_write(&private_data->free_list_sem);
+               return rc;
+       }
+       case IOCTL_GNTDEV_UNMAP_GRANT_REF:
+       {
+               struct ioctl_gntdev_unmap_grant_ref op;
+               int i, start_index;
+
+               down_write(&private_data->grants_sem);
+               down_write(&private_data->free_list_sem);
+
+               if ((rc = copy_from_user(&op, 
+                                        (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto unmap_out;
+               }
+
+               start_index = op.index >> PAGE_SHIFT;
+
+               /* First, check that all pages are in the NOT_YET_MAPPED
+                * state.
+                */
+               for (i = 0; i < op.count; ++i) {
+                       if (unlikely
+                           (private_data->grants[start_index + i].state
+                            != GNTDEV_SLOT_NOT_YET_MAPPED)) {
+                               if (private_data->grants[start_index + i].state
+                                   == GNTDEV_SLOT_INVALID) {
+                                       printk(KERN_ERR
+                                              "Tried to remove an invalid "
+                                              "grant at offset 0x%x.",
+                                              (start_index + i) 
+                                              << PAGE_SHIFT);
+                                       rc = -EINVAL;
+                               } else {
+                                       printk(KERN_ERR
+                                              "Tried to remove a grant which "
+                                              "is currently mmap()-ed at "
+                                              "offset 0x%x.",
+                                              (start_index + i) 
+                                              << PAGE_SHIFT);
+                                       rc = -EBUSY;
+                               }
+                               goto unmap_out;
+                       }
+               }
+
+               /* Unmap pages and add them to the free list.
+                */
+               for (i = 0; i < op.count; ++i) {
+                       private_data->grants[start_index+i].state = 
+                               GNTDEV_SLOT_INVALID;
+                       private_data->grants[start_index+i].u.free_list_index =
+                               private_data->free_list_size;
+                       private_data->free_list[private_data->free_list_size] =
+                               start_index + i;
+                       ++private_data->free_list_size;
+               }
+               compress_free_list(flip);
+
+       unmap_out:
+               up_write(&private_data->grants_sem);
+               up_write(&private_data->free_list_sem);
+               return rc;
+       }
+       case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
+       {
+               struct ioctl_gntdev_get_offset_for_vaddr op;
+               struct vm_area_struct *vma;
+               unsigned long vaddr;
+
+               if ((rc = copy_from_user(&op, 
+                                        (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto get_offset_out;
+               }
+               vaddr = (unsigned long)op.vaddr;
+
+               down_read(&current->mm->mmap_sem);              
+               vma = find_vma(current->mm, vaddr);
+               if (vma == NULL) {
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
+                       printk(KERN_ERR "The vaddr specified does not belong "
+                              "to a gntdev instance: %#lx\n", vaddr);
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               if (vma->vm_start != vaddr) {
+                       printk(KERN_ERR "The vaddr specified in an "
+                              "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
+                              "the start of the VM area. vma->vm_start = "
+                              "%#lx; vaddr = %#lx\n",
+                              vma->vm_start, vaddr);
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               op.offset = vma->vm_pgoff << PAGE_SHIFT;
+               op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+               up_read(&current->mm->mmap_sem);
+               if ((rc = copy_to_user((void __user *) arg, 
+                                      &op, 
+                                      sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto get_offset_out;
+               }
+               goto get_offset_out;
+       get_offset_unlock_out:
+               up_read(&current->mm->mmap_sem);
+       get_offset_out:
+               return rc;
+       }
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       return 0;
+}
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/util.c
--- a/linux-2.6-xen-sparse/drivers/xen/util.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/util.c   Tue Apr 03 13:04:51 2007 -0600
@@ -4,6 +4,26 @@
 #include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 #include <xen/driver_util.h>
+
+struct class *get_xen_class(void)
+{
+       static struct class *xen_class;
+
+       if (xen_class)
+               return xen_class;
+
+       xen_class = class_create(THIS_MODULE, "xen");
+       if (IS_ERR(xen_class)) {
+               printk("Failed to create xen sysfs class.\n");
+               xen_class = NULL;
+       }
+
+       return xen_class;
+}
+EXPORT_SYMBOL_GPL(get_xen_class);
+
+/* Todo: merge ia64 ('auto-translate physmap') versions of these functions. */
+#ifndef __ia64__
 
 static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
 {
@@ -46,3 +66,5 @@ void free_vm_area(struct vm_struct *area
        kfree(area);
 }
 EXPORT_SYMBOL_GPL(free_vm_area);
+
+#endif /* !__ia64__ */
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h   Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/include/linux/mm.h   Tue Apr 03 13:04:51 2007 -0600
@@ -205,6 +205,10 @@ struct vm_operations_struct {
        /* notification that a previously read-only page is about to become
         * writable, if an error is returned it will cause a SIGBUS */
        int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
+       /* Area-specific function for clearing the PTE at @ptep. Returns the
+        * original value of @ptep. */
+       pte_t (*zap_pte)(struct vm_area_struct *vma, 
+                        unsigned long addr, pte_t *ptep, int is_fullmm);
 #ifdef CONFIG_NUMA
        int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
        struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/include/xen/driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/driver_util.h    Fri Mar 30 17:18:42 
2007 -0600
+++ b/linux-2.6-xen-sparse/include/xen/driver_util.h    Tue Apr 03 13:04:51 
2007 -0600
@@ -3,9 +3,12 @@
 #define __ASM_XEN_DRIVER_UTIL_H__
 
 #include <linux/vmalloc.h>
+#include <linux/device.h>
 
 /* Allocate/destroy a 'vmalloc' VM area. */
 extern struct vm_struct *alloc_vm_area(unsigned long size);
 extern void free_vm_area(struct vm_struct *area);
 
+extern struct class *get_xen_class(void);
+
 #endif /* __ASM_XEN_DRIVER_UTIL_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/include/xen/public/gntdev.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/public/gntdev.h  Tue Apr 03 13:04:51 
2007 -0600
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * gntdev.h
+ * 
+ * Interface to /dev/xen/gntdev.
+ * 
+ * Copyright (c) 2007, D G Murray
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTDEV_H__
+#define __LINUX_PUBLIC_GNTDEV_H__
+
+struct ioctl_gntdev_grant_ref {
+       /* The domain ID of the grant to be mapped. */
+       uint32_t domid;
+       /* The grant reference of the grant to be mapped. */
+       uint32_t ref;
+};
+
+/*
+ * Inserts the grant references into the mapping table of an instance
+ * of gntdev. N.B. This does not perform the mapping, which is deferred
+ * until mmap() is called with @index as the offset.
+ */
+#define IOCTL_GNTDEV_MAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
+struct ioctl_gntdev_map_grant_ref {
+       /* IN parameters */
+       /* The number of grants to be mapped. */
+       uint32_t count;
+       uint32_t pad;
+       /* OUT parameters */
+       /* The offset to be used on a subsequent call to mmap(). */
+       uint64_t index;
+       /* Variable IN parameter. */
+       /* Array of grant references, of size @count. */
+       struct ioctl_gntdev_grant_ref refs[1];
+};
+
+/*
+ * Removes the grant references from the mapping table of an instance of
+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
+ * before this ioctl is called, or an error will result.
+ */
+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
+struct ioctl_gntdev_unmap_grant_ref {
+       /* IN parameters */
+       /* The offset was returned by the corresponding map operation. */
+       uint64_t index;
+       /* The number of pages to be unmapped. */
+       uint32_t count;
+       uint32_t pad;
+};
+
+/*
+ * Returns the offset in the driver's address space that corresponds
+ * to @vaddr. This can be used to perform a munmap(), followed by an
+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
+ * the caller. The number of pages that were allocated at the same time as
+ * @vaddr is returned in @count.
+ *
+ * N.B. Where more than one page has been mapped into a contiguous range, the
+ *      supplied @vaddr must correspond to the start of the range; otherwise
+ *      an error will result. It is only possible to munmap() the entire
+ *      contiguously-allocated range at once, and not any subrange thereof.
+ */
+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
+struct ioctl_gntdev_get_offset_for_vaddr {
+       /* IN parameters */
+       /* The virtual address of the first mapped page in a range. */
+       uint64_t vaddr;
+       /* OUT parameters */
+       /* The offset that was used in the initial mmap() operation. */
+       uint64_t offset;
+       /* The number of pages mapped in the VM area that begins at @vaddr. */
+       uint32_t count;
+       uint32_t pad;
+};
+
+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/mm/memory.c  Tue Apr 03 13:04:51 2007 -0600
@@ -659,8 +659,12 @@ static unsigned long zap_pte_range(struc
                                     page->index > details->last_index))
                                        continue;
                        }
-                       ptent = ptep_get_and_clear_full(mm, addr, pte,
-                                                       tlb->fullmm);
+                       if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
+                               ptent = vma->vm_ops->zap_pte(vma, addr, pte,
+                                                            tlb->fullmm);
+                       else
+                               ptent = ptep_get_and_clear_full(mm, addr, pte,
+                                                               tlb->fullmm);
                        tlb_remove_tlb_entry(tlb, pte, addr);
                        if (unlikely(!page))
                                continue;
@@ -755,6 +759,7 @@ static unsigned long unmap_page_range(st
                details = NULL;
 
        BUG_ON(addr >= end);
+
        tlb_start_vma(tlb, vma);
        pgd = pgd_offset(vma->vm_mm, addr);
        do {
diff -r fc9e2f7920c9 -r f378c424e0ce tools/blktap/drivers/qcow2raw.c
--- a/tools/blktap/drivers/qcow2raw.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/blktap/drivers/qcow2raw.c   Tue Apr 03 13:04:51 2007 -0600
@@ -51,7 +51,6 @@
 #define BLOCK_PROCESSSZ 4096
 
 static int maxfds, *qcowio_fd, *aio_fd, running = 1, complete = 0; 
-static int read_complete = 0, write_complete = 0;
 static int returned_read_events = 0, returned_write_events = 0;
 static int submit_events = 0;
 static uint32_t read_idx = 0, write_idx = 0;
@@ -109,8 +108,6 @@ static int send_write_responses(struct d
        written += BLOCK_PROCESSSZ;
        returned_write_events++;
        write_idx = idx;
-       if (complete && (returned_write_events == submit_events)) 
-               write_complete = 1;
 
        debug_output(written, dd->td_state->size << 9);
        free(private);
@@ -126,8 +123,6 @@ static int send_read_responses(struct di
        
        returned_read_events++;
        read_idx = idx;
-       if (complete && (returned_read_events == submit_events)) 
-               read_complete = 1;
        
        ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, 
private, 
                                        send_write_responses, idx, private);
@@ -136,7 +131,7 @@ static int send_read_responses(struct di
                return 0;
        }
 
-       if ( (complete && returned_read_events == submit_events) || 
+       if ( (returned_read_events == submit_events) || 
             (returned_read_events % 10 == 0) ) {
                ddaio.drv->td_submit(&ddaio);
        }
@@ -299,6 +294,7 @@ int main(int argc, char *argv[])
                        }
                
                        /*Attempt to read 4k sized blocks*/
+                       submit_events++;
                        ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9,
                                                        BLOCK_PROCESSSZ>>9, 
buf, 
                                                        send_read_responses, 
i>>9, buf);
@@ -309,7 +305,6 @@ int main(int argc, char *argv[])
                                exit(-1);
                        } else {
                                i += BLOCK_PROCESSSZ;
-                               submit_events++;
                        }
 
                        if (i >= ddqcow.td_state->size<<9) {
diff -r fc9e2f7920c9 -r f378c424e0ce tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/examples/xmexample.hvm      Tue Apr 03 13:04:51 2007 -0600
@@ -180,6 +180,10 @@ serial='pty'
 
 
 #-----------------------------------------------------------------------------
+#    set the real time clock offset in seconds [default=0 i.e. same as dom0]
+#rtc_timeoffset=3600
+
+#-----------------------------------------------------------------------------
 #    start in full screen
 #full-screen=1   
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c      Tue Apr 03 13:04:51 2007 -0600
@@ -73,6 +73,8 @@ int vcpus = 1;
 int vcpus = 1;
 
 int xc_handle;
+
+long time_offset = 0;
 
 shared_iopage_t *shared_page = NULL;
 
@@ -439,6 +441,34 @@ void cpu_ioreq_xor(CPUState *env, ioreq_
     req->data = tmp1;
 }
 
+void timeoffset_get()
+{
+    char *p;
+
+    p = xenstore_vm_read(domid, "rtc/timeoffset", NULL);
+    if (!p)
+       return;
+
+    if (sscanf(p, "%ld", &time_offset) == 1)
+       fprintf(logfile, "Time offset set %ld\n", time_offset);
+    else
+       time_offset = 0;
+
+    xc_domain_set_time_offset(xc_handle, domid, time_offset);
+
+    free(p);
+}
+
+void cpu_ioreq_timeoffset(CPUState *env, ioreq_t *req)
+{
+    char b[64];
+
+    time_offset += (ulong)req->data;
+
+    sprintf(b, "%ld", time_offset);
+    xenstore_vm_write(domid, "rtc/timeoffset", b);
+}
+
 void cpu_ioreq_xchg(CPUState *env, ioreq_t *req)
 {
     unsigned long tmp1;
@@ -478,6 +508,9 @@ void __handle_ioreq(CPUState *env, ioreq
     case IOREQ_TYPE_XCHG:
         cpu_ioreq_xchg(env, req);
         break;
+    case IOREQ_TYPE_TIMEOFFSET:
+       cpu_ioreq_timeoffset(env, req);
+       break;
     default:
         hw_error("Invalid ioreq type 0x%x\n", req->type);
     }
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/vl.c  Tue Apr 03 13:04:51 2007 -0600
@@ -6670,6 +6670,9 @@ int main(int argc, char **argv)
     }
     free(page_array);
 #endif
+
+    timeoffset_get();
+
 #else  /* !CONFIG_DM */
 
     phys_ram_base = qemu_vmalloc(phys_ram_size);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/vl.h  Tue Apr 03 13:04:51 2007 -0600
@@ -1276,6 +1276,12 @@ int xenstore_unsubscribe_from_hotplug_st
                                              const char *inst,
                                              const char *token);
 
+int xenstore_vm_write(int domid, char *key, char *val);
+char *xenstore_vm_read(int domid, char *key, int *len);
+
+/* helper2.c */
+extern long time_offset;
+void timeoffset_get(void);
 
 /* xen_platform.c */
 void pci_xen_platform_init(PCIBus *bus);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/xenstore.c    Tue Apr 03 13:04:51 2007 -0600
@@ -567,3 +567,72 @@ int xenstore_unsubscribe_from_hotplug_st
 
     return rc;
 }
+
+char *xenstore_vm_read(int domid, char *key, int *len)
+{
+    char *buf = NULL, *path = NULL, *value = NULL;
+
+    if (xsh == NULL)
+       goto out;
+
+    path = xs_get_domain_path(xsh, domid);
+    if (path == NULL) {
+       fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/vm", path);
+    free(path);
+    path = xs_read(xsh, XBT_NULL, buf, NULL);
+    if (path == NULL) {
+       fprintf(logfile, "xs_read(%s): read error\n", buf);
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/%s", path, key);
+    value = xs_read(xsh, XBT_NULL, buf, len);
+    if (value == NULL) {
+       fprintf(logfile, "xs_read(%s): read error\n", buf);
+       goto out;
+    }
+
+ out:
+    free(path);
+    free(buf);
+    return value;
+}
+
+int xenstore_vm_write(int domid, char *key, char *value)
+{
+    char *buf = NULL, *path = NULL;
+    int rc = -1;
+
+    if (xsh == NULL)
+       goto out;
+
+    path = xs_get_domain_path(xsh, domid);
+    if (path == NULL) {
+       fprintf(logfile, "xs_get_domain_path(%d): error\n");
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/vm", path);
+    free(path);
+    path = xs_read(xsh, XBT_NULL, buf, NULL);
+    if (path == NULL) {
+       fprintf(logfile, "xs_read(%s): read error\n", buf);
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/%s", path, key);
+    rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
+    if (rc) {
+       fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
+       goto out;
+    }
+
+ out:
+    free(path);
+    free(buf);
+    return rc;
+}
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/ia64/xc_ia64_linux_restore.c
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c  Tue Apr 03 13:04:51 2007 -0600
@@ -14,8 +14,14 @@
 
 #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
 
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
+static unsigned long nr_pfns;
+
+/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
+static unsigned long max_nr_pfns;
 
 static ssize_t
 read_exact(int fd, void *buf, size_t count)
@@ -57,9 +63,9 @@ read_page(int xc_handle, int io_fd, uint
 
 int
 xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
-                 unsigned long nr_pfns, unsigned int store_evtchn,
-                 unsigned long *store_mfn, unsigned int console_evtchn,
-                 unsigned long *console_mfn)
+                 unsigned long p2msize, unsigned long maxnrpfns,
+                 unsigned int store_evtchn, unsigned long *store_mfn,
+                 unsigned int console_evtchn, unsigned long *console_mfn)
 {
     DECLARE_DOMCTL;
     int rc = 1, i;
@@ -79,10 +85,13 @@ xc_linux_restore(int xc_handle, int io_f
     /* A temporary mapping of the guest's start_info page. */
     start_info_t *start_info;
 
-    max_pfn = nr_pfns;
-
-    DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn);
-
+    p2m_size = p2msize;
+    max_nr_pfns = maxnrpfns;
+
+    /* For info only */
+    nr_pfns = 0;
+
+    DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
 
     if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
        ERROR("Error when reading version");
@@ -99,29 +108,29 @@ xc_linux_restore(int xc_handle, int io_f
         return 1;
     }
 
-    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
         errno = ENOMEM;
         goto out;
     }
 
     /* Get pages.  */
-    page_array = malloc(max_pfn * sizeof(unsigned long));
+    page_array = malloc(p2m_size * sizeof(unsigned long));
     if (page_array == NULL) {
         ERROR("Could not allocate memory");
         goto out;
     }
 
-    for ( i = 0; i < max_pfn; i++ )
+    for ( i = 0; i < p2m_size; i++ )
         page_array[i] = i;
 
-    if ( xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn,
+    if ( xc_domain_memory_populate_physmap(xc_handle, dom, p2m_size,
                                            0, 0, page_array) )
     {
         ERROR("Failed to allocate memory for %ld KB to dom %d.\n",
-              PFN_TO_KB(max_pfn), dom);
-        goto out;
-    }
-    DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(max_pfn));
+              PFN_TO_KB(p2m_size), dom);
+        goto out;
+    }
+    DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(p2m_size));
 
     if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) 
{
         ERROR("read: domain setup");
@@ -131,9 +140,9 @@ xc_linux_restore(int xc_handle, int io_f
     /* Build firmware (will be overwritten).  */
     domctl.domain = (domid_t)dom;
     domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query;
-    domctl.u.arch_setup.bp = ((nr_pfns - 3) << PAGE_SHIFT)
+    domctl.u.arch_setup.bp = ((p2m_size - 3) << PAGE_SHIFT)
                            + sizeof (start_info_t);
-    domctl.u.arch_setup.maxmem = (nr_pfns - 3) << PAGE_SHIFT;
+    domctl.u.arch_setup.maxmem = (p2m_size - 3) << PAGE_SHIFT;
     
     domctl.cmd = XEN_DOMCTL_arch_setup;
     if (xc_domctl(xc_handle, &domctl))
@@ -157,8 +166,6 @@ xc_linux_restore(int xc_handle, int io_f
         }
        if (gmfn == INVALID_MFN)
                break;
-
-       //DPRINTF("xc_linux_restore: page %lu/%lu at %lx\n", gmfn, max_pfn, 
pfn);
 
        if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
                goto out;
@@ -281,7 +288,7 @@ xc_linux_restore(int xc_handle, int io_f
     /* Uncanonicalise the suspend-record frame number and poke resume rec. */
     start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
                                       PROT_READ | PROT_WRITE, gmfn);
-    start_info->nr_pages = max_pfn;
+    start_info->nr_pages = p2m_size;
     start_info->shared_info = shared_info_frame << PAGE_SHIFT;
     start_info->flags = 0;
     *store_mfn = start_info->store_mfn;
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_core.c     Tue Apr 03 13:04:51 2007 -0600
@@ -312,7 +312,7 @@ xc_domain_dumpcore_via_callback(int xc_h
 
     int auto_translated_physmap;
     xen_pfn_t *p2m = NULL;
-    unsigned long max_pfn = 0;
+    unsigned long p2m_size = 0;
     struct xen_dumpcore_p2m *p2m_array = NULL;
 
     uint64_t *pfn_array = NULL;
@@ -396,7 +396,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         }
 
         sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
-                                   &p2m, &max_pfn);
+                                   &p2m, &p2m_size);
         if ( sts != 0 )
             goto out;
     }
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_core_x86.c
--- a/tools/libxc/xc_core_x86.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_core_x86.c Tue Apr 03 13:04:51 2007 -0600
@@ -38,7 +38,7 @@ xc_core_arch_memory_map_get(int xc_handl
                             xc_core_memory_map_t **mapp,
                             unsigned int *nr_entries)
 {
-    unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
+    unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
     xc_core_memory_map_t *map;
 
     map = malloc(sizeof(*map));
@@ -49,7 +49,7 @@ xc_core_arch_memory_map_get(int xc_handl
     }
 
     map->addr = 0;
-    map->size = max_pfn << PAGE_SHIFT;
+    map->size = p2m_size << PAGE_SHIFT;
 
     *mapp = map;
     *nr_entries = 1;
@@ -65,13 +65,13 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
     xen_pfn_t *live_p2m_frame_list_list = NULL;
     xen_pfn_t *live_p2m_frame_list = NULL;
     uint32_t dom = info->domid;
-    unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
+    unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
     int ret = -1;
     int err;
 
-    if ( max_pfn < info->nr_pages  )
+    if ( p2m_size < info->nr_pages  )
     {
-        ERROR("max_pfn < nr_pages -1 (%lx < %lx", max_pfn, info->nr_pages - 1);
+        ERROR("p2m_size < nr_pages -1 (%lx < %lx", p2m_size, info->nr_pages - 
1);
         goto out;
     }
 
@@ -106,7 +106,7 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
         goto out;
     }
 
-    *pfnp = max_pfn;
+    *pfnp = p2m_size;
 
     ret = 0;
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_hvm_restore.c
--- a/tools/libxc/xc_hvm_restore.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_hvm_restore.c      Tue Apr 03 13:04:51 2007 -0600
@@ -95,7 +95,7 @@ int xc_hvm_restore(int xc_handle, int io
     unsigned long pfn_array_size = max_pfn + 1;
 
     /* Number of pages of memory the guest has.  *Not* the same as max_pfn. */
-    unsigned long nr_pages = max_pfn + 1;
+    unsigned long nr_pages = max_pfn;
     /* MMIO hole doesn't contain RAM */
     if ( nr_pages >= HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT ) 
         nr_pages -= HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; 
@@ -270,7 +270,6 @@ int xc_hvm_restore(int xc_handle, int io
 
     }/*while 1*/
     
-/*    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
 
@@ -279,13 +278,22 @@ int xc_hvm_restore(int xc_handle, int io
     else
         shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
 
+    /* Ensure we clear these pages */
+    if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
+         xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
+         xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) {
+        rc = -1;
+        goto out;
+    }
+
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
 
     /* caculate the store_mfn , wrong val cause hang when introduceDomain */
     *store_mfn = (v_end >> PAGE_SHIFT) - 2;
-    DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", 
*store_mfn, v_end);
+    DPRINTF("hvm restore: calculate new store_mfn=0x%lx, v_end=0x%llx.\n", 
+            *store_mfn, v_end);
 
     if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
         ERROR("error read nr vcpu !\n");
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_hvm_save.c
--- a/tools/libxc/xc_hvm_save.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_hvm_save.c Tue Apr 03 13:04:51 2007 -0600
@@ -332,10 +332,10 @@ int xc_hvm_save(int xc_handle, int io_fd
 
     unsigned long total_sent    = 0;
 
-    DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, 
live=%d, debug=%d.\n",
-            dom, max_iters, max_factor, flags,
+    DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
+            "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
             live, debug);
-
+    
     /* If no explicit control parameters given, use defaults */
     if(!max_iters)
         max_iters = DEF_MAX_ITERS;
@@ -382,7 +382,6 @@ int xc_hvm_save(int xc_handle, int io_fd
         ERROR("HVM: Could not read magic PFN parameters");
         goto out;
     }
-
     DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, "
             "nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); 
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux.c    Tue Apr 03 13:04:51 2007 -0600
@@ -2,6 +2,9 @@
  *
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * xc_gnttab functions:
+ * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -13,6 +16,7 @@
 
 #include <xen/memory.h>
 #include <xen/sys/evtchn.h>
+#include <xen/sys/gntdev.h>
 #include <unistd.h>
 #include <fcntl.h>
 
@@ -361,6 +365,158 @@ void discard_file_cache(int fd, int flus
 
  out:
     errno = saved_errno;
+}
+
+#define GNTTAB_DEV_NAME "/dev/xen/gntdev"
+
+int xc_gnttab_open(void)
+{
+    struct stat st;
+    int fd;
+    int devnum;
+    
+    devnum = xc_find_device_number("gntdev");
+    
+    /* Make sure any existing device file links to correct device. */
+    if ( (lstat(GNTTAB_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
+         (st.st_rdev != devnum) )
+        (void)unlink(GNTTAB_DEV_NAME);
+    
+reopen:
+    if ( (fd = open(GNTTAB_DEV_NAME, O_RDWR)) == -1 )
+    {
+        if ( (errno == ENOENT) &&
+             ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+             (mknod(GNTTAB_DEV_NAME, S_IFCHR|0600, devnum) == 0) )
+            goto reopen;
+        
+        PERROR("Could not open grant table interface");
+        return -1;
+    }
+    
+    return fd;
+}
+
+int xc_gnttab_close(int xcg_handle)
+{
+    return close(xcg_handle);
+}
+
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+                              uint32_t domid,
+                              uint32_t ref,
+                              int prot)
+{
+    struct ioctl_gntdev_map_grant_ref map;
+    void *addr;
+    
+    map.count = 1;
+    map.refs[0].domid = domid;
+    map.refs[0].ref   = ref;
+
+    if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
+        return NULL;
+    
+    addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, xcg_handle, map.index);
+    if ( addr == MAP_FAILED )
+    {
+        int saved_errno = errno;
+        struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+        /* Unmap the driver slots used to store the grant information. */
+        unmap_grant.index = map.index;
+        unmap_grant.count = 1;
+        ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
+        errno = saved_errno;
+        return NULL;
+    }
+    
+    return addr;
+}
+
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+                               uint32_t count,
+                               uint32_t *domids,
+                               uint32_t *refs,
+                               int prot)
+{
+    struct ioctl_gntdev_map_grant_ref *map;
+    void *addr = NULL;
+    int i;
+    
+    map = malloc(sizeof(*map) +
+                 (count-1) * sizeof(struct ioctl_gntdev_map_grant_ref));
+    if ( map == NULL )
+        return NULL;
+
+    for ( i = 0; i < count; i++ )
+    {
+        map->refs[i].domid = domids[i];
+        map->refs[i].ref   = refs[i];
+    }
+
+    map->count = count;
+    
+    if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
+        goto out;
+
+    addr = mmap(NULL, PAGE_SIZE * count, prot, MAP_SHARED, xcg_handle,
+                map->index);
+    if ( addr == MAP_FAILED )
+    {
+        int saved_errno = errno;
+        struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+        /* Unmap the driver slots used to store the grant information. */
+        unmap_grant.index = map->index;
+        unmap_grant.count = count;
+        ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
+        errno = saved_errno;
+        addr = NULL;
+    }
+
+ out:
+    free(map);
+    return addr;
+}
+
+int xc_gnttab_munmap(int xcg_handle,
+                     void *start_address,
+                     uint32_t count)
+{
+    struct ioctl_gntdev_get_offset_for_vaddr get_offset;
+    struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+    int rc;
+
+    if ( start_address == NULL )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    /* First, it is necessary to get the offset which was initially used to
+     * mmap() the pages.
+     */
+    get_offset.vaddr = (unsigned long)start_address;
+    if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR, 
+                     &get_offset)) )
+        return rc;
+
+    if ( get_offset.count != count )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    /* Next, unmap the memory. */
+    if ( (rc = munmap(start_address, count * getpagesize())) )
+        return rc;
+    
+    /* Finally, unmap the driver slots used to store the grant information. */
+    unmap_grant.index = get_offset.offset;
+    unmap_grant.count = count;
+    if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant)) )
+        return rc;
+
+    return 0;
 }
 
 /*
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux_restore.c    Tue Apr 03 13:04:51 2007 -0600
@@ -22,8 +22,14 @@ static unsigned long hvirt_start;
 /* #levels of page tables used by the current guest */
 static unsigned int pt_levels;
 
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
+static unsigned long nr_pfns;
+
+/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
+static unsigned long max_nr_pfns;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
 static xen_pfn_t *live_p2m = NULL;
@@ -33,7 +39,6 @@ static xen_pfn_t *p2m = NULL;
 
 /* A table of P2M mappings in the current region */
 static xen_pfn_t *p2m_batch = NULL;
-
 
 static ssize_t
 read_exact(int fd, void *buf, size_t count)
@@ -85,11 +90,11 @@ static int uncanonicalize_pagetable(int 
         
         pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
         
-        if(pfn >= max_pfn) {
+        if(pfn >= p2m_size) {
             /* This "page table page" is probably not one; bail. */
             ERROR("Frame number in type %lu page table is out of range: "
-                  "i=%d pfn=0x%lx max_pfn=%lu",
-                  type >> 28, i, pfn, max_pfn);
+                  "i=%d pfn=0x%lx p2m_size=%lu",
+                  type >> 28, i, pfn, p2m_size);
             return 0;
         }
         
@@ -138,8 +143,9 @@ static int uncanonicalize_pagetable(int 
     return 1;
 }
 
-int xc_linux_restore(int xc_handle, int io_fd,
-                     uint32_t dom, unsigned long nr_pfns,
+
+int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
+                     unsigned long p2msize, unsigned long maxnrpfns,
                      unsigned int store_evtchn, unsigned long *store_mfn,
                      unsigned int console_evtchn, unsigned long *console_mfn)
 {
@@ -191,9 +197,13 @@ int xc_linux_restore(int xc_handle, int 
     unsigned int max_vcpu_id = 0;
     int new_ctxt_format = 0;
 
-    max_pfn = nr_pfns;
-
-    DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn);
+    p2m_size    = p2msize;
+    max_nr_pfns = maxnrpfns;
+
+    /* For info only */
+    nr_pfns = 0;
+
+    DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
 
     /*
      * XXX For now, 32bit dom0's can only save/restore 32bit domUs
@@ -294,8 +304,8 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     /* We want zeroed memory so use calloc rather than malloc. */
-    p2m        = calloc(max_pfn, sizeof(xen_pfn_t));
-    pfn_type   = calloc(max_pfn, sizeof(unsigned long));
+    p2m        = calloc(p2m_size, sizeof(xen_pfn_t));
+    pfn_type   = calloc(p2m_size, sizeof(unsigned long));
     region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
     p2m_batch  = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
 
@@ -325,13 +335,13 @@ int xc_linux_restore(int xc_handle, int 
     }
     shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
 
-    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
         errno = ENOMEM;
         goto out;
     }
 
     /* Mark all PFNs as invalid; we allocate on demand */
-    for ( pfn = 0; pfn < max_pfn; pfn++ )
+    for ( pfn = 0; pfn < p2m_size; pfn++ )
         p2m[pfn] = INVALID_P2M_ENTRY;
 
     if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
@@ -352,7 +362,7 @@ int xc_linux_restore(int xc_handle, int 
 
         int j, nr_mfns = 0; 
 
-        this_pc = (n * 100) / max_pfn;
+        this_pc = (n * 100) / p2m_size;
         if ( (this_pc - prev_pc) >= 5 )
         {
             PPRINTF("\b\b\b\b%3d%%", this_pc);
@@ -436,6 +446,7 @@ int xc_linux_restore(int xc_handle, int 
                 if (p2m[pfn] == INVALID_P2M_ENTRY) {
                     /* We just allocated a new mfn above; update p2m */
                     p2m[pfn] = p2m_batch[nr_mfns++]; 
+                    nr_pfns++; 
                 }
 
                 /* setup region_mfn[] for batch map */
@@ -465,7 +476,7 @@ int xc_linux_restore(int xc_handle, int 
                 /* a bogus/unmapped page: skip it */
                 continue;
 
-            if ( pfn > max_pfn )
+            if ( pfn > p2m_size )
             {
                 ERROR("pfn out of range");
                 goto out;
@@ -518,7 +529,7 @@ int xc_linux_restore(int xc_handle, int 
             else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
             {
                 ERROR("Bogus page type %lx page table is out of range: "
-                    "i=%d max_pfn=%lu", pagetype, i, max_pfn);
+                    "i=%d p2m_size=%lu", pagetype, i, p2m_size);
                 goto out;
 
             }
@@ -598,7 +609,7 @@ int xc_linux_restore(int xc_handle, int 
         int j, k;
         
         /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for ( i = 0; i < max_pfn; i++ )
+        for ( i = 0; i < p2m_size; i++ )
         {
             if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
                   XEN_DOMCTL_PFINFO_L3TAB) &&
@@ -646,7 +657,7 @@ int xc_linux_restore(int xc_handle, int 
         /* Second pass: find all L1TABs and uncanonicalize them */
         j = 0;
 
-        for ( i = 0; i < max_pfn; i++ )
+        for ( i = 0; i < p2m_size; i++ )
         {
             if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
                   XEN_DOMCTL_PFINFO_L1TAB) )
@@ -655,7 +666,7 @@ int xc_linux_restore(int xc_handle, int 
                 j++;
             }
 
-            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
+            if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
 
                 if (!(region_base = xc_map_foreign_batch(
                           xc_handle, dom, PROT_READ | PROT_WRITE,
@@ -689,7 +700,7 @@ int xc_linux_restore(int xc_handle, int 
      * will barf when doing the type-checking.
      */
     nr_pins = 0;
-    for ( i = 0; i < max_pfn; i++ )
+    for ( i = 0; i < p2m_size; i++ )
     {
         if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
             continue;
@@ -736,7 +747,7 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("\b\b\b\b100%%\n");
-    DPRINTF("Memory reloaded.\n");
+    DPRINTF("Memory reloaded (%ld pages of max %ld)\n", nr_pfns, max_nr_pfns);
 
     /* Get the list of PFNs that are not in the psuedo-phys map */
     {
@@ -808,7 +819,7 @@ int xc_linux_restore(int xc_handle, int 
              * resume record.
              */
             pfn = ctxt.user_regs.edx;
-            if ((pfn >= max_pfn) ||
+            if ((pfn >= p2m_size) ||
                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
                 ERROR("Suspend record frame number is bad");
                 goto out;
@@ -816,7 +827,7 @@ int xc_linux_restore(int xc_handle, int 
             ctxt.user_regs.edx = mfn = p2m[pfn];
             start_info = xc_map_foreign_range(
                 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
-            start_info->nr_pages = max_pfn;
+            start_info->nr_pages = p2m_size;
             start_info->shared_info = shared_info_frame << PAGE_SHIFT;
             start_info->flags = 0;
             *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
@@ -835,7 +846,7 @@ int xc_linux_restore(int xc_handle, int 
 
         for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
             pfn = ctxt.gdt_frames[j];
-            if ((pfn >= max_pfn) ||
+            if ((pfn >= p2m_size) ||
                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
                 ERROR("GDT frame number is bad");
                 goto out;
@@ -846,16 +857,16 @@ int xc_linux_restore(int xc_handle, int 
         /* Uncanonicalise the page table base pointer. */
         pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
 
-        if (pfn >= max_pfn) {
-            ERROR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
-                  pfn, max_pfn, pfn_type[pfn]);
+        if (pfn >= p2m_size) {
+            ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+                  pfn, p2m_size, pfn_type[pfn]);
             goto out;
         }
 
         if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
              ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
             ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                  pfn, max_pfn, pfn_type[pfn],
+                  pfn, p2m_size, pfn_type[pfn],
                   (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
             goto out;
         }
@@ -867,16 +878,16 @@ int xc_linux_restore(int xc_handle, int 
         {
             pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
 
-            if (pfn >= max_pfn) {
-                ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
-                      pfn, max_pfn, pfn_type[pfn]);
+            if (pfn >= p2m_size) {
+                ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+                      pfn, p2m_size, pfn_type[pfn]);
                 goto out;
             }
 
             if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
                  ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
                 ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                      pfn, max_pfn, pfn_type[pfn],
+                      pfn, p2m_size, pfn_type[pfn],
                       (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
                 goto out;
             }
@@ -915,7 +926,7 @@ int xc_linux_restore(int xc_handle, int 
     /* Uncanonicalise the pfn-to-mfn table frame-number list. */
     for (i = 0; i < P2M_FL_ENTRIES; i++) {
         pfn = p2m_frame_list[i];
-        if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
+        if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
             ERROR("PFN-to-MFN frame number is bad");
             goto out;
         }
@@ -930,8 +941,8 @@ int xc_linux_restore(int xc_handle, int 
         goto out;
     }
 
-    memcpy(live_p2m, p2m, P2M_SIZE);
-    munmap(live_p2m, P2M_SIZE);
+    memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+    munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 
     DPRINTF("Domain ready to be built.\n");
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux_save.c       Tue Apr 03 13:04:51 2007 -0600
@@ -25,7 +25,7 @@
 **
 */
 #define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
-#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
+#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
 
 
 /* max mfn of the whole machine */
@@ -37,8 +37,8 @@ static unsigned long hvirt_start;
 /* #levels of page tables used by the current guest */
 static unsigned int pt_levels;
 
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
 static xen_pfn_t *live_p2m = NULL;
@@ -57,7 +57,7 @@ static unsigned long m2p_mfn0;
  */
 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
 (((_mfn) < (max_mfn)) &&                        \
- ((mfn_to_pfn(_mfn) < (max_pfn)) &&               \
+ ((mfn_to_pfn(_mfn) < (p2m_size)) &&               \
   (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
 
 
@@ -79,7 +79,7 @@ static unsigned long m2p_mfn0;
 */
 
 #define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITMAP_SIZE   ((max_pfn + BITS_PER_LONG - 1) / 8)
+#define BITMAP_SIZE   ((p2m_size + BITS_PER_LONG - 1) / 8)
 
 #define BITMAP_ENTRY(_nr,_bmap) \
    ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
@@ -343,7 +343,7 @@ static int print_stats(int xc_handle, ui
 }
 
 
-static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn,
+static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
                           unsigned long *arr, int runs)
 {
     long long start, now;
@@ -356,7 +356,7 @@ static int analysis_phase(int xc_handle,
         int i;
 
         xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-                          arr, max_pfn, NULL, 0, NULL);
+                          arr, p2m_size, NULL, 0, NULL);
         DPRINTF("#Flush\n");
         for ( i = 0; i < 40; i++ ) {
             usleep(50000);
@@ -682,7 +682,7 @@ int xc_linux_save(int xc_handle, int io_
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
-    /* power of 2 order of max_pfn */
+    /* power of 2 order of p2m_size */
     int order_nr;
 
     /* bitmap of pages:
@@ -730,7 +730,7 @@ int xc_linux_save(int xc_handle, int io_
         goto out;
     }
 
-    max_pfn = live_shinfo->arch.max_pfn;
+    p2m_size = live_shinfo->arch.max_pfn;
 
     live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
                                                    live_shinfo);
@@ -777,7 +777,7 @@ int xc_linux_save(int xc_handle, int io_
     memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
 
     /* Canonicalise the pfn-to-mfn table frame-number list. */
-    for (i = 0; i < max_pfn; i += fpp) {
+    for (i = 0; i < p2m_size; i += fpp) {
         if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
             ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
             ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
@@ -813,12 +813,12 @@ int xc_linux_save(int xc_handle, int io_
     }
 
     /* pretend we sent all the pages last iteration */
-    sent_last_iter = max_pfn;
-
-
-    /* calculate the power of 2 order of max_pfn, e.g.
+    sent_last_iter = p2m_size;
+
+
+    /* calculate the power of 2 order of p2m_size, e.g.
        15->4 16->4 17->5 */
-    for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
+    for (i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++)
         continue;
 
     /* Setup to_send / to_fix and to_skip bitmaps */
@@ -844,7 +844,7 @@ int xc_linux_save(int xc_handle, int io_
         return 1;
     }
 
-    analysis_phase(xc_handle, dom, max_pfn, to_skip, 0);
+    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
 
     /* We want zeroed memory so use calloc rather than malloc. */
     pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
@@ -867,7 +867,7 @@ int xc_linux_save(int xc_handle, int io_
     {
         int err=0;
         unsigned long mfn;
-        for (i = 0; i < max_pfn; i++) {
+        for (i = 0; i < p2m_size; i++) {
 
             mfn = live_p2m[i];
             if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) {
@@ -882,8 +882,8 @@ int xc_linux_save(int xc_handle, int io_
 
     /* Start writing out the saved-domain record. */
 
-    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
-        ERROR("write: max_pfn");
+    if (!write_exact(io_fd, &p2m_size, sizeof(unsigned long))) {
+        ERROR("write: p2m_size");
         goto out;
     }
 
@@ -929,9 +929,9 @@ int xc_linux_save(int xc_handle, int io_
 
         DPRINTF("Saving memory pages: iter %d   0%%", iter);
 
-        while( N < max_pfn ){
-
-            unsigned int this_pc = (N * 100) / max_pfn;
+        while( N < p2m_size ){
+
+            unsigned int this_pc = (N * 100) / p2m_size;
 
             if ((this_pc - prev_pc) >= 5) {
                 DPRINTF("\b\b\b\b%3d%%", this_pc);
@@ -942,7 +942,7 @@ int xc_linux_save(int xc_handle, int io_
                but this is fast enough for the moment. */
             if (!last_iter && xc_shadow_control(
                     xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK,
-                    to_skip, max_pfn, NULL, 0, NULL) != max_pfn) {
+                    to_skip, p2m_size, NULL, 0, NULL) != p2m_size) {
                 ERROR("Error peeking shadow bitmap");
                 goto out;
             }
@@ -950,9 +950,9 @@ int xc_linux_save(int xc_handle, int io_
 
             /* load pfn_type[] with the mfn of all the pages we're doing in
                this batch. */
-            for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) {
-
-                int n = permute(N, max_pfn, order_nr);
+            for (batch = 0; batch < MAX_BATCH_SIZE && N < p2m_size ; N++) {
+
+                int n = permute(N, p2m_size, order_nr);
 
                 if (debug) {
                     DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
@@ -1123,7 +1123,7 @@ int xc_linux_save(int xc_handle, int io_
             print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 
             DPRINTF("Total pages sent= %ld (%.2fx)\n",
-                    total_sent, ((float)total_sent)/max_pfn );
+                    total_sent, ((float)total_sent)/p2m_size );
             DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
         }
 
@@ -1150,7 +1150,7 @@ int xc_linux_save(int xc_handle, int io_
             if (((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
                 (iter >= max_iters) ||
                 (sent_this_iter+skip_this_iter < 50) ||
-                (total_sent > max_pfn*max_factor)) {
+                (total_sent > p2m_size*max_factor)) {
                 DPRINTF("Start last iteration\n");
                 last_iter = 1;
 
@@ -1168,7 +1168,7 @@ int xc_linux_save(int xc_handle, int io_
 
             if (xc_shadow_control(xc_handle, dom, 
                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
-                                  max_pfn, NULL, 0, &stats) != max_pfn) {
+                                  p2m_size, NULL, 0, &stats) != p2m_size) {
                 ERROR("Error flushing shadow PT");
                 goto out;
             }
@@ -1220,7 +1220,7 @@ int xc_linux_save(int xc_handle, int io_
         unsigned int i,j;
         unsigned long pfntab[1024];
 
-        for (i = 0, j = 0; i < max_pfn; i++) {
+        for (i = 0, j = 0; i < p2m_size; i++) {
             if (!is_mapped(live_p2m[i]))
                 j++;
         }
@@ -1230,13 +1230,13 @@ int xc_linux_save(int xc_handle, int io_
             goto out;
         }
 
-        for (i = 0, j = 0; i < max_pfn; ) {
+        for (i = 0, j = 0; i < p2m_size; ) {
 
             if (!is_mapped(live_p2m[i]))
                 pfntab[j++] = i;
 
             i++;
-            if (j == 1024 || i == max_pfn) {
+            if (j == 1024 || i == p2m_size) {
                 if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) {
                     ERROR("Error when writing to state file (6b) (errno %d)",
                           errno);
@@ -1333,7 +1333,7 @@ int xc_linux_save(int xc_handle, int io_
         munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
 
     if (live_p2m)
-        munmap(live_p2m, P2M_SIZE);
+        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 
     if (live_m2p)
         munmap(live_m2p, M2P_SIZE(max_mfn));
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_resume.c
--- a/tools/libxc/xc_resume.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_resume.c   Tue Apr 03 13:04:51 2007 -0600
@@ -46,7 +46,7 @@ static int xc_domain_resume_any(int xc_h
     xc_dominfo_t info;
     int i, rc = -1;
 #if defined(__i386__) || defined(__x86_64__)
-    unsigned long mfn, max_pfn = 0;
+    unsigned long mfn, p2m_size = 0;
     vcpu_guest_context_t ctxt;
     start_info_t *start_info;
     shared_info_t *shinfo = NULL;
@@ -74,7 +74,7 @@ static int xc_domain_resume_any(int xc_h
         goto out;
     }
 
-    max_pfn = shinfo->arch.max_pfn;
+    p2m_size = shinfo->arch.max_pfn;
 
     p2m_frame_list_list =
         xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ,
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xenctrl.h     Tue Apr 03 13:04:51 2007 -0600
@@ -4,6 +4,9 @@
  * A library for low-level access to the Xen control interfaces.
  *
  * Copyright (c) 2003-2004, K A Fraser.
+ *
+ * xc_gnttab functions:
+ * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx>
  */
 
 #ifndef XENCTRL_H
@@ -740,6 +743,62 @@ evtchn_port_t xc_evtchn_pending(int xce_
  */
 int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
 
+/**************************
+ * GRANT TABLE OPERATIONS *
+ **************************/
+
+/*
+ * Return a handle to the grant table driver, or -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_gnttab_open(void);
+
+/*
+ * Close a handle previously allocated with xc_gnttab_open().
+ */
+int xc_gnttab_close(int xcg_handle);
+
+/*
+ * Memory maps a grant reference from one domain to a local address range.
+ * Mappings should be unmapped with xc_gnttab_munmap.  Returns NULL on failure.
+ *
+ * @parm xcg_handle a handle on an open grant table interface
+ * @parm domid the domain to map memory from
+ * @parm ref the grant reference ID to map
+ * @parm prot same flag as in mmap()
+ */
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+                              uint32_t domid,
+                              uint32_t ref,
+                              int prot);
+
+/**
+ * Memory maps one or more grant references from one or more domains to a
+ * contiguous local address range. Mappings should be unmapped with
+ * xc_gnttab_munmap.  Returns NULL on failure.
+ *
+ * @parm xcg_handle a handle on an open grant table interface
+ * @parm count the number of grant references to be mapped
+ * @parm domids an array of @count domain IDs by which the corresponding @refs
+ *              were granted
+ * @parm refs an array of @count grant references to be mapped
+ * @parm prot same flag as in mmap()
+ */
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+                               uint32_t count,
+                               uint32_t *domids,
+                               uint32_t *refs,
+                               int prot);
+
+/*
+ * Unmaps the @count pages starting at @start_address, which were mapped by a
+ * call to xc_gnttab_map_grant_ref or xc_gnttab_map_grant_refs. Returns zero
+ * on success, otherwise sets errno and returns non-zero.
+ */
+int xc_gnttab_munmap(int xcg_handle,
+                     void *start_address,
+                     uint32_t count);
+
 int xc_hvm_set_pci_intx_level(
     int xc_handle, domid_t dom,
     uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx,
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xenguest.h    Tue Apr 03 13:04:51 2007 -0600
@@ -43,15 +43,16 @@ int xc_hvm_save(int xc_handle, int io_fd
  * @parm xc_handle a handle to an open hypervisor interface
  * @parm fd the file descriptor to restore a domain from
  * @parm dom the id of the domain
- * @parm nr_pfns the number of pages
+ * @parm p2m_size number of pages the guest has (i.e. number entries in P2M)
+ * @parm max_nr_pfns domains maximum real memory allocation, in pages
  * @parm store_evtchn the store event channel for this domain to use
  * @parm store_mfn returned with the mfn of the store page
  * @return 0 on success, -1 on failure
  */
 int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
-                     unsigned long nr_pfns, unsigned int store_evtchn,
-                     unsigned long *store_mfn, unsigned int console_evtchn,
-                     unsigned long *console_mfn);
+                     unsigned long p2m_size, unsigned long max_nr_pfns,
+                     unsigned int store_evtchn, unsigned long *store_mfn,
+                     unsigned int console_evtchn, unsigned long *console_mfn);
 
 /**
  * This function will restore a saved hvm domain running unmodified guest.
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xg_private.h  Tue Apr 03 13:04:51 2007 -0600
@@ -148,17 +148,16 @@ typedef l4_pgentry_64_t l4_pgentry_t;
 
 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
 
-/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
-#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
-
 /* Number of xen_pfn_t in a page */
 #define fpp             (PAGE_SIZE/sizeof(xen_pfn_t))
 
+/* XXX SMH: following 3 skanky macros rely on variable p2m_size being set */
+
 /* Number of entries in the pfn_to_mfn_frame_list_list */
-#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
+#define P2M_FLL_ENTRIES (((p2m_size)+(fpp*fpp)-1)/(fpp*fpp))
 
 /* Number of entries in the pfn_to_mfn_frame_list */
-#define P2M_FL_ENTRIES  (((max_pfn)+fpp-1)/fpp)
+#define P2M_FL_ENTRIES  (((p2m_size)+fpp-1)/fpp)
 
 /* Size in bytes of the pfn_to_mfn_frame_list     */
 #define P2M_FL_SIZE     ((P2M_FL_ENTRIES)*sizeof(unsigned long))
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/lowlevel/scf/scf.c
--- a/tools/python/xen/lowlevel/scf/scf.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/lowlevel/scf/scf.c       Tue Apr 03 13:04:51 2007 -0600
@@ -26,7 +26,7 @@
 #include <libscf.h>
 #include <stdio.h>
 
-#define        XEND_FMRI "svc:/system/xen/xend:default"
+#define        XEND_FMRI "svc:/system/xctl/xend:default"
 #define        XEND_PG "config"
 
 static PyObject *scf_exc;
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py   Tue Apr 03 13:04:51 2007 -0600
@@ -187,6 +187,7 @@ def restore(xd, fd, dominfo = None, paus
     assert console_port
 
     nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 
+    max_nr_pfns = (dominfo.getMemoryMaximum() + 3) / 4 
 
     # if hvm, pass mem size to calculate the store_mfn
     image_cfg = dominfo.info.get('image', {})
@@ -203,17 +204,17 @@ def restore(xd, fd, dominfo = None, paus
     try:
         l = read_exact(fd, sizeof_unsigned_long,
                        "not a valid guest state file: pfn count read")
-        max_pfn = unpack("L", l)[0]    # native sizeof long
-
-        if max_pfn > 16*1024*1024:     # XXX 
+        p2m_size = unpack("L", l)[0]    # native sizeof long
+
+        if p2m_size > 16*1024*1024:     # XXX 
             raise XendError(
                 "not a valid guest state file: pfn count out of range")
 
         shadow = dominfo.info['shadow_memory']
         log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, "
-                  "nr_pfns=0x%x.", dominfo.info['shadow_memory'],
+                  "p2m_size=0x%x.", dominfo.info['shadow_memory'],
                   dominfo.info['memory_static_max'],
-                  dominfo.info['memory_static_min'], nr_pfns)
+                  dominfo.info['memory_static_min'], p2m_size)
 
         balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
 
@@ -221,7 +222,7 @@ def restore(xd, fd, dominfo = None, paus
         dominfo.info['shadow_memory'] = shadow_cur
 
         cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
-                        fd, dominfo.getDomid(), max_pfn,
+                        fd, dominfo.getDomid(), p2m_size, max_nr_pfns, 
                         store_port, console_port, int(is_hvm), pae, apic])
         log.debug("[xc_restore]: %s", string.join(cmd))
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py       Tue Apr 03 13:04:51 2007 -0600
@@ -118,7 +118,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
 # Platform configuration keys.
 XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', 
                         'fda', 'fdb', 'keymap', 'isa', 'localtime',
-                        'nographic', 'pae', 'serial', 'sdl',
+                        'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
                         'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
                         'vncconsole', 'vncdisplay', 'vnclisten',
                         'vncpasswd', 'vncunused', 'xauthority']
@@ -203,6 +203,7 @@ LEGACY_CFG_TYPES = {
     'on_xend_stop':  str,
     'on_xend_start': str,
     'online_vcpus':  int,
+    'rtc/timeoffset': str,
 }
 
 # Values that should be stored in xenstore's /vm/<uuid> that is used
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Tue Apr 03 13:04:51 2007 -0600
@@ -859,7 +859,8 @@ class XendDomainInfo:
         # Check whether values in the configuration have
         # changed in Xenstore.
         
-        cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash']
+        cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash',
+                  'rtc/timeoffset']
         
         vm_details = self._readVMDetails([(k,XendConfig.LEGACY_CFG_TYPES[k])
                                            for k in cfg_vm])
@@ -888,6 +889,11 @@ class XendDomainInfo:
             self.info.update_with_image_sxp(sxp.from_string(image_sxp))
             changed = True
 
+        # Check if the rtc offset has changes
+        if vm_details.get("rtc/timeoffset", 0) != 
self.info["platform"].get("rtc_timeoffset", 0):
+            self.info["platform"]["rtc_timeoffset"] = 
vm_details.get("rtc/timeoffset", 0)
+            changed = True
+ 
         if changed:
             # Update the domain section of the store, as this contains some
             # parameters derived from the VM configuration.
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/balloon.py  Tue Apr 03 13:04:51 2007 -0600
@@ -25,9 +25,7 @@ import XendOptions
 import XendOptions
 from XendLogging import log
 from XendError import VmError
-
-
-PROC_XEN_BALLOON = '/proc/xen/balloon'
+import osdep
 
 RETRY_LIMIT = 20
 RETRY_LIMIT_INCR = 5
@@ -51,19 +49,7 @@ def _get_proc_balloon(label):
     """Returns the value for the named label.  Returns None if the label was
        not found or the value was non-numeric."""
 
-    f = file(PROC_XEN_BALLOON, 'r')
-    try:
-        for line in f:
-            keyvalue = line.split(':')
-            if keyvalue[0] == label:
-                values = keyvalue[1].split()
-                if values[0].isdigit():
-                    return int(values[0])
-                else:
-                    return None
-        return None
-    finally:
-        f.close()
+    return osdep.lookup_balloon_stat(label)
 
 def get_dom0_current_alloc():
     """Returns the current memory allocation (in KiB) of dom0."""
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/image.py    Tue Apr 03 13:04:51 2007 -0600
@@ -256,9 +256,12 @@ class HVMImageHandler(ImageHandler):
         self.xauthority = vmConfig['platform'].get('xauthority')
         self.vncconsole = vmConfig['platform'].get('vncconsole')
 
+        rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset')
+
         self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
                         ("image/device-model", self.device_model),
                         ("image/display", self.display))
+        self.vm.storeVm(("rtc/timeoffset", rtc_timeoffset))
 
         self.pid = None
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/osdep.py
--- a/tools/python/xen/xend/osdep.py    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/osdep.py    Tue Apr 03 13:04:51 2007 -0600
@@ -41,6 +41,55 @@ _vif_script = {
     "SunOS": "vif-vnic"
 }
 
+def _linux_balloon_stat(label):
+    """Returns the value for the named label, or None if an error occurs."""
+
+    PROC_XEN_BALLOON = '/proc/xen/balloon'
+    f = file(PROC_XEN_BALLOON, 'r')
+    try:
+        for line in f:
+            keyvalue = line.split(':')
+            if keyvalue[0] == label:
+                values = keyvalue[1].split()
+                if values[0].isdigit():
+                    return int(values[0])
+                else:
+                    return None
+        return None
+    finally:
+        f.close()
+
+def _solaris_balloon_stat(label):
+    """Returns the value for the named label, or None if an error occurs."""
+
+    import fcntl
+    import array
+    DEV_XEN_BALLOON = '/dev/xen/balloon'
+    BLN_IOCTL_CURRENT = 0x4201
+    BLN_IOCTL_TARGET = 0x4202
+    BLN_IOCTL_LOW = 0x4203
+    BLN_IOCTL_HIGH = 0x4204
+    BLN_IOCTL_LIMIT = 0x4205
+    label_to_ioctl = { 'Current allocation'    : BLN_IOCTL_CURRENT,
+                       'Requested target'      : BLN_IOCTL_TARGET,
+                       'Low-mem balloon'       : BLN_IOCTL_LOW,
+                       'High-mem balloon'      : BLN_IOCTL_HIGH,
+                       'Xen hard limit'        : BLN_IOCTL_LIMIT }
+
+    f = file(DEV_XEN_BALLOON, 'r')
+    try:
+        values = array.array('L', [0])
+        if fcntl.ioctl(f.fileno(), label_to_ioctl[label], values, 1) == 0:
+            return values[0]
+        else:
+            return None
+    finally:
+        f.close()
+
+_balloon_stat = {
+    "SunOS": _solaris_balloon_stat
+}
+
 def _get(var, default=None):
     return var.get(os.uname()[0], default)
 
@@ -49,3 +98,4 @@ pygrub_path = _get(_pygrub_path, "/usr/b
 pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub")
 netback_type = _get(_netback_type, "netfront")
 vif_script = _get(_vif_script, "vif-bridge")
+lookup_balloon_stat = _get(_balloon_stat, _linux_balloon_stat)
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/server/SrvServer.py Tue Apr 03 13:04:51 2007 -0600
@@ -212,8 +212,8 @@ def _loadConfig(servers, root, reload):
                     if server_cfg[1] in [XendAPI.AUTH_PAM, XendAPI.AUTH_NONE]:
                         auth_method = server_cfg[1]
 
-                if len(server_cfg) > 2:
-                    hosts_allowed = server_cfg[2] or None
+                if len(server_cfg) > 2 and len(server_cfg[2]):
+                    hosts_allowed = map(re.compile, server_cfg[2].split(' '))
 
                 if len(server_cfg) > 4:
                     # SSL key and cert file
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/server/relocate.py  Tue Apr 03 13:04:51 2007 -0600
@@ -106,8 +106,12 @@ class RelocationProtocol(protocol.Protoc
     def op_receive(self, name, _):
         if self.transport:
             self.send_reply(["ready", name])
-            XendDomain.instance().domain_restore_fd(
-                self.transport.sock.fileno())
+            try:
+                XendDomain.instance().domain_restore_fd(
+                    self.transport.sock.fileno())
+            except:
+                self.send_error()
+                self.close()
         else:
             log.error(name + ": no transport")
             raise XendError(name + ": no transport")
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/create.py     Tue Apr 03 13:04:51 2007 -0600
@@ -185,6 +185,10 @@ gopts.var('cpus', val='CPUS',
 gopts.var('cpus', val='CPUS',
           fn=set_value, default=None,
           use="CPUS to run the domain on.")
+
+gopts.var('rtc_timeoffset', val='RTC_TIMEOFFSET',
+          fn=set_value, default="0",
+          use="Set RTC offset.")
 
 gopts.var('pae', val='PAE',
           fn=set_int, default=1,
@@ -717,7 +721,7 @@ def configure_hvm(config_image, vals):
     args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb',
              'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
-             'sdl', 'display', 'xauthority',
+             'sdl', 'display', 'xauthority', 'rtc_timeoffset',
              'acpi', 'apic', 'usb', 'usbdevice', 'keymap' ]
     for a in args:
         if a in vals.__dict__ and vals.__dict__[a] is not None:
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/main.py       Tue Apr 03 13:04:51 2007 -0600
@@ -929,10 +929,10 @@ def xm_label_list(doms):
             if security.active_policy not in ['INACTIVE', 'NULL', 'DEFAULT']:
                 if not d['seclabel']:
                     d['seclabel'] = 'ERROR'
-                elif security.active_policy in ['DEFAULT']:
-                    d['seclabel'] = 'DEFAULT'
-                else:
-                    d['seclabel'] = 'INACTIVE'
+            elif security.active_policy in ['DEFAULT']:
+                d['seclabel'] = 'DEFAULT'
+            else:
+                d['seclabel'] = 'INACTIVE'
 
             output.append((format % d, d['seclabel']))
         
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/xenapi_create.py      Tue Apr 03 13:04:51 2007 -0600
@@ -20,7 +20,6 @@
 
 from xen.xm.main import server, get_default_SR
 from xml.dom.minidom import parse, getDOMImplementation
-from xml.dom.ext import PrettyPrint
 from xml.parsers.xmlproc import xmlproc, xmlval, xmldtd
 from xen.xend import sxp
 from xen.xend.XendAPIConstants import XEN_API_ON_NORMAL_EXIT, \
diff -r fc9e2f7920c9 -r f378c424e0ce tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c        Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/xcutils/xc_restore.c        Tue Apr 03 13:04:51 2007 -0600
@@ -18,15 +18,14 @@ int
 int
 main(int argc, char **argv)
 {
-    unsigned int xc_fd, io_fd, domid, max_pfn, store_evtchn, console_evtchn;
+    unsigned int xc_fd, io_fd, domid, store_evtchn, console_evtchn;
     unsigned int hvm, pae, apic;
     int ret;
-    unsigned long store_mfn, console_mfn;
+    unsigned long p2m_size, max_nr_pfns, store_mfn, console_mfn;
 
-    if (argc != 9)
-       errx(1,
-            "usage: %s iofd domid max_pfn store_evtchn console_evtchn hvm pae 
apic",
-            argv[0]);
+    if (argc != 10)
+        errx(1, "usage: %s iofd domid p2m_size max_nr_pfns store_evtchn "
+             "console_evtchn hvm pae apic", argv[0]);
 
     xc_fd = xc_interface_open();
     if (xc_fd < 0)
@@ -34,19 +33,21 @@ main(int argc, char **argv)
 
     io_fd = atoi(argv[1]);
     domid = atoi(argv[2]);
-    max_pfn = atoi(argv[3]);
-    store_evtchn = atoi(argv[4]);
-    console_evtchn = atoi(argv[5]);
-    hvm  = atoi(argv[6]);
-    pae  = atoi(argv[7]);
-    apic = atoi(argv[8]);
+    p2m_size = atoi(argv[3]);
+    max_nr_pfns = atoi(argv[4]);
+    store_evtchn = atoi(argv[5]);
+    console_evtchn = atoi(argv[6]);
+    hvm  = atoi(argv[7]);
+    pae  = atoi(argv[8]);
+    apic = atoi(argv[9]);
 
     if (hvm) {
-        ret = xc_hvm_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
+        ret = xc_hvm_restore(xc_fd, io_fd, domid, max_nr_pfns, store_evtchn,
                 &store_mfn, pae, apic);
-    } else 
-        ret = xc_linux_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
-                &store_mfn, console_evtchn, &console_mfn);
+    } else
+        ret = xc_linux_restore(xc_fd, io_fd, domid, p2m_size,
+                               max_nr_pfns, store_evtchn, &store_mfn,
+                               console_evtchn, &console_mfn);
 
     if (ret == 0) {
        printf("store-mfn %li\n", store_mfn);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/xenstat/xentop/xentop.c     Tue Apr 03 13:04:51 2007 -0600
@@ -984,6 +984,8 @@ static void top(void)
 
        if(!batch)
        do_bottom_line();
+
+       free(domains);
 }
 
 int main(int argc, char **argv)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Tue Apr 03 13:04:51 2007 -0600
@@ -59,9 +59,6 @@ struct hvm_function_table hvm_funcs __re
 /* I/O permission bitmap is globally shared by all HVM guests. */
 char __attribute__ ((__section__ (".bss.page_aligned")))
     hvm_io_bitmap[3*PAGE_SIZE];
-/* MSR permission bitmap is globally shared by all HVM guests. */
-char __attribute__ ((__section__ (".bss.page_aligned")))
-    hvm_msr_bitmap[PAGE_SIZE];
 
 void hvm_enable(struct hvm_function_table *fns)
 {
@@ -74,9 +71,6 @@ void hvm_enable(struct hvm_function_tabl
      */
     memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
     clear_bit(0x80, hvm_io_bitmap);
-
-    /* All MSR accesses are intercepted by default. */
-    memset(hvm_msr_bitmap, ~0, sizeof(hvm_msr_bitmap));
 
     hvm_funcs   = *fns;
     hvm_enabled = 1;
@@ -378,6 +372,9 @@ void hvm_send_assist_req(struct vcpu *v)
 void hvm_send_assist_req(struct vcpu *v)
 {
     ioreq_t *p;
+
+    if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
+        return; /* implicitly bins the i/o operation */
 
     p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq;
     if ( unlikely(p->state != STATE_IOREQ_NONE) )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/intercept.c      Tue Apr 03 13:04:51 2007 -0600
@@ -155,28 +155,13 @@ static inline void hvm_mmio_access(struc
     }
 }
 
-int hvm_buffered_io_intercept(ioreq_t *p)
+int hvm_buffered_io_send(ioreq_t *p)
 {
     struct vcpu *v = current;
     spinlock_t  *buffered_io_lock;
     buffered_iopage_t *buffered_iopage =
         (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
     unsigned long tmp_write_pointer = 0;
-    int i;
-
-    /* ignore READ ioreq_t! */
-    if ( p->dir == IOREQ_READ )
-        return 0;
-
-    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
-        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
-             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
-                                     hvm_buffered_io_ranges[i]->length )
-            break;
-    }
-
-    if ( i == HVM_BUFFERED_IO_RANGE_NR )
-        return 0;
 
     buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
     spin_lock(buffered_io_lock);
@@ -205,6 +190,27 @@ int hvm_buffered_io_intercept(ioreq_t *p
     return 1;
 }
 
+int hvm_buffered_io_intercept(ioreq_t *p)
+{
+    int i;
+
+    /* ignore READ ioreq_t! */
+    if ( p->dir == IOREQ_READ )
+        return 0;
+
+    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
+        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
+             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
+                                     hvm_buffered_io_ranges[i]->length )
+            break;
+    }
+
+    if ( i == HVM_BUFFERED_IO_RANGE_NR )
+        return 0;
+
+    return hvm_buffered_io_send(p);
+}
+
 int hvm_mmio_intercept(ioreq_t *p)
 {
     struct vcpu *v = current;
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/io.c     Tue Apr 03 13:04:51 2007 -0600
@@ -771,10 +771,11 @@ void hvm_io_assist(struct vcpu *v)
     struct cpu_user_regs *regs;
     struct hvm_io_op *io_opp;
     unsigned long gmfn;
+    struct domain *d = v->domain;
 
     io_opp = &v->arch.hvm_vcpu.io_op;
     regs   = &io_opp->io_context;
-    vio    = get_vio(v->domain, v->vcpu_id);
+    vio    = get_vio(d, v->vcpu_id);
 
     p = &vio->vp_ioreq;
     if ( p->state != STATE_IORESP_READY )
@@ -797,11 +798,13 @@ void hvm_io_assist(struct vcpu *v)
     memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
 
     /* Has memory been dirtied? */
-    if ( p->dir == IOREQ_READ && p->data_is_ptr )
+    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
     {
         gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
-        mark_dirty(v->domain, gmfn);
-    }
+        mark_dirty(d, gmfn);
+    }
+
+    vcpu_end_shutdown_deferral(v);
 }
 
 /*
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/platform.c       Tue Apr 03 13:04:51 2007 -0600
@@ -921,6 +921,26 @@ static void send_mmio_req(unsigned char 
     hvm_send_assist_req(v);
 }
 
+void send_timeoffset_req(unsigned long timeoff)
+{
+    ioreq_t p[1];
+
+    if ( timeoff == 0 )
+        return;
+
+    memset(p, 0, sizeof(*p));
+
+    p->type = IOREQ_TYPE_TIMEOFFSET;
+    p->size = 4;
+    p->dir = IOREQ_WRITE;
+    p->data = timeoff;
+
+    p->state = STATE_IOREQ_READY;
+
+    if ( !hvm_buffered_io_send(p) )
+        printk("Unsuccessful timeoffset update\n");
+}
+
 static void mmio_operands(int type, unsigned long gpa,
                           struct hvm_io_op *mmio_op,
                           unsigned char op_size)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/rtc.c
--- a/xen/arch/x86/hvm/rtc.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/rtc.c    Tue Apr 03 13:04:51 2007 -0600
@@ -157,6 +157,10 @@ static void rtc_set_time(RTCState *s)
 static void rtc_set_time(RTCState *s)
 {
     struct tm *tm = &s->current_tm;
+    unsigned long before, after; /* XXX s_time_t */
+      
+    before = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
+                   tm->tm_hour, tm->tm_min, tm->tm_sec);
     
     tm->tm_sec = from_bcd(s, s->hw.cmos_data[RTC_SECONDS]);
     tm->tm_min = from_bcd(s, s->hw.cmos_data[RTC_MINUTES]);
@@ -168,6 +172,10 @@ static void rtc_set_time(RTCState *s)
     tm->tm_mday = from_bcd(s, s->hw.cmos_data[RTC_DAY_OF_MONTH]);
     tm->tm_mon = from_bcd(s, s->hw.cmos_data[RTC_MONTH]) - 1;
     tm->tm_year = from_bcd(s, s->hw.cmos_data[RTC_YEAR]) + 100;
+
+    after = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
+                   tm->tm_hour, tm->tm_min, tm->tm_sec);
+    send_timeoffset_req(after - before);
 }
 
 static void rtc_copy_date(RTCState *s)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Tue Apr 03 13:04:51 2007 -0600
@@ -79,6 +79,30 @@ struct host_save_area *alloc_host_save_a
     return hsa;
 }
 
+static void disable_intercept_for_msr(char *msr_bitmap, u32 msr)
+{
+    /*
+     * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
+     */
+    if ( msr <= 0x1fff )
+    {
+        __clear_bit(msr*2, msr_bitmap + 0x000); 
+        __clear_bit(msr*2+1, msr_bitmap + 0x000); 
+    }
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+    {
+        msr &= 0x1fff;
+        __clear_bit(msr*2, msr_bitmap + 0x800);
+        __clear_bit(msr*2+1, msr_bitmap + 0x800);
+    } 
+    else if ( (msr >= 0xc001000) && (msr <= 0xc0011fff) )
+    {
+        msr &= 0x1fff;
+        __clear_bit(msr*2, msr_bitmap + 0x1000);
+        __clear_bit(msr*2+1, msr_bitmap + 0x1000);
+    }
+}
+
 static int construct_vmcb(struct vcpu *v)
 {
     struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -114,6 +138,10 @@ static int construct_vmcb(struct vcpu *v
     if ( arch_svm->msrpm == NULL )
         return -ENOMEM;
     memset(arch_svm->msrpm, 0xff, MSRPM_SIZE);
+
+    disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_FS_BASE);
+    disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_GS_BASE);
+
     vmcb->msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm);
     vmcb->iopm_base_pa  = (u64)virt_to_maddr(hvm_io_bitmap);
 
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Tue Apr 03 13:04:51 2007 -0600
@@ -289,7 +289,7 @@ static void construct_vmcs(struct vcpu *
     v->arch.hvm_vcpu.u.vmx.exec_control = vmx_cpu_based_exec_control;
 
     if ( cpu_has_vmx_msr_bitmap )
-        __vmwrite(MSR_BITMAP, virt_to_maddr(hvm_msr_bitmap));
+        __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
 
     /* I/O access bitmap. */
     __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue Apr 03 13:04:51 2007 -0600
@@ -51,6 +51,8 @@
 #include <public/hvm/save.h>
 #include <asm/hvm/trace.h>
 
+char *vmx_msr_bitmap;
+
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
 
@@ -1005,14 +1007,14 @@ static void disable_intercept_for_msr(u3
      */
     if ( msr <= 0x1fff )
     {
-        __clear_bit(msr, hvm_msr_bitmap + 0x000); /* read-low */
-        __clear_bit(msr, hvm_msr_bitmap + 0x800); /* write-low */
+        __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
+        __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
     }
     else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
     {
         msr &= 0x1fff;
-        __clear_bit(msr, hvm_msr_bitmap + 0x400); /* read-high */
-        __clear_bit(msr, hvm_msr_bitmap + 0xc00); /* write-high */
+        __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
+        __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
     }
 }
 
@@ -1105,6 +1107,9 @@ int start_vmx(void)
     if ( cpu_has_vmx_msr_bitmap )
     {
         printk("VMX: MSR intercept bitmap enabled\n");
+        vmx_msr_bitmap = alloc_xenheap_page();
+        BUG_ON(vmx_msr_bitmap == NULL);
+        memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
         disable_intercept_for_msr(MSR_FS_BASE);
         disable_intercept_for_msr(MSR_GS_BASE);
     }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm.c Tue Apr 03 13:04:51 2007 -0600
@@ -806,7 +806,8 @@ void put_page_from_l1e(l1_pgentry_t l1e,
      * (Note that the undestroyable active grants are not a security hole in
      * Xen. All active grants can safely be cleaned up when the domain dies.)
      */
-    if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && !d->is_shutdown && 
!d->is_dying )
+    if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
+         !d->is_shutting_down && !d->is_dying )
     {
         MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
                 l1e_get_intpte(l1e));
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm/hap/hap.c Tue Apr 03 13:04:51 2007 -0600
@@ -52,7 +52,7 @@
 /************************************************/
 /*             HAP SUPPORT FUNCTIONS            */
 /************************************************/
-mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
+mfn_t hap_alloc(struct domain *d)
 {
     struct page_info *sp = NULL;
     void *p;
@@ -82,43 +82,43 @@ void hap_free(struct domain *d, mfn_t sm
     list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
 }
 
-static int hap_alloc_p2m_pages(struct domain *d)
-{
-    struct page_info *pg;
-
-    ASSERT(hap_locked_by_me(d));
-
-    pg = mfn_to_page(hap_alloc(d, 0));
-    d->arch.paging.hap.p2m_pages += 1;
-    d->arch.paging.hap.total_pages -= 1;
-    
-    page_set_owner(pg, d);
-    pg->count_info = 1;
-    list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
-
-    return 1;
-}
-
 struct page_info * hap_alloc_p2m_page(struct domain *d)
 {
-    struct list_head *entry;
     struct page_info *pg;
     mfn_t mfn;
     void *p;
 
     hap_lock(d);
-    
-    if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 
-         !hap_alloc_p2m_pages(d) ) {
-        hap_unlock(d);
-        return NULL;
-    }
-    entry = d->arch.paging.hap.p2m_freelist.next;
-    list_del(entry);
-    
+
+#if CONFIG_PAGING_LEVELS == 3
+    /* Under PAE mode, top-level P2M table should be allocated below 4GB space
+     * because the size of h_cr3 is only 32-bit. We use alloc_domheap_pages to 
+     * force this requirement. This page will be de-allocated in 
+     * hap_free_p2m_page(), like other P2M pages.
+    */
+    if ( d->arch.paging.hap.p2m_pages == 0 ) 
+    {
+       pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32));
+       d->arch.paging.hap.p2m_pages += 1;
+    }
+    else
+#endif
+    {
+       pg = mfn_to_page(hap_alloc(d));
+       
+       d->arch.paging.hap.p2m_pages += 1;
+       d->arch.paging.hap.total_pages -= 1;
+    }  
+
+    if ( pg == NULL ) {
+       hap_unlock(d);
+       return NULL;
+    }   
+
     hap_unlock(d);
 
-    pg = list_entry(entry, struct page_info, list);
+    page_set_owner(pg, d);
+    pg->count_info = 1;
     mfn = page_to_mfn(pg);
     p = hap_map_domain_page(mfn);
     clear_page(p);
@@ -141,6 +141,7 @@ void hap_free_p2m_page(struct domain *d,
     page_set_owner(pg, NULL); 
     free_domheap_pages(pg, 0);
     d->arch.paging.hap.p2m_pages--;
+    ASSERT( d->arch.paging.hap.p2m_pages >= 0 );
 }
 
 /* Return the size of the pool, rounded up to the nearest MB */
@@ -320,7 +321,7 @@ mfn_t hap_make_monitor_table(struct vcpu
 #if CONFIG_PAGING_LEVELS == 4
     {
         mfn_t m4mfn;
-        m4mfn = hap_alloc(d, 0);
+        m4mfn = hap_alloc(d);
         hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
         return m4mfn;
     }
@@ -331,12 +332,12 @@ mfn_t hap_make_monitor_table(struct vcpu
         l2_pgentry_t *l2e;
         int i;
 
-        m3mfn = hap_alloc(d, 0);
+        m3mfn = hap_alloc(d);
 
         /* Install a monitor l2 table in slot 3 of the l3 table.
          * This is used for all Xen entries, including linear maps
          */
-        m2mfn = hap_alloc(d, 0);
+        m2mfn = hap_alloc(d);
         l3e = hap_map_domain_page(m3mfn);
         l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
         hap_install_xen_entries_in_l2h(v, m2mfn);
@@ -357,7 +358,7 @@ mfn_t hap_make_monitor_table(struct vcpu
     {
         mfn_t m2mfn;
         
-        m2mfn = hap_alloc(d, 0);
+        m2mfn = hap_alloc(d);
         hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
     
         return m2mfn;
@@ -390,7 +391,6 @@ void hap_domain_init(struct domain *d)
 {
     hap_lock_init(d);
     INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
-    INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
 }
 
 /* return 0 for success, -errno for failure */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c    Tue Apr 03 13:04:51 2007 -0600
@@ -2823,8 +2823,8 @@ static int sh_page_fault(struct vcpu *v,
          * are OK, this can only have been caused by a failed
          * shadow_set_l*e(), which will have crashed the guest.
          * Get out of the fault handler immediately. */
-        ASSERT(d->is_shutdown);
-        unmap_walk(v, &gw); 
+        ASSERT(d->is_shutting_down);
+        unmap_walk(v, &gw);
         shadow_unlock(d);
         return 0;
     }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/setup.c      Tue Apr 03 13:04:51 2007 -0600
@@ -591,8 +591,6 @@ void __init __start_xen(multiboot_info_t
 
     numa_initmem_init(0, max_page);
 
-    end_boot_allocator();
-
     /* Initialise the Xen heap, skipping RAM holes. */
     nr_pages = 0;
     for ( i = 0; i < e820.nr_map; i++ )
@@ -617,6 +615,8 @@ void __init __start_xen(multiboot_info_t
     printk("Xen heap: %luMB (%lukB)\n", 
            nr_pages >> (20 - PAGE_SHIFT),
            nr_pages << (PAGE_SHIFT - 10));
+
+    end_boot_allocator();
 
     early_boot = 0;
 
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/time.c       Tue Apr 03 13:04:51 2007 -0600
@@ -573,7 +573,7 @@ static void init_platform_timer(void)
  * machines were long is 32-bit! (However, as time_t is signed, we
  * will already get problems at other places on 2038-01-19 03:14:08)
  */
-static inline unsigned long
+unsigned long
 mktime (unsigned int year, unsigned int mon,
         unsigned int day, unsigned int hour,
         unsigned int min, unsigned int sec)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/traps.c      Tue Apr 03 13:04:51 2007 -0600
@@ -285,23 +285,32 @@ void show_xen_trace()
     show_trace(&regs);
 }
 
-void show_stack_overflow(unsigned long esp)
+void show_stack_overflow(unsigned int cpu, unsigned long esp)
 {
 #ifdef MEMORY_GUARD
-    unsigned long esp_top;
+    unsigned long esp_top, esp_bottom;
     unsigned long *stack, addr;
 
-    esp_top = (esp | (STACK_SIZE - 1)) - DEBUG_STACK_SIZE;
+    esp_bottom = (esp | (STACK_SIZE - 1)) + 1;
+    esp_top    = esp_bottom - DEBUG_STACK_SIZE;
+
+    printk("Valid stack range: %p-%p, sp=%p, tss.esp0=%p\n",
+           (void *)esp_top, (void *)esp_bottom, (void *)esp,
+           (void *)init_tss[cpu].esp0);
 
     /* Trigger overflow trace if %esp is within 512 bytes of the guard page. */
     if ( ((unsigned long)(esp - esp_top) > 512) &&
          ((unsigned long)(esp_top - esp) > 512) )
+    {
+        printk("No stack overflow detected. Skipping stack trace.\n");
         return;
+    }
 
     if ( esp < esp_top )
         esp = esp_top;
 
-    printk("Xen stack overflow:\n   ");
+    printk("Xen stack overflow (dumping trace %p-%p):\n   ",
+           (void *)esp, (void *)esp_bottom);
 
     stack = (unsigned long *)esp;
     while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Tue Apr 03 13:04:51 2007 -0600
@@ -139,7 +139,7 @@ void show_page_walk(unsigned long addr)
     unmap_domain_page(l1t);
 }
 
-#define DOUBLEFAULT_STACK_SIZE 1024
+#define DOUBLEFAULT_STACK_SIZE 2048
 static struct tss_struct doublefault_tss;
 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
 
@@ -167,7 +167,7 @@ asmlinkage void do_double_fault(void)
            tss->esi, tss->edi, tss->ebp, tss->esp);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   ss: %04x\n",
            tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
-    show_stack_overflow(tss->esp);
+    show_stack_overflow(cpu, tss->esp);
 
     panic("DOUBLE FAULT -- system shutdown\n");
 }
@@ -268,8 +268,7 @@ void __init percpu_traps_init(void)
     tss->ds     = __HYPERVISOR_DS;
     tss->es     = __HYPERVISOR_DS;
     tss->ss     = __HYPERVISOR_DS;
-    tss->esp    = (unsigned long)
-        &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+    tss->esp    = (unsigned long)&doublefault_stack[DOUBLEFAULT_STACK_SIZE];
     tss->__cr3  = __pa(idle_pg_table);
     tss->cs     = __HYPERVISOR_CS;
     tss->eip    = (unsigned long)do_double_fault;
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Tue Apr 03 13:04:51 2007 -0600
@@ -171,7 +171,7 @@ asmlinkage void do_double_fault(struct c
     printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
            regs->r12, regs->r13, regs->r14);
     printk("r15: %016lx\n", regs->r15);
-    show_stack_overflow(regs->rsp);
+    show_stack_overflow(cpu, regs->rsp);
 
     panic("DOUBLE FAULT -- system shutdown\n");
 }
@@ -270,18 +270,18 @@ void __init percpu_traps_init(void)
     stack_bottom = (char *)get_stack_bottom();
     stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
 
-    /* Double-fault handler has its own per-CPU 1kB stack. */
-    init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
+    /* Double-fault handler has its own per-CPU 2kB stack. */
+    init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
 
     /* NMI handler has its own per-CPU 1kB stack. */
-    init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
+    init_tss[cpu].ist[1] = (unsigned long)&stack[3072];
 
     /*
      * Trampoline for SYSCALL entry from long mode.
      */
 
     /* Skip the NMI and DF stacks. */
-    stack = &stack[2048];
+    stack = &stack[3072];
     wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
 
     /* movq %rsp, saversp(%rip) */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/domain.c
--- a/xen/common/domain.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/domain.c       Tue Apr 03 13:04:51 2007 -0600
@@ -59,6 +59,7 @@ struct domain *alloc_domain(domid_t domi
     atomic_set(&d->refcnt, 1);
     spin_lock_init(&d->big_lock);
     spin_lock_init(&d->page_alloc_lock);
+    spin_lock_init(&d->shutdown_lock);
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
 
@@ -83,6 +84,45 @@ void free_domain(struct domain *d)
     xfree(d);
 }
 
+static void __domain_finalise_shutdown(struct domain *d)
+{
+    struct vcpu *v;
+
+    BUG_ON(!spin_is_locked(&d->shutdown_lock));
+
+    if ( d->is_shut_down )
+        return;
+
+    for_each_vcpu ( d, v )
+        if ( !v->paused_for_shutdown )
+            return;
+
+    d->is_shut_down = 1;
+
+    for_each_vcpu ( d, v )
+        vcpu_sleep_nosync(v);
+
+    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+}
+
+static void vcpu_check_shutdown(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    spin_lock(&d->shutdown_lock);
+
+    if ( d->is_shutting_down )
+    {
+        if ( !v->paused_for_shutdown )
+            atomic_inc(&v->pause_count);
+        v->paused_for_shutdown = 1;
+        v->defer_shutdown = 0;
+        __domain_finalise_shutdown(d);
+    }
+
+    spin_unlock(&d->shutdown_lock);
+}
+
 struct vcpu *alloc_vcpu(
     struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
 {
@@ -121,6 +161,9 @@ struct vcpu *alloc_vcpu(
     d->vcpu[vcpu_id] = v;
     if ( vcpu_id != 0 )
         d->vcpu[v->vcpu_id-1]->next_in_list = v;
+
+    /* Must be called after making new vcpu visible to for_each_vcpu(). */
+    vcpu_check_shutdown(v);
 
     return v;
 }
@@ -286,7 +329,7 @@ void domain_kill(struct domain *d)
 
 void __domain_crash(struct domain *d)
 {
-    if ( d->is_shutdown )
+    if ( d->is_shutting_down )
     {
         /* Print nothing: the domain is already shutting down. */
     }
@@ -335,16 +378,73 @@ void domain_shutdown(struct domain *d, u
     if ( d->domain_id == 0 )
         dom0_shutdown(reason);
 
-    atomic_inc(&d->pause_count);
-    if ( !xchg(&d->is_shutdown, 1) )
-        d->shutdown_code = reason;
-    else
-        domain_unpause(d);
+    spin_lock(&d->shutdown_lock);
+
+    if ( d->is_shutting_down )
+    {
+        spin_unlock(&d->shutdown_lock);
+        return;
+    }
+
+    d->is_shutting_down = 1;
+    d->shutdown_code = reason;
+
+    smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
 
     for_each_vcpu ( d, v )
-        vcpu_sleep_nosync(v);
-
-    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+    {
+        if ( v->defer_shutdown )
+            continue;
+        atomic_inc(&v->pause_count);
+        v->paused_for_shutdown = 1;
+    }
+
+    __domain_finalise_shutdown(d);
+
+    spin_unlock(&d->shutdown_lock);
+}
+
+void domain_resume(struct domain *d)
+{
+    struct vcpu *v;
+
+    /*
+     * Some code paths assume that shutdown status does not get reset under
+     * their feet (e.g., some assertions make this assumption).
+     */
+    domain_pause(d);
+
+    spin_lock(&d->shutdown_lock);
+
+    d->is_shutting_down = d->is_shut_down = 0;
+
+    for_each_vcpu ( d, v )
+    {
+        if ( v->paused_for_shutdown )
+            vcpu_unpause(v);
+        v->paused_for_shutdown = 0;
+    }
+
+    spin_unlock(&d->shutdown_lock);
+
+    domain_unpause(d);
+}
+
+int vcpu_start_shutdown_deferral(struct vcpu *v)
+{
+    v->defer_shutdown = 1;
+    smp_mb(); /* set deferral status /then/ check for shutdown */
+    if ( unlikely(v->domain->is_shutting_down) )
+        vcpu_check_shutdown(v);
+    return v->defer_shutdown;
+}
+
+void vcpu_end_shutdown_deferral(struct vcpu *v)
+{
+    v->defer_shutdown = 0;
+    smp_mb(); /* clear deferral status /then/ check for shutdown */
+    if ( unlikely(v->domain->is_shutting_down) )
+        vcpu_check_shutdown(v);
 }
 
 void domain_pause_for_debugger(void)
@@ -425,7 +525,6 @@ void vcpu_pause_nosync(struct vcpu *v)
 
 void vcpu_unpause(struct vcpu *v)
 {
-    ASSERT(v != current);
     if ( atomic_dec_and_test(&v->pause_count) )
         vcpu_wake(v);
 }
@@ -445,8 +544,6 @@ void domain_unpause(struct domain *d)
 void domain_unpause(struct domain *d)
 {
     struct vcpu *v;
-
-    ASSERT(d != current->domain);
 
     if ( atomic_dec_and_test(&d->pause_count) )
         for_each_vcpu( d, v )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/domctl.c
--- a/xen/common/domctl.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/domctl.c       Tue Apr 03 13:04:51 2007 -0600
@@ -115,7 +115,7 @@ void getdomaininfo(struct domain *d, str
 
     info->flags = flags |
         (d->is_dying                ? XEN_DOMINF_dying    : 0) |
-        (d->is_shutdown             ? XEN_DOMINF_shutdown : 0) |
+        (d->is_shut_down            ? XEN_DOMINF_shutdown : 0) |
         (d->is_paused_by_controller ? XEN_DOMINF_paused   : 0) |
         d->shutdown_code << XEN_DOMINF_shutdownshift;
 
@@ -287,8 +287,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         if ( d == NULL )
             break;
 
-        if ( xchg(&d->is_shutdown, 0) )
-            domain_unpause(d);
+        domain_resume(d);
         rcu_unlock_domain(d);
         ret = 0;
     }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/page_alloc.c   Tue Apr 03 13:04:51 2007 -0600
@@ -512,6 +512,14 @@ void init_heap_pages(
 
     ASSERT(zone < NR_ZONES);
 
+    if ( unlikely(avail[0] == NULL) )
+    {
+        /* Start-of-day memory node 0 initialisation. */
+        init_heap_block(&_heap0);
+        _heap[0] = &_heap0;
+        avail[0] = avail0;
+    }
+
     if ( likely(page_to_mfn(pg) != 0) )
         nid_prev = phys_to_nid(page_to_maddr(pg-1));
     else
@@ -569,10 +577,6 @@ void end_boot_allocator(void)
 {
     unsigned long i;
     int curr_free, next_free;
-
-    init_heap_block(&_heap0);
-    _heap[0] = &_heap0;
-    avail[0] = avail0;
 
     /* Pages that are free now go to the domain sub-allocator. */
     if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/symbols.c
--- a/xen/common/symbols.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/symbols.c      Tue Apr 03 13:04:51 2007 -0600
@@ -16,6 +16,7 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/string.h>
+#include <xen/spinlock.h>
 
 extern unsigned long symbols_addresses[];
 extern unsigned long symbols_num_syms;
@@ -140,12 +141,15 @@ void __print_symbol(const char *fmt, uns
 void __print_symbol(const char *fmt, unsigned long address)
 {
     const char *name;
-    unsigned long offset, size;
-    char namebuf[KSYM_NAME_LEN+1];
+    unsigned long offset, size, flags;
 
+    static DEFINE_SPINLOCK(lock);
+    static char namebuf[KSYM_NAME_LEN+1];
 #define BUFFER_SIZE sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \
                        2*(BITS_PER_LONG*3/10) + 1
-    char buffer[BUFFER_SIZE];
+    static char buffer[BUFFER_SIZE];
+
+    spin_lock_irqsave(&lock, flags);
 
     name = symbols_lookup(address, &size, &offset, namebuf);
 
@@ -155,4 +159,6 @@ void __print_symbol(const char *fmt, uns
         snprintf(buffer, BUFFER_SIZE, "%s+%#lx/%#lx", name, offset, size);
 
     printk(fmt, buffer);
+
+    spin_unlock_irqrestore(&lock, flags);
 }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/drivers/char/console.c        Tue Apr 03 13:04:51 2007 -0600
@@ -858,19 +858,20 @@ void panic(const char *fmt, ...)
 void panic(const char *fmt, ...)
 {
     va_list args;
-    char buf[128];
     unsigned long flags;
     static DEFINE_SPINLOCK(lock);
+    static char buf[128];
     
     debugtrace_dump();
+
+    /* Protects buf[] and ensure multi-line message prints atomically. */
+    spin_lock_irqsave(&lock, flags);
 
     va_start(args, fmt);
     (void)vsnprintf(buf, sizeof(buf), fmt, args);
     va_end(args);
 
-    /* Spit out multiline message in one go. */
     console_start_sync();
-    spin_lock_irqsave(&lock, flags);
     printk("\n****************************************\n");
     printk("Panic on CPU %d:\n", smp_processor_id());
     printk(buf);
@@ -879,6 +880,7 @@ void panic(const char *fmt, ...)
         printk("Manual reset required ('noreboot' specified)\n");
     else
         printk("Reboot in five seconds...\n");
+
     spin_unlock_irqrestore(&lock, flags);
 
     debugger_trap_immediate();
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/domain.h      Tue Apr 03 13:04:51 2007 -0600
@@ -115,7 +115,6 @@ struct hap_domain {
     const char       *locker_function;
     
     struct list_head  freelists;
-    struct list_head  p2m_freelist;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages allocates to p2m */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/io.h      Tue Apr 03 13:04:51 2007 -0600
@@ -127,6 +127,7 @@ static inline int hvm_portio_intercept(i
 }
 
 extern int hvm_mmio_intercept(ioreq_t *p);
+extern int hvm_buffered_io_send(ioreq_t *p);
 extern int hvm_buffered_io_intercept(ioreq_t *p);
 
 static inline int register_portio_handler(
@@ -145,6 +146,7 @@ static inline int irq_masked(unsigned lo
 
 extern void send_pio_req(unsigned long port, unsigned long count, int size,
                          paddr_t value, int dir, int df, int value_is_ptr);
+void send_timeoffset_req(unsigned long timeoff);
 extern void handle_mmio(unsigned long gpa);
 extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
 extern void hvm_io_assist(struct vcpu *v);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/support.h Tue Apr 03 13:04:51 2007 -0600
@@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
 /* End of save/restore */
 
 extern char hvm_io_bitmap[];
-extern char hvm_msr_bitmap[];
 extern int hvm_enabled;
 
 void hvm_enable(struct hvm_function_table *);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Tue Apr 03 13:04:51 2007 -0600
@@ -121,6 +121,7 @@ extern u32 vmx_vmentry_control;
 
 #define cpu_has_vmx_msr_bitmap \
     (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
+extern char *vmx_msr_bitmap;
 
 /* VMCS Encordings */
 enum vmcs_field {
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/processor.h   Tue Apr 03 13:04:51 2007 -0600
@@ -413,9 +413,9 @@ struct tss_struct {
 struct tss_struct {
     unsigned short     back_link,__blh;
 #ifdef __x86_64__
-    u64 rsp0;
-    u64 rsp1;
-    u64 rsp2;
+    union { u64 rsp0, esp0; };
+    union { u64 rsp1, esp1; };
+    union { u64 rsp2, esp2; };
     u64 reserved1;
     u64 ist[7];
     u64 reserved2;
@@ -553,7 +553,7 @@ extern always_inline void prefetchw(cons
 
 void show_stack(struct cpu_user_regs *regs);
 void show_xen_trace(void);
-void show_stack_overflow(unsigned long esp);
+void show_stack_overflow(unsigned int cpu, unsigned long esp);
 void show_registers(struct cpu_user_regs *regs);
 void show_execution_state(struct cpu_user_regs *regs);
 void show_page_walk(unsigned long addr);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/time.h        Tue Apr 03 13:04:51 2007 -0600
@@ -16,4 +16,9 @@ static inline cycles_t get_cycles(void)
     return c;
 }
 
+unsigned long
+mktime (unsigned int year, unsigned int mon,
+        unsigned int day, unsigned int hour,
+        unsigned int min, unsigned int sec);
+
 #endif /* __X86_TIME_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/public/hvm/ioreq.h    Tue Apr 03 13:04:51 2007 -0600
@@ -39,6 +39,7 @@
 #define IOREQ_TYPE_XOR          4
 #define IOREQ_TYPE_XCHG         5
 #define IOREQ_TYPE_ADD          6
+#define IOREQ_TYPE_TIMEOFFSET   7
 
 /*
  * VMExit dispatcher should cooperate with instruction decoder to
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/xen/sched.h   Tue Apr 03 13:04:51 2007 -0600
@@ -114,6 +114,10 @@ struct vcpu
     bool_t           nmi_pending;
     /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
     bool_t           nmi_masked;
+    /* Require shutdown to be deferred for some asynchronous operation? */
+    bool_t           defer_shutdown;
+    /* VCPU is paused following shutdown request (d->is_shutting_down)? */
+    bool_t           paused_for_shutdown;
 
     unsigned long    pause_flags;
     atomic_t         pause_count;
@@ -193,7 +197,9 @@ struct domain
     bool_t           is_paused_by_controller;
 
     /* Guest has shut down (inc. reason code)? */
-    bool_t           is_shutdown;
+    spinlock_t       shutdown_lock;
+    bool_t           is_shutting_down; /* in process of shutting down? */
+    bool_t           is_shut_down;     /* fully shut down? */
     int              shutdown_code;
 
     atomic_t         pause_count;
@@ -331,7 +337,11 @@ void domain_destroy(struct domain *d);
 void domain_destroy(struct domain *d);
 void domain_kill(struct domain *d);
 void domain_shutdown(struct domain *d, u8 reason);
+void domain_resume(struct domain *d);
 void domain_pause_for_debugger(void);
+
+int vcpu_start_shutdown_deferral(struct vcpu *v);
+void vcpu_end_shutdown_deferral(struct vcpu *v);
 
 /*
  * Mark specified domain as crashed. This function always returns, even if the

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.