[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1182365367 21600
# Node ID 810885428743660169e7382ec9596373ca6ce48f
# Parent  c20bc60f9243d08199cb0a9a837cbe11c6b3dcdc
# Parent  005dd6b1cf8e0008aba7984b828274a40e8d7d95
merge with xen-unstable.hg
---
 docs/src/user.tex                       |    1 
 tools/blktap/drivers/Makefile           |    1 
 tools/blktap/drivers/block-aio.c        |   49 ++++-----
 tools/blktap/drivers/block-qcow.c       |   48 ++++-----
 tools/blktap/drivers/tapaio.c           |  164 ++++++++++++++++++++++++++++++++
 tools/blktap/drivers/tapaio.h           |   58 +++++++++++
 tools/examples/init.d/xendomains        |   33 ++++--
 tools/ioemu/block-raw.c                 |    2 
 tools/ioemu/target-i386-dm/exec-dm.c    |   42 ++++++--
 tools/ioemu/vl.c                        |   12 ++
 tools/libxc/xc_core.c                   |    2 
 tools/python/xen/xend/XendDomainInfo.py |    2 
 tools/python/xen/xend/server/blkif.py   |    5 
 xen/arch/ia64/xen/domain.c              |    9 -
 xen/arch/ia64/xen/xenmem.c              |    2 
 xen/arch/x86/apic.c                     |    4 
 xen/arch/x86/boot/cmdline.S             |   40 +++++--
 xen/arch/x86/boot/trampoline.S          |   11 --
 xen/arch/x86/boot/video.S               |   59 ++++++-----
 xen/arch/x86/boot/video.h               |    9 -
 xen/arch/x86/boot/x86_32.S              |    4 
 xen/arch/x86/boot/x86_64.S              |    2 
 xen/arch/x86/domain.c                   |   37 +++++--
 xen/arch/x86/domain_build.c             |    6 -
 xen/arch/x86/flushtlb.c                 |    4 
 xen/arch/x86/hvm/hvm.c                  |   21 ++--
 xen/arch/x86/hvm/irq.c                  |   81 ++++++++-------
 xen/arch/x86/hvm/svm/asid.c             |   72 +++++++-------
 xen/arch/x86/hvm/svm/intr.c             |  146 ++++++++++++++++------------
 xen/arch/x86/hvm/svm/svm.c              |   60 +++++------
 xen/arch/x86/hvm/svm/vmcb.c             |    6 -
 xen/arch/x86/hvm/vioapic.c              |   34 +++---
 xen/arch/x86/hvm/vlapic.c               |    9 -
 xen/arch/x86/hvm/vmx/intr.c             |  106 ++++++++++----------
 xen/arch/x86/hvm/vmx/vmcs.c             |    2 
 xen/arch/x86/hvm/vmx/vmx.c              |   59 ++++++++---
 xen/arch/x86/hvm/vpic.c                 |    3 
 xen/arch/x86/hvm/vpt.c                  |   40 ++++---
 xen/arch/x86/mm.c                       |   10 -
 xen/arch/x86/setup.c                    |   10 +
 xen/arch/x86/traps.c                    |   14 ++
 xen/arch/x86/x86_32/traps.c             |    1 
 xen/arch/x86/x86_64/compat_kexec.S      |   65 +++++++++++-
 xen/arch/x86/x86_64/traps.c             |    1 
 xen/common/compat/memory.c              |    7 +
 xen/common/domctl.c                     |    4 
 xen/common/grant_table.c                |   12 +-
 xen/common/kernel.c                     |   10 -
 xen/common/kexec.c                      |    4 
 xen/common/perfc.c                      |    2 
 xen/drivers/char/console.c              |    2 
 xen/drivers/video/vga.c                 |    3 
 xen/include/asm-ia64/guest_access.h     |   25 ++--
 xen/include/asm-x86/event.h             |    1 
 xen/include/asm-x86/guest_access.h      |   68 +++++++------
 xen/include/asm-x86/hvm/hvm.h           |   33 +++++-
 xen/include/asm-x86/hvm/irq.h           |   12 +-
 xen/include/asm-x86/hvm/support.h       |    1 
 xen/include/asm-x86/hvm/svm/asid.h      |    1 
 xen/include/asm-x86/hvm/vcpu.h          |    4 
 xen/include/asm-x86/hvm/vlapic.h        |    2 
 xen/include/asm-x86/hvm/vmx/vmx.h       |   13 +-
 xen/include/asm-x86/hvm/vpic.h          |    2 
 xen/include/asm-x86/hvm/vpt.h           |    3 
 xen/include/xen/compat.h                |   62 +++++++-----
 xen/include/xen/xencomm.h               |   43 ++++----
 66 files changed, 1080 insertions(+), 580 deletions(-)

diff -r c20bc60f9243 -r 810885428743 docs/src/user.tex
--- a/docs/src/user.tex Wed Jun 20 12:47:52 2007 -0600
+++ b/docs/src/user.tex Wed Jun 20 12:49:27 2007 -0600
@@ -3178,6 +3178,7 @@ editing \path{grub.conf}.
   \begin{description}
   \item[ ask ] Display a vga menu allowing manual selection of video
   mode.
+  \item[ current ] Use existing vga mode without modification.
   \item[ text-$<$mode$>$ ] Select text-mode resolution, where mode is
   one of 80x25, 80x28, 80x30, 80x34, 80x43, 80x50, 80x60.
   \item[ gfx-$<$mode$>$ ] Select VESA graphics mode
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/Makefile     Wed Jun 20 12:49:27 2007 -0600
@@ -35,6 +35,7 @@ BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-qcow.o
 BLK-OBJS  += aes.o
+BLK-OBJS  += tapaio.o
 
 all: $(IBIN) qcow-util
 
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c  Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-aio.c  Wed Jun 20 12:49:27 2007 -0600
@@ -43,14 +43,7 @@
 #include <sys/ioctl.h>
 #include <linux/fs.h>
 #include "tapdisk.h"
-
-
-/**
- * We used a kernel patch to return an fd associated with the AIO context
- * so that we can concurrently poll on synchronous and async descriptors.
- * This is signalled by passing 1 as the io context to io_setup.
- */
-#define REQUEST_ASYNC_FD 1
+#include "tapaio.h"
 
 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
@@ -65,14 +58,13 @@ struct tdaio_state {
        int fd;
        
        /* libaio state */
-       io_context_t       aio_ctx;
+       tap_aio_context_t  aio_ctx;
        struct iocb        iocb_list  [MAX_AIO_REQS];
        struct iocb       *iocb_free  [MAX_AIO_REQS];
        struct pending_aio pending_aio[MAX_AIO_REQS];
        int                iocb_free_count;
        struct iocb       *iocb_queue[MAX_AIO_REQS];
        int                iocb_queued;
-       int                poll_fd; /* NB: we require aio_poll support */
        struct io_event    aio_events[MAX_AIO_REQS];
 };
 
@@ -148,7 +140,7 @@ static inline void init_fds(struct disk_
        for(i = 0; i < MAX_IOFD; i++) 
                dd->io_fd[i] = 0;
 
-       dd->io_fd[0] = prv->poll_fd;
+       dd->io_fd[0] = prv->aio_ctx.pollfd;
 }
 
 /* Open the disk file and initialize aio state. */
@@ -162,12 +154,9 @@ int tdaio_open (struct disk_driver *dd, 
        /* Initialize AIO */
        prv->iocb_free_count = MAX_AIO_REQS;
        prv->iocb_queued     = 0;
-       
-       prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
-       prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
-
-       if (prv->poll_fd < 0) {
-               ret = prv->poll_fd;
+
+       ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
+       if (ret < 0) {
                 if (ret == -EAGAIN) {
                         DPRINTF("Couldn't setup AIO context.  If you are "
                                 "trying to concurrently use a large number "
@@ -176,9 +165,7 @@ int tdaio_open (struct disk_driver *dd, 
                                 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
                                 "aio-max-nr')\n");
                 } else {
-                        DPRINTF("Couldn't get fd for AIO poll support.  This "
-                                "is probably because your kernel does not "
-                                "have the aio-poll patch applied.\n");
+                        DPRINTF("Couldn't setup AIO context.\n");
                 }
                goto done;
        }
@@ -286,7 +273,7 @@ int tdaio_submit(struct disk_driver *dd)
        if (!prv->iocb_queued)
                return 0;
 
-       ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+       ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, 
prv->iocb_queue);
        
        /* XXX: TODO: Handle error conditions here. */
        
@@ -300,7 +287,7 @@ int tdaio_close(struct disk_driver *dd)
 {
        struct tdaio_state *prv = (struct tdaio_state *)dd->private;
        
-       io_destroy(prv->aio_ctx);
+       io_destroy(prv->aio_ctx.aio_ctx);
        close(prv->fd);
 
        return 0;
@@ -308,15 +295,13 @@ int tdaio_close(struct disk_driver *dd)
 
 int tdaio_do_callbacks(struct disk_driver *dd, int sid)
 {
-       int ret, i, rsp = 0;
+       int i, nr_events, rsp = 0;
        struct io_event *ep;
        struct tdaio_state *prv = (struct tdaio_state *)dd->private;
 
-       /* Non-blocking test for completed io. */
-       ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
-                          NULL);
-                       
-       for (ep=prv->aio_events,i=ret; i-->0; ep++) {
+       nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+       for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
                struct iocb        *io  = ep->obj;
                struct pending_aio *pio;
                
@@ -327,6 +312,14 @@ int tdaio_do_callbacks(struct disk_drive
 
                prv->iocb_free[prv->iocb_free_count++] = io;
        }
+
+       if (nr_events) {
+               nr_events = tap_aio_more_events(&prv->aio_ctx);
+               goto repeat;
+       }
+
+       tap_aio_continue(&prv->aio_ctx);
+
        return rsp;
 }
 
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:49:27 2007 -0600
@@ -38,6 +38,7 @@
 #include "bswap.h"
 #include "aes.h"
 #include "tapdisk.h"
+#include "tapaio.h"
 
 #if 1
 #define ASSERT(_p) \
@@ -52,9 +53,6 @@
     (uint64_t)( \
         (l + (s - 1)) - ((l + (s - 1)) % s)); \
 })
-
-/******AIO DEFINES******/
-#define REQUEST_ASYNC_FD 1
 
 struct pending_aio {
         td_callback_t cb;
@@ -145,7 +143,7 @@ struct tdqcow_state {
        AES_KEY aes_encrypt_key;       /*AES key*/
        AES_KEY aes_decrypt_key;       /*AES key*/
         /* libaio state */
-        io_context_t        aio_ctx;
+        tap_aio_context_t   aio_ctx;
         int                 max_aio_reqs;
         struct iocb        *iocb_list;
         struct iocb       **iocb_free;
@@ -153,7 +151,6 @@ struct tdqcow_state {
         int                 iocb_free_count;
         struct iocb       **iocb_queue;
         int                 iocb_queued;
-        int                 poll_fd;      /* NB: we require aio_poll support */
         struct io_event    *aio_events;
 };
 
@@ -179,7 +176,7 @@ static void free_aio_state(struct disk_d
 
 static int init_aio_state(struct disk_driver *dd)
 {
-        int i;
+       int i, ret;
        struct td_state     *bs = dd->td_state;
        struct tdqcow_state  *s = (struct tdqcow_state *)dd->private;
         long     ioidx;
@@ -216,12 +213,9 @@ static int init_aio_state(struct disk_dr
                 goto fail;
         }
 
-        /*Signal kernel to create Poll FD for Asyc completion events*/
-        s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
-        s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx);
-
-       if (s->poll_fd < 0) {
-                if (s->poll_fd == -EAGAIN) {
+       ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
+       if (ret < 0) {
+                if (ret == -EAGAIN) {
                         DPRINTF("Couldn't setup AIO context.  If you are "
                                 "trying to concurrently use a large number "
                                 "of blktap-based disks, you may need to "
@@ -229,9 +223,7 @@ static int init_aio_state(struct disk_dr
                                 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
                                 "aio-max-nr')\n");
                 } else {
-                        DPRINTF("Couldn't get fd for AIO poll support.  This "
-                                "is probably because your kernel does not "
-                                "have the aio-poll patch applied.\n");
+                        DPRINTF("Couldn't setup AIO context.\n");
                 }
                goto fail;
        }
@@ -845,7 +837,7 @@ static inline void init_fds(struct disk_
        for(i = 0; i < MAX_IOFD; i++) 
                dd->io_fd[i] = 0;
 
-       dd->io_fd[0] = s->poll_fd;
+       dd->io_fd[0] = s->aio_ctx.pollfd;
 }
 
 /* Open the disk file and initialize qcow state. */
@@ -1144,7 +1136,7 @@ int tdqcow_submit(struct disk_driver *dd
        if (!prv->iocb_queued)
                return 0;
 
-       ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+       ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, 
prv->iocb_queue);
 
         /* XXX: TODO: Handle error conditions here. */
 
@@ -1172,7 +1164,7 @@ int tdqcow_close(struct disk_driver *dd)
                close(fd);
        }
 
-       io_destroy(s->aio_ctx);
+       io_destroy(s->aio_ctx.aio_ctx);
        free(s->name);
        free(s->l1_table);
        free(s->l2_cache);
@@ -1184,17 +1176,15 @@ int tdqcow_close(struct disk_driver *dd)
 
 int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
 {
-        int ret, i, rsp = 0,*ptr;
+        int ret, i, nr_events, rsp = 0,*ptr;
         struct io_event *ep;
         struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
 
         if (sid > MAX_IOFD) return 1;
-       
-       /* Non-blocking test for completed io. */
-        ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events,
-                           NULL);
-
-        for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
+
+        nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+        for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
                 struct iocb        *io  = ep->obj;
                 struct pending_aio *pio;
 
@@ -1215,6 +1205,14 @@ int tdqcow_do_callbacks(struct disk_driv
 
                 prv->iocb_free[prv->iocb_free_count++] = io;
         }
+
+        if (nr_events) {
+                nr_events = tap_aio_more_events(&prv->aio_ctx);
+                goto repeat;
+        }
+
+        tap_aio_continue(&prv->aio_ctx);
+
         return rsp;
 }
 
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.c     Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "tapaio.h"
+#include "tapdisk.h"
+#include <unistd.h>
+
+/**
+ * We used a kernel patch to return an fd associated with the AIO context
+ * so that we can concurrently poll on synchronous and async descriptors.
+ * This is signalled by passing 1 as the io context to io_setup.
+ */
+#define REQUEST_ASYNC_FD 1
+
+/*
+ * If we don't have any way to do epoll on aio events in a normal kernel,
+ * wait for aio events in a separate thread and return completion status
+ * that via a pipe that can be waited on normally.
+ *
+ * To keep locking problems between the completion thread and the submit
+ * thread to a minimum, there's a handshake which allows only one thread
+ * to be doing work on the completion queue at a time:
+ *
+ * 1) main thread sends completion thread a command via the command pipe;
+ * 2) completion thread waits for aio events and returns the number
+ *    received on the completion pipe
+ * 3) main thread processes the received ctx->aio_events events
+ * 4) loop back to 1) to let the completion thread refill the aio_events
+ *    buffer.
+ *
+ * This workaround needs to disappear once the kernel provides a single
+ * mechanism for waiting on both aio and normal fd wakeups.
+ */
+static void *
+tap_aio_completion_thread(void *arg)
+{
+       tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+       int command;
+       int nr_events;
+       int rc;
+
+       while (1) {
+               rc = read(ctx->command_fd[0], &command, sizeof(command));
+
+               do {
+                       rc = io_getevents(ctx->aio_ctx, 1,
+                                         ctx->max_aio_events, ctx->aio_events,
+                                         NULL);
+                       if (rc) {
+                               nr_events = rc;
+                               rc = write(ctx->completion_fd[1], &nr_events,
+                                          sizeof(nr_events));
+                       }
+               } while (!rc);
+       }
+}
+
+void
+tap_aio_continue(tap_aio_context_t *ctx)
+{
+        int cmd = 0;
+
+        if (!ctx->poll_in_thread)
+                return;
+
+        if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0)
+                DPRINTF("Cannot write to command pipe\n");
+}
+
+int
+tap_aio_setup(tap_aio_context_t *ctx,
+              struct io_event *aio_events,
+              int max_aio_events)
+{
+        int ret;
+
+        ctx->aio_events = aio_events;
+        ctx->max_aio_events = max_aio_events;
+        ctx->poll_in_thread = 0;
+
+        ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+        if (ret < 0 && ret != -EINVAL)
+                return ret;
+        else if (ret > 0) {
+                ctx->pollfd = ret;
+                return ctx->pollfd;
+        }
+
+        ctx->aio_ctx = (io_context_t) 0;
+        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+        if (ret < 0)
+                return ret;
+
+        if ((ret = pipe(ctx->command_fd)) < 0) {
+                DPRINTF("Unable to create command pipe\n");
+                return -1;
+        }
+        if ((ret = pipe(ctx->completion_fd)) < 0) {
+                DPRINTF("Unable to create completion pipe\n");
+                return -1;
+        }
+
+        if ((ret = pthread_create(&ctx->aio_thread, NULL,
+                                  tap_aio_completion_thread, ctx)) != 0) {
+                DPRINTF("Unable to create completion thread\n");
+                return -1;
+        }
+
+        ctx->pollfd = ctx->completion_fd[0];
+        ctx->poll_in_thread = 1;
+
+        tap_aio_continue(ctx);
+
+        return 0;
+}
+
+int
+tap_aio_get_events(tap_aio_context_t *ctx)
+{
+        int nr_events = 0;
+
+        if (!ctx->poll_in_thread)
+                nr_events = io_getevents(ctx->aio_ctx, 1,
+                                         ctx->max_aio_events, ctx->aio_events, 
NULL);
+        else
+                read(ctx->completion_fd[0], &nr_events, sizeof(nr_events));
+
+        return nr_events;
+}
+
+int tap_aio_more_events(tap_aio_context_t *ctx)
+{
+        return io_getevents(ctx->aio_ctx, 0,
+                            ctx->max_aio_events, ctx->aio_events, NULL);
+}
+
+
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.h     Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __TAPAIO_H__
+#define __TAPAIO_H__
+
+#include <pthread.h>
+#include <libaio.h>
+
+struct tap_aio_context {
+        io_context_t     aio_ctx;
+
+        struct io_event *aio_events;
+        int              max_aio_events;
+
+        pthread_t        aio_thread;
+        int              command_fd[2];
+        int              completion_fd[2];
+        int              pollfd;
+        unsigned int     poll_in_thread : 1;
+};
+
+typedef struct tap_aio_context tap_aio_context_t;
+
+int  tap_aio_setup      (tap_aio_context_t *ctx,
+                         struct io_event *aio_events,
+                         int max_aio_events);
+void tap_aio_continue   (tap_aio_context_t *ctx);
+int  tap_aio_get_events (tap_aio_context_t *ctx);
+int  tap_aio_more_events(tap_aio_context_t *ctx);
+
+#endif /* __TAPAIO_H__ */
diff -r c20bc60f9243 -r 810885428743 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains  Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/examples/init.d/xendomains  Wed Jun 20 12:49:27 2007 -0600
@@ -182,25 +182,31 @@ rdnames()
 
 parseln()
 {
-    name=`echo "$1" | cut -c0-17`
-    name=${name%% *}
-    rest=`echo "$1" | cut -c18- `
-    read id mem cpu vcpu state tm < <(echo "$rest")
+    if [[ "$1" =~ "\(domain" ]]; then
+        name=;id=
+    else if [[ "$1" =~ "\(name" ]]; then
+        name=$(echo $1 | sed -e 's/^.*(name \(.*\))$/\1/')
+    else if [[ "$1" =~ "\(domid" ]]; then
+        id=$(echo $1 | sed -e 's/^.*(domid \(.*\))$/\1/')
+    fi; fi; fi
+
+    [ -n "$name" -a -n "$id" ] && return 0 || return 1
 }
 
 is_running()
 {
     rdname $1
     RC=1
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        case $name in 
            ($NM)
                RC=0
                ;;
        esac
-    done < <(xm list | grep -v '^Name')
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
     return $RC
 }
 
@@ -267,13 +273,14 @@ start()
 
 all_zombies()
 {
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        if test "$state" != "-b---d" -a "$state" != "-----d"; then
            return 1;
        fi
-    done < <(xm list | grep -v '^Name')
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
     return 0
 }
 
@@ -309,8 +316,9 @@ stop()
        rdnames
     fi
     echo -n "Shutting down Xen domains:"
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        echo -n " $name"
        if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
@@ -384,7 +392,7 @@ stop()
            fi
            kill $WDOG_PID >/dev/null 2>&1
        fi
-    done < <(xm list | grep -v '^Name')
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
 
     # NB. this shuts down ALL Xen domains (politely), not just the ones in
     # AUTODIR/*
@@ -409,15 +417,16 @@ stop()
 
 check_domain_up()
 {
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        case $name in 
            ($1)
                return 0
                ;;
        esac
-    done < <(xm list | grep -v "^Name")
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
     return 1
 }
 
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/block-raw.c
--- a/tools/ioemu/block-raw.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/block-raw.c   Wed Jun 20 12:49:27 2007 -0600
@@ -166,7 +166,7 @@ typedef struct RawAIOCB {
     struct RawAIOCB *next;
 } RawAIOCB;
 
-static int aio_sig_num = SIGUSR2;
+const int aio_sig_num = SIGUSR2;
 static RawAIOCB *first_aio; /* AIO issued */
 static int aio_initialized = 0;
 
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c      Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c      Wed Jun 20 12:49:27 2007 -0600
@@ -443,19 +443,40 @@ extern unsigned long logdirty_bitmap_siz
  * Forcing a word-sized read/write prevents the guest from seeing a partially
  * written word-sized atom.
  */
-void memcpy_words(void *dst, void *src, size_t n)
-{
-    while (n >= sizeof(long)) {
-        *((long *)dst) = *((long *)src);
-        dst = ((long *)dst) + 1;
-        src = ((long *)src) + 1;
-        n -= sizeof(long);
-    }
-
-    if (n & 4) {
+#if defined(__x86_64__) || defined(__i386__)
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+    asm (
+        "   movl %%edx,%%ecx \n"
+#ifdef __x86_64
+        "   shrl $3,%%ecx    \n"
+        "   andl $7,%%edx    \n"
+        "   rep  movsq       \n"
+        "   test $4,%%edx    \n"
+        "   jz   1f          \n"
+        "   movsl            \n"
+#else /* __i386__ */
+        "   shrl $2,%%ecx    \n"
+        "   andl $3,%%edx    \n"
+        "   rep  movsl       \n"
+#endif
+        "1: test $2,%%edx    \n"
+        "   jz   1f          \n"
+        "   movsw            \n"
+        "1: test $1,%%edx    \n"
+        "   jz   1f          \n"
+        "   movsb            \n"
+        "1:                  \n"
+        : : "S" (src), "D" (dst), "d" (n) : "ecx" );
+}
+#else
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+    while (n >= sizeof(uint32_t)) {
         *((uint32_t *)dst) = *((uint32_t *)src);
         dst = ((uint32_t *)dst) + 1;
         src = ((uint32_t *)src) + 1;
+        n -= sizeof(uint32_t);
     }
 
     if (n & 2) {
@@ -470,6 +491,7 @@ void memcpy_words(void *dst, void *src, 
         src = ((uint8_t *)src) + 1;
     }
 }
+#endif
 
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
                             int len, int is_write)
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/vl.c  Wed Jun 20 12:49:27 2007 -0600
@@ -7059,6 +7059,18 @@ int main(int argc, char **argv)
 #endif
 
     char qemu_dm_logfilename[128];
+    
+    /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
+       then only the threads that use the signal unblock it -- this fixes a
+       race condition in Qcow support where the AIO signal is misdelivered.  */
+    {
+        extern const int aio_sig_num;
+        sigset_t set;
+
+        sigemptyset(&set);
+        sigaddset(&set, aio_sig_num);
+        sigprocmask(SIG_BLOCK, &set, NULL);
+    }
 
     LIST_INIT (&vm_change_state_head);
 #ifndef _WIN32
diff -r c20bc60f9243 -r 810885428743 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/libxc/xc_core.c     Wed Jun 20 12:49:27 2007 -0600
@@ -156,7 +156,7 @@ struct xc_core_section_headers {
     Elf64_Shdr  *shdrs;
 };
 #define SHDR_INIT       16
-#define SHDR_INC        4
+#define SHDR_INC        4U
 
 static struct xc_core_section_headers*
 xc_core_shdr_init(void)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Jun 20 12:49:27 2007 -0600
@@ -983,7 +983,7 @@ class XendDomainInfo:
                 self.info['VCPUs_live'] = vcpus
                 self._writeDom(self._vcpuDomDetails())
         else:
-            self.info['VCPUs_live'] = vcpus
+            self.info['VCPUs_max'] = vcpus
             xen.xend.XendDomain.instance().managed_config_save(self)
         log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
                  vcpus)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/server/blkif.py     Wed Jun 20 12:49:27 2007 -0600
@@ -98,6 +98,11 @@ class BlkifController(DevController):
 
         if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and
             dev == new_back['dev'] and mode == 'r'):
+            # dummy device
+            self.writeBackend(devid,
+                              'type', new_back['type'],
+                              'params', '')
+            # new backend-device
             self.writeBackend(devid,
                               'type', new_back['type'],
                               'params', new_back['params'])
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/domain.c        Wed Jun 20 12:49:27 2007 -0600
@@ -1146,9 +1146,8 @@ static void __init loaddomainelfimage(st
                        dom_imva = __va_ul(page_to_maddr(p));
                        if (filesz > 0) {
                                if (filesz >= PAGE_SIZE)
-                                       memcpy((void *) dom_imva,
-                                              (void *) elfaddr,
-                                              PAGE_SIZE);
+                                       copy_page((void *) dom_imva,
+                                                 (void *) elfaddr);
                                else {
                                        // copy partial page
                                        memcpy((void *) dom_imva,
@@ -1166,7 +1165,7 @@ static void __init loaddomainelfimage(st
                        }
                        else if (memsz > 0) {
                                 /* always zero out entire page */
-                               memset((void *) dom_imva, 0, PAGE_SIZE);
+                               clear_page((void *) dom_imva);
                        }
                        memsz -= PAGE_SIZE;
                        filesz -= PAGE_SIZE;
@@ -1367,7 +1366,7 @@ int __init construct_dom0(struct domain 
        if (start_info_page == NULL)
                panic("can't allocate start info page");
        si = page_to_virt(start_info_page);
-       memset(si, 0, PAGE_SIZE);
+       clear_page(si);
        snprintf(si->magic, sizeof(si->magic), "xen-%i.%i-ia64",
                xen_major_version(), xen_minor_version());
        si->nr_pages     = max_pages;
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/xenmem.c
--- a/xen/arch/ia64/xen/xenmem.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/xenmem.c        Wed Jun 20 12:49:27 2007 -0600
@@ -90,7 +90,7 @@ alloc_dir_page(void)
                panic("Not enough memory for virtual frame table!\n");
        ++table_size;
        dir = mfn << PAGE_SHIFT;
-       memset(__va(dir), 0, PAGE_SIZE);
+       clear_page(__va(dir));
        return dir;
 }
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/apic.c       Wed Jun 20 12:49:27 2007 -0600
@@ -817,7 +817,7 @@ void __init init_apic_mappings(void)
      */
     if (!smp_found_config && detect_init_APIC()) {
         apic_phys = __pa(alloc_xenheap_page());
-        memset(__va(apic_phys), 0, PAGE_SIZE);
+        clear_page(__va(apic_phys));
     } else
         apic_phys = mp_lapic_addr;
 
@@ -852,7 +852,7 @@ void __init init_apic_mappings(void)
             } else {
 fake_ioapic_page:
                 ioapic_phys = __pa(alloc_xenheap_page());
-                memset(__va(ioapic_phys), 0, PAGE_SIZE);
+                clear_page(__va(ioapic_phys));
             }
             set_fixmap_nocache(idx, ioapic_phys);
             apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/cmdline.S
--- a/xen/arch/x86/boot/cmdline.S       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/cmdline.S       Wed Jun 20 12:49:27 2007 -0600
@@ -119,30 +119,31 @@ 3:      pop     %edi
         ret
 
 .Lfind_option:
-        push    %ebx
-        push    4+8(%esp)
-        push    4+8(%esp)
+        mov     4(%esp),%eax
+        dec     %eax
+        push    %ebx
+1:      pushl   4+8(%esp)
+        inc     %eax
+        push    %eax
         call    .Lstrstr
         add     $8,%esp
         test    %eax,%eax
         jz      3f
         cmp     %eax,4+4(%esp)
-        je      1f
+        je      2f
         cmpb    $' ',-1(%eax)
-        jne     2f
-1:      mov     %eax,%ebx
-        push    4+8(%esp)
+        jne     1b
+2:      mov     %eax,%ebx
+        pushl   4+8(%esp)
         call    .Lstrlen
         add     $4,%esp
-        xchg    %eax,%ebx
-        add     %eax,%ebx
+        xadd    %eax,%ebx
         cmpb    $'\0',(%ebx)
         je      3f
         cmpb    $' ',(%ebx)
         je      3f
         cmpb    $'=',(%ebx)
-        je      3f
-2:      xor     %eax,%eax
+        jne     1b
 3:      pop     %ebx
         ret
 
@@ -297,7 +298,7 @@ 1:      lodsw
         call    .Lstr_prefix
         add     $8,%esp
         test    %eax,%eax
-        jnz     .Lcmdline_exit
+        jnz     .Lparse_vga_current
 
         /* We have 'vga=mode-<mode>'. */
         add     $5,%ebx
@@ -305,6 +306,19 @@ 1:      lodsw
         call    .Latoi
         add     $4,%esp
         mov     %ax,bootsym_phys(boot_vid_mode)
+        jmp     .Lcmdline_exit
+
+.Lparse_vga_current:
+        /* Check for 'vga=current'. */
+        push    %ebx
+        pushl   $sym_phys(.Lvga_current)
+        call    .Lstr_prefix
+        add     $8,%esp
+        test    %eax,%eax
+        jnz     .Lcmdline_exit
+
+        /* We have 'vga=current'. */
+        movw    $VIDEO_CURRENT_MODE,bootsym_phys(boot_vid_mode)
 
 .Lcmdline_exit:
         popa
@@ -328,6 +342,8 @@ 1:      lodsw
         .asciz  "gfx-"
 .Lvga_mode:
         .asciz  "mode-"
+.Lvga_current:
+        .asciz  "current"
 .Lno_rm_opt:
         .asciz  "no-real-mode"
 .Ledid_opt:
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/trampoline.S
--- a/xen/arch/x86/boot/trampoline.S    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/trampoline.S    Wed Jun 20 12:49:27 2007 -0600
@@ -13,12 +13,11 @@ trampoline_realmode_entry:
         cli
         lidt    bootsym(idt_48)
         lgdt    bootsym(gdt_48)
+        mov     $1,%bl                    # EBX != 0 indicates we are an AP
         xor     %ax, %ax
         inc     %ax
         lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
-        mov     $1,%bl                    # EBX != 0 indicates we are an AP
-        jmp     1f
-1:      ljmpl   $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
+        ljmpl   $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
 
 idt_48: .word   0, 0, 0 # base = limit = 0
 gdt_48: .word   6*8-1
@@ -135,10 +134,9 @@ trampoline_boot_cpu_entry:
         ljmp    $BOOT_PSEUDORM_CS,$bootsym(1f)
         .code16
 1:      mov     %eax,%cr0                 # CR0.PE = 0 (leave protected mode)
-        jmp     1f
 
         /* Load proper real-mode values into %cs, %ds, %es and %ss. */
-1:      ljmp    $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
+        ljmp    $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
 1:      mov     $(BOOT_TRAMPOLINE>>4),%ax
         mov     %ax,%ds
         mov     %ax,%es
@@ -166,10 +164,9 @@ 1:      mov     $(BOOT_TRAMPOLINE>>4),%a
         xor     %ax,%ax
         inc     %ax
         lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
-        jmp     1f
 
         /* Load proper protected-mode values into all segment registers. */
-1:      ljmpl   $BOOT_CS32,$bootsym_phys(1f)
+        ljmpl   $BOOT_CS32,$bootsym_phys(1f)
         .code32
 1:      mov     $BOOT_DS,%eax
         mov     %eax,%ds
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.S
--- a/xen/arch/x86/boot/video.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.S Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,10 @@
 
 #include "video.h"
 
-#define modelist (0x3000)
+/* Scratch space layout. */
+#define modelist       (0x3000)
+#define vesa_glob_info (modelist + 1024)
+#define vesa_mode_info (vesa_glob_info + 1024)
 
 /* Retrieve Extended Display Identification Data. */
 #define CONFIG_FIRMWARE_EDID
@@ -109,7 +112,7 @@ mopar2: movb    %al, _param(PARAM_VIDEO_
 
 # Fetching of VESA frame buffer parameters
 mopar_gr:
-        leaw    modelist+1024, %di
+        leaw    vesa_mode_info, %di
         movb    $0x23, _param(PARAM_HAVE_VGA)
         movw    16(%di), %ax
         movw    %ax, _param(PARAM_LFB_LINELENGTH)
@@ -128,9 +131,7 @@ mopar_gr:
         movl    %eax, _param(PARAM_LFB_COLORS+4)
 
 # get video mem size
-        leaw    modelist+1024, %di
-        movw    $0x4f00, %ax
-        int     $0x10
+        leaw    vesa_glob_info, %di
         xorl    %eax, %eax
         movw    18(%di), %ax
         movl    %eax, _param(PARAM_LFB_SIZE)
@@ -183,7 +184,10 @@ dac_done:
 
         movw    %es, _param(PARAM_VESAPM_SEG)
         movw    %di, _param(PARAM_VESAPM_OFF)
-no_pm:  ret
+
+no_pm:  pushw   %ds
+        popw    %es
+        ret
 
 # The video mode menu
 mode_menu:
@@ -428,17 +432,13 @@ setmenu:
         jmp     mode_set
 
 check_vesa:
-#ifdef CONFIG_FIRMWARE_EDID
-        leaw    modelist+1024, %di
+        leaw    vesa_glob_info, %di
         movw    $0x4f00, %ax
         int     $0x10
         cmpw    $0x004f, %ax
         jnz     setbad
 
-        movw    4(%di), %ax
-        movw    %ax, bootsym(vbe_version)
-#endif
-        leaw    modelist+1024, %di
+        leaw    vesa_mode_info, %di
         subb    $VIDEO_FIRST_VESA>>8, %bh
         movw    %bx, %cx                # Get mode information structure
         movw    $0x4f01, %ax
@@ -447,7 +447,7 @@ check_vesa:
         cmpw    $0x004f, %ax
         jnz     setbad
 
-        movb    (%di), %al              # Check capabilities.
+        movb    (%di), %al              # Check mode attributes.
         andb    $0x99, %al
         cmpb    $0x99, %al
         jnz     _setbad                 # Doh! No linear frame buffer.
@@ -530,6 +530,7 @@ spec_inits:
         .word   bootsym(set_8pixel)
         .word   bootsym(set_80x43)
         .word   bootsym(set_80x28)
+        .word   bootsym(set_current)
         .word   bootsym(set_80x30)
         .word   bootsym(set_80x34)
         .word   bootsym(set_80x60)
@@ -575,6 +576,7 @@ set14:  movw    $0x1111, %ax            
         movb    $0x01, %ah              # Define cursor scan lines 11-12
         movw    $0x0b0c, %cx
         int     $0x10
+set_current:
         stc
         ret
 
@@ -695,33 +697,34 @@ vga_modes_end:
 # Detect VESA modes.
 vesa_modes:
         movw    %di, %bp                # BP=original mode table end
-        addw    $0x200, %di             # Buffer space
+        leaw    vesa_glob_info, %di
         movw    $0x4f00, %ax            # VESA Get card info call
         int     $0x10
+        movw    %di, %si
         movw    %bp, %di
         cmpw    $0x004f, %ax            # Successful?
         jnz     ret0
         
-        cmpw    $0x4556, 0x200(%di)     # 'VE'
+        cmpw    $0x4556, (%si)          # 'VE'
         jnz     ret0
         
-        cmpw    $0x4153, 0x202(%di)     # 'SA'
+        cmpw    $0x4153, 2(%si)         # 'SA'
         jnz     ret0
         
         movw    $bootsym(vesa_name), bootsym(card_name) # Set name to "VESA 
VGA"
         pushw   %gs
-        lgsw    0x20e(%di), %si         # GS:SI=mode list
+        lgsw    0xe(%si), %si           # GS:SI=mode list
         movw    $128, %cx               # Iteration limit
 vesa1:
         gs;     lodsw
-        cmpw    $0xffff, %ax                        # End of the table?
+        cmpw    $0xffff, %ax            # End of the table?
         jz      vesar
         
-        cmpw    $0x0080, %ax                        # Check validity of mode ID
+        cmpw    $0x0080, %ax            # Check validity of mode ID
         jc      vesa2
         
-        orb     %ah, %ah        # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
-        jz      vesan                # Certain BIOSes report 0x80-0xff!
+        orb     %ah, %ah                # Valid IDs 0x0000-0x007f/0x0100-0x07ff
+        jz      vesan                   # Certain BIOSes report 0x80-0xff!
 
         cmpw    $0x0800, %ax
         jnc     vesae
@@ -891,8 +894,13 @@ store_edid:
         cmpb    $1, bootsym(opt_edid)   # EDID disabled on cmdline (edid=no)?
         je      .Lno_edid
 
-        cmpw    $0x0200, bootsym(vbe_version)  # only do EDID on >= VBE2.0
-        jl      .Lno_edid
+        leaw    vesa_glob_info, %di
+        movw    $0x4f00, %ax
+        int     $0x10
+        cmpw    $0x004f, %ax
+        jne     .Lno_edid
+        cmpw    $0x0200, 4(%di)         # only do EDID on >= VBE2.0
+        jb      .Lno_edid
 
         xorw    %di, %di                # Report Capability
         pushw   %di
@@ -901,6 +909,8 @@ store_edid:
         xorw    %bx, %bx
         xorw    %cx, %cx
         int     $0x10
+        pushw   %ds
+        popw    %es
         cmpw    $0x004f, %ax            # Call failed?
         jne     .Lno_edid
 
@@ -920,8 +930,6 @@ store_edid:
         movw    $0x01, %bx
         movw    $0x00, %cx
         movw    $0x00, %dx
-        pushw   %ds
-        popw    %es
         movw    $bootsym(boot_edid_info), %di
         int     $0x10
 
@@ -940,7 +948,6 @@ card_name:      .word   0       # Pointe
 card_name:      .word   0       # Pointer to adapter name
 graphic_mode:   .byte   0       # Graphic mode with a linear frame buffer
 dac_size:       .byte   6       # DAC bit depth
-vbe_version:    .word   0       # VBE bios version
 
 # Status messages
 keymsg:         .ascii  "Press <RETURN> to see video modes available,"
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.h
--- a/xen/arch/x86/boot/video.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.h Wed Jun 20 12:49:27 2007 -0600
@@ -16,10 +16,11 @@
 #define VIDEO_80x50         0x0f01
 #define VIDEO_80x43         0x0f02
 #define VIDEO_80x28         0x0f03
-#define VIDEO_80x30         0x0f04
-#define VIDEO_80x34         0x0f05
-#define VIDEO_80x60         0x0f06
-#define VIDEO_LAST_SPECIAL  0x0f07
+#define VIDEO_CURRENT_MODE  0x0f04
+#define VIDEO_80x30         0x0f05
+#define VIDEO_80x34         0x0f06
+#define VIDEO_80x60         0x0f07
+#define VIDEO_LAST_SPECIAL  0x0f08
 
 #define ASK_VGA             0xfffd
 #define VIDEO_VESA_BY_SIZE  0xffff
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S        Wed Jun 20 12:49:27 2007 -0600
@@ -30,9 +30,7 @@ 1:      mov     %eax,(%edi)
         loop    1b
                 
         /* Pass off the Multiboot info structure to C land. */
-        mov     multiboot_ptr,%eax
-        add     $__PAGE_OFFSET,%eax
-        push    %eax
+        pushl   multiboot_ptr
         call    __start_xen
         ud2     /* Force a panic (invalid opcode). */
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S        Wed Jun 20 12:49:27 2007 -0600
@@ -51,8 +51,6 @@ 1:      movq    %rax,(%rdi)
 
         /* Pass off the Multiboot info structure to C land. */
         mov     multiboot_ptr(%rip),%edi
-        lea     start-0x100000(%rip),%rax
-        add     %rax,%rdi
         call    __start_xen
         ud2     /* Force a panic (invalid opcode). */
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain.c     Wed Jun 20 12:49:27 2007 -0600
@@ -232,26 +232,28 @@ static int setup_compat_l4(struct vcpu *
     l4_pgentry_t *l4tab;
     int rc;
 
-    if ( !pg )
+    if ( pg == NULL )
         return -ENOMEM;
 
     /* This page needs to look like a pagetable so that it can be shadowed */
     pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated;
 
     l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+    l4tab[0] = l4e_empty();
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_page(pg, __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
                        __PAGE_HYPERVISOR);
+
+    if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
+    {
+        free_domheap_page(pg);
+        return rc;
+    }
+
     v->arch.guest_table = pagetable_from_page(pg);
     v->arch.guest_table_user = v->arch.guest_table;
-
-    if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
-    {
-        free_domheap_page(pg);
-        return rc;
-    }
 
     return 0;
 }
@@ -318,11 +320,11 @@ int switch_compat(struct domain *d)
     gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
     for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
     {
+        if ( (d->vcpu[vcpuid] != NULL) &&
+             (setup_compat_l4(d->vcpu[vcpuid]) != 0) )
+            goto undo_and_fail;
         d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
                                  FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
-        if (d->vcpu[vcpuid]
-            && setup_compat_l4(d->vcpu[vcpuid]) != 0)
-            return -ENOMEM;
     }
 
     d->arch.physaddr_bitsize =
@@ -330,6 +332,19 @@ int switch_compat(struct domain *d)
         + (PAGE_SIZE - 2);
 
     return 0;
+
+ undo_and_fail:
+    d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
+    release_arg_xlat_area(d);
+    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
+    while ( vcpuid-- != 0 )
+    {
+        if ( d->vcpu[vcpuid] != NULL )
+            release_compat_l4(d->vcpu[vcpuid]);
+        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+    }
+    return -ENOMEM;
 }
 
 #else
@@ -461,7 +476,7 @@ int arch_domain_create(struct domain *d)
         if ( (d->shared_info = alloc_xenheap_page()) == NULL )
             goto fail;
 
-        memset(d->shared_info, 0, PAGE_SIZE);
+        clear_page(d->shared_info);
         share_xen_page_with_guest(
             virt_to_page(d->shared_info), d, XENSHARE_writable);
     }
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain_build.c       Wed Jun 20 12:49:27 2007 -0600
@@ -505,7 +505,7 @@ int __init construct_dom0(
     v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
 #else
     l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
-    memcpy(l2tab, idle_pg_table, PAGE_SIZE);
+    copy_page(l2tab, idle_pg_table);
     l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
         l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
     v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
@@ -645,7 +645,7 @@ int __init construct_dom0(
             panic("Not enough RAM for domain 0 PML4.\n");
         l4start = l4tab = page_to_virt(page);
     }
-    memcpy(l4tab, idle_pg_table, PAGE_SIZE);
+    copy_page(l4tab, idle_pg_table);
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
@@ -823,7 +823,7 @@ int __init construct_dom0(
 
     /* Set up start info area. */
     si = (start_info_t *)vstartinfo_start;
-    memset(si, 0, PAGE_SIZE);
+    clear_page(si);
     si->nr_pages = nr_pages;
 
     si->shared_info = virt_to_maddr(d->shared_info);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/flushtlb.c
--- a/xen/arch/x86/flushtlb.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/flushtlb.c   Wed Jun 20 12:49:27 2007 -0600
@@ -80,6 +80,8 @@ void write_cr3(unsigned long cr3)
 
     t = pre_flush();
 
+    hvm_flush_guest_tlbs();
+
 #ifdef USER_MAPPINGS_ARE_GLOBAL
     __pge_off();
     __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
@@ -103,6 +105,8 @@ void local_flush_tlb(void)
 
     t = pre_flush();
 
+    hvm_flush_guest_tlbs();
+
 #ifdef USER_MAPPINGS_ARE_GLOBAL
     __pge_off();
     __pge_on();
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Wed Jun 20 12:49:27 2007 -0600
@@ -831,11 +831,24 @@ void hvm_update_guest_cr3(struct vcpu *v
     hvm_funcs.update_guest_cr3(v);
 }
 
+static void hvm_latch_shinfo_size(struct domain *d)
+{
+    /*
+     * Called from operations which are among the very first executed by
+     * PV drivers on initialisation or after save/restore. These are sensible
+     * points at which to sample the execution mode of the guest and latch
+     * 32- or 64-bit format for shared state.
+     */
+    if ( current->domain == d )
+        d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+}
+
 /* Initialise a hypercall transfer page for a VMX domain using
    paravirtualised drivers. */
 void hvm_hypercall_page_initialise(struct domain *d,
                                    void *hypercall_page)
 {
+    hvm_latch_shinfo_size(d);
     hvm_funcs.init_hypercall_page(d, hypercall_page);
 }
 
@@ -1065,13 +1078,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
                 break;
             case HVM_PARAM_CALLBACK_IRQ:
                 hvm_set_callback_via(d, a.value);
-                /*
-                 * Since this operation is one of the very first executed
-                 * by PV drivers on initialisation or after save/restore, it
-                 * is a sensible point at which to sample the execution mode of
-                 * the guest and latch 32- or 64-bit format for shared state.
-                 */
-                d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+                hvm_latch_shinfo_size(d);
                 break;
             }
             d->arch.hvm_domain.params[a.index] = a.value;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/irq.c    Wed Jun 20 12:49:27 2007 -0600
@@ -285,43 +285,49 @@ void hvm_set_callback_via(struct domain 
     }
 }
 
-int cpu_has_pending_irq(struct vcpu *v)
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
 {
     struct hvm_domain *plat = &v->domain->arch.hvm_domain;
 
-    /* APIC */
+    if ( unlikely(v->arch.hvm_vcpu.nmi_pending) )
+        return hvm_intack_nmi;
+
     if ( vlapic_has_interrupt(v) != -1 )
-        return 1;
-
-    /* PIC */
+        return hvm_intack_lapic;
+
     if ( !vlapic_accept_pic_intr(v) )
-        return 0;
-
-    return plat->vpic[0].int_output;
-}
-
-int cpu_get_interrupt(struct vcpu *v, int *type)
-{
-    int vector;
-
-    if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 )
-        return vector;
-
-    if ( (v->vcpu_id == 0) &&
-         ((vector = cpu_get_pic_interrupt(v, type)) != -1) )
-        return vector;
-
-    return -1;
-}
-
-int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type)
+        return hvm_intack_none;
+
+    return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none;
+}
+
+int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector)
+{
+    switch ( type )
+    {
+    case hvm_intack_nmi:
+        return test_and_clear_bool(v->arch.hvm_vcpu.nmi_pending);
+    case hvm_intack_lapic:
+        return ((*vector = cpu_get_apic_interrupt(v)) != -1);
+    case hvm_intack_pic:
+        ASSERT(v->vcpu_id == 0);
+        return ((*vector = cpu_get_pic_interrupt(v)) != -1);
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src)
 {
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
 
-    if ( type == APIC_DM_EXTINT )
+    if ( src == hvm_intack_pic )
         return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base
                 + (isa_irq & 7));
 
+    ASSERT(src == hvm_intack_lapic);
     return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector;
 }
 
@@ -337,19 +343,20 @@ int is_isa_irq_masked(struct vcpu *v, in
             domain_vioapic(v->domain)->redirtbl[gsi].fields.mask);
 }
 
-/*
- * TODO: 1. Should not need special treatment of event-channel events.
- *       2. Should take notice of interrupt shadows (or clear them).
- */
 int hvm_local_events_need_delivery(struct vcpu *v)
 {
-    int pending;
-
-    pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v));
-    if ( unlikely(pending) )
-        pending = hvm_interrupts_enabled(v); 
-
-    return pending;
+    enum hvm_intack type;
+
+    /* TODO: Get rid of event-channel special case. */
+    if ( vcpu_info(v, evtchn_upcall_pending) )
+        type = hvm_intack_pic;
+    else
+        type = hvm_vcpu_has_pending_irq(v);
+
+    if ( likely(type == hvm_intack_none) )
+        return 0;
+
+    return hvm_interrupts_enabled(v, type);
 }
 
 #if 0 /* Keep for debugging */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/asid.c
--- a/xen/arch/x86/hvm/svm/asid.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/asid.c       Wed Jun 20 12:49:27 2007 -0600
@@ -60,7 +60,7 @@ struct svm_asid_data {
    u64 core_asid_generation;
    u32 next_asid;
    u32 max_asid;
-   u32 erratum170;
+   u32 erratum170:1;
 };
 
 static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data);
@@ -140,25 +140,21 @@ void svm_asid_init_vcpu(struct vcpu *v)
 }
 
 /*
- * Increase the Generation to make free ASIDs.  Flush physical TLB and give
- * ASID.
- */
-static void svm_asid_handle_inc_generation(struct vcpu *v)
-{
-    struct svm_asid_data *data = svm_asid_core_data();
-
-    if ( likely(data->core_asid_generation <  SVM_ASID_LAST_GENERATION) )
-    {
-        /* Handle ASID overflow. */
+ * Increase the Generation to make free ASIDs, and indirectly cause a 
+ * TLB flush of all ASIDs on the next vmrun.
+ */
+void svm_asid_inc_generation(void)
+{
+    struct svm_asid_data *data = svm_asid_core_data();
+
+    if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) )
+    {
+        /* Move to the next generation.  We can't flush the TLB now
+         * because you need to vmrun to do that, and current might not
+         * be a HVM vcpu, but the first HVM vcpu that runs after this 
+         * will pick up ASID 1 and flush the TLBs. */
         data->core_asid_generation++;
-        data->next_asid = SVM_ASID_FIRST_GUEST_ASID + 1;
-
-        /* Handle VCPU. */
-        v->arch.hvm_svm.vmcb->guest_asid = SVM_ASID_FIRST_GUEST_ASID;
-        v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
-
-        /* Trigger flush of physical TLB. */
-        v->arch.hvm_svm.vmcb->tlb_control = 1;
+        data->next_asid = SVM_ASID_FIRST_GUEST_ASID;
         return;
     }
 
@@ -168,11 +164,12 @@ static void svm_asid_handle_inc_generati
      * this core (flushing TLB always). So correctness is established; it
      * only runs a bit slower.
      */
-    printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
-    data->erratum170 = 1;
-    data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
-
-    svm_asid_init_vcpu(v);
+    if ( !data->erratum170 )
+    {
+        printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
+        data->erratum170 = 1;
+        data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
+    }
 }
 
 /*
@@ -202,18 +199,21 @@ asmlinkage void svm_asid_handle_vmrun(vo
         return;
     }
 
-    /* Different ASID generations trigger fetching of a fresh ASID. */
-    if ( likely(data->next_asid <= data->max_asid) )
-    {
-        /* There is a free ASID. */
-        v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
-        v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
-        v->arch.hvm_svm.vmcb->tlb_control = 0;
-        return;
-    }
-
-    /* Slow path, may cause TLB flush. */
-    svm_asid_handle_inc_generation(v);
+    /* If there are no free ASIDs, need to go to a new generation */
+    if ( unlikely(data->next_asid > data->max_asid) )
+        svm_asid_inc_generation();
+
+    /* Now guaranteed to be a free ASID. */
+    v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
+    v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
+
+    /* When we assign ASID 1, flush all TLB entries.  We need to do it 
+     * here because svm_asid_inc_generation() can be called at any time, 
+     * but the TLB flush can only happen on vmrun. */
+    if ( v->arch.hvm_svm.vmcb->guest_asid == SVM_ASID_FIRST_GUEST_ASID )
+        v->arch.hvm_svm.vmcb->tlb_control = 1;
+    else
+        v->arch.hvm_svm.vmcb->tlb_control = 0;
 }
 
 void svm_asid_inv_asid(struct vcpu *v)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
  */
 
 #include <xen/config.h>
@@ -39,100 +38,119 @@
 #include <xen/domain_page.h>
 #include <asm/hvm/trace.h>
 
-/*
- * Most of this code is copied from vmx_io.c and modified 
- * to be suitable for SVM.
- */
-
-static inline int svm_inject_extint(struct vcpu *v, int trap)
+static void svm_inject_dummy_vintr(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     vintr_t intr = vmcb->vintr;
 
-    /* Update only relevant fields */    
     intr.fields.irq = 1;
     intr.fields.intr_masking = 1;
-    intr.fields.vector = trap;
+    intr.fields.vector = 0;
     intr.fields.prio = 0xF;
     intr.fields.ign_tpr = 1;
     vmcb->vintr = intr;
+}
+    
+static void svm_inject_nmi(struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    eventinj_t event;
 
-    return 0;
+    event.bytes = 0;
+    event.fields.v = 1;
+    event.fields.type = EVENTTYPE_NMI;
+    event.fields.vector = 2;
+
+    ASSERT(vmcb->eventinj.fields.v == 0);
+    vmcb->eventinj = event;
+}
+    
+static void svm_inject_extint(struct vcpu *v, int vector)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    eventinj_t event;
+
+    event.bytes = 0;
+    event.fields.v = 1;
+    event.fields.type = EVENTTYPE_INTR;
+    event.fields.vector = vector;
+
+    ASSERT(vmcb->eventinj.fields.v == 0);
+    vmcb->eventinj = event;
 }
     
 asmlinkage void svm_intr_assist(void) 
 {
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    int intr_type = APIC_DM_EXTINT;
-    int intr_vector = -1;
+    enum hvm_intack intr_source;
+    int intr_vector;
 
     /*
-     * Previous Interrupt delivery caused this intercept?
+     * Previous event delivery caused this intercept?
      * This will happen if the injection is latched by the processor (hence
-     * clearing vintr.fields.irq) but then subsequently a fault occurs (e.g.,
-     * due to lack of shadow mapping of guest IDT or guest-kernel stack).
-     * 
-     * NB. Exceptions that fault during delivery are lost. This needs to be
-     * fixed but we'll usually get away with it since faults are usually
-     * idempotent. But this isn't the case for e.g. software interrupts!
+     * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
+     * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
+     * stack).
      */
-    if ( vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0) )
+    if ( vmcb->exitintinfo.fields.v )
     {
-        intr_vector = vmcb->exitintinfo.fields.vector;
+        vmcb->eventinj = vmcb->exitintinfo;
         vmcb->exitintinfo.bytes = 0;
         HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
-        svm_inject_extint(v, intr_vector);
         return;
     }
 
-    /*
-     * Previous interrupt still pending? This occurs if we return from VMRUN
-     * very early in the entry-to-guest process. Usually this is because an
-     * external physical interrupt was pending when we executed VMRUN.
-     */
-    if ( vmcb->vintr.fields.irq )
-        return;
-
-    /* Crank the handle on interrupt state and check for new interrrupts. */
+    /* Crank the handle on interrupt state. */
     pt_update_irq(v);
     hvm_set_callback_irq_level();
-    if ( !cpu_has_pending_irq(v) )
-        return;
 
-    /*
-     * If the guest can't take an interrupt right now, create a 'fake'
-     * virtual interrupt on to intercept as soon as the guest _can_ take
-     * interrupts.  Do not obtain the next interrupt from the vlapic/pic
-     * if unable to inject.
-     *
-     * Also do this if there is an exception pending.  This is because
-     * the delivery of the exception can arbitrarily delay the injection
-     * of the vintr (for example, if the exception is handled via an
-     * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
-     * - the vTPR could be modified upwards, so we need to wait until the
-     *   exception is delivered before we can safely decide that an
-     *   interrupt is deliverable; and
-     * - the guest might look at the APIC/PIC state, so we ought not to have 
-     *   cleared the interrupt out of the IRR.
-     */
-    if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow 
-         || vmcb->eventinj.fields.v )  
+    do {
+        intr_source = hvm_vcpu_has_pending_irq(v);
+        if ( likely(intr_source == hvm_intack_none) )
+            return;
+
+        /*
+         * If the guest can't take an interrupt right now, create a 'fake'
+         * virtual interrupt on to intercept as soon as the guest _can_ take
+         * interrupts.  Do not obtain the next interrupt from the vlapic/pic
+         * if unable to inject.
+         *
+         * Also do this if there is an injection already pending. This is
+         * because the event delivery can arbitrarily delay the injection
+         * of the vintr (for example, if the exception is handled via an
+         * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
+         * - the vTPR could be modified upwards, so we need to wait until the
+         *   exception is delivered before we can safely decide that an
+         *   interrupt is deliverable; and
+         * - the guest might look at the APIC/PIC state, so we ought not to
+         *   have cleared the interrupt out of the IRR.
+         *
+         * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
+         * shadow. This is hard to do without hardware support. We should also
+         * track 'NMI blocking' from NMI injection until IRET. This can be done
+         * quite easily in software by intercepting the unblocking IRET.
+         */
+        if ( !hvm_interrupts_enabled(v, intr_source) ||
+             vmcb->eventinj.fields.v )
+        {
+            vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
+            HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+            svm_inject_dummy_vintr(v);
+            return;
+        }
+    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
+
+    if ( intr_source == hvm_intack_nmi )
     {
-        vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
-        HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
-        svm_inject_extint(v, 0x0); /* actual vector doesn't matter */
-        return;
+        svm_inject_nmi(v);
     }
-
-    /* Okay, we can deliver the interrupt: grab it and update PIC state. */
-    intr_vector = cpu_get_interrupt(v, &intr_type);
-    BUG_ON(intr_vector < 0);
-
-    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
-    svm_inject_extint(v, intr_vector);
-
-    pt_intr_post(v, intr_vector, intr_type);
+    else
+    {
+        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+        svm_inject_extint(v, intr_vector);
+        pt_intr_post(v, intr_vector, intr_source);
+    }
 }
 
 /*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Jun 20 12:49:27 2007 -0600
@@ -312,26 +312,8 @@ int svm_vmcb_save(struct vcpu *v, struct
     c->sysenter_esp = vmcb->sysenter_esp;
     c->sysenter_eip = vmcb->sysenter_eip;
 
-    /* Save any event/interrupt that was being injected when we last
-     * exited.  Although there are three(!) VMCB fields that can contain
-     * active events, we only need to save at most one: because the
-     * intr_assist logic never delivers an IRQ when any other event is
-     * active, we know that the only possible collision is if we inject
-     * a fault while exitintinfo contains a valid event (the delivery of
-     * which caused the last exit).  In that case replaying just the
-     * first event should cause the same behaviour when we restore. */
-    if ( vmcb->vintr.fields.irq 
-         && /* Check it's not a fake interrupt (see svm_intr_assist()) */
-         !(vmcb->general1_intercepts & GENERAL1_INTERCEPT_VINTR) )
-    {
-        c->pending_vector = vmcb->vintr.fields.vector;
-        c->pending_type = 0; /* External interrupt */
-        c->pending_error_valid = 0;
-        c->pending_reserved = 0;
-        c->pending_valid = 1;
-        c->error_code = 0;
-    }
-    else if ( vmcb->exitintinfo.fields.v )
+    /* Save any event/interrupt that was being injected when we last exited. */
+    if ( vmcb->exitintinfo.fields.v )
     {
         c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
         c->error_code = vmcb->exitintinfo.fields.errorcode;
@@ -569,10 +551,15 @@ static inline void svm_restore_dr(struct
         __restore_debug_registers(v);
 }
 
-static int svm_interrupts_enabled(struct vcpu *v)
-{
-    unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
-    return !irq_masked(eflags); 
+static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if ( type == hvm_intack_nmi )
+        return !vmcb->interrupt_shadow;
+
+    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+    return !irq_masked(vmcb->rflags) && !vmcb->interrupt_shadow; 
 }
 
 static int svm_guest_x86_mode(struct vcpu *v)
@@ -596,6 +583,14 @@ static void svm_update_guest_cr3(struct 
 static void svm_update_guest_cr3(struct vcpu *v)
 {
     v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
+}
+
+static void svm_flush_guest_tlbs(void)
+{
+    /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
+     * next VMRUN.  (If ASIDs are disabled, the whole TLB is flushed on
+     * VMRUN anyway). */
+    svm_asid_inc_generation();
 }
 
 static void svm_update_vtpr(struct vcpu *v, unsigned long value)
@@ -770,8 +765,6 @@ static void svm_init_hypercall_page(stru
 {
     char *p;
     int i;
-
-    memset(hypercall_page, 0, PAGE_SIZE);
 
     for ( i = 0; i < (PAGE_SIZE / 32); i++ )
     {
@@ -948,6 +941,7 @@ static struct hvm_function_table svm_fun
     .get_segment_register = svm_get_segment_register,
     .update_host_cr3      = svm_update_host_cr3,
     .update_guest_cr3     = svm_update_guest_cr3,
+    .flush_guest_tlbs     = svm_flush_guest_tlbs,
     .update_vtpr          = svm_update_vtpr,
     .stts                 = svm_stts,
     .set_tsc_offset       = svm_set_tsc_offset,
@@ -957,7 +951,7 @@ static struct hvm_function_table svm_fun
     .event_injection_faulted = svm_event_injection_faulted
 };
 
-void svm_npt_detect(void)
+static void svm_npt_detect(void)
 {
     u32 eax, ebx, ecx, edx;
 
@@ -1017,6 +1011,9 @@ int start_svm(struct cpuinfo_x86 *c)
 
     hvm_enable(&svm_function_table);
 
+    if ( opt_hap_enabled )
+        printk("SVM: Nested paging enabled.\n");
+        
     return 1;
 }
 
@@ -1477,7 +1474,7 @@ static void svm_io_instruction(struct vc
 
     /* Copy current guest state into io instruction state structure. */
     memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
-    hvm_store_cpu_guest_regs(v, regs, NULL);
+    svm_store_cpu_guest_regs(v, regs, NULL);
 
     info.bytes = vmcb->exitinfo1;
 
@@ -2148,11 +2145,14 @@ static inline void svm_do_msr_access(
 
 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
 {
+    enum hvm_intack type = hvm_vcpu_has_pending_irq(current);
+
     __update_guest_eip(vmcb, 1);
 
     /* Check for interrupt not handled or new interrupt. */
-    if ( (vmcb->rflags & X86_EFLAGS_IF) &&
-         (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) {
+    if ( vmcb->eventinj.fields.v ||
+         ((type != hvm_intack_none) && svm_interrupts_enabled(current, type)) )
+    {
         HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
         return;
     }
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Wed Jun 20 12:49:27 2007 -0600
@@ -56,7 +56,7 @@ struct vmcb_struct *alloc_vmcb(void)
         return NULL;
     }
 
-    memset(vmcb, 0, PAGE_SIZE);
+    clear_page(vmcb);
     return vmcb;
 }
 
@@ -72,11 +72,11 @@ struct host_save_area *alloc_host_save_a
     hsa = alloc_xenheap_page();
     if ( hsa == NULL )
     {
-        printk(XENLOG_WARNING "Warning: failed to allocate vmcb.\n");
+        printk(XENLOG_WARNING "Warning: failed to allocate hsa.\n");
         return NULL;
     }
 
-    memset(hsa, 0, PAGE_SIZE);
+    clear_page(hsa);
     return hsa;
 }
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vioapic.c        Wed Jun 20 12:49:27 2007 -0600
@@ -254,17 +254,11 @@ static void ioapic_inj_irq(
     HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
                 vector, trig_mode, delivery_mode);
 
-    switch ( delivery_mode )
-    {
-    case dest_Fixed:
-    case dest_LowestPrio:
-        if ( vlapic_set_irq(target, vector, trig_mode) )
-            vcpu_kick(vlapic_vcpu(target));
-        break;
-    default:
-        gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode);
-        break;
-    }
+    ASSERT((delivery_mode == dest_Fixed) ||
+           (delivery_mode == dest_LowestPrio));
+
+    if ( vlapic_set_irq(target, vector, trig_mode) )
+        vcpu_kick(vlapic_vcpu(target));
 }
 
 static uint32_t ioapic_get_delivery_bitmask(
@@ -368,7 +362,6 @@ static void vioapic_deliver(struct hvm_h
     }
 
     case dest_Fixed:
-    case dest_ExtINT:
     {
         uint8_t bit;
         for ( bit = 0; deliver_bitmask != 0; bit++ )
@@ -393,10 +386,21 @@ static void vioapic_deliver(struct hvm_h
         break;
     }
 
-    case dest_SMI:
     case dest_NMI:
-    case dest_INIT:
-    case dest__reserved_2:
+    {
+        uint8_t bit;
+        for ( bit = 0; deliver_bitmask != 0; bit++ )
+        {
+            if ( !(deliver_bitmask & (1 << bit)) )
+                continue;
+            deliver_bitmask &= ~(1 << bit);
+            if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) &&
+                 !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+                vcpu_kick(v);
+        }
+        break;
+    }
+
     default:
         gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n",
                  delivery_mode);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:49:27 2007 -0600
@@ -294,7 +294,8 @@ static int vlapic_accept_irq(struct vcpu
         break;
 
     case APIC_DM_NMI:
-        gdprintk(XENLOG_WARNING, "Ignoring guest NMI\n");
+        if ( !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+            vcpu_kick(v);
         break;
 
     case APIC_DM_INIT:
@@ -747,7 +748,7 @@ int vlapic_has_interrupt(struct vcpu *v)
     return highest_irr;
 }
 
-int cpu_get_apic_interrupt(struct vcpu *v, int *mode)
+int cpu_get_apic_interrupt(struct vcpu *v)
 {
     int vector = vlapic_has_interrupt(v);
     struct vlapic *vlapic = vcpu_vlapic(v);
@@ -757,8 +758,6 @@ int cpu_get_apic_interrupt(struct vcpu *
  
     vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
     vlapic_clear_irr(vector, vlapic);
-
-    *mode = APIC_DM_FIXED;
     return vector;
 }
 
@@ -935,7 +934,7 @@ int vlapic_init(struct vcpu *v)
        return -ENOMEM;
     }
 
-    memset(vlapic->regs, 0, PAGE_SIZE);
+    clear_page(vlapic->regs);
 
     vlapic_reset(vlapic);
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c       Wed Jun 20 12:49:27 2007 -0600
@@ -102,8 +102,8 @@ static void update_tpr_threshold(struct 
 
 asmlinkage void vmx_intr_assist(void)
 {
-    int has_ext_irq, intr_vector, intr_type = 0;
-    unsigned long eflags, intr_shadow;
+    int intr_vector;
+    enum hvm_intack intr_source;
     struct vcpu *v = current;
     unsigned int idtv_info_field;
     unsigned long inst_len;
@@ -114,65 +114,67 @@ asmlinkage void vmx_intr_assist(void)
 
     update_tpr_threshold(vcpu_vlapic(v));
 
-    has_ext_irq = cpu_has_pending_irq(v);
+    do {
+        intr_source = hvm_vcpu_has_pending_irq(v);
 
-    if ( unlikely(v->arch.hvm_vmx.vector_injected) )
-    {
-        v->arch.hvm_vmx.vector_injected = 0;
-        if ( unlikely(has_ext_irq) )
-            enable_irq_window(v);
-        return;
-    }
+        if ( unlikely(v->arch.hvm_vmx.vector_injected) )
+        {
+            v->arch.hvm_vmx.vector_injected = 0;
+            if ( unlikely(intr_source != hvm_intack_none) )
+                enable_irq_window(v);
+            return;
+        }
 
-    /* This could be moved earlier in the VMX resume sequence. */
-    idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
-    if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
-    {
-        __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+        /* This could be moved earlier in the VMX resume sequence. */
+        idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
+        if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
+        {
+            __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+
+            /*
+             * Safe: the length will only be interpreted for software
+             * exceptions and interrupts. If we get here then delivery of some
+             * event caused a fault, and this always results in defined
+             * VM_EXIT_INSTRUCTION_LEN.
+             */
+            inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
+            __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+
+            if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
+                __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                          __vmread(IDT_VECTORING_ERROR_CODE));
+            if ( unlikely(intr_source != hvm_intack_none) )
+                enable_irq_window(v);
+
+            HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
+            return;
+        }
+
+        if ( likely(intr_source == hvm_intack_none) )
+            return;
 
         /*
-         * Safe: the length will only be interpreted for software exceptions
-         * and interrupts. If we get here then delivery of some event caused a
-         * fault, and this always results in defined VM_EXIT_INSTRUCTION_LEN.
+         * TODO: Better NMI handling. Shouldn't wait for EFLAGS.IF==1, but
+         * should wait for exit from 'NMI blocking' window (NMI injection to
+         * next IRET). This requires us to use the new 'virtual NMI' support.
          */
-        inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
-        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+        if ( !hvm_interrupts_enabled(v, intr_source) )
+        {
+            enable_irq_window(v);
+            return;
+        }
+    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
 
-        if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
-            __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
-                      __vmread(IDT_VECTORING_ERROR_CODE));
-        if ( unlikely(has_ext_irq) )
-            enable_irq_window(v);
-
-        HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
-        return;
+    if ( intr_source == hvm_intack_nmi )
+    {
+        vmx_inject_nmi(v);
     }
-
-    if ( likely(!has_ext_irq) )
-        return;
-
-    intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
-    if ( unlikely(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)) )
+    else
     {
-        enable_irq_window(v);
-        HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility");
-        return;
+        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+        vmx_inject_extint(v, intr_vector);
+        pt_intr_post(v, intr_vector, intr_source);
     }
-
-    eflags = __vmread(GUEST_RFLAGS);
-    if ( irq_masked(eflags) )
-    {
-        enable_irq_window(v);
-        return;
-    }
-
-    intr_vector = cpu_get_interrupt(v, &intr_type);
-    BUG_ON(intr_vector < 0);
-
-    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
-    vmx_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE);
-
-    pt_intr_post(v, intr_vector, intr_type);
 }
 
 /*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jun 20 12:49:27 2007 -0600
@@ -158,7 +158,7 @@ static struct vmcs_struct *vmx_alloc_vmc
         return NULL;
     }
 
-    memset(vmcs, 0, PAGE_SIZE);
+    clear_page(vmcs);
     vmcs->vmcs_revision_id = vmcs_revision_id;
 
     return vmcs;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 20 12:49:27 2007 -0600
@@ -1070,8 +1070,6 @@ static void vmx_init_hypercall_page(stru
     char *p;
     int i;
 
-    memset(hypercall_page, 0, PAGE_SIZE);
-
     for ( i = 0; i < (PAGE_SIZE / 32); i++ )
     {
         p = (char *)(hypercall_page + (i * 32));
@@ -1115,16 +1113,26 @@ static int vmx_nx_enabled(struct vcpu *v
     return v->arch.hvm_vmx.efer & EFER_NX;
 }
 
-static int vmx_interrupts_enabled(struct vcpu *v) 
-{
-    unsigned long eflags = __vmread(GUEST_RFLAGS); 
-    return !irq_masked(eflags); 
-}
-
+static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+    unsigned long intr_shadow, eflags;
+
+    ASSERT(v == current);
+
+    intr_shadow  = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+    intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
+
+    if ( type == hvm_intack_nmi )
+        return !intr_shadow;
+
+    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+    eflags = __vmread(GUEST_RFLAGS);
+    return !irq_masked(eflags) && !intr_shadow;
+}
 
 static void vmx_update_host_cr3(struct vcpu *v)
 {
-    ASSERT( (v == current) || !vcpu_runnable(v) );
+    ASSERT((v == current) || !vcpu_runnable(v));
     vmx_vmcs_enter(v);
     __vmwrite(HOST_CR3, v->arch.cr3);
     vmx_vmcs_exit(v);
@@ -1132,12 +1140,18 @@ static void vmx_update_host_cr3(struct v
 
 static void vmx_update_guest_cr3(struct vcpu *v)
 {
-    ASSERT( (v == current) || !vcpu_runnable(v) );
+    ASSERT((v == current) || !vcpu_runnable(v));
     vmx_vmcs_enter(v);
     __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
     vmx_vmcs_exit(v);
 }
 
+static void vmx_flush_guest_tlbs(void)
+{
+    /* No tagged TLB support on VMX yet.  The fact that we're in Xen 
+     * at all means any guest will have a clean TLB when it's next run,
+     * because VMRESUME will flush it for us. */
+}
 
 static void vmx_inject_exception(
     unsigned int trapnr, int errcode, unsigned long cr2)
@@ -1205,6 +1219,7 @@ static struct hvm_function_table vmx_fun
     .get_segment_register = vmx_get_segment_register,
     .update_host_cr3      = vmx_update_host_cr3,
     .update_guest_cr3     = vmx_update_guest_cr3,
+    .flush_guest_tlbs     = vmx_flush_guest_tlbs,
     .update_vtpr          = vmx_update_vtpr,
     .stts                 = vmx_stts,
     .set_tsc_offset       = vmx_set_tsc_offset,
@@ -1837,7 +1852,7 @@ static void vmx_io_instruction(unsigned 
 
     /* Copy current guest state into io instruction state structure. */
     memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
-    hvm_store_cpu_guest_regs(current, regs, NULL);
+    vmx_store_cpu_guest_regs(current, regs, NULL);
 
     HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
                 "exit_qualification = %lx",
@@ -2549,7 +2564,8 @@ static inline int vmx_do_msr_read(struct
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
 
-    switch (ecx) {
+    switch ( ecx )
+    {
     case MSR_IA32_TIME_STAMP_COUNTER:
         msr_content = hvm_get_guest_time(v);
         break;
@@ -2565,6 +2581,8 @@ static inline int vmx_do_msr_read(struct
     case MSR_IA32_APICBASE:
         msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+        goto gp_fault;
     default:
         if ( long_mode_do_msr_read(regs) )
             goto done;
@@ -2576,8 +2594,8 @@ static inline int vmx_do_msr_read(struct
             regs->edx = edx;
             goto done;
         }
-        vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
-        return 0;
+
+        goto gp_fault;
     }
 
     regs->eax = msr_content & 0xFFFFFFFF;
@@ -2589,6 +2607,10 @@ done:
                 ecx, (unsigned long)regs->eax,
                 (unsigned long)regs->edx);
     return 1;
+
+gp_fault:
+    vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+    return 0;
 }
 
 static int vmx_alloc_vlapic_mapping(struct domain *d)
@@ -2667,7 +2689,8 @@ static inline int vmx_do_msr_write(struc
     msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
     HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);
 
-    switch (ecx) {
+    switch ( ecx )
+    {
     case MSR_IA32_TIME_STAMP_COUNTER:
         hvm_set_guest_time(v, msr_content);
         pt_reset(v);
@@ -2684,6 +2707,8 @@ static inline int vmx_do_msr_write(struc
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+        goto gp_fault;
     default:
         if ( !long_mode_do_msr_write(regs) )
             wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2691,6 +2716,10 @@ static inline int vmx_do_msr_write(struc
     }
 
     return 1;
+
+gp_fault:
+    vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+    return 0;
 }
 
 static void vmx_do_hlt(void)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpic.c
--- a/xen/arch/x86/hvm/vpic.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpic.c   Wed Jun 20 12:49:27 2007 -0600
@@ -499,7 +499,7 @@ void vpic_irq_negative_edge(struct domai
         vpic_update_int_output(vpic);
 }
 
-int cpu_get_pic_interrupt(struct vcpu *v, int *type)
+int cpu_get_pic_interrupt(struct vcpu *v)
 {
     int irq, vector;
     struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
@@ -512,6 +512,5 @@ int cpu_get_pic_interrupt(struct vcpu *v
         return -1;
 
     vector = vpic[irq >> 3].irq_base + (irq & 7);
-    *type = APIC_DM_EXTINT;
     return vector;
 }
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpt.c    Wed Jun 20 12:49:27 2007 -0600
@@ -155,7 +155,8 @@ void pt_update_irq(struct vcpu *v)
     }
 }
 
-static struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type)
+static struct periodic_time *is_pt_irq(
+    struct vcpu *v, int vector, enum hvm_intack src)
 {
     struct list_head *head = &v->arch.hvm_vcpu.tm_list;
     struct periodic_time *pt;
@@ -174,7 +175,7 @@ static struct periodic_time *is_pt_irq(s
             return pt;
         }
 
-        vec = get_isa_irq_vector(v, pt->irq, type);
+        vec = get_isa_irq_vector(v, pt->irq, src);
 
         /* RTC irq need special care */
         if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) )
@@ -186,7 +187,7 @@ static struct periodic_time *is_pt_irq(s
     return NULL;
 }
 
-void pt_intr_post(struct vcpu *v, int vector, int type)
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src)
 {
     struct periodic_time *pt;
     time_cb *cb;
@@ -194,7 +195,7 @@ void pt_intr_post(struct vcpu *v, int ve
 
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
-    pt = is_pt_irq(v, vector, type);
+    pt = is_pt_irq(v, vector, src);
     if ( pt == NULL )
     {
         spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -227,13 +228,10 @@ void pt_reset(struct vcpu *v)
 
     list_for_each_entry ( pt, head, list )
     {
-        if ( pt->enabled )
-        {
-            pt->pending_intr_nr = 0;
-            pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
-            pt->scheduled = NOW() + pt->period;
-            set_timer(&pt->timer, pt->scheduled);
-        }
+        pt->pending_intr_nr = 0;
+        pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
+        pt->scheduled = NOW() + pt->period;
+        set_timer(&pt->timer, pt->scheduled);
     }
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -247,10 +245,7 @@ void pt_migrate(struct vcpu *v)
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
     list_for_each_entry ( pt, head, list )
-    {
-        if ( pt->enabled )
-            migrate_timer(&pt->timer, v->processor);
-    }
+        migrate_timer(&pt->timer, v->processor);
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
 }
@@ -263,8 +258,9 @@ void create_periodic_time(
 
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
-    init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
     pt->enabled = 1;
+    pt->pending_intr_nr = 0;
+
     if ( period < 900000 ) /* < 0.9 ms */
     {
         gdprintk(XENLOG_WARNING,
@@ -283,6 +279,8 @@ void create_periodic_time(
     pt->priv = data;
 
     list_add(&pt->list, &v->arch.hvm_vcpu.tm_list);
+
+    init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
     set_timer(&pt->timer, pt->scheduled);
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -295,8 +293,12 @@ void destroy_periodic_time(struct period
 
     pt_lock(pt);
     pt->enabled = 0;
-    pt->pending_intr_nr = 0;
     list_del(&pt->list);
+    pt_unlock(pt);
+
+    /*
+     * pt_timer_fn() can run until this kill_timer() returns. We must do this
+     * outside pt_lock() otherwise we can deadlock with pt_timer_fn().
+     */
     kill_timer(&pt->timer);
-    pt_unlock(pt);
-}
+}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/mm.c Wed Jun 20 12:49:27 2007 -0600
@@ -2942,7 +2942,7 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong) 
     if ( entries > FIRST_RESERVED_GDT_ENTRY )
         return -EINVAL;
     
-    if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
+    if ( copy_from_guest(frames, frame_list, nr_pages) )
         return -EFAULT;
 
     LOCK_BIGLOCK(current->domain);
@@ -3123,7 +3123,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
         else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
             return -ESRCH;
 
-        rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
+        rc = copy_from_guest(d->arch.e820, fmap.map.buffer,
                              fmap.map.nr_entries) ? -EFAULT : 0;
         d->arch.nr_e820 = fmap.map.nr_entries;
 
@@ -3144,7 +3144,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
             return -EFAULT;
 
         map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
-        if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
+        if ( copy_to_guest(map.buffer, d->arch.e820, map.nr_entries) ||
              copy_to_guest(arg, &map, 1) )
             return -EFAULT;
 
@@ -3168,7 +3168,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
         buffer = guest_handle_cast(memmap.buffer, e820entry_t);
 
         count = min((unsigned int)e820.nr_map, memmap.nr_entries);
-        if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
+        if ( copy_to_guest(buffer, e820.map, count) < 0 )
             return -EFAULT;
 
         memmap.nr_entries = count;
@@ -3181,7 +3181,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
     case XENMEM_machphys_mapping:
     {
-        struct xen_machphys_mapping mapping = {
+        static const struct xen_machphys_mapping mapping = {
             .v_start = MACH2PHYS_VIRT_START,
             .v_end   = MACH2PHYS_VIRT_END,
             .max_mfn = MACH2PHYS_NR_ENTRIES - 1
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/setup.c      Wed Jun 20 12:49:27 2007 -0600
@@ -295,14 +295,14 @@ static struct e820map __initdata boot_e8
 /* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
 static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
 {
-    unsigned long rs, re;
+    uint64_t rs, re;
     int i;
 
     for ( i = 0; i < boot_e820.nr_map; i++ )
     {
         /* Have we found the e820 region that includes the specified range? */
         rs = boot_e820.map[i].addr;
-        re = boot_e820.map[i].addr + boot_e820.map[i].size;
+        re = rs + boot_e820.map[i].size;
         if ( (s < rs) || (e > re) )
             continue;
 
@@ -402,7 +402,7 @@ void init_done(void)
     startup_cpu_idle_loop();
 }
 
-void __init __start_xen(multiboot_info_t *mbi)
+void __init __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
     char __cmdline[] = "", *cmdline = __cmdline;
@@ -410,6 +410,7 @@ void __init __start_xen(multiboot_info_t
     unsigned int initrdidx = 1;
     char *_policy_start = NULL;
     unsigned long _policy_len = 0;
+    multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, modules_length;
     int i, e820_warn = 0, bytes = 0;
@@ -678,6 +679,9 @@ void __init __start_xen(multiboot_info_t
             barrier();
             move_memory(e, 0, __pa(&_end) - xen_phys_start);
 
+            /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
+            memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
+
             /* Walk initial pagetables, relocating page directory entries. */
             pl4e = __va(__pa(idle_pg_table));
             for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/traps.c      Wed Jun 20 12:49:27 2007 -0600
@@ -462,7 +462,17 @@ int rdmsr_hypervisor_regs(
     if ( idx > 0 )
         return 0;
 
-    *eax = *edx = 0;
+    switch ( idx )
+    {
+    case 0:
+    {
+        *eax = *edx = 0;
+        break;
+    }
+    default:
+        BUG();
+    }
+
     return 1;
 }
 
@@ -1130,7 +1140,7 @@ static inline int guest_io_okay(
          * read as 0xff (no access allowed).
          */
         TOGGLE_MODE();
-        switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp,
+        switch ( __copy_from_guest_offset(x.bytes, v->arch.iobmp,
                                           port>>3, 2) )
         {
         default: x.bytes[0] = ~0;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Wed Jun 20 12:49:27 2007 -0600
@@ -513,6 +513,7 @@ static void hypercall_page_initialise_ri
 
 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
 {
+    memset(hypercall_page, 0xCC, PAGE_SIZE);
     if ( is_hvm_domain(d) )
         hvm_hypercall_page_initialise(d, hypercall_page);
     else if ( supervisor_mode_kernel )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/compat_kexec.S
--- a/xen/arch/x86/x86_64/compat_kexec.S        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/compat_kexec.S        Wed Jun 20 12:49:27 2007 -0600
@@ -1,5 +1,11 @@
 /*
  * Compatibility kexec handler.
+ */
+
+/*
+ * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is
+ * currently true but if it ever changes then compat_pg_table will
+ * need to be moved back below 4G at run time.
  */
 
 #include <xen/config.h>
@@ -8,7 +14,20 @@
 #include <asm/msr.h>
 #include <asm/page.h>
 
-#define SYM_PHYS(sym)       ((sym) - __XEN_VIRT_START)
+/* The unrelocated physical address of a symbol. */
+#define SYM_PHYS(sym)          ((sym) - __XEN_VIRT_START)
+
+/* Load physical address of symbol into register and relocate it. */
+#define RELOCATE_SYM(sym,reg)  mov $SYM_PHYS(sym), reg ; \
+                               add xen_phys_start(%rip), reg
+
+/*
+ * Relocate a physical address in memory. Size of temporary register
+ * determines size of the value to relocate.
+ */
+#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \
+                               add xen_phys_start(%rip), reg ; \
+                               mov reg, addr(%rip)
 
         .text
 
@@ -31,20 +50,35 @@ 1:      dec %r9
         test %r9,%r9
         jnz 1b
 
-        mov $SYM_PHYS(compat_page_list),%rdx
+        RELOCATE_SYM(compat_page_list,%rdx)
+
+        /* Relocate compatibility mode entry point address. */
+        RELOCATE_MEM(compatibility_mode_far,%eax)
+
+        /* Relocate compat_pg_table. */
+        RELOCATE_MEM(compat_pg_table,     %rax)
+        RELOCATE_MEM(compat_pg_table+0x8, %rax)
+        RELOCATE_MEM(compat_pg_table+0x10,%rax)
+        RELOCATE_MEM(compat_pg_table+0x18,%rax)
 
         /*
          * Setup an identity mapped region in PML4[0] of idle page
          * table.
          */
-        lea l3_identmap(%rip),%rax
-        sub %rbx,%rax
+        RELOCATE_SYM(l3_identmap,%rax)
         or  $0x63,%rax
         mov %rax, idle_pg_table(%rip)
 
         /* Switch to idle page table. */
-        movq $SYM_PHYS(idle_pg_table), %rax
+        RELOCATE_SYM(idle_pg_table,%rax)
         movq %rax, %cr3
+
+        /* Switch to identity mapped compatibility stack. */
+        RELOCATE_SYM(compat_stack,%rax)
+        movq %rax, %rsp
+
+        /* Save xen_phys_start for 32 bit code. */
+        movq xen_phys_start(%rip), %rbx
 
         /* Jump to low identity mapping in compatibility mode. */
         ljmp *compatibility_mode_far(%rip)
@@ -54,7 +88,26 @@ compatibility_mode_far:
         .long SYM_PHYS(compatibility_mode)
         .long __HYPERVISOR_CS32
 
+        /*
+         * We use 5 words of stack for the arguments passed to the kernel. The
+         * kernel only uses 1 word before switching to its own stack. Allocate
+         * 16 words to give "plenty" of room.
+         */
+        .fill 16,4,0
+compat_stack:
+
         .code32
+
+#undef RELOCATE_SYM
+#undef RELOCATE_MEM
+
+/*
+ * Load physical address of symbol into register and relocate it. %rbx
+ * contains xen_phys_start(%rip) saved before jump to compatibility
+ * mode.
+ */
+#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
+                              add %ebx, reg
 
 compatibility_mode:
         /* Setup some sane segments. */
@@ -78,7 +131,7 @@ compatibility_mode:
         movl %eax, %cr0
 
         /* Switch to 32 bit page table. */
-        movl  $SYM_PHYS(compat_pg_table), %eax
+        RELOCATE_SYM(compat_pg_table, %eax)
         movl  %eax, %cr3
 
         /* Clear MSR_EFER[LME], disabling long mode */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Wed Jun 20 12:49:27 2007 -0600
@@ -510,6 +510,7 @@ static void hypercall_page_initialise_ri
 
 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
 {
+    memset(hypercall_page, 0xCC, PAGE_SIZE);
     if ( is_hvm_domain(d) )
         hvm_hypercall_page_initialise(d, hypercall_page);
     else if ( !is_pv_32bit_domain(d) )
diff -r c20bc60f9243 -r 810885428743 xen/common/compat/memory.c
--- a/xen/common/compat/memory.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/compat/memory.c        Wed Jun 20 12:49:27 2007 -0600
@@ -258,7 +258,8 @@ int compat_memory_op(unsigned int cmd, X
                     compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent];
 
                     BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]);
-                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start, 
start_extent, &pfn, 1) )
+                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start,
+                                                 start_extent, &pfn, 1) )
                     {
                         if ( split >= 0 )
                         {
@@ -275,6 +276,10 @@ int compat_memory_op(unsigned int cmd, X
                         break;
                     }
                 }
+
+                /* Bail if there was an error. */
+                if ( (split >= 0) && (end_extent != nat.rsrv->nr_extents) )
+                    split = 0;
             }
             else
                 start_extent = end_extent;
diff -r c20bc60f9243 -r 810885428743 xen/common/domctl.c
--- a/xen/common/domctl.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/domctl.c       Wed Jun 20 12:49:27 2007 -0600
@@ -43,7 +43,7 @@ void cpumask_to_xenctl_cpumap(
 
     bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS);
 
-    copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes);
+    copy_to_guest(xenctl_cpumap->bitmap, bytemap, copy_bytes);
 
     for ( i = copy_bytes; i < guest_bytes; i++ )
         copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1);
@@ -63,7 +63,7 @@ void xenctl_cpumap_to_cpumask(
     if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
         return;
 
-    copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes);
+    copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes);
 
     bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS);
 }
diff -r c20bc60f9243 -r 810885428743 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/grant_table.c  Wed Jun 20 12:49:27 2007 -0600
@@ -148,7 +148,7 @@ get_maptrack_handle(
                 return -1;
             }
 
-            memset(new_mt, 0, PAGE_SIZE);
+            clear_page(new_mt);
 
             new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
 
@@ -624,7 +624,7 @@ gnttab_grow_table(struct domain *d, unsi
     {
         if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
             goto active_alloc_failed;
-        memset(gt->active[i], 0, PAGE_SIZE);
+        clear_page(gt->active[i]);
     }
 
     /* Shared */
@@ -632,7 +632,7 @@ gnttab_grow_table(struct domain *d, unsi
     {
         if ( (gt->shared[i] = alloc_xenheap_page()) == NULL )
             goto shared_alloc_failed;
-        memset(gt->shared[i], 0, PAGE_SIZE);
+        clear_page(gt->shared[i]);
     }
 
     /* Share the new shared frames with the recipient domain */
@@ -1365,7 +1365,7 @@ grant_table_create(
     {
         if ( (t->active[i] = alloc_xenheap_page()) == NULL )
             goto no_mem_2;
-        memset(t->active[i], 0, PAGE_SIZE);
+        clear_page(t->active[i]);
     }
 
     /* Tracking of mapped foreign frames table */
@@ -1375,7 +1375,7 @@ grant_table_create(
     memset(t->maptrack, 0, max_nr_maptrack_frames() * sizeof(t->maptrack[0]));
     if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
         goto no_mem_3;
-    memset(t->maptrack[0], 0, PAGE_SIZE);
+    clear_page(t->maptrack[0]);
     t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping);
     for ( i = 0; i < t->maptrack_limit; i++ )
         t->maptrack[0][i].ref = i+1;
@@ -1389,7 +1389,7 @@ grant_table_create(
     {
         if ( (t->shared[i] = alloc_xenheap_page()) == NULL )
             goto no_mem_4;
-        memset(t->shared[i], 0, PAGE_SIZE);
+        clear_page(t->shared[i]);
     }
 
     for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
diff -r c20bc60f9243 -r 810885428743 xen/common/kernel.c
--- a/xen/common/kernel.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kernel.c       Wed Jun 20 12:49:27 2007 -0600
@@ -142,7 +142,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
     {
         xen_extraversion_t extraversion;
         safe_strcpy(extraversion, xen_extra_version());
-        if ( copy_to_guest(arg, (char *)extraversion, sizeof(extraversion)) )
+        if ( copy_to_guest(arg, extraversion, ARRAY_SIZE(extraversion)) )
             return -EFAULT;
         return 0;
     }
@@ -167,7 +167,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
         memset(info, 0, sizeof(info));
         arch_get_xen_caps(&info);
 
-        if ( copy_to_guest(arg, (char *)info, sizeof(info)) )
+        if ( copy_to_guest(arg, info, ARRAY_SIZE(info)) )
             return -EFAULT;
         return 0;
     }
@@ -187,7 +187,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
     {
         xen_changeset_info_t chgset;
         safe_strcpy(chgset, xen_changeset());
-        if ( copy_to_guest(arg, (char *)chgset, sizeof(chgset)) )
+        if ( copy_to_guest(arg, chgset, ARRAY_SIZE(chgset)) )
             return -EFAULT;
         return 0;
     }
@@ -229,8 +229,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
 
     case XENVER_guest_handle:
     {
-        if ( copy_to_guest(arg, (char *)current->domain->handle,
-                           sizeof(current->domain->handle)) )
+        if ( copy_to_guest(arg, current->domain->handle,
+                           ARRAY_SIZE(current->domain->handle)) )
             return -EFAULT;
         return 0;
     }    
diff -r c20bc60f9243 -r 810885428743 xen/common/kexec.c
--- a/xen/common/kexec.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kexec.c        Wed Jun 20 12:49:27 2007 -0600
@@ -169,7 +169,11 @@ static int kexec_get(reserve)(xen_kexec_
 
 static int kexec_get(xen)(xen_kexec_range_t *range)
 {
+#ifdef CONFIG_X86_64
+    range->start = xenheap_phys_start;
+#else
     range->start = virt_to_maddr(_start);
+#endif
     range->size = (unsigned long)xenheap_phys_end - (unsigned 
long)range->start;
     return 0;
 }
diff -r c20bc60f9243 -r 810885428743 xen/common/perfc.c
--- a/xen/common/perfc.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/perfc.c        Wed Jun 20 12:49:27 2007 -0600
@@ -227,7 +227,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
     }
     BUG_ON(v != perfc_nbr_vals);
 
-    if ( copy_to_guest(desc, (xen_sysctl_perfc_desc_t *)perfc_d, NR_PERFCTRS) )
+    if ( copy_to_guest(desc, perfc_d, NR_PERFCTRS) )
         return -EFAULT;
     if ( copy_to_guest(val, perfc_vals, perfc_nbr_vals) )
         return -EFAULT;
diff -r c20bc60f9243 -r 810885428743 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/char/console.c        Wed Jun 20 12:49:27 2007 -0600
@@ -326,7 +326,7 @@ static long guest_console_write(XEN_GUES
                 CONSOLEIO_write, count, buffer);
 
         kcount = min_t(int, count, sizeof(kbuf)-1);
-        if ( copy_from_guest((char *)kbuf, buffer, kcount) )
+        if ( copy_from_guest(kbuf, buffer, kcount) )
             return -EFAULT;
         kbuf[kcount] = '\0';
 
diff -r c20bc60f9243 -r 810885428743 xen/drivers/video/vga.c
--- a/xen/drivers/video/vga.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/video/vga.c   Wed Jun 20 12:49:27 2007 -0600
@@ -32,6 +32,9 @@ static unsigned char *video;
  * 
  *   'vga=ask':
  *      display a vga menu of available modes
+ * 
+ *   'vga=current':
+ *      use the current vga mode without modification
  * 
  *   'vga=text-80x<rows>':
  *      text mode, where <rows> is one of {25,28,30,34,43,50,60}
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-ia64/guest_access.h
--- a/xen/include/asm-ia64/guest_access.h       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-ia64/guest_access.h       Wed Jun 20 12:49:27 2007 -0600
@@ -76,28 +76,31 @@ extern int xencomm_handle_is_null(void *
     __copy_field_from_guest(ptr, hnd, field)
 
 #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                    \
-    const typeof(ptr) _d = (hnd).p;                                     \
-    const typeof(ptr) _s = (ptr);                                       \
+    const typeof(*(ptr)) *_s = (ptr);                                   \
+    void *_d = (hnd).p;                                                 \
+    ((void)((hnd).p == (ptr)));                                         \
     xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
 })
 
 #define __copy_field_to_guest(hnd, ptr, field) ({                   \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(ptr) _d = (hnd).p;                                 \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
     const typeof(&(ptr)->field) _s = &(ptr)->field;                 \
+    void *_d = (hnd).p;                                             \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
     xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off);               \
 })
 
-#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({                     \
-    const typeof(ptr) _s = (hnd).p;                                        \
-    const typeof(ptr) _d = (ptr);                                          \
-    xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx));  \
+#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({                    \
+    const typeof(*(ptr)) *_s = (hnd).p;                                   \
+    typeof(*(ptr)) *_d = (ptr);                                           \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
 })
 
 #define __copy_field_from_guest(ptr, hnd, field) ({                 \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(ptr) _s = (hnd).p;                                 \
-    const typeof(&(ptr)->field) _d = &(ptr)->field;                 \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
+    const void *_s = (hnd).p;                                       \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                       \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
     xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off);             \
 })
 
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/event.h       Wed Jun 20 12:49:27 2007 -0600
@@ -10,7 +10,6 @@
 #define __ASM_EVENT_H__
 
 #include <xen/shared.h>
-#include <asm/hvm/irq.h> /* cpu_has_pending_irq() */
 
 static inline void vcpu_kick(struct vcpu *v)
 {
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/guest_access.h
--- a/xen/include/asm-x86/guest_access.h        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/guest_access.h        Wed Jun 20 12:49:27 2007 -0600
@@ -32,11 +32,12 @@
  * specifying an offset into the guest array.
  */
 #define copy_to_guest_offset(hnd, off, ptr, nr) ({      \
-    typeof(ptr) _x = (hnd).p;                           \
-    const typeof(ptr) _y = (ptr);                       \
+    const typeof(*(ptr)) *_s = (ptr);                   \
+    char (*_d)[sizeof(*_s)] = (void *)(hnd).p;          \
+    ((void)((hnd).p == (ptr)));                         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
-    copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));       \
+    copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) :  \
+    copy_to_user(_d+(off), _s, sizeof(*_s)*(nr));       \
 })
 
 /*
@@ -44,29 +45,30 @@
  * specifying an offset into the guest array.
  */
 #define copy_from_guest_offset(ptr, hnd, off, nr) ({    \
-    const typeof(ptr) _x = (hnd).p;                     \
-    typeof(ptr) _y = (ptr);                             \
+    const typeof(*(ptr)) *_s = (hnd).p;                 \
+    typeof(*(ptr)) *_d = (ptr);                         \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\
-    copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));     \
+    copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+    copy_from_user(_d, _s+(off), sizeof(*_d)*(nr));     \
 })
 
 /* Copy sub-field of a structure to guest context via a guest handle. */
 #define copy_field_to_guest(hnd, ptr, field) ({         \
-    typeof(&(ptr)->field) _x = &(hnd).p->field;         \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;     \
+    void *_d = &(hnd).p->field;                         \
+    ((void)(&(hnd).p->field == &(ptr)->field));         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
-    copy_to_user(_x, _y, sizeof(*_x));                  \
+    copy_to_user_hvm(_d, _s, sizeof(*_s)) :             \
+    copy_to_user(_d, _s, sizeof(*_s));                  \
 })
 
 /* Copy sub-field of a structure from guest context via a guest handle. */
 #define copy_field_from_guest(ptr, hnd, field) ({       \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    typeof(&(ptr)->field) _y = &(ptr)->field;           \
+    const typeof(&(ptr)->field) _s = &(hnd).p->field;   \
+    typeof(&(ptr)->field) _d = &(ptr)->field;           \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x, sizeof(*_x)) :           \
-    copy_from_user(_y, _x, sizeof(*_x));                \
+    copy_from_user_hvm(_d, _s, sizeof(*_d)) :           \
+    copy_from_user(_d, _s, sizeof(*_d));                \
 })
 
 /*
@@ -78,35 +80,37 @@
      array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
 
 #define __copy_to_guest_offset(hnd, off, ptr, nr) ({    \
-    typeof(ptr) _x = (hnd).p;                           \
-    const typeof(ptr) _y = (ptr);                       \
+    const typeof(*(ptr)) *_s = (ptr);                   \
+    char (*_d)[sizeof(*_s)] = (void *)(hnd).p;          \
+    ((void)((hnd).p == (ptr)));                         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
-    __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));     \
+    copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) :  \
+    __copy_to_user(_d+(off), _s, sizeof(*_s)*(nr));     \
 })
 
 #define __copy_from_guest_offset(ptr, hnd, off, nr) ({  \
-    const typeof(ptr) _x = (hnd).p;                     \
-    typeof(ptr) _y = (ptr);                             \
+    const typeof(*(ptr)) *_s = (hnd).p;                 \
+    typeof(*(ptr)) *_d = (ptr);                         \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \
-    __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));   \
+    copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+    __copy_from_user(_d, _s+(off), sizeof(*_d)*(nr));   \
 })
 
 #define __copy_field_to_guest(hnd, ptr, field) ({       \
-    typeof(&(ptr)->field) _x = &(hnd).p->field;         \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;     \
+    void *_d = &(hnd).p->field;                         \
+    ((void)(&(hnd).p->field == &(ptr)->field));         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
-    __copy_to_user(_x, _y, sizeof(*_x));                \
+    copy_to_user_hvm(_d, _s, sizeof(*_s)) :             \
+    __copy_to_user(_d, _s, sizeof(*_s));                \
 })
 
 #define __copy_field_from_guest(ptr, hnd, field) ({     \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    typeof(&(ptr)->field) _y = &(ptr)->field;           \
+    const typeof(&(ptr)->field) _s = &(hnd).p->field;   \
+    typeof(&(ptr)->field) _d = &(ptr)->field;           \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x, sizeof(*_x)) :           \
-    __copy_from_user(_y, _x, sizeof(*_x));              \
+    copy_from_user_hvm(_d, _s, sizeof(*_d)) :           \
+    __copy_from_user(_d, _s, sizeof(*_d));              \
 })
 
 #endif /* __ASM_X86_GUEST_ACCESS_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h     Wed Jun 20 12:49:27 2007 -0600
@@ -55,6 +55,14 @@ typedef struct segment_register {
     u64        base;
 } __attribute__ ((packed)) segment_register_t;
 
+/* Interrupt acknowledgement sources. */
+enum hvm_intack {
+    hvm_intack_none,
+    hvm_intack_pic,
+    hvm_intack_lapic,
+    hvm_intack_nmi
+};
+
 /*
  * The hardware virtual machine (HVM) interface abstracts away from the
  * x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -106,7 +114,7 @@ struct hvm_function_table {
     int (*long_mode_enabled)(struct vcpu *v);
     int (*pae_enabled)(struct vcpu *v);
     int (*nx_enabled)(struct vcpu *v);
-    int (*interrupts_enabled)(struct vcpu *v);
+    int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
     int (*guest_x86_mode)(struct vcpu *v);
     unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
     unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
@@ -124,6 +132,13 @@ struct hvm_function_table {
     void (*update_guest_cr3)(struct vcpu *v);
 
     /*
+     * Called to ensure than all guest-specific mappings in a tagged TLB
+     * are flushed; does *not* flush Xen's TLB entries, and on
+     * processors without a tagged TLB it will be a noop.
+     */
+    void (*flush_guest_tlbs)(void);
+
+    /*
      * Reflect the virtual APIC's value in the guest's V_TPR register
      */
     void (*update_vtpr)(struct vcpu *v, unsigned long value);
@@ -148,6 +163,7 @@ struct hvm_function_table {
 };
 
 extern struct hvm_function_table hvm_funcs;
+extern int hvm_enabled;
 
 int hvm_domain_initialise(struct domain *d);
 void hvm_domain_relinquish_resources(struct domain *d);
@@ -191,16 +207,16 @@ hvm_long_mode_enabled(struct vcpu *v)
 #define hvm_long_mode_enabled(v) (v,0)
 #endif
 
- static inline int
+static inline int
 hvm_pae_enabled(struct vcpu *v)
 {
     return hvm_funcs.pae_enabled(v);
 }
 
 static inline int
-hvm_interrupts_enabled(struct vcpu *v)
-{
-    return hvm_funcs.interrupts_enabled(v);
+hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+    return hvm_funcs.interrupts_enabled(v, type);
 }
 
 static inline int
@@ -230,6 +246,13 @@ hvm_update_vtpr(struct vcpu *v, unsigned
 }
 
 void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3);
+
+static inline void 
+hvm_flush_guest_tlbs(void)
+{
+    if ( hvm_enabled )
+        hvm_funcs.flush_guest_tlbs();
+}
 
 void hvm_hypercall_page_initialise(struct domain *d,
                                    void *hypercall_page);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/irq.h     Wed Jun 20 12:49:27 2007 -0600
@@ -24,10 +24,10 @@
 
 #include <xen/types.h>
 #include <xen/spinlock.h>
+#include <asm/hvm/hvm.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vioapic.h>
 #include <public/hvm/save.h>
-
 
 struct hvm_irq {
     /*
@@ -58,7 +58,6 @@ struct hvm_irq {
             HVMIRQ_callback_gsi,
             HVMIRQ_callback_pci_intx
         } callback_via_type;
-        uint32_t pad; /* So the next field will be aligned */
     };
     union {
         uint32_t gsi;
@@ -115,9 +114,12 @@ void hvm_set_callback_irq_level(void);
 void hvm_set_callback_irq_level(void);
 void hvm_set_callback_via(struct domain *d, uint64_t via);
 
-int cpu_get_interrupt(struct vcpu *v, int *type);
-int cpu_has_pending_irq(struct vcpu *v);
-int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type);
+/* Check/Acknowledge next pending interrupt. */
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
+int hvm_vcpu_ack_pending_irq(
+    struct vcpu *v, enum hvm_intack type, int *vector);
+
+int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src);
 int is_isa_irq_masked(struct vcpu *v, int isa_irq);
 
 #endif /* __ASM_X86_HVM_IRQ_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:49:27 2007 -0600
@@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
 /* End of save/restore */
 
 extern char hvm_io_bitmap[];
-extern int hvm_enabled;
 
 void hvm_enable(struct hvm_function_table *);
 void hvm_disable(void);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/svm/asid.h
--- a/xen/include/asm-x86/hvm/svm/asid.h        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/asid.h        Wed Jun 20 12:49:27 2007 -0600
@@ -30,6 +30,7 @@ void svm_asid_init(struct cpuinfo_x86 *c
 void svm_asid_init(struct cpuinfo_x86 *c);
 void svm_asid_init_vcpu(struct vcpu *v);
 void svm_asid_inv_asid(struct vcpu *v);
+void svm_asid_inc_generation(void);
 
 /*
  * ASID related, guest triggered events.
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Jun 20 12:49:27 2007 -0600
@@ -30,11 +30,13 @@
 
 struct hvm_vcpu {
     unsigned long       hw_cr3;     /* value we give to HW to use */
-    unsigned long       ioflags;
     struct hvm_io_op    io_op;
     struct vlapic       vlapic;
     s64                 cache_tsc_offset;
     u64                 guest_time;
+
+    /* Is an NMI pending for delivery to this VCPU core? */
+    bool_t              nmi_pending; /* NB. integrate flag with save/restore */
 
     /* Lock and list for virtual platform timers. */
     spinlock_t          tm_lock;
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h  Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vlapic.h  Wed Jun 20 12:49:27 2007 -0600
@@ -76,7 +76,7 @@ int vlapic_find_highest_irr(struct vlapi
 int vlapic_find_highest_irr(struct vlapic *vlapic);
 
 int vlapic_has_interrupt(struct vcpu *v);
-int cpu_get_apic_interrupt(struct vcpu *v, int *mode);
+int cpu_get_apic_interrupt(struct vcpu *v);
 
 int  vlapic_init(struct vcpu *v);
 void vlapic_destroy(struct vcpu *v);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:49:27 2007 -0600
@@ -336,9 +336,16 @@ static inline void vmx_inject_sw_excepti
                            instruction_len);
 }
 
-static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code)
-{
-    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0);
+static inline void vmx_inject_extint(struct vcpu *v, int trap)
+{
+    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
+                           VMX_DELIVER_NO_ERROR_CODE, 0);
+}
+
+static inline void vmx_inject_nmi(struct vcpu *v)
+{
+    __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
+                           VMX_DELIVER_NO_ERROR_CODE, 0);
 }
 
 #endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpic.h    Wed Jun 20 12:49:27 2007 -0600
@@ -32,7 +32,7 @@ void vpic_irq_positive_edge(struct domai
 void vpic_irq_positive_edge(struct domain *d, int irq);
 void vpic_irq_negative_edge(struct domain *d, int irq);
 void vpic_init(struct domain *d);
-int cpu_get_pic_interrupt(struct vcpu *v, int *type);
+int cpu_get_pic_interrupt(struct vcpu *v);
 int is_periodic_irq(struct vcpu *v, int irq, int type);
 
 #endif  /* __ASM_X86_HVM_VPIC_H__ */  
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpt.h     Wed Jun 20 12:49:27 2007 -0600
@@ -29,6 +29,7 @@
 #include <xen/timer.h>
 #include <xen/list.h>
 #include <asm/hvm/vpic.h>
+#include <asm/hvm/irq.h>
 #include <public/hvm/save.h>
 
 struct HPETState;
@@ -119,7 +120,7 @@ void pt_freeze_time(struct vcpu *v);
 void pt_freeze_time(struct vcpu *v);
 void pt_thaw_time(struct vcpu *v);
 void pt_update_irq(struct vcpu *v);
-void pt_intr_post(struct vcpu *v, int vector, int type);
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src);
 void pt_reset(struct vcpu *v);
 void pt_migrate(struct vcpu *v);
 void create_periodic_time(
diff -r c20bc60f9243 -r 810885428743 xen/include/xen/compat.h
--- a/xen/include/xen/compat.h  Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/compat.h  Wed Jun 20 12:49:27 2007 -0600
@@ -44,9 +44,10 @@
  * specifying an offset into the guest array.
  */
 #define copy_to_compat_offset(hnd, off, ptr, nr) ({                  \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(*(ptr)) *const _y = (ptr);                          \
-    copy_to_user(_x + (off), _y, sizeof(*_x) * (nr));                \
+    const typeof(*(ptr)) *_s = (ptr);                                \
+    char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c;           \
+    ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr)));     \
+    copy_to_user(_d + (off), _s, sizeof(*_s) * (nr));                \
 })
 
 /*
@@ -54,9 +55,9 @@
  * specifying an offset into the guest array.
  */
 #define copy_from_compat_offset(ptr, hnd, off, nr) ({                \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(ptr) _y = (ptr);                                    \
-    copy_from_user(_y, _x + (off), sizeof(*_x) * (nr));              \
+    const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+    typeof(*(ptr)) *_d = (ptr);                                      \
+    copy_from_user(_d, _s + (off), sizeof(*_d) * (nr));              \
 })
 
 #define copy_to_compat(hnd, ptr, nr)                                 \
@@ -67,16 +68,19 @@
 
 /* Copy sub-field of a structure to guest context via a compat handle. */
 #define copy_field_to_compat(hnd, ptr, field) ({                     \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    const typeof((ptr)->field) *const _y = &(ptr)->field;            \
-    copy_to_user(_x, _y, sizeof(*_x));                               \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;                  \
+    void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;   \
+    ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field ==    \
+            &(ptr)->field));                                         \
+    copy_to_user(_d, _s, sizeof(*_s));                               \
 })
 
 /* Copy sub-field of a structure from guest context via a compat handle. */
 #define copy_field_from_compat(ptr, hnd, field) ({                   \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    typeof((ptr)->field) *const _y = &(ptr)->field;                  \
-    copy_from_user(_y, _x, sizeof(*_x));                             \
+    const typeof(&(ptr)->field) _s =                                 \
+        &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;          \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                        \
+    copy_from_user(_d, _s, sizeof(*_d));                             \
 })
 
 /*
@@ -84,18 +88,20 @@
  * Allows use of faster __copy_* functions.
  */
 #define compat_handle_okay(hnd, nr)                                  \
-    compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), 
sizeof(**(hnd)._))
+    compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr),        \
+                           sizeof(**(hnd)._))
 
 #define __copy_to_compat_offset(hnd, off, ptr, nr) ({                \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(*(ptr)) *const _y = (ptr);                          \
-    __copy_to_user(_x + (off), _y, sizeof(*_x) * (nr));              \
+    const typeof(*(ptr)) *_s = (ptr);                                \
+    char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c;           \
+    ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr)));     \
+    __copy_to_user(_d + (off), _s, sizeof(*_s) * (nr));              \
 })
 
 #define __copy_from_compat_offset(ptr, hnd, off, nr) ({              \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(ptr) _y = (ptr);                                    \
-    __copy_from_user(_y, _x + (off), sizeof(*_x) * (nr));            \
+    const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+    typeof(*(ptr)) *_d = (ptr);                                      \
+    __copy_from_user(_d, _s + (off), sizeof(*_d) * (nr));            \
 })
 
 #define __copy_to_compat(hnd, ptr, nr)                               \
@@ -105,15 +111,18 @@
     __copy_from_compat_offset(ptr, hnd, 0, nr)
 
 #define __copy_field_to_compat(hnd, ptr, field) ({                   \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    const typeof((ptr)->field) *const _y = &(ptr)->field;            \
-    __copy_to_user(_x, _y, sizeof(*_x));                             \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;                  \
+    void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;   \
+    ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field ==    \
+            &(ptr)->field));                                         \
+    __copy_to_user(_d, _s, sizeof(*_s));                             \
 })
 
 #define __copy_field_from_compat(ptr, hnd, field) ({                 \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    typeof((ptr)->field) *const _y = &(ptr)->field;                  \
-    __copy_from_user(_y, _x, sizeof(*_x));                           \
+    const typeof(&(ptr)->field) _s =                                 \
+        &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;          \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                        \
+    __copy_from_user(_d, _s, sizeof(*_d));                           \
 })
 
 
@@ -169,7 +178,8 @@ int switch_compat(struct domain *);
 int switch_compat(struct domain *);
 int switch_native(struct domain *);
 
-#define BITS_PER_GUEST_LONG(d) (!IS_COMPAT(d) ? BITS_PER_LONG : 
COMPAT_BITS_PER_LONG)
+#define BITS_PER_GUEST_LONG(d) \
+    (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)
 
 #else
 
diff -r c20bc60f9243 -r 810885428743 xen/include/xen/xencomm.h
--- a/xen/include/xen/xencomm.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/xencomm.h Wed Jun 20 12:49:27 2007 -0600
@@ -47,17 +47,17 @@ static inline unsigned long xencomm_inli
     ((hnd).p == NULL || xencomm_handle_is_null((hnd).p))
 
 /* Offset the given guest handle into the array it refers to. */
-#define guest_handle_add_offset(hnd, nr) ({         \
-    const typeof((hnd).p) _ptr;                     \
-    xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr));   \
+#define guest_handle_add_offset(hnd, nr) ({                             \
+    const typeof((hnd).p) _ptr;                                         \
+    xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr));        \
 })
 
 /* Cast a guest handle to the specified type of handle. */
 #define guest_handle_cast(hnd, type) ({         \
     type *_x = (hnd).p;                         \
-    XEN_GUEST_HANDLE(type) _y; \
-    set_xen_guest_handle(_y, _x); \
-    _y; \
+    XEN_GUEST_HANDLE(type) _y;                  \
+    set_xen_guest_handle(_y, _x);               \
+    _y;                                         \
 })
 
 /* Since we run in real mode, we can safely access all addresses. That also
@@ -87,29 +87,32 @@ static inline unsigned long xencomm_inli
     __copy_field_from_guest(ptr, hnd, field)
 
 #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
+    const typeof(*(ptr)) *_s = (ptr);                               \
+    void *_d = (hnd).p;                                             \
+    ((void)((hnd).p == (ptr)));                                     \
+    xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
 })
 
 #define __copy_field_to_guest(hnd, ptr, field) ({                   \
-    const int _off = offsetof(typeof(*ptr), field);                  \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;                 \
+    void *_d = (hnd).p;                                             \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
+    xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off);               \
 })
 
 #define __copy_from_guest_offset(ptr, hnd, idx, nr) ({              \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx));  \
+    const typeof(*(ptr)) *_s = (hnd).p;                             \
+    typeof(*(ptr)) *_d = (ptr);                                     \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
 })
 
 #define __copy_field_from_guest(ptr, hnd, field) ({                 \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
+    const void *_s = (hnd).p;                                       \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                       \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off);             \
 })
 
 #endif /* __XENCOMM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.