[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg (staging)



# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1191346315 21600
# Node ID 3165e43ce73421bf6308844a3d0d4f27dab47639
# Parent  d6c09be8c5f53e327f401a7062a1e95d2d2a1ed4
# Parent  385b9b6bb61f076d06bbffba2ef3bf667428168c
merge with xen-unstable.hg (staging)
---
 tools/examples/external-device-migrate   |   11 +-
 tools/flask/libflask/Makefile            |    2 
 tools/flask/libflask/include/flask_op.h  |    2 
 tools/flask/loadpolicy/Makefile          |   11 --
 tools/ioemu/vl.c                         |    2 
 tools/libxc/xc_hvm_build.c               |   76 +++-----------
 tools/libxc/xc_private.c                 |    6 -
 tools/python/xen/util/xsm/acm/acm.py     |    2 
 tools/python/xen/xend/XendDomain.py      |   26 ----
 tools/python/xen/xend/XendDomainInfo.py  |   10 +
 tools/xenstat/xentop/xentop.c            |   32 ++++--
 tools/xentrace/xenctx.c                  |   55 +++++++++-
 tools/xentrace/xentrace.8                |    2 
 tools/xentrace/xentrace_format           |   91 +++++++++++++----
 tools/xm-test/configure.ac               |    4 
 tools/xm-test/lib/XmTestLib/NetConfig.py |    2 
 xen/arch/x86/domain.c                    |  162 +++++++++++++++----------------
 xen/arch/x86/hvm/hvm.c                   |   16 ++-
 xen/arch/x86/hvm/irq.c                   |   84 +++++++++-------
 xen/arch/x86/hvm/svm/intr.c              |   91 +++++------------
 xen/arch/x86/hvm/svm/svm.c               |  117 +++++++---------------
 xen/arch/x86/hvm/svm/vmcb.c              |    9 -
 xen/arch/x86/hvm/vlapic.c                |   31 +++--
 xen/arch/x86/hvm/vmx/intr.c              |  109 ++++++++------------
 xen/arch/x86/hvm/vmx/vmx.c               |   34 ++++--
 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c   |   14 +-
 xen/arch/x86/hvm/vmx/vtd/io.c            |   83 +++++++++------
 xen/arch/x86/hvm/vpic.c                  |    2 
 xen/arch/x86/hvm/vpt.c                   |   15 +-
 xen/arch/x86/platform_hypercall.c        |   47 +++++++-
 xen/arch/x86/time.c                      |    2 
 xen/arch/x86/traps.c                     |    2 
 xen/arch/x86/x86_32/domain_page.c        |  107 ++++++++++----------
 xen/arch/x86/x86_32/entry.S              |    2 
 xen/arch/x86/x86_64/entry.S              |    2 
 xen/common/sysctl.c                      |   12 --
 xen/include/asm-x86/domain.h             |   36 ++++--
 xen/include/asm-x86/hvm/hvm.h            |   33 ++++--
 xen/include/asm-x86/hvm/irq.h            |   24 ++--
 xen/include/asm-x86/hvm/vlapic.h         |    4 
 xen/include/asm-x86/hvm/vpic.h           |    2 
 xen/include/asm-x86/hvm/vpt.h            |    2 
 xen/include/public/platform.h            |   14 ++
 43 files changed, 744 insertions(+), 646 deletions(-)

diff -r d6c09be8c5f5 -r 3165e43ce734 tools/examples/external-device-migrate
--- a/tools/examples/external-device-migrate    Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/examples/external-device-migrate    Tue Oct 02 11:31:55 2007 -0600
@@ -16,6 +16,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #
 
+set -x
 
 # This script is called by XenD for migration of external devices
 # It does not handle the migration of those devices itself, but
@@ -57,11 +58,11 @@ function evaluate_params()
        stype=""
        while [ $# -ge 1 ]; do
                case "$1" in
-               -step)          step=$2; shift 2;;
-               -host)          host=$2; shift 2;;
-               -domname)       domname=$2; shift 2;;
-               -type)          typ=$2; shift 2;;
-               -subtype)       stype=$2; shift 2;;
+               -step)          step=$2; shift; shift;;
+               -host)          host=$2; shift; shift;;
+               -domname)       domname=$2; shift; shift;;
+               -type)          typ=$2; shift; shift;;
+               -subtype)       stype=$2; shift; shift;;
                -recover)       recover=1; shift;;
                -help)          ext_dev_migrate_usage; exit 0;;
                *)              break;;
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/flask/libflask/Makefile
--- a/tools/flask/libflask/Makefile     Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/flask/libflask/Makefile     Tue Oct 02 11:31:55 2007 -0600
@@ -60,6 +60,6 @@ libflask.so.$(MAJOR): libflask.so.$(MAJO
        ln -sf $< $@
 
 libflask.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
-       $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libflask.so.$(MAJOR) -shared 
-o $@ $^
+       $(CC) $(CFLAGS) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) 
-Wl,libflask.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $^
 
 -include $(DEPS)
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/flask/libflask/include/flask_op.h
--- a/tools/flask/libflask/include/flask_op.h   Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/flask/libflask/include/flask_op.h   Tue Oct 02 11:31:55 2007 -0600
@@ -39,7 +39,7 @@ typedef struct flask_op {
 } flask_op_t;
 
 int flask_load(int xc_handle, char *buf, int size);
-int flask_context_to_sid(int xc_handle, char *buf, int size, u_int32_t *sid);
+int flask_context_to_sid(int xc_handle, char *buf, int size, uint32_t *sid);
 int flask_sid_to_context(int xc_handle, int sid, char *buf, int size);
 int do_flask_op(int xc_handle, flask_op_t *op);
 
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/flask/loadpolicy/Makefile
--- a/tools/flask/loadpolicy/Makefile   Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/flask/loadpolicy/Makefile   Tue Oct 02 11:31:55 2007 -0600
@@ -1,11 +1,6 @@ XEN_ROOT=../../..
 XEN_ROOT=../../..
 include $(XEN_ROOT)/tools/Rules.mk
 XEN_LIBXC          = $(XEN_ROOT)/tools/libxc
-
-INSTALL         = install
-INSTALL_DATA    = $(INSTALL) -m0644
-INSTALL_PROG    = $(INSTALL) -m0755
-INSTALL_DIR     = $(INSTALL) -d -m0755
 
 LIBXC_ROOT = $(XEN_ROOT)/tools/libxc
 LIBFLASK_ROOT = $(XEN_ROOT)/tools/flask/libflask
@@ -28,13 +23,17 @@ TESTENV  = XENSTORED_ROOTDIR=$(TESTDIR) 
 TESTENV  = XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)
 
 CLIENTS := flask-loadpolicy
+CLIENTS_SRCS := $(patsubst flask-%,%.c,$(CLIENTS))
 CLIENTS_OBJS := $(patsubst flask-%,%.o,$(CLIENTS))
 
 .PHONY: all
 all: $(CLIENTS)
 
 $(CLIENTS): flask-%: %.o
-       $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -L. -lflask -lxenctrl -o $@
+       $(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lflask 
-lxenctrl -o $@
+
+$(CLIENTS_OBJS): $(CLIENTS_SRCS)
+       $(COMPILE.c) -o $@ $<
 
 .PHONY: clean
 clean: 
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/ioemu/vl.c  Tue Oct 02 11:31:55 2007 -0600
@@ -7102,6 +7102,7 @@ int main(int argc, char **argv)
     char qemu_dm_logfilename[128];
     const char *direct_pci = NULL;
 
+#ifndef __sun__
     /* Maximise rlimits. Needed where default constraints are tight (*BSD). */
     if (getrlimit(RLIMIT_STACK, &rl) != 0) {
        perror("getrlimit(RLIMIT_STACK)");
@@ -7125,6 +7126,7 @@ int main(int argc, char **argv)
     rl.rlim_max = RLIM_INFINITY;
     if (setrlimit(RLIMIT_MEMLOCK, &rl) != 0)
        perror("setrlimit(RLIMIT_MEMLOCK)");
+#endif
 
     /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
        then only the threads that use the signal unblock it -- this fixes a
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/libxc/xc_hvm_build.c        Tue Oct 02 11:31:55 2007 -0600
@@ -20,14 +20,6 @@
 #include <xen/libelf.h>
 
 #define SCRATCH_PFN 0xFFFFF
-
-/* Need to provide the right flavour of vcpu context for Xen */
-typedef union
-{
-    vcpu_guest_context_x86_64_t c64;
-    vcpu_guest_context_x86_32_t c32;   
-    vcpu_guest_context_t c;
-} vcpu_guest_context_either_t;
 
 static void build_e820map(void *e820_page, unsigned long long mem_size)
 {
@@ -154,12 +146,11 @@ static int loadelfimage(
 
 static int setup_guest(int xc_handle,
                        uint32_t dom, int memsize,
-                       char *image, unsigned long image_size,
-                       vcpu_guest_context_either_t *ctxt)
+                       char *image, unsigned long image_size)
 {
     xen_pfn_t *page_array = NULL;
     unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
-    unsigned long shared_page_nr;
+    unsigned long shared_page_nr, entry_eip;
     struct xen_add_to_physmap xatp;
     struct shared_info *shared_info;
     void *e820_page;
@@ -263,20 +254,20 @@ static int setup_guest(int xc_handle,
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
 
+    /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
+    entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
+    if ( entry_eip != 0 )
+    {
+        char *page0 = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, 0);
+        if ( page0 == NULL )
+            goto error_out;
+        page0[0] = 0xe9;
+        *(uint32_t *)&page0[1] = entry_eip - 5;
+        munmap(page0, PAGE_SIZE);
+    }
+
     free(page_array);
-
-    /* Set [er]ip in the way that's right for Xen */
-    if ( strstr(caps, "x86_64") )
-    {
-        ctxt->c64.user_regs.rip = elf_uval(&elf, elf.ehdr, e_entry); 
-        ctxt->c64.flags = VGCF_online;
-    }
-    else
-    {
-        ctxt->c32.user_regs.eip = elf_uval(&elf, elf.ehdr, e_entry);
-        ctxt->c32.flags = VGCF_online;
-    }
-
     return 0;
 
  error_out:
@@ -290,42 +281,13 @@ static int xc_hvm_build_internal(int xc_
                                  char *image,
                                  unsigned long image_size)
 {
-    struct xen_domctl launch_domctl;
-    vcpu_guest_context_either_t ctxt;
-    int rc;
-
     if ( (image == NULL) || (image_size == 0) )
     {
         ERROR("Image required");
-        goto error_out;
-    }
-
-    memset(&ctxt, 0, sizeof(ctxt));
-
-    if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
-    {
-        goto error_out;
-    }
-
-    if ( lock_pages(&ctxt, sizeof(ctxt) ) )
-    {
-        PERROR("%s: ctxt mlock failed", __func__);
-        goto error_out;
-    }
-
-    memset(&launch_domctl, 0, sizeof(launch_domctl));
-    launch_domctl.domain = (domid_t)domid;
-    launch_domctl.u.vcpucontext.vcpu = 0;
-    set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt.c);
-    launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
-    rc = xc_domctl(xc_handle, &launch_domctl);
-
-    unlock_pages(&ctxt, sizeof(ctxt));
-
-    return rc;
-
- error_out:
-    return -1;
+        return -1;
+    }
+
+    return setup_guest(xc_handle, domid, memsize, image, image_size);
 }
 
 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/libxc/xc_private.c  Tue Oct 02 11:31:55 2007 -0600
@@ -130,7 +130,8 @@ int lock_pages(void *addr, size_t len)
       int e = 0;
 #ifndef __sun__
       void *laddr = (void *)((unsigned long)addr & PAGE_MASK);
-      size_t llen = (len + PAGE_SIZE - 1) & PAGE_MASK;
+      size_t llen = (len + ((unsigned long)addr - (unsigned long)laddr) +
+                     PAGE_SIZE - 1) & PAGE_MASK;
       e = mlock(laddr, llen);
 #endif
       return e;
@@ -140,7 +141,8 @@ void unlock_pages(void *addr, size_t len
 {
 #ifndef __sun__
     void *laddr = (void *)((unsigned long)addr & PAGE_MASK);
-    size_t llen = (len + PAGE_SIZE - 1) & PAGE_MASK;
+    size_t llen = (len + ((unsigned long)addr - (unsigned long)laddr) +
+                   PAGE_SIZE - 1) & PAGE_MASK;
     safe_munlock(laddr, llen);
 #endif
 }
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/python/xen/util/xsm/acm/acm.py
--- a/tools/python/xen/util/xsm/acm/acm.py      Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/python/xen/util/xsm/acm/acm.py      Tue Oct 02 11:31:55 2007 -0600
@@ -1309,7 +1309,7 @@ def parse_security_label(security_label)
         return security_label
 
 def set_security_label(policy, label):
-    if label != "" and policy != "":
+    if label and policy and label != "" and policy != "":
         return "%s:%s:%s" % (xsconstants.ACM_POLICY_ID, policy, label)
     else:
         return ""
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Tue Oct 02 11:31:55 2007 -0600
@@ -51,7 +51,6 @@ from xen.xend.xenstore.xswatch import xs
 from xen.xend.xenstore.xswatch import xswatch
 from xen.util import mkdir
 from xen.xend import uuid
-from xen.xend import sxp
 
 xc = xen.lowlevel.xc.xc()
 xoptions = XendOptions.instance() 
@@ -969,31 +968,6 @@ class XendDomain:
         try:
             try:
                 domconfig = XendConfig.XendConfig(sxp_obj = config)
-                
-                domains = self.list('all')
-                domains = map(lambda dom: dom.sxpr(), domains)
-                for dom in domains:
-                    if sxp.child_value(config, 'uuid', None):
-                        if domconfig['uuid'] == sxp.child_value(dom, 'uuid'):
-                            if domconfig['name_label'] != sxp.child_value(dom, 
'name'):
-                                raise XendError("Domain UUID '%s' is already 
used." % \
-                                                domconfig['uuid'])
-                            else:
-                                # Update the config for that existing domain
-                                # because it is same name and same UUID.
-                                break
-                        else:
-                            if domconfig['name_label'] == sxp.child_value(dom, 
'name'):
-                                raise XendError("Domain name '%s' is already 
used." % \
-                                                domconfig['name_label'])
-                    else:
-                        if domconfig['name_label'] == sxp.child_value(dom, 
'name'):
-                            # Overwrite the auto-generated UUID by the UUID
-                            # of the existing domain. And update the config
-                            # for that existing domain.
-                            domconfig['uuid'] = sxp.child_value(dom, 'uuid')
-                            break
-                
                 dominfo = XendDomainInfo.createDormant(domconfig)
                 log.debug("Creating new managed domain: %s" %
                           dominfo.getName())
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Tue Oct 02 11:31:55 2007 -0600
@@ -74,9 +74,15 @@ def create(config):
     @return: An up and running XendDomainInfo instance
     @raise VmError: Invalid configuration or failure to start.
     """
-
+    from xen.xend import XendDomain
+    domconfig = XendConfig.XendConfig(sxp_obj = config)
+    othervm = XendDomain.instance().domain_lookup_nr(domconfig["name_label"])
+    if othervm is None or othervm.domid is None:
+        othervm = XendDomain.instance().domain_lookup_nr(domconfig["uuid"])
+    if othervm is not None and othervm.domid is not None:
+        raise VmError("Domain '%s' already exists with ID '%d'" % 
(domconfig["name_label"], othervm.domid))
     log.debug("XendDomainInfo.create(%s)", scrub_password(config))
-    vm = XendDomainInfo(XendConfig.XendConfig(sxp_obj = config))
+    vm = XendDomainInfo(domconfig)
     try:
         vm.start()
     except:
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c     Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/xenstat/xentop/xentop.c     Tue Oct 02 11:31:55 2007 -0600
@@ -28,6 +28,7 @@
 #include <sys/time.h>
 #include <time.h>
 #include <unistd.h>
+#include <signal.h>
 #if defined(__linux__)
 #include <linux/kdev_t.h>
 #endif
@@ -1011,6 +1012,13 @@ static void top(void)
        free(domains);
 }
 
+static int signal_exit;
+
+void signal_exit_handler(int sig)
+{
+       signal_exit = 1;
+}
+
 int main(int argc, char **argv)
 {
        int opt, optind = 0;
@@ -1102,14 +1110,22 @@ int main(int argc, char **argv)
                        ch = getch();
                } while (handle_key(ch));
        } else {
-                       do {
-                               gettimeofday(&curtime, NULL);
-                               top();
-                               oldtime = curtime;
-                               if ((!loop) && !(--iterations))
-                                       break;
-                               sleep(delay);
-                       } while (1);
+               struct sigaction sa = {
+                       .sa_handler = signal_exit_handler,
+                       .sa_flags = 0
+               };
+               sigemptyset(&sa.sa_mask);
+               sigaction(SIGINT, &sa, NULL);
+               sigaction(SIGTERM, &sa, NULL);
+
+               do {
+                       gettimeofday(&curtime, NULL);
+                       top();
+                       oldtime = curtime;
+                       if ((!loop) && !(--iterations))
+                               break;
+                       sleep(delay);
+               } while (!signal_exit);
        }
 
        /* Cleanup occurs in cleanup(), so no work to do here. */
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c   Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/xentrace/xenctx.c   Tue Oct 02 11:31:55 2007 -0600
@@ -21,6 +21,7 @@
 #include <argp.h>
 #include <signal.h>
 #include <string.h>
+#include <inttypes.h>
 #include <getopt.h>
 
 #include "xenctrl.h"
@@ -152,9 +153,9 @@ void print_symbol(size_t addr)
         return;
 
     if (addr==s->address)
-        printf("%s", s->name);
+        printf("%s ", s->name);
     else
-        printf("%s+%#x", s->name, (unsigned int)(addr - s->address));
+        printf("%s+%#x ", s->name, (unsigned int)(addr - s->address));
 }
 
 void read_symbol_table(const char *symtab)
@@ -207,6 +208,46 @@ void read_symbol_table(const char *symta
     fclose(f);
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+char *flag_values[22][2] =
+{/*  clear,     set,       bit# */
+    { NULL,     "c"    }, // 0        Carry
+    { NULL,     NULL   }, // 1
+    { NULL,     "p"    }, // 2        Parity
+    { NULL,     NULL   }, // 3
+    { NULL,     "a"    }, // 4        Adjust
+    { NULL,     NULL   }, // 5
+    { "nz",     "z"    }, // 6        Zero
+    { NULL,     "s"    }, // 7        Sign
+    { NULL,     "tf"   }, // 8        Trap
+    { NULL,     "i"    }, // 9        Interrupt (enabled)
+    { NULL,     "d=b"  }, // 10       Direction
+    { NULL,     "o"    }, // 11       Overflow
+    { NULL,     NULL   }, // 12       12+13 == IOPL
+    { NULL,     NULL   }, // 13
+    { NULL,     "nt"   }, // 14       Nested Task
+    { NULL,     NULL   }, // 15
+    { NULL,     "rf"   }, // 16       Resume Flag
+    { NULL,     "v86"  }, // 17       Virtual 8086 mode
+    { NULL,     "ac"   }, // 18       Alignment Check (enabled)
+    { NULL,     "vif"  }, // 19       Virtual Interrupt (enabled)
+    { NULL,     "vip"  }, // 20       Virtual Interrupt Pending
+    { NULL,     "cid"  }  // 21       Cpuid Identification Flag
+};
+
+void print_flags(uint64_t flags)
+{
+    int i;
+
+    printf("flags: %08" PRIx64, flags);
+    for (i = 21; i >= 0; i--) {
+        char *s = flag_values[i][(flags >> i) & 1];
+        if (s != NULL)
+            printf(" %s", s);
+    }
+}
+#endif
+
 #ifdef __i386__
 void print_ctx(vcpu_guest_context_t *ctx1)
 {
@@ -214,6 +255,7 @@ void print_ctx(vcpu_guest_context_t *ctx
 
     printf("eip: %08x ", regs->eip);
     print_symbol(regs->eip);
+    print_flags(regs->eflags);
     printf("\n");
 
     printf("esp: %08x\n", regs->esp);
@@ -240,6 +282,7 @@ void print_ctx(vcpu_guest_context_t *ctx
 
     printf("rip: %08lx ", regs->rip);
     print_symbol(regs->rip);
+    print_flags(regs->rflags);
     printf("\n");
     printf("rsp: %08lx\n", regs->rsp);
 
@@ -262,10 +305,10 @@ void print_ctx(vcpu_guest_context_t *ctx
     printf("r14: %08lx\t", regs->r14);
     printf("r15: %08lx\n", regs->r15);
 
-    printf(" cs: %08x\t", regs->cs);
-    printf(" ds: %08x\t", regs->ds);
-    printf(" fs: %08x\t", regs->fs);
-    printf(" gs: %08x\n", regs->gs);
+    printf(" cs:     %04x\t", regs->cs);
+    printf(" ds:     %04x\t", regs->ds);
+    printf(" fs:     %04x\t", regs->fs);
+    printf(" gs:     %04x\n", regs->gs);
 
 }
 #elif defined(__ia64__)
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/xentrace/xentrace.8
--- a/tools/xentrace/xentrace.8 Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/xentrace/xentrace.8 Tue Oct 02 11:31:55 2007 -0600
@@ -131,4 +131,4 @@ Mark A. Williamson <mark.a.williamson@in
 Mark A. Williamson <mark.a.williamson@xxxxxxxxx>
 
 .SH "SEE ALSO"
-xentrace_cpuinfo(1), xentrace_format(1)
+xentrace_format(1)
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/xentrace/xentrace_format
--- a/tools/xentrace/xentrace_format    Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/xentrace/xentrace_format    Tue Oct 02 11:31:55 2007 -0600
@@ -83,11 +83,24 @@ interrupted = 0
 
 defs = read_defs(arg[0])
 
-# structure of trace record + prepended CPU id (as output by xentrace):
-# CPU(I) TSC(Q) EVENT(L) D1(L) D2(L) D3(L) D4(L) D5(L)
-# read CPU id separately to avoid structure packing problems on 64-bit arch.
-CPUREC = "I"
-TRCREC = "QLLLLLL"
+# structure of trace record (as output by xentrace):
+# HDR(I) {TSC(Q)} D1(I) D2(I) D3(I) D4(I) D5(I)
+#
+# HDR consists of EVENT:28:, n_data:3:, tsc_in:1:
+# EVENT means Event ID
+# n_data means number of data (like D1, D2, ...)
+# tsc_in means TSC data exists(1) or not(0).
+# if tsc_in == 0, TSC(Q) does not exists.
+#
+# CPU ID exists on trace data of EVENT=0x0001f003
+#
+HDRREC = "I"
+TSCREC = "Q"
+D1REC  = "I"
+D2REC  = "II"
+D3REC  = "III"
+D4REC  = "IIII"
+D5REC  = "IIIII"
 
 last_tsc = [0]
 
@@ -96,19 +109,58 @@ while not interrupted:
 while not interrupted:
     try:
        i=i+1
-        line = sys.stdin.read(struct.calcsize(CPUREC))
+        line = sys.stdin.read(struct.calcsize(HDRREC))
         if not line:
             break
-        cpu = struct.unpack(CPUREC, line)[0]
-
-        line = sys.stdin.read(struct.calcsize(TRCREC))
-        if not line:
-            break
-
-        (tsc, event, d1, d2, d3, d4, d5) = struct.unpack(TRCREC, line)
-
-        # Event field is 'uint32_t', not 'long'.
-        event &= 0xffffffff
+        event = struct.unpack(HDRREC, line)[0]
+        n_data = event >> 28 & 0x7
+        tsc_in = event >> 31
+
+        d1 = 0
+        d2 = 0
+        d3 = 0
+        d4 = 0
+        d5 = 0
+  
+        tsc = 0
+
+        if tsc_in == 1:
+            line = sys.stdin.read(struct.calcsize(TSCREC))
+            if not line:
+                break
+            tsc = struct.unpack(TSCREC, line)[0]
+
+        if n_data == 1:
+            line = sys.stdin.read(struct.calcsize(D1REC))
+            if not line:
+                break
+            (d1) = struct.unpack(D1REC, line)
+        if n_data == 2:
+            line = sys.stdin.read(struct.calcsize(D2REC))
+            if not line:
+                break
+            (d1, d2) = struct.unpack(D2REC, line)
+        if n_data == 3:
+            line = sys.stdin.read(struct.calcsize(D3REC))
+            if not line:
+                break
+            (d1, d2, d3) = struct.unpack(D3REC, line)
+        if n_data == 4:
+            line = sys.stdin.read(struct.calcsize(D4REC))
+            if not line:
+                break
+            (d1, d2, d3, d4) = struct.unpack(D4REC, line)
+        if n_data == 5:
+            line = sys.stdin.read(struct.calcsize(D5REC))
+            if not line:
+                break
+            (d1, d2, d3, d4, d5) = struct.unpack(D5REC, line)
+
+        # Event field is 28bit of 'uint32_t' in header, not 'long'.
+        event &= 0x0fffffff
+        if event == 0x1f003:
+            cpu = d1
+
 
        #tsc = (tscH<<32) | tscL
 
@@ -116,16 +168,17 @@ while not interrupted:
 
         if cpu >= len(last_tsc):
             last_tsc += [0] * (cpu - len(last_tsc) + 1)
-       elif tsc < last_tsc[cpu]:
+       elif tsc < last_tsc[cpu] and tsc_in == 1:
            print "TSC stepped backward cpu %d !  %d %d" % 
(cpu,tsc,last_tsc[cpu])
 
        # provide relative TSC
-       if last_tsc[cpu] > 0:
+       if last_tsc[cpu] > 0 and tsc_in == 1:
                reltsc = tsc - last_tsc[cpu]
        else:
                reltsc = 0
 
-       last_tsc[cpu] = tsc
+       if tsc_in == 1:
+           last_tsc[cpu] = tsc
 
        if mhz:
            tsc = tsc / (mhz*1000000.0)
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/xm-test/configure.ac
--- a/tools/xm-test/configure.ac        Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/xm-test/configure.ac        Tue Oct 02 11:31:55 2007 -0600
@@ -85,9 +85,9 @@ AC_SUBST(NETWORK_ADDRESS)
 AC_SUBST(NETWORK_ADDRESS)
 AC_SUBST(NETMASK)
 
-DOM0_INTF="vif0.0"
+DOM0_INTF="eth0"
 AC_ARG_WITH(dom0-intf,
-        [ --with-dom0-intf=intf Set dom0 interface name [[default="vif0.0"]]],
+        [ --with-dom0-intf=intf Set dom0 interface name [[default="eth0"]]],
         [ DOM0_INTF="$withval" ])
 
 AC_SUBST(DOM0_INTF)
diff -r d6c09be8c5f5 -r 3165e43ce734 tools/xm-test/lib/XmTestLib/NetConfig.py
--- a/tools/xm-test/lib/XmTestLib/NetConfig.py  Tue Oct 02 10:07:35 2007 -0600
+++ b/tools/xm-test/lib/XmTestLib/NetConfig.py  Tue Oct 02 11:31:55 2007 -0600
@@ -71,7 +71,7 @@ def getXendNetConfig():
 
 def checkZeroconfAddresses():
     # Make sure there aren't existing zeroconf addresses.
-    rc, out = traceCommand("ip addr show |grep \"inet 169.254\" | grep -v vif")
+    rc, out = traceCommand("ip addr show |grep \"inet 169.254\" | grep eth0")
     if rc == 0:
         raise NetworkError("Zeroconf addresses already used: %s" % out)
 
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/domain.c     Tue Oct 02 11:31:55 2007 -0600
@@ -382,6 +382,10 @@ int vcpu_initialise(struct vcpu *v)
 
     v->arch.flags = TF_kernel_mode;
 
+#if defined(__i386__)
+    mapcache_vcpu_init(v);
+#endif
+
     pae_l3_cache_init(&v->arch.pae_l3_cache);
 
     paging_vcpu_init(v);
@@ -461,7 +465,7 @@ int arch_domain_create(struct domain *d)
 
 #if defined(__i386__)
 
-    mapcache_init(d);
+    mapcache_domain_init(d);
 
 #else /* __x86_64__ */
 
@@ -645,21 +649,21 @@ int arch_set_info_guest(
 
     v->arch.guest_context.user_regs.eflags |= 2;
 
+    if ( is_hvm_vcpu(v) )
+        goto out;
+
     /* Only CR0.TS is modifiable by guest or admin. */
     v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS;
     v->arch.guest_context.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS;
 
     init_int80_direct_trap(v);
 
-    if ( !is_hvm_vcpu(v) )
-    {
-        /* IOPL privileges are virtualised. */
-        v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3;
-        v->arch.guest_context.user_regs.eflags &= ~EF_IOPL;
-
-        /* Ensure real hardware interrupts are enabled. */
-        v->arch.guest_context.user_regs.eflags |= EF_IE;
-    }
+    /* IOPL privileges are virtualised. */
+    v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3;
+    v->arch.guest_context.user_regs.eflags &= ~EF_IOPL;
+
+    /* Ensure real hardware interrupts are enabled. */
+    v->arch.guest_context.user_regs.eflags |= EF_IE;
 
     if ( v->is_initialised )
         goto out;
@@ -672,29 +676,44 @@ int arch_set_info_guest(
     if ( v->vcpu_id == 0 )
         d->vm_assist = c(vm_assist);
 
-    if ( !is_hvm_vcpu(v) )
-    {
-        if ( !compat )
-            rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents);
+    if ( !compat )
+        rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents);
 #ifdef CONFIG_COMPAT
-        else
-        {
-            unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)];
-            unsigned int i, n = (c.cmp->gdt_ents + 511) / 512;
-
-            if ( n > ARRAY_SIZE(c.cmp->gdt_frames) )
-                return -EINVAL;
-            for ( i = 0; i < n; ++i )
-                gdt_frames[i] = c.cmp->gdt_frames[i];
-            rc = (int)set_gdt(v, gdt_frames, c.cmp->gdt_ents);
-        }
-#endif
-        if ( rc != 0 )
-            return rc;
-
-        if ( !compat )
-        {
-            cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3]));
+    else
+    {
+        unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)];
+        unsigned int i, n = (c.cmp->gdt_ents + 511) / 512;
+
+        if ( n > ARRAY_SIZE(c.cmp->gdt_frames) )
+            return -EINVAL;
+        for ( i = 0; i < n; ++i )
+            gdt_frames[i] = c.cmp->gdt_frames[i];
+        rc = (int)set_gdt(v, gdt_frames, c.cmp->gdt_ents);
+    }
+#endif
+    if ( rc != 0 )
+        return rc;
+
+    if ( !compat )
+    {
+        cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3]));
+
+        if ( !mfn_valid(cr3_pfn) ||
+             (paging_mode_refcounts(d)
+              ? !get_page(mfn_to_page(cr3_pfn), d)
+              : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+                                   PGT_base_page_table)) )
+        {
+            destroy_gdt(v);
+            return -EINVAL;
+        }
+
+        v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
+
+#ifdef __x86_64__
+        if ( c.nat->ctrlreg[1] )
+        {
+            cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1]));
 
             if ( !mfn_valid(cr3_pfn) ||
                  (paging_mode_refcounts(d)
@@ -702,59 +721,42 @@ int arch_set_info_guest(
                   : !get_page_and_type(mfn_to_page(cr3_pfn), d,
                                        PGT_base_page_table)) )
             {
+                cr3_pfn = pagetable_get_pfn(v->arch.guest_table);
+                v->arch.guest_table = pagetable_null();
+                if ( paging_mode_refcounts(d) )
+                    put_page(mfn_to_page(cr3_pfn));
+                else
+                    put_page_and_type(mfn_to_page(cr3_pfn));
                 destroy_gdt(v);
                 return -EINVAL;
             }
 
-            v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
-
-#ifdef __x86_64__
-            if ( c.nat->ctrlreg[1] )
-            {
-                cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1]));
-
-                if ( !mfn_valid(cr3_pfn) ||
-                     (paging_mode_refcounts(d)
-                      ? !get_page(mfn_to_page(cr3_pfn), d)
-                      : !get_page_and_type(mfn_to_page(cr3_pfn), d,
-                                           PGT_base_page_table)) )
-                {
-                    cr3_pfn = pagetable_get_pfn(v->arch.guest_table);
-                    v->arch.guest_table = pagetable_null();
-                    if ( paging_mode_refcounts(d) )
-                        put_page(mfn_to_page(cr3_pfn));
-                    else
-                        put_page_and_type(mfn_to_page(cr3_pfn));
-                    destroy_gdt(v);
-                    return -EINVAL;
-                }
-
-                v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn);
-            }
-#endif
-        }
+            v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn);
+        }
+#endif
+    }
 #ifdef CONFIG_COMPAT
-        else
-        {
-            l4_pgentry_t *l4tab;
-
-            cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3]));
-
-            if ( !mfn_valid(cr3_pfn) ||
-                 (paging_mode_refcounts(d)
-                  ? !get_page(mfn_to_page(cr3_pfn), d)
-                  : !get_page_and_type(mfn_to_page(cr3_pfn), d,
-                                       PGT_l3_page_table)) )
-            {
-                destroy_gdt(v);
-                return -EINVAL;
-            }
-
-            l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
-            *l4tab = l4e_from_pfn(cr3_pfn, 
_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
-        }
-#endif
-    }    
+    else
+    {
+        l4_pgentry_t *l4tab;
+
+        cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3]));
+
+        if ( !mfn_valid(cr3_pfn) ||
+             (paging_mode_refcounts(d)
+              ? !get_page(mfn_to_page(cr3_pfn), d)
+              : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+                                   PGT_l3_page_table)) )
+        {
+            destroy_gdt(v);
+            return -EINVAL;
+        }
+
+        l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
+        *l4tab = l4e_from_pfn(
+            cr3_pfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
+    }
+#endif
 
     if ( v->vcpu_id == 0 )
         update_domain_wallclock_time(d);
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Tue Oct 02 11:31:55 2007 -0600
@@ -443,6 +443,8 @@ int hvm_vcpu_initialise(struct vcpu *v)
     spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
     INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
 
+    v->arch.guest_context.user_regs.eflags = 2;
+
     if ( v->vcpu_id == 0 )
     {
         /* NB. All these really belong in hvm_domain_initialise(). */
@@ -453,6 +455,10 @@ int hvm_vcpu_initialise(struct vcpu *v)
  
         /* Init guest TSC to start from zero. */
         hvm_set_guest_time(v, 0);
+
+        /* Can start up without SIPI-SIPI or setvcpucontext domctl. */
+        v->is_initialised = 1;
+        clear_bit(_VPF_down, &v->pause_flags);
     }
 
     return 0;
@@ -737,7 +743,7 @@ int hvm_set_cr4(unsigned long value)
     old_cr = v->arch.hvm_vcpu.guest_cr[4];
     v->arch.hvm_vcpu.guest_cr[4] = value;
     hvm_update_guest_cr(v, 4);
-  
+
     /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
     if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
         paging_update_paging_modes(v);
@@ -1651,7 +1657,15 @@ static int hvmop_set_pci_link_route(
 
 static int hvmop_flush_tlb_all(void)
 {
+    struct vcpu *v;
+
+    /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
+    for_each_vcpu ( current->domain, v )
+        paging_update_cr3(v);
+
+    /* Flush all dirty TLBs. */
     flush_tlb_mask(current->domain->domain_dirty_cpumask);
+
     return 0;
 }
 
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c    Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/irq.c    Tue Oct 02 11:31:55 2007 -0600
@@ -285,49 +285,63 @@ void hvm_set_callback_via(struct domain 
     }
 }
 
-enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
+struct hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
 {
     struct hvm_domain *plat = &v->domain->arch.hvm_domain;
+    int vector;
 
     if ( unlikely(v->nmi_pending) )
         return hvm_intack_nmi;
 
-    if ( vlapic_has_interrupt(v) != -1 )
-        return hvm_intack_lapic;
-
-    if ( !vlapic_accept_pic_intr(v) )
-        return hvm_intack_none;
-
-    return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none;
-}
-
-int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector)
-{
-    switch ( type )
-    {
-    case hvm_intack_nmi:
-        return test_and_clear_bool(v->nmi_pending);
-    case hvm_intack_lapic:
-        return ((*vector = cpu_get_apic_interrupt(v)) != -1);
-    case hvm_intack_pic:
+    if ( vlapic_accept_pic_intr(v) && plat->vpic[0].int_output )
+        return hvm_intack_pic(0);
+
+    vector = vlapic_has_pending_irq(v);
+    if ( vector != -1 )
+        return hvm_intack_lapic(vector);
+
+    return hvm_intack_none;
+}
+
+struct hvm_intack hvm_vcpu_ack_pending_irq(
+    struct vcpu *v, struct hvm_intack intack)
+{
+    int vector;
+
+    switch ( intack.source )
+    {
+    case hvm_intsrc_nmi:
+        if ( !test_and_clear_bool(v->nmi_pending) )
+            intack = hvm_intack_none;
+        break;
+    case hvm_intsrc_pic:
         ASSERT(v->vcpu_id == 0);
-        return ((*vector = cpu_get_pic_interrupt(v)) != -1);
+        if ( (vector = vpic_ack_pending_irq(v)) == -1 )
+            intack = hvm_intack_none;
+        else
+            intack.vector = (uint8_t)vector;
+        break;
+    case hvm_intsrc_lapic:
+        if ( !vlapic_ack_pending_irq(v, intack.vector) )
+            intack = hvm_intack_none;
+        break;
     default:
-        break;
-    }
-
-    return 0;
-}
-
-int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src)
+        intack = hvm_intack_none;
+        break;
+    }
+
+    return intack;
+}
+
+int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intsrc src)
 {
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
 
-    if ( src == hvm_intack_pic )
+    if ( src == hvm_intsrc_pic )
         return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base
                 + (isa_irq & 7));
 
-    ASSERT(src == hvm_intack_lapic);
+    ASSERT(src == hvm_intsrc_lapic);
     return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector;
 }
 
@@ -345,18 +359,18 @@ int is_isa_irq_masked(struct vcpu *v, in
 
 int hvm_local_events_need_delivery(struct vcpu *v)
 {
-    enum hvm_intack type;
+    struct hvm_intack intack;
 
     /* TODO: Get rid of event-channel special case. */
     if ( vcpu_info(v, evtchn_upcall_pending) )
-        type = hvm_intack_pic;
+        intack = hvm_intack_pic(0);
     else
-        type = hvm_vcpu_has_pending_irq(v);
-
-    if ( likely(type == hvm_intack_none) )
+        intack = hvm_vcpu_has_pending_irq(v);
+
+    if ( likely(intack.source == hvm_intsrc_none) )
         return 0;
 
-    return hvm_interrupts_enabled(v, type);
+    return !hvm_interrupt_blocked(v, intack);
 }
 
 #if 0 /* Keep for debugging */
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Tue Oct 02 11:31:55 2007 -0600
@@ -39,19 +39,6 @@
 #include <xen/domain_page.h>
 #include <asm/hvm/trace.h>
 
-static void svm_inject_dummy_vintr(struct vcpu *v)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    vintr_t intr = vmcb->vintr;
-
-    intr.fields.irq = 1;
-    intr.fields.intr_masking = 1;
-    intr.fields.vector = 0;
-    intr.fields.prio = 0xF;
-    intr.fields.ign_tpr = 1;
-    vmcb->vintr = intr;
-}
-    
 static void svm_inject_nmi(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -80,11 +67,14 @@ static void svm_inject_extint(struct vcp
     vmcb->eventinj = event;
 }
     
-static void enable_intr_window(struct vcpu *v, enum hvm_intack intr_source)
+static void enable_intr_window(struct vcpu *v, struct hvm_intack intack)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    vintr_t intr;
 
-    ASSERT(intr_source != hvm_intack_none);
+    ASSERT(intack.source != hvm_intsrc_none);
+
+    HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
 
     /*
      * Create a dummy virtual interrupt to intercept as soon as the
@@ -95,53 +85,29 @@ static void enable_intr_window(struct vc
      * track 'NMI blocking' from NMI injection until IRET. This can be done
      * quite easily in software by intercepting the unblocking IRET.
      */
+    intr = vmcb->vintr;
+    intr.fields.irq     = 1;
+    intr.fields.vector  = 0;
+    intr.fields.prio    = intack.vector >> 4;
+    intr.fields.ign_tpr = (intack.source != hvm_intsrc_lapic);
+    vmcb->vintr = intr;
     vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
-    HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
-    svm_inject_dummy_vintr(v);
-}
-
-static void update_cr8_intercept(
-    struct vcpu *v, enum hvm_intack masked_intr_source)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    struct vlapic *vlapic = vcpu_vlapic(v);
-    int max_irr;
-
-    vmcb->cr_intercepts &= ~CR_INTERCEPT_CR8_WRITE;
-
-    /*
-     * If ExtInts are masked then that dominates the TPR --- the 'interrupt
-     * window' has already been enabled in this case.
-     */
-    if ( (masked_intr_source == hvm_intack_lapic) ||
-         (masked_intr_source == hvm_intack_pic) )
-        return;
-
-    /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
-    if ( !vlapic_enabled(vlapic) || 
-         ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
-        return;
-
-    /* Highest-priority pending interrupt is masked by the TPR? */
-    if ( (vmcb->vintr.fields.tpr & 0xf) >= (max_irr >> 4) )
-        vmcb->cr_intercepts |= CR_INTERCEPT_CR8_WRITE;
 }
 
 asmlinkage void svm_intr_assist(void) 
 {
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    enum hvm_intack intr_source;
-    int intr_vector;
+    struct hvm_intack intack;
 
     /* Crank the handle on interrupt state. */
     pt_update_irq(v);
     hvm_set_callback_irq_level();
 
     do {
-        intr_source = hvm_vcpu_has_pending_irq(v);
-        if ( likely(intr_source == hvm_intack_none) )
-            goto out;
+        intack = hvm_vcpu_has_pending_irq(v);
+        if ( likely(intack.source == hvm_intsrc_none) )
+            return;
 
         /*
          * Pending IRQs must be delayed if:
@@ -158,31 +124,30 @@ asmlinkage void svm_intr_assist(void)
          * 2. The IRQ is masked.
          */
         if ( unlikely(vmcb->eventinj.fields.v) ||
-             !hvm_interrupts_enabled(v, intr_source) )
+             hvm_interrupt_blocked(v, intack) )
         {
-            enable_intr_window(v, intr_source);
-            goto out;
+            enable_intr_window(v, intack);
+            return;
         }
-    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
 
-    if ( intr_source == hvm_intack_nmi )
+        intack = hvm_vcpu_ack_pending_irq(v, intack);
+    } while ( intack.source == hvm_intsrc_none );
+
+    if ( intack.source == hvm_intsrc_nmi )
     {
         svm_inject_nmi(v);
     }
     else
     {
-        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
-        svm_inject_extint(v, intr_vector);
-        pt_intr_post(v, intr_vector, intr_source);
+        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
+        svm_inject_extint(v, intack.vector);
+        pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
-    intr_source = hvm_vcpu_has_pending_irq(v);
-    if ( unlikely(intr_source != hvm_intack_none) )
-        enable_intr_window(v, intr_source);
-
- out:
-    update_cr8_intercept(v, intr_source);
+    intack = hvm_vcpu_has_pending_irq(v);
+    if ( unlikely(intack.source != hvm_intsrc_none) )
+        enable_intr_window(v, intack);
 }
 
 /*
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Tue Oct 02 11:31:55 2007 -0600
@@ -425,16 +425,28 @@ static void svm_restore_dr(struct vcpu *
         __restore_debug_registers(v);
 }
 
-static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-
-    if ( type == hvm_intack_nmi )
-        return !vmcb->interrupt_shadow;
-
-    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
-    return (!irq_masked(guest_cpu_user_regs()->eflags) &&
-            !vmcb->interrupt_shadow);
+static enum hvm_intblk svm_interrupt_blocked(
+    struct vcpu *v, struct hvm_intack intack)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if ( vmcb->interrupt_shadow )
+        return hvm_intblk_shadow;
+
+    if ( intack.source == hvm_intsrc_nmi )
+        return hvm_intblk_none;
+
+    ASSERT((intack.source == hvm_intsrc_pic) ||
+           (intack.source == hvm_intsrc_lapic));
+
+    if ( irq_masked(guest_cpu_user_regs()->eflags) )
+        return hvm_intblk_rflags_ie;
+
+    if ( (intack.source == hvm_intsrc_lapic) &&
+         ((vmcb->vintr.fields.tpr & 0xf) >= (intack.vector >> 4)) )
+        return hvm_intblk_tpr;
+
+    return hvm_intblk_none;
 }
 
 static int svm_guest_x86_mode(struct vcpu *v)
@@ -855,7 +867,7 @@ static struct hvm_function_table svm_fun
     .vcpu_destroy         = svm_vcpu_destroy,
     .save_cpu_ctxt        = svm_save_vmcb_ctxt,
     .load_cpu_ctxt        = svm_load_vmcb_ctxt,
-    .interrupts_enabled   = svm_interrupts_enabled,
+    .interrupt_blocked    = svm_interrupt_blocked,
     .guest_x86_mode       = svm_guest_x86_mode,
     .get_segment_base     = svm_get_segment_base,
     .get_segment_register = svm_get_segment_register,
@@ -1552,7 +1564,6 @@ static void mov_from_cr(int cr, int gp, 
 {
     unsigned long value = 0;
     struct vcpu *v = current;
-    struct vlapic *vlapic = vcpu_vlapic(v);
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
     switch ( cr )
@@ -1560,21 +1571,14 @@ static void mov_from_cr(int cr, int gp, 
     case 0:
         value = v->arch.hvm_vcpu.guest_cr[0];
         break;
-    case 2:
-        value = vmcb->cr2;
-        break;
     case 3:
         value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
         break;
     case 4:
         value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
         break;
-    case 8:
-        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
-        value = (value & 0xF0) >> 4;
-        break;
-        
     default:
+        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
         domain_crash(v->domain);
         return;
     }
@@ -1590,7 +1594,6 @@ static int mov_to_cr(int gpreg, int cr, 
 {
     unsigned long value;
     struct vcpu *v = current;
-    struct vlapic *vlapic = vcpu_vlapic(v);
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
     value = get_reg(gpreg, regs, vmcb);
@@ -1604,18 +1607,10 @@ static int mov_to_cr(int gpreg, int cr, 
     {
     case 0: 
         return svm_set_cr0(value);
-
     case 3:
         return hvm_set_cr3(value);
-
     case 4:
         return hvm_set_cr4(value);
-
-    case 8:
-        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
-        vmcb->vintr.fields.tpr = value & 0x0F;
-        break;
-
     default:
         gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
         domain_crash(v->domain);
@@ -1894,13 +1889,14 @@ static void svm_vmexit_do_hlt(struct vmc
 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
                               struct cpu_user_regs *regs)
 {
-    enum hvm_intack type = hvm_vcpu_has_pending_irq(current);
+    struct hvm_intack intack = hvm_vcpu_has_pending_irq(current);
 
     __update_guest_eip(regs, 1);
 
     /* Check for interrupt not handled or new interrupt. */
     if ( vmcb->eventinj.fields.v ||
-         ((type != hvm_intack_none) && svm_interrupts_enabled(current, type)) )
+         ((intack.source != hvm_intsrc_none) &&
+          !svm_interrupt_blocked(current, intack)) )
     {
         HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
         return;
@@ -2080,13 +2076,11 @@ asmlinkage void svm_vmexit_handler(struc
 
     /*
      * Before doing anything else, we need to sync up the VLAPIC's TPR with
-     * SVM's vTPR if CR8 writes are currently disabled.  It's OK if the 
-     * guest doesn't touch the CR8 (e.g. 32-bit Windows) because we update
-     * the vTPR on MMIO writes to the TPR
+     * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
+     * because we update the vTPR on MMIO writes to the TPR.
      */
-    if ( !(vmcb->cr_intercepts & CR_INTERCEPT_CR8_WRITE) )
-        vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
-                       (vmcb->vintr.fields.tpr & 0x0F) << 4);
+    vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
+                   (vmcb->vintr.fields.tpr & 0x0F) << 4);
 
     exit_reason = vmcb->exitcode;
 
@@ -2164,9 +2158,9 @@ asmlinkage void svm_vmexit_handler(struc
         break;
     }
 
+    /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
     case VMEXIT_EXCEPTION_MC:
         HVMTRACE_0D(MCE, v);
-        do_machine_check(regs);
         break;
 
     case VMEXIT_VINTR:
@@ -2222,45 +2216,14 @@ asmlinkage void svm_vmexit_handler(struc
         }
         break;
 
-    case VMEXIT_CR0_READ:
-        svm_cr_access(v, 0, TYPE_MOV_FROM_CR, regs);
-        break;
-
-    case VMEXIT_CR2_READ:
-        svm_cr_access(v, 2, TYPE_MOV_FROM_CR, regs);
-        break;
-
-    case VMEXIT_CR3_READ:
-        svm_cr_access(v, 3, TYPE_MOV_FROM_CR, regs);
-        break;
-
-    case VMEXIT_CR4_READ:
-        svm_cr_access(v, 4, TYPE_MOV_FROM_CR, regs);
-        break;
-
-    case VMEXIT_CR8_READ:
-        svm_cr_access(v, 8, TYPE_MOV_FROM_CR, regs);
-        break;
-
-    case VMEXIT_CR0_WRITE:
-        svm_cr_access(v, 0, TYPE_MOV_TO_CR, regs);
-        break;
-
-    case VMEXIT_CR2_WRITE:
-        svm_cr_access(v, 2, TYPE_MOV_TO_CR, regs);
-        break;
-
-    case VMEXIT_CR3_WRITE:
-        svm_cr_access(v, 3, TYPE_MOV_TO_CR, regs);
-        local_flush_tlb();
-        break;
-
-    case VMEXIT_CR4_WRITE:
-        svm_cr_access(v, 4, TYPE_MOV_TO_CR, regs);
-        break;
-
-    case VMEXIT_CR8_WRITE:
-        svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs);
+    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
+        svm_cr_access(v, exit_reason - VMEXIT_CR0_READ,
+                      TYPE_MOV_FROM_CR, regs);
+        break;
+
+    case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
+        svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE,
+                      TYPE_MOV_TO_CR, regs);
         break;
 
     case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Tue Oct 02 11:31:55 2007 -0600
@@ -130,14 +130,11 @@ static int construct_vmcb(struct vcpu *v
     /* Intercept all debug-register writes. */
     vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
 
-    /*
-     * Intercept all control-register accesses except for CR2 reads/writes
-     * and CR8 reads (and actually CR8 writes, but that's a special case
-     * that's handled in svm/intr.c). 
-     */
+    /* Intercept all control-register accesses except for CR2 and CR8. */
     vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ |
                             CR_INTERCEPT_CR2_WRITE |
-                            CR_INTERCEPT_CR8_READ);
+                            CR_INTERCEPT_CR8_READ |
+                            CR_INTERCEPT_CR8_WRITE);
 
     /* I/O and MSR permission bitmaps. */
     arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Tue Oct 02 11:31:55 2007 -0600
@@ -732,33 +732,34 @@ int vlapic_accept_pic_intr(struct vcpu *
              vlapic_hw_disabled(vlapic)));
 }
 
-int vlapic_has_interrupt(struct vcpu *v)
+int vlapic_has_pending_irq(struct vcpu *v)
 {
     struct vlapic *vlapic = vcpu_vlapic(v);
-    int highest_irr;
+    int irr, isr;
 
     if ( !vlapic_enabled(vlapic) )
         return -1;
 
-    highest_irr = vlapic_find_highest_irr(vlapic);
-    if ( (highest_irr == -1) ||
-         ((highest_irr & 0xF0) <= vlapic_get_ppr(vlapic)) )
+    irr = vlapic_find_highest_irr(vlapic);
+    if ( irr == -1 )
         return -1;
 
-    return highest_irr;
-}
-
-int cpu_get_apic_interrupt(struct vcpu *v)
-{
-    int vector = vlapic_has_interrupt(v);
+    isr = vlapic_find_highest_isr(vlapic);
+    isr = (isr != -1) ? isr : 0;
+    if ( (isr & 0xf0) >= (irr & 0xf0) )
+        return -1;
+
+    return irr;
+}
+
+int vlapic_ack_pending_irq(struct vcpu *v, int vector)
+{
     struct vlapic *vlapic = vcpu_vlapic(v);
 
-    if ( vector == -1 )
-        return -1;
- 
     vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
     vlapic_clear_irr(vector, vlapic);
-    return vector;
+
+    return 1;
 }
 
 /* Reset the VLPAIC back to its power-on/reset state. */
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c       Tue Oct 02 11:31:55 2007 -0600
@@ -71,14 +71,14 @@
  * the effect is cleared. (i.e., MOV-SS-blocking 'dominates' STI-blocking).
  */
 
-static void enable_intr_window(struct vcpu *v, enum hvm_intack intr_source)
+static void enable_intr_window(struct vcpu *v, struct hvm_intack intack)
 {
     u32 *cpu_exec_control = &v->arch.hvm_vmx.exec_control;
     u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;
 
-    ASSERT(intr_source != hvm_intack_none);
+    ASSERT(intack.source != hvm_intsrc_none);
 
-    if ( (intr_source == hvm_intack_nmi) && cpu_has_vmx_vnmi )
+    if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
     {
         /*
          * We set MOV-SS blocking in lieu of STI blocking when delivering an
@@ -107,105 +107,84 @@ static void enable_intr_window(struct vc
     }
 }
 
-static void update_tpr_threshold(
-    struct vcpu *v, enum hvm_intack masked_intr_source)
-{
-    struct vlapic *vlapic = vcpu_vlapic(v);
-    int max_irr, tpr, threshold = 0;
-
-    if ( !cpu_has_vmx_tpr_shadow )
-        return;
-
-    /*
-     * If ExtInts are masked then that dominates the TPR --- the 'interrupt
-     * window' has already been enabled in this case.
-     */
-    if ( (masked_intr_source == hvm_intack_lapic) ||
-         (masked_intr_source == hvm_intack_pic) )
-        goto out;
-
-    /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
-    if ( !vlapic_enabled(vlapic) || 
-         ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
-        goto out;
-
-    /* Highest-priority pending interrupt is masked by the TPR? */
-    tpr = vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xF0;
-    if ( (tpr >> 4) >= (max_irr >> 4) )
-        threshold = max_irr >> 4;
-
- out:
-    __vmwrite(TPR_THRESHOLD, threshold);
-}
-
-static void vmx_dirq_assist(struct domain *d)
+static void vmx_dirq_assist(struct vcpu *v)
 {
     unsigned int irq;
     uint32_t device, intx;
-    struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+    struct domain *d = v->domain;
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
 
-    for ( irq = find_first_bit(hvm_irq->dirq_mask, NR_IRQS);
+    if ( !vtd_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+        return;
+
+    for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
           irq < NR_IRQS;
-          irq = find_next_bit(hvm_irq->dirq_mask, NR_IRQS, irq + 1) )
+          irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
     {
-        test_and_clear_bit(irq, &hvm_irq->dirq_mask);
-        device = hvm_irq->mirq[irq].device;
-        intx = hvm_irq->mirq[irq].intx;
+        test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask);
+        device = hvm_irq_dpci->mirq[irq].device;
+        intx = hvm_irq_dpci->mirq[irq].intx;
         hvm_pci_intx_assert(d, device, intx);
     }
 }
 
 asmlinkage void vmx_intr_assist(void)
 {
-    int intr_vector;
-    enum hvm_intack intr_source;
+    struct hvm_intack intack;
     struct vcpu *v = current;
-    unsigned int intr_info;
+    unsigned int tpr_threshold = 0;
+    enum hvm_intblk intblk;
 
     /* Crank the handle on interrupt state. */
     pt_update_irq(v);
 
-    if ( vtd_enabled && (v->vcpu_id == 0) )
-        vmx_dirq_assist(v->domain);
+    vmx_dirq_assist(v);
   
     hvm_set_callback_irq_level();
 
     do {
-        intr_source = hvm_vcpu_has_pending_irq(v);
-        if ( likely(intr_source == hvm_intack_none) )
+        intack = hvm_vcpu_has_pending_irq(v);
+        if ( likely(intack.source == hvm_intsrc_none) )
             goto out;
 
-        /*
-         * An event is already pending or the pending interrupt is masked?
-         * Then the pending interrupt must be delayed.
-         */
-        intr_info = __vmread(VM_ENTRY_INTR_INFO);
-        if ( unlikely(intr_info & INTR_INFO_VALID_MASK) ||
-             !hvm_interrupts_enabled(v, intr_source) )
+        intblk = hvm_interrupt_blocked(v, intack);
+        if ( intblk == hvm_intblk_tpr )
         {
-            enable_intr_window(v, intr_source);
+            ASSERT(vlapic_enabled(vcpu_vlapic(v)));
+            ASSERT(intack.source == hvm_intsrc_lapic);
+            tpr_threshold = intack.vector >> 4;
             goto out;
         }
-    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
 
-    if ( intr_source == hvm_intack_nmi )
+        if ( (intblk != hvm_intblk_none) ||
+             (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+        {
+            enable_intr_window(v, intack);
+            goto out;
+        }
+
+        intack = hvm_vcpu_ack_pending_irq(v, intack);
+    } while ( intack.source == hvm_intsrc_none );
+
+    if ( intack.source == hvm_intsrc_nmi )
     {
         vmx_inject_nmi(v);
     }
     else
     {
-        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
-        vmx_inject_extint(v, intr_vector);
-        pt_intr_post(v, intr_vector, intr_source);
+        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
+        vmx_inject_extint(v, intack.vector);
+        pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
-    intr_source = hvm_vcpu_has_pending_irq(v);
-    if ( unlikely(intr_source != hvm_intack_none) )
-        enable_intr_window(v, intr_source);
+    intack = hvm_vcpu_has_pending_irq(v);
+    if ( unlikely(intack.source != hvm_intsrc_none) )
+        enable_intr_window(v, intack);
 
  out:
-    update_tpr_threshold(v, intr_source);
+    if ( cpu_has_vmx_tpr_shadow )
+        __vmwrite(TPR_THRESHOLD, tpr_threshold);
 }
 
 /*
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue Oct 02 11:31:55 2007 -0600
@@ -975,20 +975,34 @@ static void vmx_init_hypercall_page(stru
     *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
 }
 
-static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+static enum hvm_intblk vmx_interrupt_blocked(
+    struct vcpu *v, struct hvm_intack intack)
 {
     unsigned long intr_shadow;
 
     intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
 
-    if ( type == hvm_intack_nmi )
-        return !(intr_shadow & (VMX_INTR_SHADOW_STI|
-                                VMX_INTR_SHADOW_MOV_SS|
-                                VMX_INTR_SHADOW_NMI));
-
-    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
-    return (!irq_masked(guest_cpu_user_regs()->eflags) &&
-            !(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)));
+    if ( intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS) )
+        return hvm_intblk_shadow;
+
+    if ( intack.source == hvm_intsrc_nmi )
+        return ((intr_shadow & VMX_INTR_SHADOW_NMI) ?
+                hvm_intblk_nmi_iret : hvm_intblk_none);
+
+    ASSERT((intack.source == hvm_intsrc_pic) ||
+           (intack.source == hvm_intsrc_lapic));
+
+    if ( irq_masked(guest_cpu_user_regs()->eflags) )
+        return hvm_intblk_rflags_ie;
+
+    if ( intack.source == hvm_intsrc_lapic )
+    {
+        uint32_t tpr = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xF0;
+        if ( (tpr >> 4) >= (intack.vector >> 4) )
+            return hvm_intblk_tpr;
+    }
+
+    return hvm_intblk_none;
 }
 
 static void vmx_update_host_cr3(struct vcpu *v)
@@ -1112,7 +1126,7 @@ static struct hvm_function_table vmx_fun
     .vcpu_destroy         = vmx_vcpu_destroy,
     .save_cpu_ctxt        = vmx_save_vmcs_ctxt,
     .load_cpu_ctxt        = vmx_load_vmcs_ctxt,
-    .interrupts_enabled   = vmx_interrupts_enabled,
+    .interrupt_blocked    = vmx_interrupt_blocked,
     .guest_x86_mode       = vmx_guest_x86_mode,
     .get_segment_base     = vmx_get_segment_base,
     .get_segment_register = vmx_get_segment_register,
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c
--- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Oct 02 11:31:55 2007 -0600
@@ -173,7 +173,7 @@ static struct page_info *addr_to_dma_pag
         if ( dma_pte_addr(*pte) == 0 )
         {
             pg = alloc_domheap_page(NULL);
-            vaddr = map_domain_page(mfn_x(page_to_mfn(pg)));
+            vaddr = map_domain_page(page_to_mfn(pg));
             if ( !vaddr )
             {
                 spin_unlock_irqrestore(&hd->mapping_lock, flags);
@@ -195,7 +195,7 @@ static struct page_info *addr_to_dma_pag
         else
         {
             pg = maddr_to_page(pte->val);
-            vaddr = map_domain_page(mfn_x(page_to_mfn(pg)));
+            vaddr = map_domain_page(page_to_mfn(pg));
             if ( !vaddr )
             {
                 spin_unlock_irqrestore(&hd->mapping_lock, flags);
@@ -250,7 +250,7 @@ static struct page_info *dma_addr_level_
         if ( level == total )
             return pg;
 
-        parent = map_domain_page(mfn_x(page_to_mfn(pg)));
+        parent = map_domain_page(page_to_mfn(pg));
         total--;
     }
 
@@ -542,7 +542,7 @@ static void dma_pte_clear_one(struct dom
     pg = dma_addr_level_page(domain, addr, 1);
     if ( !pg )
         return;
-    pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
+    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
     pte += address_level_offset(addr, 1);
     if ( pte )
     {
@@ -612,7 +612,7 @@ void dma_pte_free_pagetable(struct domai
             pg = dma_addr_level_page(domain, tmp, level);
             if ( !pg )
                 return;
-            pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
+            pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
             pte += address_level_offset(tmp, level);
             dma_clear_pte(*pte);
             iommu_flush_cache_entry(iommu, pte);
@@ -1493,7 +1493,7 @@ int iommu_map_page(struct domain *d, pad
     pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
     if ( !pg )
         return -ENOMEM;
-    pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
+    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
     pte += mfn & LEVEL_MASK;
     dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
@@ -1554,7 +1554,7 @@ int iommu_page_mapping(struct domain *do
         pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
         if ( !pg )
             return -ENOMEM;
-        pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
+        pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
         pte += start_pfn & LEVEL_MASK;
         dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/vmx/vtd/io.c
--- a/xen/arch/x86/hvm/vmx/vtd/io.c     Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vtd/io.c     Tue Oct 02 11:31:55 2007 -0600
@@ -46,27 +46,41 @@
 #include <public/domctl.h>
 
 int pt_irq_create_bind_vtd(
-    struct domain *d,
-    xen_domctl_bind_pt_irq_t * pt_irq_bind)
+    struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
-    struct hvm_domain *hd = &d->arch.hvm_domain;
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     uint32_t machine_gsi, guest_gsi;
     uint32_t device, intx;
+
+    if ( hvm_irq_dpci == NULL )
+    {
+        hvm_irq_dpci = xmalloc(struct hvm_irq_dpci);
+        if ( hvm_irq_dpci == NULL )
+            return -ENOMEM;
+
+        memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
+
+        if ( cmpxchg((unsigned long *)&d->arch.hvm_domain.irq.dpci,
+                     0, (unsigned long)hvm_irq_dpci) != 0 )
+            xfree(hvm_irq_dpci);
+
+        hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+    }
 
     machine_gsi = pt_irq_bind->machine_irq;
     device = pt_irq_bind->u.pci.device;
     intx = pt_irq_bind->u.pci.intx;
     guest_gsi = hvm_pci_intx_gsi(device, intx);
 
-    hd->irq.mirq[machine_gsi].valid = 1;
-    hd->irq.mirq[machine_gsi].device = device;
-    hd->irq.mirq[machine_gsi].intx = intx;
-    hd->irq.mirq[machine_gsi].guest_gsi = guest_gsi;
+    hvm_irq_dpci->mirq[machine_gsi].valid = 1;
+    hvm_irq_dpci->mirq[machine_gsi].device = device;
+    hvm_irq_dpci->mirq[machine_gsi].intx = intx;
+    hvm_irq_dpci->mirq[machine_gsi].guest_gsi = guest_gsi;
 
-    hd->irq.girq[guest_gsi].valid = 1;
-    hd->irq.girq[guest_gsi].device = device;
-    hd->irq.girq[guest_gsi].intx = intx;
-    hd->irq.girq[guest_gsi].machine_gsi = machine_gsi;
+    hvm_irq_dpci->girq[guest_gsi].valid = 1;
+    hvm_irq_dpci->girq[guest_gsi].device = device;
+    hvm_irq_dpci->girq[guest_gsi].intx = intx;
+    hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi;
 
     /* Deal with gsi for legacy devices */
     pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
@@ -76,31 +90,31 @@ int pt_irq_create_bind_vtd(
 
     return 0;
 }
+
 int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
 {
     uint32_t device, intx;
     uint32_t link, isa_irq;
-    struct hvm_irq *hvm_irq;
+    struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
 
-    if ( !vtd_enabled || (d == dom0) ||
-         !d->arch.hvm_domain.irq.mirq[mirq].valid )
+    if ( !vtd_enabled || (d == dom0) || (hvm_irq->dpci == NULL) ||
+         !hvm_irq->dpci->mirq[mirq].valid )
         return 0;
 
-    device = d->arch.hvm_domain.irq.mirq[mirq].device;
-    intx = d->arch.hvm_domain.irq.mirq[mirq].intx;
+    device = hvm_irq->dpci->mirq[mirq].device;
+    intx = hvm_irq->dpci->mirq[mirq].intx;
     link = hvm_pci_intx_link(device, intx);
-    hvm_irq = &d->arch.hvm_domain.irq;
     isa_irq = hvm_irq->pci_link.route[link];
 
-    if ( !d->arch.hvm_domain.irq.girq[isa_irq].valid )
+    if ( !hvm_irq->dpci->girq[isa_irq].valid )
     {
-        d->arch.hvm_domain.irq.girq[isa_irq].valid = 1;
-        d->arch.hvm_domain.irq.girq[isa_irq].device = device;
-        d->arch.hvm_domain.irq.girq[isa_irq].intx = intx;
-        d->arch.hvm_domain.irq.girq[isa_irq].machine_gsi = mirq;
+        hvm_irq->dpci->girq[isa_irq].valid = 1;
+        hvm_irq->dpci->girq[isa_irq].device = device;
+        hvm_irq->dpci->girq[isa_irq].intx = intx;
+        hvm_irq->dpci->girq[isa_irq].machine_gsi = mirq;
     }
 
-    if ( !test_and_set_bit(mirq, d->arch.hvm_domain.irq.dirq_mask) )
+    if ( !test_and_set_bit(mirq, hvm_irq->dpci->dirq_mask) )
     {
         vcpu_kick(d->vcpu[0]);
         return 1;
@@ -113,17 +127,19 @@ void hvm_dpci_eoi(unsigned int guest_gsi
 void hvm_dpci_eoi(unsigned int guest_gsi, union vioapic_redir_entry *ent)
 {
     struct domain *d = current->domain;
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     uint32_t device, intx, machine_gsi;
     irq_desc_t *desc;
 
     ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
 
-    if ( !vtd_enabled || !d->arch.hvm_domain.irq.girq[guest_gsi].valid )
+    if ( !vtd_enabled || (hvm_irq_dpci == NULL) ||
+         !hvm_irq_dpci->girq[guest_gsi].valid )
         return;
 
-    device = d->arch.hvm_domain.irq.girq[guest_gsi].device;
-    intx = d->arch.hvm_domain.irq.girq[guest_gsi].intx;
-    machine_gsi = d->arch.hvm_domain.irq.girq[guest_gsi].machine_gsi;
+    device = hvm_irq_dpci->girq[guest_gsi].device;
+    intx = hvm_irq_dpci->girq[guest_gsi].intx;
+    machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi;
     gdprintk(XENLOG_INFO, "hvm_dpci_eoi:: device %x intx %x\n",
              device, intx);
     __hvm_pci_intx_deassert(d, device, intx);
@@ -136,15 +152,20 @@ void hvm_dpci_eoi(unsigned int guest_gsi
 
 void iommu_domain_destroy(struct domain *d)
 {
-    struct hvm_domain *hd = &d->arch.hvm_domain;
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     uint32_t i;
 
     if ( !vtd_enabled )
         return;
 
-    for ( i = 0; i < NR_IRQS; i++ )
-        if ( hd->irq.mirq[i].valid )
-            pirq_guest_unbind(d, i);
+    if ( hvm_irq_dpci != NULL )
+    {
+        for ( i = 0; i < NR_IRQS; i++ )
+            if ( hvm_irq_dpci->mirq[i].valid )
+                pirq_guest_unbind(d, i);
+        d->arch.hvm_domain.irq.dpci = NULL;
+        xfree(hvm_irq_dpci);
+    }
 
     iommu_domain_teardown(d);
 }
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/vpic.c
--- a/xen/arch/x86/hvm/vpic.c   Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/vpic.c   Tue Oct 02 11:31:55 2007 -0600
@@ -503,7 +503,7 @@ void vpic_irq_negative_edge(struct domai
         vpic_update_int_output(vpic);
 }
 
-int cpu_get_pic_interrupt(struct vcpu *v)
+int vpic_ack_pending_irq(struct vcpu *v)
 {
     int irq, vector;
     struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c    Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/hvm/vpt.c    Tue Oct 02 11:31:55 2007 -0600
@@ -165,12 +165,12 @@ void pt_update_irq(struct vcpu *v)
 }
 
 static struct periodic_time *is_pt_irq(
-    struct vcpu *v, int vector, enum hvm_intack src)
+    struct vcpu *v, struct hvm_intack intack)
 {
     struct list_head *head = &v->arch.hvm_vcpu.tm_list;
     struct periodic_time *pt;
     struct RTCState *rtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
-    int vec;
+    int vector;
 
     list_for_each_entry ( pt, head, list )
     {
@@ -179,15 +179,16 @@ static struct periodic_time *is_pt_irq(
 
         if ( is_lvtt(v, pt->irq) )
         {
-            if ( pt->irq != vector )
+            if ( pt->irq != intack.vector )
                 continue;
             return pt;
         }
 
-        vec = get_isa_irq_vector(v, pt->irq, src);
+        vector = get_isa_irq_vector(v, pt->irq, intack.source);
 
         /* RTC irq need special care */
-        if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) )
+        if ( (intack.vector != vector) ||
+             ((pt->irq == 8) && !is_rtc_periodic_irq(rtc)) )
             continue;
 
         return pt;
@@ -196,7 +197,7 @@ static struct periodic_time *is_pt_irq(
     return NULL;
 }
 
-void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src)
+void pt_intr_post(struct vcpu *v, struct hvm_intack intack)
 {
     struct periodic_time *pt;
     time_cb *cb;
@@ -204,7 +205,7 @@ void pt_intr_post(struct vcpu *v, int ve
 
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
-    pt = is_pt_irq(v, vector, src);
+    pt = is_pt_irq(v, intack);
     if ( pt == NULL )
     {
         spin_unlock(&v->arch.hvm_vcpu.tm_lock);
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/platform_hypercall.c Tue Oct 02 11:31:55 2007 -0600
@@ -36,6 +36,8 @@ DEFINE_SPINLOCK(xenpf_lock);
 # define copy_from_compat copy_from_guest
 # undef copy_to_compat
 # define copy_to_compat copy_to_guest
+# undef guest_from_compat_handle
+# define guest_from_compat_handle(x,y) ((x)=(y))
 #else
 extern spinlock_t xenpf_lock;
 #endif
@@ -142,21 +144,14 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
     case XENPF_microcode_update:
     {
         extern int microcode_update(XEN_GUEST_HANDLE(void), unsigned long len);
-#ifdef COMPAT
         XEN_GUEST_HANDLE(void) data;
-#endif
 
         ret = xsm_microcode();
         if ( ret )
             break;
 
-#ifndef COMPAT
-        ret = microcode_update(op->u.microcode.data,
-                               op->u.microcode.length);
-#else
         guest_from_compat_handle(data, op->u.microcode.data);
         ret = microcode_update(data, op->u.microcode.length);
-#endif
     }
     break;
 
@@ -286,6 +281,9 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         break;
 
     case XENPF_change_freq:
+        ret = -ENOSYS;
+        if ( cpufreq_controller != FREQCTL_dom0_kernel )
+            break;
         ret = -EINVAL;
         if ( op->u.change_freq.flags != 0 )
             break;
@@ -294,11 +292,46 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                                         &op->u.change_freq.freq);
         break;
 
+    case XENPF_getidletime:
+    {
+        uint32_t i, nr_cpus;
+        uint64_t idletime;
+        struct vcpu *v;
+        XEN_GUEST_HANDLE(uint64_t) idletimes;
+
+        ret = -ENOSYS;
+        if ( cpufreq_controller != FREQCTL_dom0_kernel )
+            break;
+
+        guest_from_compat_handle(idletimes, op->u.getidletime.idletime);
+        nr_cpus = min_t(uint32_t, op->u.getidletime.max_cpus, NR_CPUS);
+
+        for ( i = 0; i < nr_cpus; i++ )
+        {
+            /* Assume no holes in idle-vcpu map. */
+            if ( (v = idle_vcpu[i]) == NULL )
+                break;
+
+            idletime = v->runstate.time[RUNSTATE_running];
+            if ( v->is_running )
+                idletime += NOW() - v->runstate.state_entry_time;
+
+            ret = -EFAULT;
+            if ( copy_to_guest_offset(idletimes, i, &idletime, 1) )
+                goto out;
+        }
+
+        op->u.getidletime.nr_cpus = i;
+        ret = copy_to_guest(u_xenpf_op, op, 1) ? -EFAULT : 0;
+    }
+    break;
+
     default:
         ret = -ENOSYS;
         break;
     }
 
+ out:
     spin_unlock(&xenpf_lock);
 
     return ret;
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/time.c       Tue Oct 02 11:31:55 2007 -0600
@@ -728,11 +728,11 @@ int cpu_frequency_change(u64 freq)
     u64 curr_tsc;
 
     local_irq_disable();
-    set_time_scale(&t->tsc_scale, freq);
     rdtscll(curr_tsc);
     t->local_tsc_stamp = curr_tsc;
     t->stime_local_stamp = get_s_time();
     t->stime_master_stamp = read_platform_stime();
+    set_time_scale(&t->tsc_scale, freq);
     local_irq_enable();
 
     /* A full epoch should pass before we check for deviation. */
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/traps.c      Tue Oct 02 11:31:55 2007 -0600
@@ -2009,7 +2009,7 @@ void unset_nmi_callback(void)
     nmi_callback = dummy_nmi_callback;
 }
 
-asmlinkage int math_state_restore(struct cpu_user_regs *regs)
+asmlinkage int do_device_not_available(struct cpu_user_regs *regs)
 {
     BUG_ON(!guest_mode(regs));
 
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/x86_32/domain_page.c
--- a/xen/arch/x86/x86_32/domain_page.c Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/x86_32/domain_page.c Tue Oct 02 11:31:55 2007 -0600
@@ -43,9 +43,10 @@ void *map_domain_page(unsigned long mfn)
 void *map_domain_page(unsigned long mfn)
 {
     unsigned long va;
-    unsigned int idx, i, vcpu;
-    struct vcpu *v;
-    struct mapcache *cache;
+    unsigned int idx, i;
+    struct vcpu *v;
+    struct mapcache_domain *dcache;
+    struct mapcache_vcpu *vcache;
     struct vcpu_maphash_entry *hashent;
 
     ASSERT(!in_irq());
@@ -54,59 +55,59 @@ void *map_domain_page(unsigned long mfn)
 
     v = mapcache_current_vcpu();
 
-    vcpu  = v->vcpu_id;
-    cache = &v->domain->arch.mapcache;
-
-    hashent = &cache->vcpu_maphash[vcpu].hash[MAPHASH_HASHFN(mfn)];
+    dcache = &v->domain->arch.mapcache;
+    vcache = &v->arch.mapcache;
+
+    hashent = &vcache->hash[MAPHASH_HASHFN(mfn)];
     if ( hashent->mfn == mfn )
     {
         idx = hashent->idx;
         hashent->refcnt++;
         ASSERT(idx < MAPCACHE_ENTRIES);
         ASSERT(hashent->refcnt != 0);
-        ASSERT(l1e_get_pfn(cache->l1tab[idx]) == mfn);
+        ASSERT(l1e_get_pfn(dcache->l1tab[idx]) == mfn);
         goto out;
     }
 
-    spin_lock(&cache->lock);
+    spin_lock(&dcache->lock);
 
     /* Has some other CPU caused a wrap? We must flush if so. */
-    if ( unlikely(cache->epoch != cache->shadow_epoch[vcpu]) )
-    {
-        cache->shadow_epoch[vcpu] = cache->epoch;
-        if ( NEED_FLUSH(this_cpu(tlbflush_time), cache->tlbflush_timestamp) )
+    if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
+    {
+        vcache->shadow_epoch = dcache->epoch;
+        if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
         {
             perfc_incr(domain_page_tlb_flush);
             local_flush_tlb();
         }
     }
 
-    idx = find_next_zero_bit(cache->inuse, MAPCACHE_ENTRIES, cache->cursor);
+    idx = find_next_zero_bit(dcache->inuse, MAPCACHE_ENTRIES, dcache->cursor);
     if ( unlikely(idx >= MAPCACHE_ENTRIES) )
     {
         /* /First/, clean the garbage map and update the inuse list. */
-        for ( i = 0; i < ARRAY_SIZE(cache->garbage); i++ )
-        {
-            unsigned long x = xchg(&cache->garbage[i], 0);
-            cache->inuse[i] &= ~x;
+        for ( i = 0; i < ARRAY_SIZE(dcache->garbage); i++ )
+        {
+            unsigned long x = xchg(&dcache->garbage[i], 0);
+            dcache->inuse[i] &= ~x;
         }
 
         /* /Second/, flush TLBs. */
         perfc_incr(domain_page_tlb_flush);
         local_flush_tlb();
-        cache->shadow_epoch[vcpu] = ++cache->epoch;
-        cache->tlbflush_timestamp = tlbflush_current_time();
-
-        idx = find_first_zero_bit(cache->inuse, MAPCACHE_ENTRIES);
+        vcache->shadow_epoch = ++dcache->epoch;
+        dcache->tlbflush_timestamp = tlbflush_current_time();
+
+        idx = find_first_zero_bit(dcache->inuse, MAPCACHE_ENTRIES);
         BUG_ON(idx >= MAPCACHE_ENTRIES);
     }
 
-    set_bit(idx, cache->inuse);
-    cache->cursor = idx + 1;
-
-    spin_unlock(&cache->lock);
-
-    l1e_write(&cache->l1tab[idx], l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+    set_bit(idx, dcache->inuse);
+    dcache->cursor = idx + 1;
+
+    spin_unlock(&dcache->lock);
+
+    l1e_write(&dcache->l1tab[idx], l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
 
  out:
     va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
@@ -117,7 +118,7 @@ void unmap_domain_page(void *va)
 {
     unsigned int idx;
     struct vcpu *v;
-    struct mapcache *cache;
+    struct mapcache_domain *dcache;
     unsigned long mfn;
     struct vcpu_maphash_entry *hashent;
 
@@ -128,11 +129,11 @@ void unmap_domain_page(void *va)
 
     v = mapcache_current_vcpu();
 
-    cache = &v->domain->arch.mapcache;
+    dcache = &v->domain->arch.mapcache;
 
     idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
-    mfn = l1e_get_pfn(cache->l1tab[idx]);
-    hashent = &cache->vcpu_maphash[v->vcpu_id].hash[MAPHASH_HASHFN(mfn)];
+    mfn = l1e_get_pfn(dcache->l1tab[idx]);
+    hashent = &v->arch.mapcache.hash[MAPHASH_HASHFN(mfn)];
 
     if ( hashent->idx == idx )
     {
@@ -145,10 +146,10 @@ void unmap_domain_page(void *va)
         if ( hashent->idx != MAPHASHENT_NOTINUSE )
         {
             /* /First/, zap the PTE. */
-            ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->mfn);
-            l1e_write(&cache->l1tab[hashent->idx], l1e_empty());
+            ASSERT(l1e_get_pfn(dcache->l1tab[hashent->idx]) == hashent->mfn);
+            l1e_write(&dcache->l1tab[hashent->idx], l1e_empty());
             /* /Second/, mark as garbage. */
-            set_bit(hashent->idx, cache->garbage);
+            set_bit(hashent->idx, dcache->garbage);
         }
 
         /* Add newly-freed mapping to the maphash. */
@@ -158,30 +159,30 @@ void unmap_domain_page(void *va)
     else
     {
         /* /First/, zap the PTE. */
-        l1e_write(&cache->l1tab[idx], l1e_empty());
+        l1e_write(&dcache->l1tab[idx], l1e_empty());
         /* /Second/, mark as garbage. */
-        set_bit(idx, cache->garbage);
-    }
-}
-
-void mapcache_init(struct domain *d)
-{
-    unsigned int i, j;
-    struct vcpu_maphash_entry *hashent;
-
+        set_bit(idx, dcache->garbage);
+    }
+}
+
+void mapcache_domain_init(struct domain *d)
+{
     d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
         (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
     spin_lock_init(&d->arch.mapcache.lock);
+}
+
+void mapcache_vcpu_init(struct vcpu *v)
+{
+    unsigned int i;
+    struct vcpu_maphash_entry *hashent;
 
     /* Mark all maphash entries as not in use. */
-    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
-    {
-        for ( j = 0; j < MAPHASH_ENTRIES; j++ )
-        {
-            hashent = &d->arch.mapcache.vcpu_maphash[i].hash[j];
-            hashent->mfn = ~0UL; /* never valid to map */
-            hashent->idx = MAPHASHENT_NOTINUSE;
-        }
+    for ( i = 0; i < MAPHASH_ENTRIES; i++ )
+    {
+        hashent = &v->arch.mapcache.hash[i];
+        hashent->mfn = ~0UL; /* never valid to map */
+        hashent->idx = MAPHASHENT_NOTINUSE;
     }
 }
 
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/x86_32/entry.S       Tue Oct 02 11:31:55 2007 -0600
@@ -623,7 +623,7 @@ ENTRY(exception_table)
         .long do_overflow
         .long do_bounds
         .long do_invalid_op
-        .long math_state_restore
+        .long do_device_not_available
         .long 0 # double fault
         .long do_coprocessor_segment_overrun
         .long do_invalid_TSS
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/arch/x86/x86_64/entry.S       Tue Oct 02 11:31:55 2007 -0600
@@ -559,7 +559,7 @@ ENTRY(exception_table)
         .quad do_overflow
         .quad do_bounds
         .quad do_invalid_op
-        .quad math_state_restore
+        .quad do_device_not_available
         .quad 0 # double_fault
         .quad do_coprocessor_segment_overrun
         .quad do_invalid_TSS
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/common/sysctl.c
--- a/xen/common/sysctl.c       Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/common/sysctl.c       Tue Oct 02 11:31:55 2007 -0600
@@ -177,18 +177,13 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
             if ( v->is_running )
                 cpuinfo.idletime += NOW() - v->runstate.state_entry_time;
 
+            ret = -EFAULT;
             if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
-            {
-                ret = -EFAULT;
-                break;
-            }
+                goto out;
         }
 
         op->u.getcpuinfo.nr_cpus = i;
-        ret = 0;
-
-        if ( copy_to_guest(u_sysctl, op, 1) )
-            ret = -EFAULT;
+        ret = copy_to_guest(u_sysctl, op, 1) ? -EFAULT : 0;
     }
     break;
 
@@ -209,6 +204,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
         break;
     }
 
+ out:
     spin_unlock(&sysctl_lock);
 
     return ret;
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/include/asm-x86/domain.h      Tue Oct 02 11:31:55 2007 -0600
@@ -28,17 +28,21 @@ struct trap_bounce {
 #define MAPHASH_ENTRIES 8
 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
 #define MAPHASHENT_NOTINUSE ((u16)~0U)
-struct vcpu_maphash {
+struct mapcache_vcpu {
+    /* Shadow of mapcache_domain.epoch. */
+    unsigned int shadow_epoch;
+
+    /* Lock-free per-VCPU hash of recently-used mappings. */
     struct vcpu_maphash_entry {
         unsigned long mfn;
         uint16_t      idx;
         uint16_t      refcnt;
     } hash[MAPHASH_ENTRIES];
-} __cacheline_aligned;
+};
 
 #define MAPCACHE_ORDER   10
 #define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
-struct mapcache {
+struct mapcache_domain {
     /* The PTEs that provide the mappings, and a cursor into the array. */
     l1_pgentry_t *l1tab;
     unsigned int cursor;
@@ -47,27 +51,25 @@ struct mapcache {
     spinlock_t lock;
 
     /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
-    unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS];
+    unsigned int epoch;
     u32 tlbflush_timestamp;
 
     /* Which mappings are in use, and which are garbage to reap next epoch? */
     unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
     unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
-
-    /* Lock-free per-VCPU hash of recently-used mappings. */
-    struct vcpu_maphash vcpu_maphash[MAX_VIRT_CPUS];
-};
-
-extern void mapcache_init(struct domain *);
+};
+
+void mapcache_domain_init(struct domain *);
+void mapcache_vcpu_init(struct vcpu *);
 
 /* x86/64: toggle guest between kernel and user modes. */
-extern void toggle_guest_mode(struct vcpu *);
+void toggle_guest_mode(struct vcpu *);
 
 /*
  * Initialise a hypercall-transfer page. The given pointer must be mapped
  * in Xen virtual address space (accesses are not validated or checked).
  */
-extern void hypercall_page_initialise(struct domain *d, void *);
+void hypercall_page_initialise(struct domain *d, void *);
 
 /************************************************/
 /*          shadow paging extension             */
@@ -204,7 +206,7 @@ struct arch_domain
 
 #ifdef CONFIG_X86_32
     /* map_domain_page() mapping cache. */
-    struct mapcache mapcache;
+    struct mapcache_domain mapcache;
 #endif
 
 #ifdef CONFIG_COMPAT
@@ -290,7 +292,7 @@ struct arch_vcpu
     struct trap_bounce trap_bounce;
 
     /* I/O-port access bitmap. */
-    XEN_GUEST_HANDLE(uint8_t) iobmp; /* Guest kernel virtual address of the 
bitmap. */
+    XEN_GUEST_HANDLE(uint8_t) iobmp; /* Guest kernel vaddr of the bitmap. */
     int iobmp_limit;  /* Number of ports represented in the bitmap.  */
     int iopl;         /* Current IOPL for this VCPU. */
 
@@ -327,6 +329,12 @@ struct arch_vcpu
 
     /* Guest-specified relocation of vcpu_info. */
     unsigned long vcpu_info_mfn;
+
+#ifdef CONFIG_X86_32
+    /* map_domain_page() mapping cache. */
+    struct mapcache_vcpu mapcache;
+#endif
+
 } __cacheline_aligned;
 
 /* Shorthands to improve code legibility. */
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h     Tue Oct 02 11:31:55 2007 -0600
@@ -57,11 +57,26 @@ typedef struct segment_register {
 } __attribute__ ((packed)) segment_register_t;
 
 /* Interrupt acknowledgement sources. */
-enum hvm_intack {
-    hvm_intack_none,
-    hvm_intack_pic,
-    hvm_intack_lapic,
-    hvm_intack_nmi
+enum hvm_intsrc {
+    hvm_intsrc_none,
+    hvm_intsrc_pic,
+    hvm_intsrc_lapic,
+    hvm_intsrc_nmi
+};
+struct hvm_intack {
+    uint8_t source; /* enum hvm_intsrc */
+    uint8_t vector;
+};
+#define hvm_intack_none       ( (struct hvm_intack) { hvm_intsrc_none,  0 } )
+#define hvm_intack_pic(vec)   ( (struct hvm_intack) { hvm_intsrc_pic,   vec } )
+#define hvm_intack_lapic(vec) ( (struct hvm_intack) { hvm_intsrc_lapic, vec } )
+#define hvm_intack_nmi        ( (struct hvm_intack) { hvm_intsrc_nmi,   2 } )
+enum hvm_intblk {
+    hvm_intblk_none,      /* not blocked (deliverable) */
+    hvm_intblk_shadow,    /* MOV-SS or STI shadow */
+    hvm_intblk_rflags_ie, /* RFLAGS.IE == 0 */
+    hvm_intblk_tpr,       /* LAPIC TPR too high */
+    hvm_intblk_nmi_iret   /* NMI blocked until IRET */
 };
 
 /*
@@ -94,7 +109,7 @@ struct hvm_function_table {
      * 3) return the current guest segment descriptor base
      * 4) return the current guest segment descriptor
      */
-    int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
+    enum hvm_intblk (*interrupt_blocked)(struct vcpu *v, struct hvm_intack);
     int (*guest_x86_mode)(struct vcpu *v);
     unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
     void (*get_segment_register)(struct vcpu *v, enum x86_segment seg,
@@ -177,11 +192,11 @@ u64 hvm_get_guest_time(struct vcpu *v);
 #define hvm_long_mode_enabled(v) (v,0)
 #endif
 
-static inline int
-hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+static inline enum hvm_intblk
+hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
 {
     ASSERT(v == current);
-    return hvm_funcs.interrupts_enabled(v, type);
+    return hvm_funcs.interrupt_blocked(v, intack);
 }
 
 static inline int
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h     Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/include/asm-x86/hvm/irq.h     Tue Oct 02 11:31:55 2007 -0600
@@ -29,7 +29,7 @@
 #include <asm/hvm/vioapic.h>
 #include <public/hvm/save.h>
 
-struct hvm_irq_mapping {
+struct hvm_irq_dpci_mapping {
     uint8_t valid;
     uint8_t device;
     uint8_t intx;
@@ -37,6 +37,14 @@ struct hvm_irq_mapping {
         uint8_t guest_gsi;
         uint8_t machine_gsi;
     };
+};
+
+struct hvm_irq_dpci {
+    /* Machine IRQ to guest device/intx mapping. */
+    struct hvm_irq_dpci_mapping mirq[NR_IRQS];
+    /* Guest IRQ to guest device/intx mapping. */
+    struct hvm_irq_dpci_mapping girq[NR_IRQS];
+    DECLARE_BITMAP(dirq_mask, NR_IRQS);
 };
 
 struct hvm_irq {
@@ -99,11 +107,7 @@ struct hvm_irq {
     /* Last VCPU that was delivered a LowestPrio interrupt. */
     u8 round_robin_prev_vcpu;
 
-    /* machine irq to guest device/intx mapping */
-    struct hvm_irq_mapping mirq[NR_IRQS];
-    /* guest irq to guest device/intx mapping */
-    struct hvm_irq_mapping girq[NR_IRQS];
-    DECLARE_BITMAP(dirq_mask, NR_IRQS);
+    struct hvm_irq_dpci *dpci;
 };
 
 #define hvm_pci_intx_gsi(dev, intx)  \
@@ -135,11 +139,11 @@ void hvm_set_callback_via(struct domain 
 void hvm_set_callback_via(struct domain *d, uint64_t via);
 
 /* Check/Acknowledge next pending interrupt. */
-enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
-int hvm_vcpu_ack_pending_irq(
-    struct vcpu *v, enum hvm_intack type, int *vector);
+struct hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
+struct hvm_intack hvm_vcpu_ack_pending_irq(struct vcpu *v,
+                                           struct hvm_intack intack);
 
-int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src);
+int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intsrc src);
 int is_isa_irq_masked(struct vcpu *v, int isa_irq);
 
 #endif /* __ASM_X86_HVM_IRQ_H__ */
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h  Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/include/asm-x86/hvm/vlapic.h  Tue Oct 02 11:31:55 2007 -0600
@@ -75,8 +75,8 @@ int vlapic_set_irq(struct vlapic *vlapic
 
 int vlapic_find_highest_irr(struct vlapic *vlapic);
 
-int vlapic_has_interrupt(struct vcpu *v);
-int cpu_get_apic_interrupt(struct vcpu *v);
+int vlapic_has_pending_irq(struct vcpu *v);
+int vlapic_ack_pending_irq(struct vcpu *v, int vector);
 
 int  vlapic_init(struct vcpu *v);
 void vlapic_destroy(struct vcpu *v);
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h    Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpic.h    Tue Oct 02 11:31:55 2007 -0600
@@ -32,7 +32,7 @@ void vpic_irq_positive_edge(struct domai
 void vpic_irq_positive_edge(struct domain *d, int irq);
 void vpic_irq_negative_edge(struct domain *d, int irq);
 void vpic_init(struct domain *d);
-int cpu_get_pic_interrupt(struct vcpu *v);
+int vpic_ack_pending_irq(struct vcpu *v);
 int is_periodic_irq(struct vcpu *v, int irq, int type);
 
 #endif  /* __ASM_X86_HVM_VPIC_H__ */  
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h     Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpt.h     Tue Oct 02 11:31:55 2007 -0600
@@ -120,7 +120,7 @@ void pt_freeze_time(struct vcpu *v);
 void pt_freeze_time(struct vcpu *v);
 void pt_thaw_time(struct vcpu *v);
 void pt_update_irq(struct vcpu *v);
-void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src);
+void pt_intr_post(struct vcpu *v, struct hvm_intack intack);
 void pt_reset(struct vcpu *v);
 void pt_migrate(struct vcpu *v);
 void create_periodic_time(
diff -r d6c09be8c5f5 -r 3165e43ce734 xen/include/public/platform.h
--- a/xen/include/public/platform.h     Tue Oct 02 10:07:35 2007 -0600
+++ b/xen/include/public/platform.h     Tue Oct 02 11:31:55 2007 -0600
@@ -164,7 +164,7 @@ typedef struct xenpf_enter_acpi_sleep xe
 typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t);
 
-#define XENPF_change_freq       52
+#define XENPF_change_freq         52
 struct xenpf_change_freq {
     /* IN variables */
     uint32_t flags; /* Must be zero. */
@@ -173,6 +173,17 @@ struct xenpf_change_freq {
 };
 typedef struct xenpf_change_freq xenpf_change_freq_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t);
+
+#define XENPF_getidletime         53
+struct xenpf_getidletime {
+    /* IN variables. */
+    uint32_t max_cpus;
+    XEN_GUEST_HANDLE(uint64_t) idletime;
+    /* OUT variables. */
+    uint32_t nr_cpus;
+};
+typedef struct xenpf_getidletime xenpf_getidletime_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
 
 struct xen_platform_op {
     uint32_t cmd;
@@ -187,6 +198,7 @@ struct xen_platform_op {
         struct xenpf_firmware_info     firmware_info;
         struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
         struct xenpf_change_freq       change_freq;
+        struct xenpf_getidletime       getidletime;
         uint8_t                        pad[128];
     } u;
 };

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.