[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1183400366 21600
# Node ID d146700adf714cdc13f924ab0de1dc895b6927f8
# Parent  443ce7edad0e8a3a640960890a72ce530887b38e
# Parent  182446677b6b56d58523050a6225a73d87a86ab7
merge with xen-unstable.hg
---
 buildconfigs/mk.linux-2.6-xen                |    5 
 tools/Makefile                               |    1 
 tools/console/daemon/io.c                    |   38 +++--
 tools/examples/init.d/xendomains             |    6 
 tools/examples/network-bridge                |   12 +
 tools/firmware/vmxassist/head.S              |   76 -----------
 tools/firmware/vmxassist/machine.h           |   15 --
 tools/firmware/vmxassist/setup.c             |   58 ---------
 tools/firmware/vmxassist/vm86.c              |   75 +++++++----
 tools/ioemu/target-i386-dm/exec-dm.c         |    2 
 tools/libxc/xc_domain_restore.c              |   11 -
 tools/libxc/xc_misc.c                        |   28 ++++
 tools/libxc/xenctrl.h                        |    4 
 tools/misc/xen-python-path                   |    9 +
 tools/python/xen/util/auxbin.py              |    9 +
 tools/python/xen/xend/XendCheckpoint.py      |    2 
 tools/python/xen/xend/XendConfig.py          |  105 +++++++++++++---
 tools/python/xen/xend/XendDomain.py          |    4 
 tools/python/xen/xend/server/irqif.py        |    2 
 tools/python/xen/xend/server/pciif.py        |    3 
 tools/python/xen/xm/main.py                  |   14 +-
 xen/acm/acm_core.c                           |    2 
 xen/arch/x86/Makefile                        |    1 
 xen/arch/x86/boot/edd.S                      |   24 +--
 xen/arch/x86/boot/x86_32.S                   |   21 ++-
 xen/arch/x86/boot/x86_64.S                   |   15 ++
 xen/arch/x86/clear_page.S                    |   26 ++++
 xen/arch/x86/domain.c                        |    9 -
 xen/arch/x86/hvm/hvm.c                       |   16 --
 xen/arch/x86/hvm/io.c                        |    1 
 xen/arch/x86/hvm/platform.c                  |    3 
 xen/arch/x86/hvm/svm/svm.c                   |   56 +++++++-
 xen/arch/x86/hvm/svm/vmcb.c                  |    8 -
 xen/arch/x86/hvm/vmx/vmcs.c                  |    5 
 xen/arch/x86/hvm/vmx/vmx.c                   |  154 ++++++++++++++----------
 xen/arch/x86/io_apic.c                       |    4 
 xen/arch/x86/mm.c                            |    3 
 xen/arch/x86/platform_hypercall.c            |   96 ++++++++++++++
 xen/arch/x86/setup.c                         |   34 ++++-
 xen/arch/x86/traps.c                         |   13 +-
 xen/arch/x86/x86_32/entry.S                  |  173 +++++++++++++--------------
 xen/arch/x86/x86_32/supervisor_mode_kernel.S |   27 ++--
 xen/arch/x86/x86_32/traps.c                  |   11 -
 xen/arch/x86/x86_64/Makefile                 |    2 
 xen/arch/x86/x86_64/compat/entry.S           |   10 -
 xen/arch/x86/x86_64/entry.S                  |   68 +++++-----
 xen/arch/x86/x86_64/mm.c                     |    9 -
 xen/arch/x86/x86_64/traps.c                  |   10 +
 xen/arch/x86/x86_emulate.c                   |    1 
 xen/common/sysctl.c                          |   33 +++++
 xen/include/asm-x86/edd.h                    |   18 ++
 xen/include/asm-x86/hvm/hvm.h                |   14 ++
 xen/include/asm-x86/hvm/svm/emulate.h        |    1 
 xen/include/asm-x86/hvm/svm/vmcb.h           |    8 -
 xen/include/asm-x86/hvm/trace.h              |    1 
 xen/include/asm-x86/hvm/vmx/vmcs.h           |    2 
 xen/include/asm-x86/hvm/vmx/vmx.h            |    7 -
 xen/include/asm-x86/page.h                   |   13 +-
 xen/include/asm-x86/processor.h              |    4 
 xen/include/asm-x86/x86_32/asm_defns.h       |   76 +++++++----
 xen/include/public/platform.h                |   40 ++++++
 xen/include/public/sysctl.h                  |   22 ++-
 xen/include/public/trace.h                   |    1 
 63 files changed, 961 insertions(+), 560 deletions(-)

diff -r 443ce7edad0e -r d146700adf71 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen     Mon Jul 02 10:31:03 2007 -0600
+++ b/buildconfigs/mk.linux-2.6-xen     Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,11 @@ _build: build
 _build: build
 
 include buildconfigs/src.$(XEN_LINUX_SOURCE)
+
+# Default to allowing interface mismatch
+ifndef XEN_LINUX_ALLOW_INTERFACE_MISMATCH
+XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
+endif
 
 # The real action starts here!
 .PHONY: build
diff -r 443ce7edad0e -r d146700adf71 tools/Makefile
--- a/tools/Makefile    Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/Makefile    Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ install: check
        $(MAKE) ioemuinstall
        $(INSTALL_DIR) $(DESTDIR)/var/xen/dump
        $(INSTALL_DIR) $(DESTDIR)/var/log/xen
+       $(INSTALL_DIR) $(DESTDIR)/var/lib/xen
 
 .PHONY: clean
 clean: check_clean
diff -r 443ce7edad0e -r d146700adf71 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/console/daemon/io.c Mon Jul 02 12:19:26 2007 -0600
@@ -764,27 +764,31 @@ void handle_io(void)
                /* XXX I wish we didn't have to busy wait for hypervisor logs
                 * but there's no obvious way to get event channel notifications
                 * for new HV log data as we can with guest */
-               ret = select(max_fd + 1, &readfds, &writefds, 0, log_hv_fd != 
-1 ? &timeout : NULL);
-
+               ret = select(max_fd + 1, &readfds, &writefds, 0,
+                            log_hv_fd != -1 ? &timeout : NULL);
+
+               if (log_reload) {
+                       handle_log_reload();
+                       log_reload = 0;
+               }
+
+               /* Abort if select failed, except for EINTR cases
+                  which indicate a possible log reload */
                if (ret == -1) {
-                       if (errno == EINTR) {
-                               if (log_reload) {
-                                       handle_log_reload();
-                                       log_reload = 0;
-                               }
+                       if (errno == EINTR)
                                continue;
-                       }
                        dolog(LOG_ERR, "Failure in select: %d (%s)",
                              errno, strerror(errno));
                        break;
                }
 
-               /* Check for timeout */
-               if (ret == 0) {
-                       if (log_hv_fd != -1)
-                               handle_hv_logs();
+               /* Always process HV logs even if not a timeout */
+               if (log_hv_fd != -1)
+                       handle_hv_logs();
+
+               /* Must not check returned FDSET if it was a timeout */
+               if (ret == 0)
                        continue;
-               }
 
                if (FD_ISSET(xs_fileno(xs), &readfds))
                        handle_xs();
@@ -806,10 +810,14 @@ void handle_io(void)
                }
        }
 
-       if (log_hv_fd != -1)
+       if (log_hv_fd != -1) {
                close(log_hv_fd);
-       if (xc_handle != -1)
+               log_hv_fd = -1;
+       }
+       if (xc_handle != -1) {
                xc_interface_close(xc_handle);
+               xc_handle = -1;
+       }
 }
 
 /*
diff -r 443ce7edad0e -r d146700adf71 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains  Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/init.d/xendomains  Mon Jul 02 12:19:26 2007 -0600
@@ -221,11 +221,12 @@ start()
     if [ "$XENDOMAINS_RESTORE" = "true" ] &&
        contains_something "$XENDOMAINS_SAVE"
     then
-        mkdir -p $(dirname "$LOCKFILE")
+       XENDOMAINS_SAVED=`/bin/ls $XENDOMAINS_SAVE/* | grep -v 'lost+found'`
+       mkdir -p $(dirname "$LOCKFILE")
        touch $LOCKFILE
        echo -n "Restoring Xen domains:"
        saved_domains=`ls $XENDOMAINS_SAVE`
-       for dom in $XENDOMAINS_SAVE/*; do
+       for dom in $XENDOMAINS_SAVED; do
            echo -n " ${dom##*/}"
            xm restore $dom
            if [ $? -ne 0 ]; then
@@ -259,6 +260,7 @@ start()
            if [ $? -eq 0 ] || is_running $dom; then
                echo -n "(skip)"
            else
+               echo "(booting)"
                xm create --quiet --defconfig $dom
                if [ $? -ne 0 ]; then
                    rc_failed $?
diff -r 443ce7edad0e -r d146700adf71 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/network-bridge     Mon Jul 02 12:19:26 2007 -0600
@@ -172,9 +172,21 @@ show_status () {
     echo '============================================================'
 }
 
+is_network_root () {
+    local rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $3; }}' 
/etc/mtab)
+    local rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}' 
/etc/mtab)
+
+    [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev" ]] && return 0 || 
return 1
+}
+
 op_start () {
     if [ "${bridge}" = "null" ] ; then
        return
+    fi
+
+    if is_network_root ; then
+        [ -x /usr/bin/logger ] && /usr/bin/logger "network-bridge: bridging 
not supported on network root; not starting"
+        return
     fi
 
     if link_exists "$pdev"; then
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/head.S
--- a/tools/firmware/vmxassist/head.S   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/head.S   Mon Jul 02 12:19:26 2007 -0600
@@ -25,80 +25,12 @@
  * switch happens to the environment below. The magic indicates
  * that this is a valid context.
  */
-#ifdef TEST
-       .byte 0x55, 0xaa
-       .byte 0x80
-       .code16
-       jmp     _start16
-#else
        jmp     _start
-#endif
 
        .align  8
        .long   VMXASSIST_MAGIC
        .long   newctx                  /* new context */
        .long   oldctx                  /* old context */
-
-#ifdef TEST
-/*
- * We are running in 16-bit. Get into the protected mode as soon as
- * possible. We use our own (minimal) GDT to get started.
- *
- * ROM is a misnomer as this code isn't really rommable (although it
- * only requires a few changes) but it does live in a BIOS ROM segment.
- * This code allows me to debug vmxassists under (a modified version of)
- * Bochs and load it as a "optromimage1".
- */
-       .code16
-       .globl  _start16
-_start16:
-        cli
-
-        /* load our own global descriptor table */
-        data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
-
-        /* go to protected mode */
-        movl    %cr0, %eax
-        orl     $(CR0_PE), %eax
-        movl    %eax, %cr0
-        data32  ljmp $0x08, $1f
-
-        .align  32
-        .globl  rom_gdt
-rom_gdt:
-        .word   0, 0            /* 0x00: reserved */
-        .byte   0, 0, 0, 0
-
-        .word   0xFFFF, 0       /* 0x08: CS 32-bit */
-        .byte   0, 0x9A, 0xCF, 0
-
-        .word   0xFFFF, 0       /* 0x10: CS 32-bit */
-        .byte   0, 0x92, 0xCF, 0
-rom_gdt_end:
-
-        .align  4
-        .globl  rom_gdtr
-rom_gdtr:
-        .word   rom_gdt_end - rom_gdt - 1
-        .long   rom_gdt
-
-        .code32
-1:
-        /* welcome to the 32-bit world */
-        movw    $0x10, %ax
-        movw    %ax, %ds
-        movw    %ax, %es
-        movw    %ax, %ss
-        movw    %ax, %fs
-        movw    %ax, %gs
-
-        /* enable Bochs debug facilities */
-        movw    $0x8A00, %dx
-        movw    $0x8A00, %ax
-        outw    %ax, (%dx)
-
-       jmp     _start
-#endif /* TEST */
 
 /*
  * This is the real start. Control was transfered to this point
@@ -111,9 +43,6 @@ _start:
        cli
 
        /* save register parameters to C land */
-#ifdef TEST
-       xorl    %edx, %edx
-#endif
 
        /* clear bss */
        cld
@@ -145,11 +74,6 @@ halt:
 halt:
        push    $halt_msg
        call    printf
-#ifdef TEST
-        movw    $0x8A00, %dx
-        movw    $0x8AE0, %ax
-        outw    %ax, (%dx)
-#endif
        cli
        jmp     .
 
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/machine.h
--- a/tools/firmware/vmxassist/machine.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/machine.h        Mon Jul 02 12:19:26 2007 -0600
@@ -55,13 +55,6 @@
 #define        PGMASK          (~(PGSIZE - 1))         /* page mask */
 #define        LPGSIZE         (1 << LOG_PDSIZE)       /* large page size */
 #define        LPGMASK         (~(LPGSIZE - 1))        /* large page mask */
-
-#ifdef TEST
-#define        PTE_P           (1 << 0)        /* Present */
-#define        PTE_RW          (1 << 1)        /* Read/Write */
-#define        PTE_US          (1 << 2)        /* User/Supervisor */
-#define        PTE_PS          (1 << 7)        /* Page Size */
-#endif
 
 /* Programmable Interrupt Contoller (PIC) defines */
 #define        PIC_MASTER      0x20
@@ -195,14 +188,6 @@ set_cr4(unsigned value)
        __asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
 }
 
-#ifdef TEST
-static inline void
-breakpoint(void)
-{
-       outw(0x8A00, 0x8AE0);
-}
-#endif /* TEST */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* __MACHINE_H__ */
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/setup.c
--- a/tools/firmware/vmxassist/setup.c  Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/setup.c  Mon Jul 02 12:19:26 2007 -0600
@@ -46,19 +46,6 @@ unsigned long long idt[NR_TRAPS] __attri
 unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32)));
 
 struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
-
-#ifdef TEST
-unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
-
-struct e820entry e820map[] = {
-       { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
-       { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
-       { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
-       { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
-       { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
-       { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
-};
-#endif /* TEST */
 
 struct vmx_assist_context oldctx;
 struct vmx_assist_context newctx;
@@ -84,38 +71,11 @@ banner(void)
                    (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
        memory_size += 0x400 << 10; /* + 1MB */
 
-#ifdef TEST
-       /* Create an SMAP for our debug environment */
-       e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
-       e820map[5].addr = memory_size - PGSIZE;
-       e820map[6].addr = memory_size;
-       e820map[7].addr += memory_size;
-
-       *HVM_E820_NR = sizeof(e820map)/sizeof(e820map[0]);
-       memcpy(HVM_E820, e820map, sizeof(e820map));
-#endif
-
        printf("Memory size %ld MB\n", memory_size >> 20);
        printf("E820 map:\n");
        print_e820_map(HVM_E820, *HVM_E820_NR);
        printf("\n");
 }
-
-#ifdef TEST
-void
-setup_paging(void)
-{
-       unsigned long i;
-
-       if (((unsigned)pgd & ~PGMASK) != 0)
-               panic("PGD not page aligned");
-       set_cr4(get_cr4() | CR4_PSE);
-       for (i = 0; i < NR_PGD; i++)
-               pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
-       set_cr3((unsigned) pgd);
-       set_cr0(get_cr0() | (CR0_PE|CR0_PG));
-}
-#endif /* TEST */
 
 void
 setup_gdt(void)
@@ -211,11 +171,7 @@ enter_real_mode(struct regs *regs)
                regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
                if (booting_cpu == 0) {
                        regs->cs = 0xF000; /* ROM BIOS POST entry point */
-#ifdef TEST
-                       regs->eip = 0xFFE0;
-#else
                        regs->eip = 0xFFF0;
-#endif
                } else {
                        regs->cs = booting_vector << 8; /* AP entry point */
                        regs->eip = 0;
@@ -242,9 +198,10 @@ enter_real_mode(struct regs *regs)
        }
 
        /* go from protected to real mode */
-       regs->eflags |= EFLAGS_VM;
        set_mode(regs, VM86_PROTECTED_TO_REAL);
        emulate(regs);
+       if (mode != VM86_REAL)
+               panic("failed to emulate between clear PE and long jump.\n");
 }
 
 /*
@@ -269,13 +226,8 @@ setup_ctx(void)
         * more natural to enable CR0.PE to cause a world switch to
         * protected mode rather than disabling it.
         */
-#ifdef TEST
-       c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
-       c->cr3 = (unsigned long) pgd;
-#else
        c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
        c->cr3 = 0;
-#endif
        c->cr4 = get_cr4();
 
        c->idtr_limit = sizeof(idt)-1;
@@ -369,16 +321,10 @@ main(void)
        if (booting_cpu == 0)
                banner();
 
-#ifdef TEST
-       setup_paging();
-#endif
-
        setup_gdt();
        setup_idt();
 
-#ifndef        TEST
        set_cr4(get_cr4() | CR4_VME);
-#endif
 
        setup_ctx();
 
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/vm86.c   Mon Jul 02 12:19:26 2007 -0600
@@ -561,11 +561,7 @@ lmsw(struct regs *regs, unsigned prefix,
        unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;
 
        TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
-#ifndef TEST
        oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
-#else
-       oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
-#endif
        if (cr0 & CR0_PE)
                set_mode(regs, VM86_REAL_TO_PROTECTED);
 
@@ -584,8 +580,13 @@ movr(struct regs *regs, unsigned prefix,
        unsigned addr = operand(prefix, regs, modrm);
        unsigned val, r = (modrm >> 3) & 7;
 
-       if ((modrm & 0xC0) == 0xC0) /* no registers */
-               return 0;
+       if ((modrm & 0xC0) == 0xC0) {
+               /*
+                * Emulate all guest instructions in protected to real mode.
+                */
+               if (mode != VM86_PROTECTED_TO_REAL)
+                       return 0;
+       }
 
        switch (opc) {
        case 0x88: /* addr32 mov r8, r/m8 */
@@ -656,13 +657,8 @@ movcr(struct regs *regs, unsigned prefix
                TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
                switch (cr) {
                case 0:
-#ifndef TEST
                        setreg32(regs, modrm,
                                oldctx.cr0 & ~(CR0_PE | CR0_NE));
-#else
-                       setreg32(regs, modrm,
-                               oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
-#endif
                        break;
                case 2:
                        setreg32(regs, modrm, get_cr2());
@@ -680,9 +676,6 @@ movcr(struct regs *regs, unsigned prefix
                switch (cr) {
                case 0:
                        oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
-#ifdef TEST
-                       oldctx.cr0 |= CR0_PG;
-#endif
                        if (getreg32(regs, modrm) & CR0_PE)
                                set_mode(regs, VM86_REAL_TO_PROTECTED);
                        else
@@ -818,8 +811,13 @@ mov_to_seg(struct regs *regs, unsigned p
 {
        unsigned modrm = fetch8(regs);
 
-       /* Only need to emulate segment loads in real->protected mode. */
-       if (mode != VM86_REAL_TO_PROTECTED)
+       /*
+        * Emulate segment loads in:
+        * 1) real->protected mode.
+        * 2) protected->real mode.
+        */
+       if ((mode != VM86_REAL_TO_PROTECTED) &&
+           (mode != VM86_PROTECTED_TO_REAL))
                return 0;
 
        /* Register source only. */
@@ -829,6 +827,8 @@ mov_to_seg(struct regs *regs, unsigned p
        switch ((modrm & 0x38) >> 3) {
        case 0: /* es */
                regs->ves = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.ves = 0;
                oldctx.es_sel = regs->ves;
                return 1;
@@ -837,21 +837,29 @@ mov_to_seg(struct regs *regs, unsigned p
 
        case 2: /* ss */
                regs->uss = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.uss = 0;
                oldctx.ss_sel = regs->uss;
                return 1;
        case 3: /* ds */
                regs->vds = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.vds = 0;
                oldctx.ds_sel = regs->vds;
                return 1;
        case 4: /* fs */
                regs->vfs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.vfs = 0;
                oldctx.fs_sel = regs->vfs;
                return 1;
        case 5: /* gs */
                regs->vgs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.vgs = 0;
                oldctx.gs_sel = regs->vgs;
                return 1;
@@ -1067,7 +1075,8 @@ set_mode(struct regs *regs, enum vm86_mo
        }
 
        mode = newmode;
-       TRACE((regs, 0, states[mode]));
+       if (mode != VM86_PROTECTED)
+               TRACE((regs, 0, states[mode]));
 }
 
 static void
@@ -1086,7 +1095,7 @@ jmpl(struct regs *regs, int prefix)
 
        if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected 
mode */
                set_mode(regs, VM86_PROTECTED);
-       else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
        else
                panic("jmpl");
@@ -1280,6 +1289,12 @@ opcode(struct regs *regs)
        unsigned eip = regs->eip;
        unsigned opc, modrm, disp;
        unsigned prefix = 0;
+
+       if (mode == VM86_PROTECTED_TO_REAL &&
+               oldctx.cs_arbytes.fields.default_ops_size) {
+               prefix |= DATA32;
+               prefix |= ADDR32;
+       }
 
        for (;;) {
                switch ((opc = fetch8(regs))) {
@@ -1391,17 +1406,29 @@ opcode(struct regs *regs)
                        continue;
 
                case 0x66:
-                       TRACE((regs, regs->eip - eip, "data32"));
-                       prefix |= DATA32;
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "data16"));
+                               prefix &= ~DATA32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "data32"));
+                               prefix |= DATA32;
+                       }
                        continue;
 
                case 0x67:
-                       TRACE((regs, regs->eip - eip, "addr32"));
-                       prefix |= ADDR32;
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "addr16"));
+                               prefix &= ~ADDR32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "addr32"));
+                               prefix |= ADDR32;
+                       }
                        continue;
 
-               case 0x88: /* addr32 mov r8, r/m8 */
-               case 0x8A: /* addr32 mov r/m8, r8 */
+               case 0x88: /* mov r8, r/m8 */
+               case 0x8A: /* mov r/m8, r8 */
                        if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
                                goto invalid;
                        if ((prefix & ADDR32) == 0)
diff -r 443ce7edad0e -r d146700adf71 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c      Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c      Mon Jul 02 12:19:26 2007 -0600
@@ -448,7 +448,7 @@ static void memcpy_words(void *dst, void
 {
     asm (
         "   movl %%edx,%%ecx \n"
-#ifdef __x86_64
+#ifdef __x86_64__
         "   shrl $3,%%ecx    \n"
         "   andl $7,%%edx    \n"
         "   rep  movsq       \n"
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_domain_restore.c   Mon Jul 02 12:19:26 2007 -0600
@@ -465,7 +465,7 @@ int xc_domain_restore(int xc_handle, int
         if ( j == 0 )
             break;  /* our work here is done */
 
-        if ( j > MAX_BATCH_SIZE )
+        if ( (j > MAX_BATCH_SIZE) || (j < 0) )
         {
             ERROR("Max batch size exceeded. Giving up.");
             goto out;
@@ -903,13 +903,14 @@ int xc_domain_restore(int xc_handle, int
 
     /* Get the list of PFNs that are not in the psuedo-phys map */
     {
-        unsigned int count;
+        unsigned int count = 0;
         unsigned long *pfntab;
         int nr_frees, rc;
 
-        if ( !read_exact(io_fd, &count, sizeof(count)) )
-        {
-            ERROR("Error when reading pfn count");
+        if ( !read_exact(io_fd, &count, sizeof(count)) ||
+             (count > (1U << 28)) ) /* up to 1TB of address space */
+        {
+            ERROR("Error when reading pfn count (= %u)", count);
             goto out;
         }
 
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_misc.c     Mon Jul 02 12:19:26 2007 -0600
@@ -101,13 +101,37 @@ int xc_perfc_control(int xc_handle,
 
     rc = do_sysctl(xc_handle, &sysctl);
 
-    if (nbr_desc)
+    if ( nbr_desc )
         *nbr_desc = sysctl.u.perfc_op.nr_counters;
-    if (nbr_val)
+    if ( nbr_val )
         *nbr_val = sysctl.u.perfc_op.nr_vals;
 
     return rc;
 }
+
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+                  xc_cpuinfo_t *info, int *nr_cpus)
+{
+    int rc;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_getcpuinfo;
+    sysctl.u.getcpuinfo.max_cpus = max_cpus; 
+    set_xen_guest_handle(sysctl.u.getcpuinfo.info, info); 
+
+    if ( (rc = lock_pages(info, max_cpus*sizeof(*info))) != 0 )
+        return rc;
+
+    rc = do_sysctl(xc_handle, &sysctl);
+
+    unlock_pages(info, max_cpus*sizeof(*info));
+
+    if ( nr_cpus )
+        *nr_cpus = sysctl.u.getcpuinfo.nr_cpus; 
+
+    return rc;
+}
+
 
 int xc_hvm_set_pci_intx_level(
     int xc_handle, domid_t dom,
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xenctrl.h     Mon Jul 02 12:19:26 2007 -0600
@@ -491,6 +491,10 @@ int xc_sched_id(int xc_handle,
 int xc_sched_id(int xc_handle,
                 int *sched_id);
 
+typedef xen_sysctl_cpuinfo_t xc_cpuinfo_t;
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+                  xc_cpuinfo_t *info, int *nr_cpus); 
+
 int xc_domain_setmaxmem(int xc_handle,
                         uint32_t domid,
                         unsigned int max_memkb);
diff -r 443ce7edad0e -r d146700adf71 tools/misc/xen-python-path
--- a/tools/misc/xen-python-path        Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/misc/xen-python-path        Mon Jul 02 12:19:26 2007 -0600
@@ -28,8 +28,13 @@ import os.path
 import os.path
 import sys
 
-for p in ['python%s' % sys.version[:3], 'python']:
-    for l in ['/usr/lib64', '/usr/lib']:
+usr   = os.path.dirname(os.path.dirname(sys.argv[0]))
+list  = [ os.path.join(usr,'lib64') ]
+list += [ os.path.join(usr,'lib') ]
+list += ['/usr/lib64', '/usr/lib']
+
+for l in list:
+    for p in ['python%s' % sys.version[:3], 'python']:
         for k in ['', 'site-packages/']:
             d = os.path.join(l, p, k)
             if os.path.exists(os.path.join(d, AUXBIN)):
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/util/auxbin.py
--- a/tools/python/xen/util/auxbin.py   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/util/auxbin.py   Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,7 @@ LIB_64_ARCHS = [ 'x86_64', 's390x', 'spa
 
 import os
 import os.path
+import sys
 
 
 def execute(exe, args = None):
@@ -47,6 +48,14 @@ def path():
 
 def libpath():
     machine = os.uname()[4]
+    if sys.argv[0] != '-c':
+        prefix = os.path.dirname(os.path.dirname(sys.argv[0]))
+        path = os.path.join(prefix, os.path.basename(LIB_64))
+        if machine in LIB_64_ARCHS and os.path.exists(path):
+            return path
+        path = os.path.join(prefix, os.path.basename(LIB_32))
+        if os.path.exists(path):
+            return path
     if machine in LIB_64_ARCHS and os.path.exists(LIB_64):
         return LIB_64
     else:
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py   Mon Jul 02 12:19:26 2007 -0600
@@ -148,6 +148,8 @@ def save(fd, dominfo, network, live, dst
         except:
             log.exception("Failed to reset the migrating domain's name")
 
+        raise exn
+
 
 def restore(xd, fd, dominfo = None, paused = False):
     signature = read_exact(fd, len(SIGNATURE),
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py       Mon Jul 02 12:19:26 2007 -0600
@@ -28,6 +28,7 @@ from xen.xend.PrettyPrint import prettyp
 from xen.xend.PrettyPrint import prettyprintstring
 from xen.xend.XendConstants import DOM_STATE_HALTED
 from xen.xend.server.netif import randomMAC
+from xen.util.blkif import blkdev_name_to_number
 
 log = logging.getLogger("xend.XendConfig")
 log.setLevel(logging.WARN)
@@ -934,6 +935,62 @@ class XendConfig(dict):
 
         return sxpr    
     
+    def _blkdev_name_to_number(self, dev):
+        if 'ioemu:' in dev:
+            _, dev = dev.split(':', 1)
+        try:
+            dev, _ = dev.split(':', 1)
+        except ValueError:
+            pass
+        
+        try:
+            devid = int(dev)
+        except ValueError:
+            # devid is not a number but a string containing either device
+            # name (e.g. xvda) or device_type/device_id (e.g. vbd/51728)
+            dev2 = type(dev) is str and dev.split('/')[-1] or None
+            if dev2 == None:
+                log.debug("Could not check the device %s", dev)
+                return None
+            try:
+                devid = int(dev2)
+            except ValueError:
+                devid = blkdev_name_to_number(dev2)
+                if devid == None:
+                    log.debug("The device %s is not device name", dev2)
+                    return None
+        return devid
+    
+    def device_duplicate_check(self, dev_type, dev_info, defined_config):
+        defined_devices_sxpr = self.all_devices_sxpr(target = defined_config)
+        
+        if dev_type == 'vbd':
+            dev_uname = dev_info.get('uname')
+            blkdev_name = dev_info.get('dev')
+            devid = self._blkdev_name_to_number(blkdev_name)
+            if devid == None:
+                return
+            
+            for o_dev_type, o_dev_info in defined_devices_sxpr:
+                if dev_type == o_dev_type:
+                    if dev_uname == sxp.child_value(o_dev_info, 'uname'):
+                        raise XendConfigError('The uname "%s" is already 
defined' %
+                                              dev_uname)
+                    o_blkdev_name = sxp.child_value(o_dev_info, 'dev')
+                    o_devid = self._blkdev_name_to_number(o_blkdev_name)
+                    if o_devid != None and devid == o_devid:
+                        raise XendConfigError('The device "%s" is already 
defined' %
+                                              blkdev_name)
+                    
+        elif dev_type == 'vif':
+            dev_mac = dev_info.get('mac')
+            
+            for o_dev_type, o_dev_info in defined_devices_sxpr:
+                if dev_type == o_dev_type:
+                    if dev_mac == sxp.child_value(o_dev_info, 'mac'):
+                        raise XendConfigError('The mac "%s" is already 
defined' %
+                                              dev_mac)
+    
     def device_add(self, dev_type, cfg_sxp = None, cfg_xenapi = None,
                    target = None):
         """Add a device configuration in SXP format or XenAPI struct format.
@@ -997,6 +1054,8 @@ class XendConfig(dict):
             if dev_type == 'vif':
                 if not dev_info.get('mac'):
                     dev_info['mac'] = randomMAC()
+
+            self.device_duplicate_check(dev_type, dev_info, target)
 
             # create uuid if it doesn't exist
             dev_uuid = dev_info.get('uuid', None)
@@ -1275,15 +1334,19 @@ class XendConfig(dict):
         return False
 
 
-    def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None):
+    def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None, 
target = None):
         """Get Device SXPR by either giving the device UUID or (type, config).
 
         @rtype: list of lists
         @return: device config sxpr
         """
         sxpr = []
-        if dev_uuid != None and dev_uuid in self['devices']:
-            dev_type, dev_info = self['devices'][dev_uuid]
+
+        if target == None:
+            target = self
+
+        if dev_uuid != None and dev_uuid in target['devices']:
+            dev_type, dev_info = target['devices'][dev_uuid]
 
         if dev_type == None or dev_info == None:
             raise XendConfigError("Required either UUID or device type and "
@@ -1300,8 +1363,12 @@ class XendConfig(dict):
 
         return sxpr
 
-    def ordered_device_refs(self):
+    def ordered_device_refs(self, target = None):
         result = []
+
+        if target == None:
+            target = self
+
         # vkbd devices *must* be before vfb devices, otherwise
         # there is a race condition when setting up devices
         # where the daemon spawned for the vfb may write stuff
@@ -1309,27 +1376,30 @@ class XendConfig(dict):
         # setup permissions on the vkbd backend path. This race
         # results in domain creation failing with 'device already
         # connected' messages
-        result.extend([u for u in self['devices'].keys() if 
self['devices'][u][0] == 'vkbd'])
-
-        result.extend(self['console_refs'] +
-                      self['vbd_refs'] +
-                      self['vif_refs'] +
-                      self['vtpm_refs'])
-
-        result.extend([u for u in self['devices'].keys() if u not in result])
+        result.extend([u for u in target['devices'].keys() if 
target['devices'][u][0] == 'vkbd'])
+
+        result.extend(target.get('console_refs', []) +
+                      target.get('vbd_refs', []) +
+                      target.get('vif_refs', []) +
+                      target.get('vtpm_refs', []))
+
+        result.extend([u for u in target['devices'].keys() if u not in result])
         return result
 
-    def all_devices_sxpr(self):
+    def all_devices_sxpr(self, target = None):
         """Returns the SXPR for all devices in the current configuration."""
         sxprs = []
         pci_devs = []
 
-        if 'devices' not in self:
+        if target == None:
+            target = self
+
+        if 'devices' not in target:
             return sxprs
         
-        ordered_refs = self.ordered_device_refs()
+        ordered_refs = self.ordered_device_refs(target = target)
         for dev_uuid in ordered_refs:
-            dev_type, dev_info = self['devices'][dev_uuid]
+            dev_type, dev_info = target['devices'][dev_uuid]
             if dev_type == 'pci': # special case for pci devices
                 sxpr = [['uuid', dev_info['uuid']]]
                 for pci_dev_info in dev_info['devs']:
@@ -1340,7 +1410,8 @@ class XendConfig(dict):
                 sxprs.append((dev_type, sxpr))
             else:
                 sxpr = self.device_sxpr(dev_type = dev_type,
-                                        dev_info = dev_info)
+                                        dev_info = dev_info,
+                                        target   = target)
                 sxprs.append((dev_type, sxpr))
 
         return sxprs
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Mon Jul 02 12:19:26 2007 -0600
@@ -1262,8 +1262,10 @@ class XendDomain:
             try:
                 XendCheckpoint.save(fd, dominfo, False, False, dst,
                                     checkpoint=checkpoint)
-            finally:
+            except Exception, e:
                 os.close(fd)
+                raise e
+            os.close(fd)
         except OSError, ex:
             raise XendError("can't write guest state file %s: %s" %
                             (dst, ex[1]))
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/irqif.py
--- a/tools/python/xen/xend/server/irqif.py     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/irqif.py     Mon Jul 02 12:19:26 2007 -0600
@@ -61,7 +61,7 @@ class IRQController(DevController):
        
         pirq = get_param('irq')
 
-        rc = xc.domain_irq_permission(dom          = self.getDomid(),
+        rc = xc.domain_irq_permission(domid        = self.getDomid(),
                                       pirq         = pirq,
                                       allow_access = True)
 
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/pciif.py     Mon Jul 02 12:19:26 2007 -0600
@@ -185,3 +185,6 @@ class PciController(DevController):
 
     def waitForBackend(self,devid):
         return (0, "ok - no hotplug")
+
+    def migrate(self, config, network, dst, step, domName):
+        raise XendError('Migration not permitted with assigned PCI device.')
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xm/main.py       Mon Jul 02 12:19:26 2007 -0600
@@ -2168,9 +2168,7 @@ def xm_network_attach(args):
         server.xend.domain.device_create(dom, vif)
 
 
-def detach(args, command, deviceClass):
-    arg_check(args, command, 2, 3)
-
+def detach(args, deviceClass):
     dom = args[0]
     dev = args[1]
     try:
@@ -2204,16 +2202,17 @@ def xm_block_detach(args):
             raise OptionError("Cannot find device '%s' in domain '%s'"
                               % (dev,dom))
     else:
+        arg_check(args, 'block-detach', 2, 3)
         try:
-            detach(args, 'block-detach', 'vbd')
+            detach(args, 'vbd')
             return
         except:
             pass
-        detach(args, 'block-detach', 'tap')
+        detach(args, 'tap')
 
 def xm_network_detach(args):
     if serverType == SERVER_XEN_API:
-        arg_check(args, "xm_block_detach", 2, 3)
+        arg_check(args, "xm_network_detach", 2, 3)
         dom = args[0]
         devid = args[1]
         vif_refs = server.xenapi.VM.get_VIFs(get_single_vm(dom))
@@ -2227,7 +2226,8 @@ def xm_network_detach(args):
         else:
             print "Cannot find device '%s' in domain '%s'" % (devid,dom)
     else:
-        detach(args, 'network-detach', 'vif')
+        arg_check(args, 'network-detach', 2, 3)
+        detach(args, 'vif')
 
 
 def xm_vnet_list(args):
diff -r 443ce7edad0e -r d146700adf71 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/acm/acm_core.c        Mon Jul 02 12:19:26 2007 -0600
@@ -89,7 +89,7 @@ static void __init set_dom0_ssidref(cons
         if (hi < ACM_MAX_NUM_TYPES && hi >= 1)
             dom0_ste_ssidref = hi;
         for (i = 0; i < sizeof(polname); i++) {
-            polname[i] = c[7+i];
+            polname[i] = c[5+i];
             if (polname[i] == '\0' || polname[i] == '\t' ||
                 polname[i] == '\n' || polname[i] == ' '  ||
                 polname[i] == ':') {
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/Makefile     Mon Jul 02 12:19:26 2007 -0600
@@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64
 
 obj-y += apic.o
 obj-y += bitops.o
+obj-y += clear_page.o
 obj-y += compat.o
 obj-y += delay.o
 obj-y += dmi_scan.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/edd.S
--- a/xen/arch/x86/boot/edd.S   Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/edd.S   Mon Jul 02 12:19:26 2007 -0600
@@ -24,7 +24,7 @@
 /* Maximum number of EDD information structures at boot_edd_info. */
 #define EDD_INFO_MAX            6
 
-/* Maximum number of MBR signatures at boot_edd_signature. */
+/* Maximum number of MBR signatures at boot_mbr_signature. */
 #define EDD_MBR_SIG_MAX         16
 
 /* Size of components of EDD information structure. */
@@ -40,10 +40,8 @@ get_edd:
 # Read the first sector of each BIOS disk device and store the 4-byte signature
 edd_mbr_sig_start:
         movb    $0x80, %dl                      # from device 80
-        movw    $bootsym(boot_edd_signature),%bx # store buffer ptr in bx
+        movw    $bootsym(boot_mbr_signature),%bx # store buffer ptr in bx
 edd_mbr_sig_read:
-        movl    $0xFFFFFFFF, %eax
-        movl    %eax, (%bx)                     # assume failure
         pushw   %bx
         movb    $0x02, %ah                      # 0x02 Read Sectors
         movb    $1, %al                         # read 1 sector
@@ -64,11 +62,12 @@ edd_mbr_sig_read:
         cmpb    $0, %ah                         # some BIOSes do not set CF
         jne     edd_mbr_sig_done                # on failure, we're done.
         movl    bootsym(boot_edd_info)+EDD_MBR_SIG_OFFSET,%eax
-        movl    %eax, (%bx)                     # store signature from MBR
-        incb    bootsym(boot_edd_signature_nr)  # note that we stored something
+        movb    %dl, (%bx)                      # store BIOS drive number
+        movl    %eax, 4(%bx)                    # store signature from MBR
+        incb    bootsym(boot_mbr_signature_nr)  # note that we stored something
         incb    %dl                             # increment to next device
-        addw    $4, %bx                         # increment sig buffer ptr
-        cmpb    $EDD_MBR_SIG_MAX,bootsym(boot_edd_signature_nr)
+        addw    $8, %bx                         # increment sig buffer ptr
+        cmpb    $EDD_MBR_SIG_MAX,bootsym(boot_mbr_signature_nr)
         jb      edd_mbr_sig_read
 edd_mbr_sig_done:
 
@@ -150,12 +149,13 @@ opt_edd:
 opt_edd:
         .byte   0                               # edd=on/off/skipmbr
 
-.globl  boot_edd_info_nr, boot_edd_signature_nr
+.globl  boot_edd_info, boot_edd_info_nr
+.globl  boot_mbr_signature, boot_mbr_signature_nr
 boot_edd_info_nr:
         .byte   0
-boot_edd_signature_nr:
+boot_mbr_signature_nr:
         .byte   0
-boot_edd_signature:
-        .fill   EDD_MBR_SIG_MAX*4,1,0
+boot_mbr_signature:
+        .fill   EDD_MBR_SIG_MAX*8,1,0
 boot_edd_info:
         .fill   512,1,0                         # big enough for a disc sector
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S        Mon Jul 02 12:19:26 2007 -0600
@@ -36,15 +36,29 @@ 1:      mov     %eax,(%edi)
 
 /* This is the default interrupt handler. */
 int_msg:
-        .asciz "Unknown interrupt\n"
+        .asciz "Unknown interrupt (cr2=%08x)\n"
+hex_msg:
+        .asciz "  %08x"
         ALIGN
 ignore_int:
+        pusha
         cld
         mov     $(__HYPERVISOR_DS),%eax
         mov     %eax,%ds
         mov     %eax,%es
+        mov     %cr2,%eax
+        push    %eax
         pushl   $int_msg
         call    printk
+        add     $8,%esp
+        mov     %esp,%ebp
+0:      pushl   (%ebp)
+        add     $4,%ebp
+        pushl   $hex_msg
+        call    printk
+        add     $8,%esp
+        test    $0xffc,%ebp
+        jnz     0b
 1:      jmp     1b
 
 ENTRY(stack_start)
@@ -65,11 +79,6 @@ gdt_descr:
 gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
         .long   gdt_table - FIRST_RESERVED_GDT_BYTE
-
-        .word   0
-nopaging_gdt_descr:
-        .word   LAST_RESERVED_GDT_BYTE
-        .long   sym_phys(gdt_table) - FIRST_RESERVED_GDT_BYTE
 
         .align PAGE_SIZE, 0
 /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S        Mon Jul 02 12:19:26 2007 -0600
@@ -56,12 +56,23 @@ 1:      movq    %rax,(%rdi)
 
 /* This is the default interrupt handler. */
 int_msg:
-        .asciz "Unknown interrupt\n"
+        .asciz "Unknown interrupt (cr2=%016lx)\n"
+hex_msg:
+        .asciz "    %016lx"
 ignore_int:
-        cld
+        SAVE_ALL
+        movq    %cr2,%rsi
         leaq    int_msg(%rip),%rdi
         xorl    %eax,%eax
         call    printk
+        movq    %rsp,%rbp
+0:      movq    (%rbp),%rsi
+        addq    $8,%rbp
+        leaq    hex_msg(%rip),%rdi
+        xorl    %eax,%eax
+        call    printk
+        testq   $0xff8,%rbp
+        jnz     0b
 1:      jmp     1b
 
 
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/clear_page.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/clear_page.S Mon Jul 02 12:19:26 2007 -0600
@@ -0,0 +1,26 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+#ifdef __i386__
+#define ptr_reg %edx
+#else
+#define ptr_reg %rdi
+#endif
+
+ENTRY(clear_page_sse2)
+#ifdef __i386__
+        mov     4(%esp), ptr_reg
+#endif
+        mov     $PAGE_SIZE/16, %ecx
+        xor     %eax,%eax
+
+0:      dec     %ecx
+        movnti  %eax, (ptr_reg)
+        movnti  %eax, 4(ptr_reg)
+        movnti  %eax, 8(ptr_reg)
+        movnti  %eax, 12(ptr_reg)
+        lea     16(ptr_reg), ptr_reg
+        jnz     0b
+
+        sfence
+        ret
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/domain.c     Mon Jul 02 12:19:26 2007 -0600
@@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, 
         pg = alloc_domheap_page(NULL);
         if ( !pg )
             return -ENOMEM;
-        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+        d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
+        clear_page(d->arch.mm_arg_xlat_l3);
     }
 
     l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
@@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l2 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l2);
     for ( i = 0; i < (1 << pdpt_order); i++ )
         d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
             l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
@@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l3 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l3);
     d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
         l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
                             __PAGE_HYPERVISOR);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Mon Jul 02 12:19:26 2007 -0600
@@ -242,6 +242,11 @@ void hvm_domain_relinquish_resources(str
 {
     hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
     hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+
+    pit_deinit(d);
+    rtc_deinit(d);
+    pmtimer_deinit(d);
+    hpet_deinit(d);
 }
 
 void hvm_domain_destroy(struct domain *d)
@@ -421,22 +426,11 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
 void hvm_vcpu_destroy(struct vcpu *v)
 {
-    struct domain *d = v->domain;
-
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
 
     /* Event channel is already freed by evtchn_destroy(). */
     /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
-
-    if ( v->vcpu_id == 0 )
-    {
-        /* NB. All these really belong in hvm_domain_destroy(). */
-        pit_deinit(d);
-        rtc_deinit(d);
-        pmtimer_deinit(d);
-        hpet_deinit(d);
-    }
 }
 
 
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/io.c     Mon Jul 02 12:19:26 2007 -0600
@@ -858,6 +858,7 @@ void hvm_io_assist(void)
     }
 
     /* Copy register changes back into current guest state. */
+    regs->eflags &= ~X86_EFLAGS_RF;
     hvm_load_cpu_guest_regs(v, regs);
     memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
 
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/platform.c       Mon Jul 02 12:19:26 2007 -0600
@@ -1065,6 +1065,7 @@ void handle_mmio(unsigned long gpa)
     }
 
     regs->eip += inst_len; /* advance %eip */
+    regs->eflags &= ~X86_EFLAGS_RF;
 
     switch ( mmio_op->instr ) {
     case INSTR_MOV:
@@ -1122,6 +1123,7 @@ void handle_mmio(unsigned long gpa)
             /* IO read --> memory write */
             if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
             regs->eip -= inst_len; /* do not advance %eip */
+            regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
             hvm_inject_exception(TRAP_page_fault, errcode, addr);
             return;
         }
@@ -1150,6 +1152,7 @@ void handle_mmio(unsigned long gpa)
                         /* Failed on the page-spanning copy.  Inject PF into
                          * the guest for the address where we failed */
                         regs->eip -= inst_len; /* do not advance %eip */
+                        regs->eflags |= X86_EFLAGS_RF; /* RF was set by #PF */
                         /* Must set CR2 at the failing address */ 
                         addr += size - rv;
                         gdprintk(XENLOG_DEBUG, "Pagefault on non-io side of a "
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Jul 02 12:19:26 2007 -0600
@@ -391,7 +391,7 @@ int svm_vmcb_restore(struct vcpu *v, str
     }
 
  skip_cr3:
-    vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
+    vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
     v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
     
     vmcb->idtr.limit = c->idtr_limit;
@@ -448,7 +448,8 @@ int svm_vmcb_restore(struct vcpu *v, str
     /* update VMCB for nested paging restore */
     if ( paging_mode_hap(v->domain) ) {
         vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
-        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
         vmcb->cr3 = c->cr3;
         vmcb->np_enable = 1;
         vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
@@ -805,8 +806,10 @@ static void svm_ctxt_switch_from(struct 
         : : "a" (__pa(root_vmcb[cpu])) );
 
 #ifdef __x86_64__
-    /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
-    idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
+    /* Resume use of ISTs now that the host TR is reinstated. */
+    idt_tables[cpu][TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
+    idt_tables[cpu][TRAP_nmi].a           |= 2UL << 32; /* IST2 */
+    idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */
 #endif
 }
 
@@ -826,10 +829,12 @@ static void svm_ctxt_switch_to(struct vc
     set_segment_register(ss, 0);
 
     /*
-     * Cannot use IST2 for NMIs while we are running with the guest TR. But
-     * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
+     * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
+     * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
      */
-    idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
+    idt_tables[cpu][TRAP_double_fault].a  &= ~(3UL << 32);
+    idt_tables[cpu][TRAP_nmi].a           &= ~(3UL << 32);
+    idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32);
 #endif
 
     svm_restore_dr(v);
@@ -1823,9 +1828,19 @@ static int mov_to_cr(int gpreg, int cr, 
         break;
 
     case 4: /* CR4 */
+        if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+        {
+            HVM_DBG_LOG(DBG_LEVEL_1,
+                        "Guest attempts to set reserved bit in CR4: %lx",
+                        value);
+            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+            break;
+        }
+
         if ( paging_mode_hap(v->domain) )
         {
-            vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
+            v->arch.hvm_svm.cpu_shadow_cr4 = value;
+            vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
             paging_update_paging_modes(v);
             /* signal paging update to ASID handler */
             svm_asid_g_update_paging (v);
@@ -1875,7 +1890,7 @@ static int mov_to_cr(int gpreg, int cr, 
         }
 
         v->arch.hvm_svm.cpu_shadow_cr4 = value;
-        vmcb->cr4 = value | SVM_CR4_HOST_MASK;
+        vmcb->cr4 = value | HVM_CR4_HOST_MASK;
   
         /*
          * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
@@ -2071,9 +2086,11 @@ static inline void svm_do_msr_access(
         case MSR_IA32_TIME_STAMP_COUNTER:
             msr_content = hvm_get_guest_time(v);
             break;
+
         case MSR_IA32_APICBASE:
             msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
             break;
+
         case MSR_EFER:
             msr_content = v->arch.hvm_svm.cpu_shadow_efer;
             break;
@@ -2093,6 +2110,10 @@ static inline void svm_do_msr_access(
              * particularly meaningful, but at least avoids the guest crashing!
              */
             msr_content = 0;
+            break;
+
+        case MSR_K8_VM_HSAVE_PA:
+            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             break;
 
         default:
@@ -2128,9 +2149,15 @@ static inline void svm_do_msr_access(
             hvm_set_guest_time(v, msr_content);
             pt_reset(v);
             break;
+
         case MSR_IA32_APICBASE:
             vlapic_msr_set(vcpu_vlapic(v), msr_content);
             break;
+
+        case MSR_K8_VM_HSAVE_PA:
+            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+            break;
+
         default:
             if ( !long_mode_do_msr_write(regs) )
                 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2265,12 +2292,13 @@ static int svm_reset_to_realmode(struct 
     vmcb->cr2 = 0;
     vmcb->efer = EFER_SVME;
 
-    vmcb->cr4 = SVM_CR4_HOST_MASK;
+    vmcb->cr4 = HVM_CR4_HOST_MASK;
     v->arch.hvm_svm.cpu_shadow_cr4 = 0;
 
     if ( paging_mode_hap(v->domain) ) {
         vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
-        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
     }
 
     /* This will jump to ROMBIOS */
@@ -2411,6 +2439,12 @@ asmlinkage void svm_vmexit_handler(struc
         break;
     }
 
+    case VMEXIT_EXCEPTION_MC:
+        HVMTRACE_0D(MCE, v);
+        svm_store_cpu_guest_regs(v, regs, NULL);
+        do_machine_check(regs);
+        break;
+
     case VMEXIT_VINTR:
         vmcb->vintr.fields.irq = 0;
         vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Mon Jul 02 12:19:26 2007 -0600
@@ -224,7 +224,7 @@ static int construct_vmcb(struct vcpu *v
     /* Guest CR4. */
     arch_svm->cpu_shadow_cr4 =
         read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
-    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK;
+    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;
 
     paging_update_paging_modes(v);
     vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
@@ -235,7 +235,9 @@ static int construct_vmcb(struct vcpu *v
         vmcb->np_enable = 1; /* enable nested paging */
         vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
         vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
-        vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0;
+        vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
+                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
+        vmcb->exception_intercepts = HVM_TRAP_MASK;
 
         /* No point in intercepting CR0/3/4 reads, because the hardware 
          * will return the guest versions anyway. */
@@ -249,7 +251,7 @@ static int construct_vmcb(struct vcpu *v
     }
     else
     {
-        vmcb->exception_intercepts = 1U << TRAP_page_fault;
+        vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault);
     }
 
     return 0;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ u32 vmx_secondary_exec_control __read_mo
 u32 vmx_secondary_exec_control __read_mostly;
 u32 vmx_vmexit_control __read_mostly;
 u32 vmx_vmentry_control __read_mostly;
+bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
 
 static u32 vmcs_revision_id __read_mostly;
 
@@ -133,6 +134,7 @@ void vmx_init_vmcs_config(void)
         vmx_secondary_exec_control = _vmx_secondary_exec_control;
         vmx_vmexit_control         = _vmx_vmexit_control;
         vmx_vmentry_control        = _vmx_vmentry_control;
+        cpu_has_vmx_ins_outs_instr_info = !!(vmx_msr_high & (1U<<22));
     }
     else
     {
@@ -142,6 +144,7 @@ void vmx_init_vmcs_config(void)
         BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
         BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
         BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
+        BUG_ON(cpu_has_vmx_ins_outs_instr_info != !!(vmx_msr_high & (1U<<22)));
     }
 
     /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -421,7 +424,7 @@ static void construct_vmcs(struct vcpu *
     __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
 #endif
 
-    __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault);
+    __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
 
     /* Guest CR0. */
     cr0 = read_cr0();
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Jul 02 12:19:26 2007 -0600
@@ -560,6 +560,9 @@ int vmx_vmcs_restore(struct vcpu *v, str
     __vmwrite(GUEST_RSP, c->rsp);
     __vmwrite(GUEST_RFLAGS, c->rflags);
 
+    v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG 
+                               | X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
+    __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
     v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
     __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
 
@@ -577,33 +580,17 @@ int vmx_vmcs_restore(struct vcpu *v, str
         goto skip_cr3;
     }
 
-    if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
-        /*
-         * This is simple TLB flush, implying the guest has
-         * removed some translation or changed page attributes.
-         * We simply invalidate the shadow.
-         */
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
-            goto bad_cr3;
-        }
-    } else {
-        /*
-         * If different, make a shadow. Check if the PDBR is valid
-         * first.
-         */
-        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
-        /* current!=vcpu as not called by arch_vmx_do_launch */
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
-            goto bad_cr3;
-        }
-        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-        v->arch.guest_table = pagetable_from_pfn(mfn);
-        if (old_base_mfn)
-             put_page(mfn_to_page(old_base_mfn));
-        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
-    }
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
+    /* current!=vcpu as not called by arch_vmx_do_launch */
+    mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+    if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+        goto bad_cr3;
+    }
+    old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+    v->arch.guest_table = pagetable_from_pfn(mfn);
+    if (old_base_mfn)
+        put_page(mfn_to_page(old_base_mfn));
+    v->arch.hvm_vmx.cpu_cr3 = c->cr3;
 
  skip_cr3:
 #if defined(__x86_64__)
@@ -615,7 +602,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
     }
 #endif
 
-    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
     v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
@@ -1315,16 +1302,20 @@ static int __get_instruction_length(void
 
 static void inline __update_guest_eip(unsigned long inst_len)
 {
-    unsigned long current_eip, intr_shadow;
-
-    current_eip = __vmread(GUEST_RIP);
-    __vmwrite(GUEST_RIP, current_eip + inst_len);
-
-    intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
-    if ( intr_shadow & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
-    {
-        intr_shadow &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
-        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
+    unsigned long x;
+
+    x = __vmread(GUEST_RIP);
+    __vmwrite(GUEST_RIP, x + inst_len);
+
+    x = __vmread(GUEST_RFLAGS);
+    if ( x & X86_EFLAGS_RF )
+        __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF);
+
+    x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+    if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
+    {
+        x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
+        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
     }
 }
 
@@ -1475,16 +1466,34 @@ static void vmx_do_invlpg(unsigned long 
     paging_invlpg(v, va);
 }
 
-/*
- * get segment for string pio according to guest instruction
- */
-static void vmx_str_pio_get_segment(int long_mode, unsigned long eip,
-                                   int inst_len, enum x86_segment *seg)
+/* Get segment for OUTS according to guest instruction. */
+static enum x86_segment vmx_outs_get_segment(
+    int long_mode, unsigned long eip, int inst_len)
 {
     unsigned char inst[MAX_INST_LEN];
+    enum x86_segment seg = x86_seg_ds;
     int i;
     extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
 
+    if ( likely(cpu_has_vmx_ins_outs_instr_info) )
+    {
+        unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
+
+        /* Get segment register according to bits 17:15. */
+        switch ( (instr_info >> 15) & 7 )
+        {
+        case 0: seg = x86_seg_es; break;
+        case 1: seg = x86_seg_cs; break;
+        case 2: seg = x86_seg_ss; break;
+        case 3: seg = x86_seg_ds; break;
+        case 4: seg = x86_seg_fs; break;
+        case 5: seg = x86_seg_gs; break;
+        default: BUG();
+        }
+
+        goto out;
+    }
+
     if ( !long_mode )
         eip += __vmread(GUEST_CS_BASE);
 
@@ -1493,7 +1502,7 @@ static void vmx_str_pio_get_segment(int 
     {
         gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
         domain_crash(current->domain);
-        return;
+        goto out;
     }
 
     for ( i = 0; i < inst_len; i++ )
@@ -1510,25 +1519,28 @@ static void vmx_str_pio_get_segment(int 
 #endif
             continue;
         case 0x2e: /* CS */
-            *seg = x86_seg_cs;
+            seg = x86_seg_cs;
             continue;
         case 0x36: /* SS */
-            *seg = x86_seg_ss;
+            seg = x86_seg_ss;
             continue;
         case 0x26: /* ES */
-            *seg = x86_seg_es;
+            seg = x86_seg_es;
             continue;
         case 0x64: /* FS */
-            *seg = x86_seg_fs;
+            seg = x86_seg_fs;
             continue;
         case 0x65: /* GS */
-            *seg = x86_seg_gs;
+            seg = x86_seg_gs;
             continue;
         case 0x3e: /* DS */
-            *seg = x86_seg_ds;
+            seg = x86_seg_ds;
             continue;
         }
     }
+
+ out:
+    return seg;
 }
 
 static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
@@ -1541,7 +1553,7 @@ static int vmx_str_pio_check_descriptor(
     *base = 0;
     *limit = 0;
     if ( seg != x86_seg_es )
-        vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg);
+        seg = vmx_outs_get_segment(long_mode, eip, inst_len);
 
     switch ( seg )
     {
@@ -1587,7 +1599,7 @@ static int vmx_str_pio_check_descriptor(
     }
     *ar_bytes = __vmread(ar_field);
 
-    return !(*ar_bytes & 0x10000);
+    return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
 }
 
 
@@ -1896,7 +1908,7 @@ static void vmx_world_save(struct vcpu *
     c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
 
     c->esp = __vmread(GUEST_RSP);
-    c->eflags = __vmread(GUEST_RFLAGS);
+    c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;
 
     c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
     c->cr3 = v->arch.hvm_vmx.cpu_cr3;
@@ -1997,7 +2009,7 @@ static int vmx_world_restore(struct vcpu
     else
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
 
-    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
     v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
@@ -2272,7 +2284,6 @@ static int vmx_set_cr0(unsigned long val
                     "Enabling CR0.PE at %%eip 0x%lx", eip);
         if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
         {
-            eip = __vmread(GUEST_RIP);
             HVM_DBG_LOG(DBG_LEVEL_1,
                         "Restoring to %%eip 0x%lx", eip);
             return 0; /* do not update eip! */
@@ -2397,6 +2408,15 @@ static int mov_to_cr(int gp, int cr, str
     case 4: /* CR4 */
         old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
 
+        if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+        {
+            HVM_DBG_LOG(DBG_LEVEL_1,
+                        "Guest attempts to set reserved bit in CR4: %lx",
+                        value);
+            vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+            break;
+        }
+
         if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
         {
             if ( vmx_pgbit_test(v) )
@@ -2437,7 +2457,7 @@ static int mov_to_cr(int gp, int cr, str
             }
         }
 
-        __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
+        __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
         v->arch.hvm_vmx.cpu_shadow_cr4 = value;
         __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
@@ -2581,7 +2601,7 @@ static inline int vmx_do_msr_read(struct
     case MSR_IA32_APICBASE:
         msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
-    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
         if ( long_mode_do_msr_read(regs) )
@@ -2707,7 +2727,7 @@ static inline int vmx_do_msr_write(struc
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
-    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
         if ( !long_mode_do_msr_write(regs) )
@@ -2823,7 +2843,8 @@ static void vmx_reflect_exception(struct
     }
 }
 
-static void vmx_failed_vmentry(unsigned int exit_reason)
+static void vmx_failed_vmentry(unsigned int exit_reason,
+                               struct cpu_user_regs *regs)
 {
     unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
     unsigned long exit_qualification;
@@ -2840,6 +2861,9 @@ static void vmx_failed_vmentry(unsigned 
         break;
     case EXIT_REASON_MACHINE_CHECK:
         printk("caused by machine check.\n");
+        HVMTRACE_0D(MCE, current);
+        vmx_store_cpu_guest_regs(current, regs, NULL);
+        do_machine_check(regs);
         break;
     default:
         printk("reason not known yet!");
@@ -2869,7 +2893,7 @@ asmlinkage void vmx_vmexit_handler(struc
         local_irq_enable();
 
     if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
-        return vmx_failed_vmentry(exit_reason);
+        return vmx_failed_vmentry(exit_reason, regs);
 
     switch ( exit_reason )
     {
@@ -2920,11 +2944,19 @@ asmlinkage void vmx_vmexit_handler(struc
             vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
             break;
         case TRAP_nmi:
-            HVMTRACE_0D(NMI, v);
             if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
+            {
+                HVMTRACE_0D(NMI, v);
+                vmx_store_cpu_guest_regs(v, regs, NULL);
                 do_nmi(regs); /* Real NMI, vector 2: normal processing. */
+            }
             else
                 vmx_reflect_exception(v);
+            break;
+        case TRAP_machine_check:
+            HVMTRACE_0D(MCE, v);
+            vmx_store_cpu_guest_regs(v, regs, NULL);
+            do_machine_check(regs);
             break;
         default:
             goto exit_and_crash;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/io_apic.c    Mon Jul 02 12:19:26 2007 -0600
@@ -371,7 +371,7 @@ static int pin_2_irq(int idx, int apic, 
  * so mask in all cases should simply be TARGET_CPUS
  */
 #ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
+void /*__init*/ setup_ioapic_dest(void)
 {
     int pin, ioapic, irq, irq_entry;
 
@@ -849,7 +849,7 @@ static inline void UNEXPECTED_IO_APIC(vo
 {
 }
 
-void __init __print_IO_APIC(void)
+void /*__init*/ __print_IO_APIC(void)
 {
     int apic, i;
     union IO_APIC_reg_00 reg_00;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/mm.c Mon Jul 02 12:19:26 2007 -0600
@@ -3240,6 +3240,7 @@ static int ptwr_emulated_update(
     struct ptwr_emulate_ctxt *ptwr_ctxt)
 {
     unsigned long mfn;
+    unsigned long unaligned_addr = addr;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
     struct vcpu *v = current;
@@ -3294,7 +3295,7 @@ static int ptwr_emulated_update(
     if ( unlikely(!get_page_from_l1e(nl1e, d)) )
     {
         if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
-             (bytes == 4) && (addr & 4) && !do_cmpxchg &&
+             (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
              (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
         {
             /*
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/platform_hypercall.c Mon Jul 02 12:19:26 2007 -0600
@@ -20,12 +20,20 @@
 #include <xen/guest_access.h>
 #include <asm/current.h>
 #include <public/platform.h>
+#include <asm/edd.h>
 #include <asm/mtrr.h>
 #include "cpu/mtrr/mtrr.h"
+
+extern uint16_t boot_edid_caps;
+extern uint8_t boot_edid_info[];
 
 #ifndef COMPAT
 typedef long ret_t;
 DEFINE_SPINLOCK(xenpf_lock);
+# undef copy_from_compat
+# define copy_from_compat copy_from_guest
+# undef copy_to_compat
+# define copy_to_compat copy_to_guest
 #else
 extern spinlock_t xenpf_lock;
 #endif
@@ -150,6 +158,94 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         }
     }
     break;
+
+    case XENPF_firmware_info:
+        switch ( op->u.firmware_info.type )
+        {
+        case XEN_FW_DISK_INFO: {
+            const struct edd_info *info;
+            u16 length;
+
+            ret = -ESRCH;
+            if ( op->u.firmware_info.index >= bootsym(boot_edd_info_nr) )
+                break;
+
+            info = bootsym(boot_edd_info) + op->u.firmware_info.index;
+
+            /* Transfer the EDD info block. */
+            ret = -EFAULT;
+            if ( copy_from_compat(&length, op->u.firmware_info.u.
+                                  disk_info.edd_params, 1) )
+                break;
+            if ( length > info->edd_device_params.length )
+                length = info->edd_device_params.length;
+            if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+                                (u8 *)&info->edd_device_params,
+                                length) )
+                break;
+            if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+                                &length, 1) )
+                break;
+
+            /* Transfer miscellaneous other information values. */
+#define C(x) op->u.firmware_info.u.disk_info.x = info->x
+            C(device);
+            C(version);
+            C(interface_support);
+            C(legacy_max_cylinder);
+            C(legacy_max_head);
+            C(legacy_sectors_per_track);
+#undef C
+
+            ret = (copy_field_to_guest(u_xenpf_op, op,
+                                      u.firmware_info.u.disk_info)
+                   ? -EFAULT : 0);
+            break;
+        }
+        case XEN_FW_DISK_MBR_SIGNATURE: {
+            const struct mbr_signature *sig;
+
+            ret = -ESRCH;
+            if ( op->u.firmware_info.index >= bootsym(boot_mbr_signature_nr) )
+                break;
+
+            sig = bootsym(boot_mbr_signature) + op->u.firmware_info.index;
+
+            op->u.firmware_info.u.disk_mbr_signature.device = sig->device;
+            op->u.firmware_info.u.disk_mbr_signature.mbr_signature =
+                sig->signature;
+
+            ret = (copy_field_to_guest(u_xenpf_op, op,
+                                      u.firmware_info.u.disk_mbr_signature)
+                   ? -EFAULT : 0);
+            break;
+        }
+        case XEN_FW_VBEDDC_INFO:
+            ret = -ESRCH;
+            if ( op->u.firmware_info.index != 0 )
+                break;
+            if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
+                break;
+
+            op->u.firmware_info.u.vbeddc_info.capabilities =
+                bootsym(boot_edid_caps);
+            op->u.firmware_info.u.vbeddc_info.edid_transfer_time =
+                bootsym(boot_edid_caps) >> 8;
+
+            ret = 0;
+            if ( copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+                                     u.vbeddc_info.capabilities) ||
+                 copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+                                     u.vbeddc_info.edid_transfer_time) ||
+                 copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid,
+                                bootsym(boot_edid_info), 128) )
+                ret = -EFAULT;
+            break;
+        default:
+            ret = -EINVAL;
+            break;
+        }
+        break;
 
     default:
         ret = -ENOSYS;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/setup.c      Mon Jul 02 12:19:26 2007 -0600
@@ -405,7 +405,7 @@ void __init __start_xen(unsigned long mb
 void __init __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
-    char __cmdline[] = "", *cmdline = __cmdline;
+    char __cmdline[] = "", *cmdline = __cmdline, *kextra;
     unsigned long _initrd_start = 0, _initrd_len = 0;
     unsigned int initrdidx = 1;
     char *_policy_start = NULL;
@@ -426,6 +426,17 @@ void __init __start_xen(unsigned long mb
     /* Parse the command-line options. */
     if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
         cmdline = __va(mbi->cmdline);
+    if ( (kextra = strstr(cmdline, " -- ")) != NULL )
+    {
+        /*
+         * Options after ' -- ' separator belong to dom0.
+         *  1. Orphan dom0's options from Xen's command line.
+         *  2. Skip all but final leading space from dom0's options.
+         */
+        *kextra = '\0';
+        kextra += 3;
+        while ( kextra[1] == ' ' ) kextra++;
+    }
     cmdline_parse(cmdline);
 
     parse_video_info();
@@ -494,7 +505,7 @@ void __init __start_xen(unsigned long mb
 
     printk("Disc information:\n");
     printk(" Found %d MBR signatures\n",
-           bootsym(boot_edd_signature_nr));
+           bootsym(boot_mbr_signature_nr));
     printk(" Found %d EDD information structures\n",
            bootsym(boot_edd_info_nr));
 
@@ -1009,17 +1020,26 @@ void __init __start_xen(unsigned long mb
 
     /* Grab the DOM0 command line. */
     cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
-    if ( cmdline != NULL )
+    if ( (cmdline != NULL) || (kextra != NULL) )
     {
         static char dom0_cmdline[MAX_GUEST_CMDLINE];
 
-        /* Skip past the image name and copy to a local buffer. */
-        while ( *cmdline == ' ' ) cmdline++;
-        if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+        dom0_cmdline[0] = '\0';
+
+        if ( cmdline != NULL )
         {
+            /* Skip past the image name and copy to a local buffer. */
             while ( *cmdline == ' ' ) cmdline++;
-            safe_strcpy(dom0_cmdline, cmdline);
+            if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+            {
+                while ( *cmdline == ' ' ) cmdline++;
+                safe_strcpy(dom0_cmdline, cmdline);
+            }
         }
+
+        if ( kextra != NULL )
+            /* kextra always includes exactly one leading space. */
+            safe_strcat(dom0_cmdline, kextra);
 
         /* Append any extra parameters. */
         if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/traps.c      Mon Jul 02 12:19:26 2007 -0600
@@ -86,6 +86,7 @@ asmlinkage int do_ ## _name(struct cpu_u
 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
 
 asmlinkage void nmi(void);
+asmlinkage void machine_check(void);
 DECLARE_TRAP_HANDLER(divide_error);
 DECLARE_TRAP_HANDLER(debug);
 DECLARE_TRAP_HANDLER(int3);
@@ -103,7 +104,6 @@ DECLARE_TRAP_HANDLER(simd_coprocessor_er
 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
 DECLARE_TRAP_HANDLER(alignment_check);
 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
-DECLARE_TRAP_HANDLER(machine_check);
 
 long do_set_debugreg(int reg, unsigned long value);
 unsigned long do_get_debugreg(int reg);
@@ -631,6 +631,7 @@ static int emulate_forced_invalid_op(str
     regs->ecx = c;
     regs->edx = d;
     regs->eip = eip;
+    regs->eflags &= ~X86_EFLAGS_RF;
 
     return EXCRET_fault_fixed;
 }
@@ -730,10 +731,11 @@ asmlinkage int do_int3(struct cpu_user_r
     return do_guest_trap(TRAP_int3, regs, 0);
 }
 
-asmlinkage int do_machine_check(struct cpu_user_regs *regs)
-{
-    fatal_trap(TRAP_machine_check, regs);
-    return 0;
+asmlinkage void do_machine_check(struct cpu_user_regs *regs)
+{
+    extern fastcall void (*machine_check_vector)(
+        struct cpu_user_regs *, long error_code);
+    machine_check_vector(regs, regs->error_code);
 }
 
 void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1787,6 +1789,7 @@ static int emulate_privileged_op(struct 
 
  done:
     regs->eip = eip;
+    regs->eflags &= ~X86_EFLAGS_RF;
     return EXCRET_fault_fixed;
 
  fail:
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/entry.S       Mon Jul 02 12:19:26 2007 -0600
@@ -72,48 +72,36 @@
         andl $~3,reg;            \
         movl (reg),reg;
 
-
         ALIGN
 restore_all_guest:
         ASSERT_INTERRUPTS_DISABLED
         testl $X86_EFLAGS_VM,UREGS_eflags(%esp)
-        jnz  restore_all_vm86
+        popl  %ebx
+        popl  %ecx
+        popl  %edx
+        popl  %esi
+        popl  %edi
+        popl  %ebp
+        popl  %eax
+        leal  4(%esp),%esp
+        jnz   .Lrestore_iret_guest
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
-        testl $2,UREGS_cs(%esp)
-        jnz   1f
+        testb $2,UREGS_cs-UREGS_eip(%esp)
+        jnz   .Lrestore_sregs_guest
         call  restore_ring0_guest
-        jmp   restore_all_vm86
-1:
+        jmp   .Lrestore_iret_guest
 #endif
-.Lft1:  mov  UREGS_ds(%esp),%ds
-.Lft2:  mov  UREGS_es(%esp),%es
-.Lft3:  mov  UREGS_fs(%esp),%fs
-.Lft4:  mov  UREGS_gs(%esp),%gs
-restore_all_vm86:
-        popl %ebx
-        popl %ecx
-        popl %edx
-        popl %esi
-        popl %edi
-        popl %ebp
-        popl %eax
-        addl $4,%esp
+.Lrestore_sregs_guest:
+.Lft1:  mov  UREGS_ds-UREGS_eip(%esp),%ds
+.Lft2:  mov  UREGS_es-UREGS_eip(%esp),%es
+.Lft3:  mov  UREGS_fs-UREGS_eip(%esp),%fs
+.Lft4:  mov  UREGS_gs-UREGS_eip(%esp),%gs
+.Lrestore_iret_guest:
 .Lft5:  iret
 .section .fixup,"ax"
-.Lfx5:  subl  $28,%esp
-        pushl 28(%esp)                 # error_code/entry_vector
-        movl  %eax,UREGS_eax+4(%esp)
-        movl  %ebp,UREGS_ebp+4(%esp)
-        movl  %edi,UREGS_edi+4(%esp)
-        movl  %esi,UREGS_esi+4(%esp)
-        movl  %edx,UREGS_edx+4(%esp)
-        movl  %ecx,UREGS_ecx+4(%esp)
-        movl  %ebx,UREGS_ebx+4(%esp)
-.Lfx1:  SET_XEN_SEGMENTS(a)
-        movl  %eax,%fs
-        movl  %eax,%gs
-        sti
-        popl  %esi
+.Lfx1:  sti
+        SAVE_ALL_GPRS
+        mov   UREGS_error_code(%esp),%esi
         pushfl                         # EFLAGS
         movl  $__HYPERVISOR_CS,%eax
         pushl %eax                     # CS
@@ -147,7 +135,7 @@ 1:      call  create_bounce_frame
         .long .Lft2,.Lfx1
         .long .Lft3,.Lfx1
         .long .Lft4,.Lfx1
-        .long .Lft5,.Lfx5
+        .long .Lft5,.Lfx1
 .previous
 .section __ex_table,"a"
         .long .Ldf1,failsafe_callback
@@ -169,8 +157,8 @@ ENTRY(hypercall)
 ENTRY(hypercall)
         subl $4,%esp
         FIXUP_RING0_GUEST_STACK
-        SAVE_ALL(b)
-        sti
+        SAVE_ALL(1f,1f)
+1:      sti
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
         jae   bad_hypercall
@@ -420,9 +408,14 @@ ENTRY(divide_error)
         ALIGN
 handle_exception:
         FIXUP_RING0_GUEST_STACK
-        SAVE_ALL_NOSEGREGS(a)
-        SET_XEN_SEGMENTS(a)
-        testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
+        SAVE_ALL(1f,2f)
+        .text 1
+        /* Exception within Xen: make sure we have valid %ds,%es. */
+1:      mov   %ecx,%ds
+        mov   %ecx,%es
+        jmp   2f
+        .previous
+2:      testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
         jz    exception_with_ints_disabled
         sti                             # re-enable interrupts
 1:      xorl  %eax,%eax
@@ -533,71 +526,81 @@ ENTRY(page_fault)
         movw  $TRAP_page_fault,2(%esp)
         jmp   handle_exception
 
-ENTRY(machine_check)
-        pushl $TRAP_machine_check<<16
-        jmp   handle_exception
-
 ENTRY(spurious_interrupt_bug)
         pushl $TRAP_spurious_int<<16
         jmp   handle_exception
 
 ENTRY(early_page_fault)
-        SAVE_ALL_NOSEGREGS(a)
-        movl  %esp,%edx
-        pushl %edx
+        SAVE_ALL(1f,1f)
+1:      movl  %esp,%eax
+        pushl %eax
         call  do_early_page_fault
         addl  $4,%esp
         jmp   restore_all_xen
 
-ENTRY(nmi)
+handle_nmi_mce:
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
-        # NMI entry protocol is incompatible with guest kernel in ring 0.
+        # NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
+        addl  $4,%esp
         iret
 #else
         # Save state but do not trash the segment registers!
-        # We may otherwise be unable to reload them or copy them to ring 1. 
-        pushl %eax
-        SAVE_ALL_NOSEGREGS(a)
-
-        # We can only process the NMI if:
-        #  A. We are the outermost Xen activation (in which case we have
-        #     the selectors safely saved on our stack)
-        #  B. DS and ES contain sane Xen values.
-        # In all other cases we bail without touching DS-GS, as we have
-        # interrupted an enclosing Xen activation in tricky prologue or
-        # epilogue code.
-        movl  UREGS_eflags(%esp),%eax
-        movb  UREGS_cs(%esp),%al
-        testl $(3|X86_EFLAGS_VM),%eax
-        jnz   continue_nmi
-        movl  %ds,%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   defer_nmi
-        movl  %es,%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   defer_nmi
-
-continue_nmi:
-        SET_XEN_SEGMENTS(d)
+        SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common)
+.Lnmi_mce_common:
+        xorl  %eax,%eax
+        movw  UREGS_entry_vector(%esp),%ax
         movl  %esp,%edx
         pushl %edx
-        call  do_nmi
+        call  *exception_table(,%eax,4)
         addl  $4,%esp
+        /* 
+         * NB. We may return to Xen context with polluted %ds/%es. But in such
+         * cases we have put guest DS/ES on the guest stack frame, which will
+         * be detected by SAVE_ALL(), or we have rolled back restore_guest.
+         */
         jmp   ret_from_intr
-
-defer_nmi:
-        movl  $FIXMAP_apic_base,%eax
-        # apic_wait_icr_idle()
-1:      movl  %ss:APIC_ICR(%eax),%ebx
-        testl $APIC_ICR_BUSY,%ebx
-        jnz   1b
-        # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi)
-        movl  $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \
-                TRAP_deferred_nmi),%ss:APIC_ICR(%eax)
-        jmp   restore_all_xen
+.Lnmi_mce_xen:
+        /* Check the outer (guest) context for %ds/%es state validity. */
+        GET_GUEST_REGS(%ebx)
+        testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx)
+        mov   %ds,%eax
+        mov   %es,%edx
+        jnz   .Lnmi_mce_vm86
+        /* We may have interrupted Xen while messing with %ds/%es... */
+        cmpw  %ax,%cx
+        mov   %ecx,%ds             /* Ensure %ds is valid */
+        cmove UREGS_ds(%ebx),%eax  /* Grab guest DS if it wasn't in %ds */
+        cmpw  %dx,%cx
+        movl  %eax,UREGS_ds(%ebx)  /* Ensure guest frame contains guest DS */
+        cmove UREGS_es(%ebx),%edx  /* Grab guest ES if it wasn't in %es */
+        mov   %ecx,%es             /* Ensure %es is valid */
+        movl  $.Lrestore_sregs_guest,%ecx
+        movl  %edx,UREGS_es(%ebx)  /* Ensure guest frame contains guest ES */
+        cmpl  %ecx,UREGS_eip(%esp)
+        jbe   .Lnmi_mce_common
+        cmpl  $.Lrestore_iret_guest,UREGS_eip(%esp)
+        ja    .Lnmi_mce_common
+        /* Roll outer context restore_guest back to restoring %ds/%es. */
+        movl  %ecx,UREGS_eip(%esp)
+        jmp   .Lnmi_mce_common
+.Lnmi_mce_vm86:
+        /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */
+        mov   %ecx,%ds
+        mov   %ecx,%es
+        jmp   .Lnmi_mce_common
 #endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
 
+ENTRY(nmi)
+        pushl $TRAP_nmi<<16
+        jmp   handle_nmi_mce
+
+ENTRY(machine_check)
+        pushl $TRAP_machine_check<<16
+        jmp   handle_nmi_mce
+
 ENTRY(setup_vm86_frame)
+        mov %ecx,%ds
+        mov %ecx,%es
         # Copies the entire stack frame forwards by 16 bytes.
         .macro copy_vm86_words count=18
         .if \count
@@ -615,7 +618,7 @@ ENTRY(exception_table)
 ENTRY(exception_table)
         .long do_divide_error
         .long do_debug
-        .long 0 # nmi
+        .long do_nmi
         .long do_int3
         .long do_overflow
         .long do_bounds
diff -r 443ce7edad0e -r d146700adf71 
xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S      Mon Jul 02 10:31:03 
2007 -0600
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S      Mon Jul 02 12:19:26 
2007 -0600
@@ -20,40 +20,45 @@
 #include <asm/asm_defns.h>
 #include <public/xen.h>
 
+#define guestreg(field) ((field)-UREGS_eip+36)
+
         # Upon entry the stack should be the Xen stack and contain:
-        #   %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN
+        #   %ss, %esp, EFLAGS, %cs|1, %eip, RETURN
         # On exit the stack should be %ss:%esp (i.e. the guest stack)
         # and contain:
-        #   EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN
+        #   EFLAGS, %cs, %eip, RETURN
         ALIGN
 ENTRY(restore_ring0_guest)
+        pusha
+
         # Point %gs:%esi to guest stack.
-RRG0:   movw UREGS_ss+4(%esp),%gs
-        movl UREGS_esp+4(%esp),%esi
+RRG0:   movw guestreg(UREGS_ss)(%esp),%gs
+        movl guestreg(UREGS_esp)(%esp),%esi
 
-        # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack.
-        movl $(UREGS_kernel_sizeof>>2)+1,%ecx
+        # Copy EFLAGS, %cs, %eip, RETURN, PUSHA from Xen stack to guest stack.
+        movl $12,%ecx /* 12 32-bit values */
 
 1:      subl $4,%esi
         movl -4(%esp,%ecx,4),%eax
 RRG1:   movl %eax,%gs:(%esi)
         loop 1b
 
-RRG2:   andl $~3,%gs:UREGS_cs+4(%esi)
+RRG2:   andl $~3,%gs:guestreg(UREGS_cs)(%esi)
 
         movl %gs,%eax
 
         # We need to do this because these registers are not present
         # on the guest stack so they cannot be restored by the code in
         # restore_all_guest.
-RRG3:   mov  UREGS_ds+4(%esp),%ds
-RRG4:   mov  UREGS_es+4(%esp),%es
-RRG5:   mov  UREGS_fs+4(%esp),%fs
-RRG6:   mov  UREGS_gs+4(%esp),%gs
+RRG3:   mov  guestreg(UREGS_ds)(%esp),%ds
+RRG4:   mov  guestreg(UREGS_es)(%esp),%es
+RRG5:   mov  guestreg(UREGS_fs)(%esp),%fs
+RRG6:   mov  guestreg(UREGS_gs)(%esp),%gs
 
 RRG7:   movl %eax,%ss
         movl %esi,%esp
 
+        popa
         ret
 .section __ex_table,"a"
         .long RRG0,domain_crash_synchronous
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Mon Jul 02 12:19:26 2007 -0600
@@ -232,15 +232,6 @@ unsigned long do_iret(void)
     return 0;
 }
 
-#include <asm/asm_defns.h>
-BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
-fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
-{
-    asmlinkage void do_nmi(struct cpu_user_regs *);
-    ack_APIC_irq();
-    do_nmi(regs);
-}
-
 void __init percpu_traps_init(void)
 {
     struct tss_struct *tss = &doublefault_tss;
@@ -251,8 +242,6 @@ void __init percpu_traps_init(void)
 
     /* The hypercall entry vector is only accessible from ring 1. */
     _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
-
-    set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
 
     /*
      * Make a separate task for double faults. This will get us debug output if
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile      Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/Makefile      Mon Jul 02 12:19:26 2007 -0600
@@ -1,12 +1,12 @@ subdir-y += compat
 subdir-y += compat
 
 obj-y += entry.o
-obj-y += compat_kexec.o
 obj-y += gpr_switch.o
 obj-y += mm.o
 obj-y += traps.o
 
 obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_COMPAT) += compat_kexec.o
 obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += platform_hypercall.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/compat/entry.S
--- a/xen/arch/x86/x86_64/compat/entry.S        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/compat/entry.S        Mon Jul 02 12:19:26 2007 -0600
@@ -143,12 +143,12 @@ compat_restore_all_guest:
 .Lft0:  iretq
 
 .section .fixup,"ax"
-.Lfx0:  popq  -15*8-8(%rsp)            # error_code/entry_vector
-        SAVE_ALL                       # 15*8 bytes pushed
-        movq  -8(%rsp),%rsi            # error_code/entry_vector
-        sti                            # after stack abuse (-1024(%rsp))
+.Lfx0:  sti
+        SAVE_ALL
+        movq  UREGS_error_code(%rsp),%rsi
+        movq  %rsp,%rax
+        andq  $~0xf,%rsp
         pushq $__HYPERVISOR_DS         # SS
-        leaq  8(%rsp),%rax
         pushq %rax                     # RSP
         pushfq                         # RFLAGS
         pushq $__HYPERVISOR_CS         # CS
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/entry.S       Mon Jul 02 12:19:26 2007 -0600
@@ -57,23 +57,23 @@ 1:      sysretl
 /* No special register assumptions. */
 iret_exit_to_guest:
         addq  $8,%rsp
-.Lft1:  iretq
+.Lft0:  iretq
 
 .section .fixup,"ax"
-.Lfx1:  popq  -15*8-8(%rsp)            # error_code/entry_vector
-        SAVE_ALL                       # 15*8 bytes pushed
-        movq  -8(%rsp),%rsi            # error_code/entry_vector
-        sti                            # after stack abuse (-1024(%rsp))
+.Lfx0:  sti
+        SAVE_ALL
+        movq  UREGS_error_code(%rsp),%rsi
+        movq  %rsp,%rax
+        andq  $~0xf,%rsp
         pushq $__HYPERVISOR_DS         # SS
-        leaq  8(%rsp),%rax
         pushq %rax                     # RSP
-        pushf                          # RFLAGS
+        pushfq                         # RFLAGS
         pushq $__HYPERVISOR_CS         # CS
-        leaq  .Ldf1(%rip),%rax
+        leaq  .Ldf0(%rip),%rax
         pushq %rax                     # RIP
         pushq %rsi                     # error_code/entry_vector
         jmp   handle_exception
-.Ldf1:  GET_CURRENT(%rbx)
+.Ldf0:  GET_CURRENT(%rbx)
         jmp   test_all_events
 failsafe_callback:
         GET_CURRENT(%rbx)
@@ -88,10 +88,10 @@ 1:      call  create_bounce_frame
         jmp   test_all_events
 .previous
 .section __pre_ex_table,"a"
-        .quad .Lft1,.Lfx1
+        .quad .Lft0,.Lfx0
 .previous
 .section __ex_table,"a"
-        .quad .Ldf1,failsafe_callback
+        .quad .Ldf0,failsafe_callback
 .previous
 
         ALIGN
@@ -505,11 +505,6 @@ ENTRY(page_fault)
         movl  $TRAP_page_fault,4(%rsp)
         jmp   handle_exception
 
-ENTRY(machine_check)
-        pushq $0
-        movl  $TRAP_machine_check,4(%rsp)
-        jmp   handle_exception
-
 ENTRY(spurious_interrupt_bug)
         pushq $0
         movl  $TRAP_spurious_int,4(%rsp)
@@ -527,31 +522,38 @@ ENTRY(early_page_fault)
         call  do_early_page_fault
         jmp   restore_all_xen
 
+handle_ist_exception:
+        SAVE_ALL
+        testb $3,UREGS_cs(%rsp)
+        jz    1f
+        /* Interrupted guest context. Copy the context to stack bottom. */
+        GET_GUEST_REGS(%rdi)
+        movq  %rsp,%rsi
+        movl  $UREGS_kernel_sizeof/8,%ecx
+        movq  %rdi,%rsp
+        rep   movsq
+1:      movq  %rsp,%rdi
+        movl  UREGS_entry_vector(%rsp),%eax
+        leaq  exception_table(%rip),%rdx
+        callq *(%rdx,%rax,8)
+        jmp   ret_from_intr
+
 ENTRY(nmi)
         pushq $0
-        SAVE_ALL
-        testb $3,UREGS_cs(%rsp)
-        jz    nmi_in_hypervisor_mode
-        /* Interrupted guest context. Copy the context to stack bottom. */
-        GET_GUEST_REGS(%rbx)
-        movl  $UREGS_kernel_sizeof/8,%ecx
-1:      popq  %rax
-        movq  %rax,(%rbx)
-        addq  $8,%rbx
-        loop  1b
-        subq  $UREGS_kernel_sizeof,%rbx
-        movq  %rbx,%rsp
-nmi_in_hypervisor_mode:
-        movq  %rsp,%rdi
-        call  do_nmi
-        jmp   ret_from_intr
+        movl  $TRAP_nmi,4(%rsp)
+        jmp   handle_ist_exception
+
+ENTRY(machine_check)
+        pushq $0
+        movl  $TRAP_machine_check,4(%rsp)
+        jmp   handle_ist_exception
 
 .data
 
 ENTRY(exception_table)
         .quad do_divide_error
         .quad do_debug
-        .quad 0 # nmi
+        .quad do_nmi
         .quad do_int3
         .quad do_overflow
         .quad do_bounds
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/mm.c  Mon Jul 02 12:19:26 2007 -0600
@@ -106,7 +106,8 @@ void __init paging_init(void)
     /* Create user-accessible L2 directory to map the MPT for guests. */
     if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
         goto nomem;
-    l3_ro_mpt = clear_page(page_to_virt(l2_pg));
+    l3_ro_mpt = page_to_virt(l2_pg);
+    clear_page(l3_ro_mpt);
     l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
               l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
 
@@ -132,7 +133,8 @@ void __init paging_init(void)
             if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
                 goto nomem;
             va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
-            l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+            l2_ro_mpt = page_to_virt(l2_pg);
+            clear_page(l2_ro_mpt);
             l3e_write(&l3_ro_mpt[l3_table_offset(va)],
                       l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
             l2_ro_mpt += l2_table_offset(va);
@@ -152,7 +154,8 @@ void __init paging_init(void)
         l3_ro_mpt = 
l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
         if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
             goto nomem;
-        compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+        compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
+        clear_page(l2_ro_mpt);
         l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
                   l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
         l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Mon Jul 02 12:19:26 2007 -0600
@@ -294,8 +294,9 @@ void __init percpu_traps_init(void)
     {
         /* Specify dedicated interrupt stacks for NMIs and double faults. */
         set_intr_gate(TRAP_double_fault, &double_fault);
-        idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
-        idt_table[TRAP_nmi].a          |= 2UL << 32; /* IST2 */
+        idt_table[TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
+        idt_table[TRAP_nmi].a           |= 2UL << 32; /* IST2 */
+        idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */
 
         /*
          * The 32-on-64 hypercall entry vector is only accessible from ring 1.
@@ -310,7 +311,10 @@ void __init percpu_traps_init(void)
     stack_bottom = (char *)get_stack_bottom();
     stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
 
-    /* Double-fault handler has its own per-CPU 2kB stack. */
+    /* Machine Check handler has its own per-CPU 1kB stack. */
+    init_tss[cpu].ist[2] = (unsigned long)&stack[1024];
+
+    /* Double-fault handler has its own per-CPU 1kB stack. */
     init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
 
     /* NMI handler has its own per-CPU 1kB stack. */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_emulate.c        Mon Jul 02 12:19:26 2007 -0600
@@ -1630,6 +1630,7 @@ x86_emulate(
     }
 
     /* Commit shadow register state. */
+    _regs.eflags &= ~EF_RF;
     *ctxt->regs = _regs;
 
  done:
diff -r 443ce7edad0e -r d146700adf71 xen/common/sysctl.c
--- a/xen/common/sysctl.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/common/sysctl.c       Mon Jul 02 12:19:26 2007 -0600
@@ -136,6 +136,39 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
     }
     break;
 
+    case XEN_SYSCTL_getcpuinfo:
+    {
+        uint32_t i, nr_cpus;
+        struct xen_sysctl_cpuinfo cpuinfo;
+        struct vcpu *v;
+
+        nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
+
+        for ( i = 0; i < nr_cpus; i++ )
+        {
+            /* Assume no holes in idle-vcpu map. */
+            if ( (v = idle_vcpu[i]) == NULL )
+                break;
+
+            cpuinfo.idletime = v->runstate.time[RUNSTATE_running];
+            if ( v->is_running )
+                cpuinfo.idletime += NOW() - v->runstate.state_entry_time;
+
+            if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
+            {
+                ret = -EFAULT;
+                break;
+            }
+        }
+
+        op->u.getcpuinfo.nr_cpus = i;
+        ret = 0;
+
+        if ( copy_to_guest(u_sysctl, op, 1) )
+            ret = -EFAULT;
+    }
+    break;
+
     default:
         ret = arch_do_sysctl(op, u_sysctl);
         break;
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/edd.h
--- a/xen/include/asm-x86/edd.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/edd.h Mon Jul 02 12:19:26 2007 -0600
@@ -32,12 +32,22 @@ struct edd_info {
     u16 legacy_max_cylinder;     /* %cl[7:6]:%ch: maximum cylinder number */
     u8 legacy_max_head;          /* %dh: maximum head number */
     u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
-    /* Int13, Fn41: Get Device Parameters */
-    u8 edd_device_params[74];    /* as filled into %ds:%si */
+    /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+    struct {
+        u16 length;
+        u8 data[72];
+    } edd_device_params;
 } __attribute__ ((packed));
 
-extern u32 boot_edd_signature[];
-extern u8 boot_edd_signature_nr;
+struct mbr_signature {
+    u8 device;
+    u8 pad[3];
+    u32 signature;
+} __attribute__ ((packed));
+
+/* These all reside in the boot trampoline. Access via bootsym(). */
+extern struct mbr_signature boot_mbr_signature[];
+extern u8 boot_mbr_signature_nr;
 extern struct edd_info boot_edd_info[];
 extern u8 boot_edd_info_nr;
 
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h     Mon Jul 02 12:19:26 2007 -0600
@@ -302,4 +302,18 @@ static inline int hvm_event_injection_fa
     return hvm_funcs.event_injection_faulted(v);
 }
 
+/* These bits in CR4 are owned by the host. */
+#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
+    (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
+
+/* These bits in CR4 cannot be set by the guest. */
+#define HVM_CR4_GUEST_RESERVED_BITS \
+    ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+      X86_CR4_DE  | X86_CR4_PSE | X86_CR4_PAE | \
+      X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+      X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
+/* These exceptions must always be intercepted. */
+#define HVM_TRAP_MASK (1U << TRAP_machine_check)
+
 #endif /* __ASM_X86_HVM_HVM_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/emulate.h     Mon Jul 02 12:19:26 2007 -0600
@@ -138,6 +138,7 @@ static void inline __update_guest_eip(
 {
     ASSERT(inst_len > 0);
     vmcb->rip += inst_len;
+    vmcb->rflags &= ~X86_EFLAGS_RF;
 }
 
 #endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Jul 02 12:19:26 2007 -0600
@@ -464,14 +464,6 @@ void svm_destroy_vmcb(struct vcpu *v);
 
 void setup_vmcb_dump(void);
 
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
-#else
-#define SVM_CR4_HOST_MASK 0
-#endif
-
-
 #endif /* ASM_X86_HVM_SVM_VMCS_H__ */
 
 /*
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/trace.h
--- a/xen/include/asm-x86/hvm/trace.h   Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/trace.h   Mon Jul 02 12:19:26 2007 -0600
@@ -21,6 +21,7 @@
 #define DO_TRC_HVM_CPUID       1
 #define DO_TRC_HVM_INTR        1
 #define DO_TRC_HVM_NMI         1
+#define DO_TRC_HVM_MCE         1
 #define DO_TRC_HVM_SMI         1
 #define DO_TRC_HVM_VMMCALL     1
 #define DO_TRC_HVM_HLT         1
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Mon Jul 02 12:19:26 2007 -0600
@@ -130,6 +130,8 @@ extern u32 vmx_vmentry_control;
 
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
 extern u32 vmx_secondary_exec_control;
+
+extern bool_t cpu_has_vmx_ins_outs_instr_info;
 
 #define cpu_has_vmx_virtualize_apic_accesses \
     (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 12:19:26 2007 -0600
@@ -143,13 +143,6 @@ void vmx_vlapic_msr_changed(struct vcpu 
 #define X86_SEG_AR_GRANULARITY  (1u << 15) /* 15, granularity */
 #define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */
 
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
-#else
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
-#endif
-
 #define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
 #define VMCLEAR_OPCODE  ".byte 0x66,0x0f,0xc7\n"        /* reg/opcode: /6 */
 #define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/page.h        Mon Jul 02 12:19:26 2007 -0600
@@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd
 #define pgentry_ptr_to_slot(_p)    \
     (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
 
+#ifndef __ASSEMBLY__
+
 /* Page-table type. */
-#ifndef __ASSEMBLY__
 #if CONFIG_PAGING_LEVELS == 2
 /* x86_32 default */
 typedef struct { u32 pfn; } pagetable_t;
@@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t;
 #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
 #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
 #define pagetable_null()        pagetable_from_pfn(0)
-#endif
-
-#define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
+
+void clear_page_sse2(void *);
+#define clear_page(_p)      (cpu_has_xmm2 ?                             \
+                             clear_page_sse2((void *)(_p)) :            \
+                             (void)memset((void *)(_p), 0, PAGE_SIZE))
 #define copy_page(_t,_f)    memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
 
 #define mfn_valid(mfn)      ((mfn) < max_page)
@@ -244,6 +247,8 @@ typedef struct { u64 pfn; } pagetable_t;
 /* Convert between frame number and address formats.  */
 #define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
 #define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
+
+#endif /* !defined(__ASSEMBLY__) */
 
 /* High table entries are reserved by the hypervisor. */
 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/processor.h   Mon Jul 02 12:19:26 2007 -0600
@@ -104,7 +104,6 @@
 #define TRAP_alignment_check  17
 #define TRAP_machine_check    18
 #define TRAP_simd_error       19
-#define TRAP_deferred_nmi     31
 
 /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
 /* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */
@@ -567,7 +566,8 @@ extern void mtrr_ap_init(void);
 extern void mtrr_ap_init(void);
 extern void mtrr_bp_init(void);
 
-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
+asmlinkage void do_machine_check(struct cpu_user_regs *regs);
 
 int cpuid_hypervisor_leaves(
     uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h    Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/x86_32/asm_defns.h    Mon Jul 02 12:19:26 2007 -0600
@@ -26,7 +26,7 @@ 1:      addl  $4,%esp;
 #define ASSERT_INTERRUPTS_ENABLED  ASSERT_INTERRUPT_STATUS(nz)
 #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
 
-#define __SAVE_ALL_PRE                                  \
+#define SAVE_ALL_GPRS                                   \
         cld;                                            \
         pushl %eax;                                     \
         pushl %ebp;                                     \
@@ -35,32 +35,48 @@ 1:      addl  $4,%esp;
         pushl %esi;                                     \
         pushl %edx;                                     \
         pushl %ecx;                                     \
-        pushl %ebx;                                     \
+        pushl %ebx
+
+/*
+ * Saves all register state into an exception/interrupt stack frame.
+ * Returns to the caller at <xen_lbl> if the interrupted context is within
+ * Xen; at <vm86_lbl> if the interrupted context is vm86; or falls through
+ * if the interrupted context is an ordinary guest protected-mode context.
+ * In all cases %ecx contains __HYPERVISOR_DS. %ds/%es are guaranteed to
+ * contain __HYPERVISOR_DS unless control passes to <xen_lbl>, in which case
+ * the caller is reponsible for validity of %ds/%es.
+ */
+#define SAVE_ALL(xen_lbl, vm86_lbl)                     \
+        SAVE_ALL_GPRS;                                  \
         testl $(X86_EFLAGS_VM),UREGS_eflags(%esp);      \
-        jz 2f;                                          \
-        call setup_vm86_frame;                          \
-        jmp 3f;                                         \
-        2:testb $3,UREGS_cs(%esp);                      \
-        jz 1f;                                          \
-        mov %ds,UREGS_ds(%esp);                         \
-        mov %es,UREGS_es(%esp);                         \
-        mov %fs,UREGS_fs(%esp);                         \
-        mov %gs,UREGS_gs(%esp);                         \
-        3:
-
-#define SAVE_ALL_NOSEGREGS(_reg)                \
-        __SAVE_ALL_PRE                          \
-        1:
-
-#define SET_XEN_SEGMENTS(_reg)                          \
-        movl $(__HYPERVISOR_DS),%e ## _reg ## x;        \
-        mov %e ## _reg ## x,%ds;                        \
-        mov %e ## _reg ## x,%es;
-
-#define SAVE_ALL(_reg)                          \
-        __SAVE_ALL_PRE                          \
-        SET_XEN_SEGMENTS(_reg)                  \
-        1:
+        mov   %ds,%edi;                                 \
+        mov   %es,%esi;                                 \
+        mov   $(__HYPERVISOR_DS),%ecx;                  \
+        jnz   86f;                                      \
+        .text 1;                                        \
+        86:   call setup_vm86_frame;                    \
+        jmp   vm86_lbl;                                 \
+        .previous;                                      \
+        testb $3,UREGS_cs(%esp);                        \
+        jz    xen_lbl;                                  \
+        /*                                              \
+         * We are the outermost Xen context, but our    \
+         * life is complicated by NMIs and MCEs. These  \
+         * could occur in our critical section and      \
+         * pollute %ds and %es. We have to detect that  \
+         * this has occurred and avoid saving Xen DS/ES \
+         * values to the guest stack frame.             \
+         */                                             \
+        cmpw  %cx,%di;                                  \
+        mov   %ecx,%ds;                                 \
+        mov   %fs,UREGS_fs(%esp);                       \
+        cmove UREGS_ds(%esp),%edi;                      \
+        cmpw  %cx,%si;                                  \
+        mov   %edi,UREGS_ds(%esp);                      \
+        cmove UREGS_es(%esp),%esi;                      \
+        mov   %ecx,%es;                                 \
+        mov   %gs,UREGS_gs(%esp);                       \
+        mov   %esi,UREGS_es(%esp)
 
 #ifdef PERF_COUNTERS
 #define PERFC_INCR(_name,_idx,_cur)                     \
@@ -97,8 +113,8 @@ __asm__(                                
     STR(x) ":\n\t"                              \
     "pushl $"#v"<<16\n\t"                       \
     STR(FIXUP_RING0_GUEST_STACK)                \
-    STR(SAVE_ALL(a))                            \
-    "movl %esp,%eax\n\t"                        \
+    STR(SAVE_ALL(1f,1f)) "\n\t"                 \
+    "1:movl %esp,%eax\n\t"                      \
     "pushl %eax\n\t"                            \
     "call "STR(smp_##x)"\n\t"                   \
     "addl $4,%esp\n\t"                          \
@@ -109,8 +125,8 @@ __asm__(                                
     "\n" __ALIGN_STR"\n"                        \
     "common_interrupt:\n\t"                     \
     STR(FIXUP_RING0_GUEST_STACK)                \
-    STR(SAVE_ALL(a))                            \
-    "movl %esp,%eax\n\t"                        \
+    STR(SAVE_ALL(1f,1f)) "\n\t"                 \
+    "1:movl %esp,%eax\n\t"                      \
     "pushl %eax\n\t"                            \
     "call " STR(do_IRQ) "\n\t"                  \
     "addl $4,%esp\n\t"                          \
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/platform.h
--- a/xen/include/public/platform.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/platform.h     Mon Jul 02 12:19:26 2007 -0600
@@ -114,6 +114,45 @@ typedef struct xenpf_platform_quirk xenp
 typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
 
+#define XENPF_firmware_info       50
+#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+    /* IN variables. */
+    uint32_t type;
+    uint32_t index;
+    /* OUT variables. */
+    union {
+        struct {
+            /* Int13, Fn48: Check Extensions Present. */
+            uint8_t device;                   /* %dl: bios device number */
+            uint8_t version;                  /* %ah: major version      */
+            uint16_t interface_support;       /* %cx: support bitmap     */
+            /* Int13, Fn08: Legacy Get Device Parameters. */
+            uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */
+            uint8_t legacy_max_head;          /* %dh: max head #         */
+            uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */
+            /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+            /* NB. First uint16_t of buffer must be set to buffer size.      */
+            XEN_GUEST_HANDLE(void) edd_params;
+        } disk_info; /* XEN_FW_DISK_INFO */
+        struct {
+            uint8_t device;                   /* bios device number  */
+            uint32_t mbr_signature;           /* offset 0x1b8 in mbr */
+        } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+        struct {
+            /* Int10, AX=4F15: Get EDID info. */
+            uint8_t capabilities;
+            uint8_t edid_transfer_time;
+            /* must refer to 128-byte buffer */
+            XEN_GUEST_HANDLE(uint8_t) edid;
+        } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+    } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
 struct xen_platform_op {
     uint32_t cmd;
     uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -124,6 +163,7 @@ struct xen_platform_op {
         struct xenpf_read_memtype      read_memtype;
         struct xenpf_microcode_update  microcode;
         struct xenpf_platform_quirk    platform_quirk;
+        struct xenpf_firmware_info     firmware_info;
         uint8_t                        pad[128];
     } u;
 };
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/sysctl.h       Mon Jul 02 12:19:26 2007 -0600
@@ -140,9 +140,7 @@ typedef struct xen_sysctl_getdomaininfol
 typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
 
-/*
- * Inject debug keys into Xen.
- */
+/* Inject debug keys into Xen. */
 #define XEN_SYSCTL_debug_keys        7
 struct xen_sysctl_debug_keys {
     /* IN variables. */
@@ -151,6 +149,23 @@ struct xen_sysctl_debug_keys {
 };
 typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+#define XEN_SYSCTL_getcpuinfo        8
+struct xen_sysctl_cpuinfo {
+    uint64_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); 
+struct xen_sysctl_getcpuinfo {
+    /* IN variables. */
+    uint32_t max_cpus;
+    XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+    /* OUT variables. */
+    uint32_t nr_cpus;
+}; 
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); 
 
 struct xen_sysctl {
     uint32_t cmd;
@@ -163,6 +178,7 @@ struct xen_sysctl {
         struct xen_sysctl_perfc_op          perfc_op;
         struct xen_sysctl_getdomaininfolist getdomaininfolist;
         struct xen_sysctl_debug_keys        debug_keys;
+        struct xen_sysctl_getcpuinfo        getcpuinfo;
         uint8_t                             pad[128];
     } u;
 };
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/trace.h        Mon Jul 02 12:19:26 2007 -0600
@@ -88,6 +88,7 @@
 #define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
 #define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
 #define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
 
 /* This structure represents a single trace buffer record. */
 struct t_rec {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.