[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86 hvm: Replace old MMIO emulator with x86_emulate()-based harness.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1203518205 0
# Node ID 3f1cf03826fe642434197f898c3aac55dc81ad25
# Parent  f853c049709546b4f1fa1b4b03ddff165c163d38
x86 hvm: Replace old MMIO emulator with x86_emulate()-based harness.
Re-factor VMX real-mode emulation to use the same harness.
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 xen/arch/x86/hvm/instrlen.c          |  419 ------------
 tools/ioemu/target-i386-dm/helper2.c |  107 ---
 xen/arch/x86/hvm/Makefile            |    2 
 xen/arch/x86/hvm/emulate.c           |  755 +++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c               |   50 -
 xen/arch/x86/hvm/intercept.c         |   62 -
 xen/arch/x86/hvm/io.c                |  723 ----------------------
 xen/arch/x86/hvm/platform.c          | 1136 -----------------------------------
 xen/arch/x86/hvm/stdvga.c            |   34 -
 xen/arch/x86/hvm/svm/svm.c           |  477 ++++++++------
 xen/arch/x86/hvm/vmx/realmode.c      |  773 ++---------------------
 xen/arch/x86/hvm/vmx/vmx.c           |   58 +
 xen/arch/x86/mm/shadow/multi.c       |    6 
 xen/include/asm-x86/hvm/emulate.h    |   55 +
 xen/include/asm-x86/hvm/hvm.h        |   12 
 xen/include/asm-x86/hvm/io.h         |   18 
 xen/include/asm-x86/hvm/support.h    |    1 
 xen/include/asm-x86/hvm/vcpu.h       |    9 
 xen/include/asm-x86/hvm/vmx/vmcs.h   |    5 
 xen/include/asm-x86/hvm/vmx/vmx.h    |    8 
 xen/include/public/hvm/ioreq.h       |    6 
 21 files changed, 1314 insertions(+), 3402 deletions(-)

diff -r f853c0497095 -r 3f1cf03826fe tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Tue Feb 19 11:14:40 2008 -0700
+++ b/tools/ioemu/target-i386-dm/helper2.c      Wed Feb 20 14:36:45 2008 +0000
@@ -379,82 +379,7 @@ void cpu_ioreq_move(CPUState *env, ioreq
     }
 }
 
-void cpu_ioreq_and(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 & (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_add(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 + (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_sub(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 - (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_or(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 | (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_xor(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 ^ (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void timeoffset_get()
+void timeoffset_get(void)
 {
     char *p;
 
@@ -481,18 +406,6 @@ void cpu_ioreq_timeoffset(CPUState *env,
     fprintf(logfile, "Time offset set %ld, added offset %ld\n", time_offset, 
req->data);
     sprintf(b, "%ld", time_offset);
     xenstore_vm_write(domid, "rtc/timeoffset", b);
-}
-
-void cpu_ioreq_xchg(CPUState *env, ioreq_t *req)
-{
-    unsigned long tmp1;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    write_physical(req->addr, req->size, &req->data);
-    req->data = tmp1;
 }
 
 void __handle_ioreq(CPUState *env, ioreq_t *req)
@@ -507,24 +420,6 @@ void __handle_ioreq(CPUState *env, ioreq
         break;
     case IOREQ_TYPE_COPY:
         cpu_ioreq_move(env, req);
-        break;
-    case IOREQ_TYPE_AND:
-        cpu_ioreq_and(env, req);
-        break;
-    case IOREQ_TYPE_ADD:
-        cpu_ioreq_add(env, req);
-        break;
-    case IOREQ_TYPE_SUB:
-        cpu_ioreq_sub(env, req);
-        break;
-    case IOREQ_TYPE_OR:
-        cpu_ioreq_or(env, req);
-        break;
-    case IOREQ_TYPE_XOR:
-        cpu_ioreq_xor(env, req);
-        break;
-    case IOREQ_TYPE_XCHG:
-        cpu_ioreq_xchg(env, req);
         break;
     case IOREQ_TYPE_TIMEOFFSET:
         cpu_ioreq_timeoffset(env, req);
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/Makefile
--- a/xen/arch/x86/hvm/Makefile Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/Makefile Wed Feb 20 14:36:45 2008 +0000
@@ -1,9 +1,9 @@ subdir-y += svm
 subdir-y += svm
 subdir-y += vmx
 
+obj-y += emulate.o
 obj-y += hvm.o
 obj-y += i8254.o
-obj-y += instrlen.o
 obj-y += intercept.o
 obj-y += io.o
 obj-y += iommu.o
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/emulate.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/emulate.c        Wed Feb 20 14:36:45 2008 +0000
@@ -0,0 +1,755 @@
+/******************************************************************************
+ * hvm/emulate.c
+ * 
+ * HVM instruction emulation. Used for MMIO and VMX real mode.
+ * 
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/paging.h>
+#include <asm/event.h>
+#include <asm/hvm/emulate.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+
+/*
+ * Convert addr from linear to physical form, valid over the range
+ * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
+ * the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
+ */
+static int hvmemul_linear_to_phys(
+    unsigned long addr,
+    paddr_t *paddr,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    enum hvm_access_type access_type,
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    struct vcpu *curr = current;
+    unsigned long pfn, npfn, done, todo, i;
+    struct segment_register *sreg;
+    uint32_t pfec;
+
+    /* Clip repetitions to a sensible maximum. */
+    *reps = min_t(unsigned long, *reps, 4096);
+
+    /* With no paging it's easy: linear == physical. */
+    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
+    {
+        *paddr = addr;
+        return X86EMUL_OKAY;
+    }
+
+    *paddr = addr & ~PAGE_MASK;
+
+    /* Gather access-type information for the page walks. */
+    sreg = hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
+    pfec = PFEC_page_present;
+    if ( sreg->attr.fields.dpl == 3 )
+        pfec |= PFEC_user_mode;
+    if ( access_type == hvm_access_write )
+        pfec |= PFEC_write_access;
+
+    /* Get the first PFN in the range. */
+    if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
+    {
+        hvm_inject_exception(TRAP_page_fault, pfec, addr);
+        return X86EMUL_EXCEPTION;
+    }
+
+    /* If the range does not straddle a page boundary then we're done. */
+    done = PAGE_SIZE - (addr & ~PAGE_MASK);
+    todo = *reps * bytes_per_rep;
+    if ( done >= todo )
+        goto done;
+
+    addr += done;
+    for ( i = 1; done < todo; i++ )
+    {
+        /* Get the next PFN in the range. */
+        if ( (npfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
+        {
+            hvm_inject_exception(TRAP_page_fault, pfec, addr);
+            return X86EMUL_EXCEPTION;
+        }
+
+        /* Is it contiguous with the preceding PFNs? If not then we're done. */
+        if ( npfn != (pfn + i) )
+        {
+            done /= bytes_per_rep;
+            if ( done == 0 )
+                return X86EMUL_UNHANDLEABLE;
+            *reps = done;
+            break;
+        }
+
+        addr += PAGE_SIZE;
+        done += PAGE_SIZE;
+    }
+
+ done:
+    *paddr |= (paddr_t)pfn << PAGE_SHIFT;
+    return X86EMUL_OKAY;
+}
+    
+
+static int hvmemul_virtual_to_linear(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned int bytes,
+    enum hvm_access_type access_type,
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    unsigned long *paddr)
+{
+    struct segment_register *reg;
+    int okay;
+
+    if ( seg == x86_seg_none )
+    {
+        *paddr = offset;
+        return X86EMUL_OKAY;
+    }
+
+    reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+    okay = hvm_virtual_to_linear_addr(
+        seg, reg, offset, bytes, access_type,
+        hvmemul_ctxt->ctxt.addr_size, paddr);
+
+    if ( !okay )
+    {
+        hvmemul_ctxt->flags.exn_pending = 1;
+        hvmemul_ctxt->exn_vector = TRAP_gp_fault;
+        hvmemul_ctxt->exn_insn_len = 0;
+        return X86EMUL_EXCEPTION;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int __hvmemul_read(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long *val,
+    unsigned int bytes,
+    enum hvm_access_type access_type,
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    unsigned long addr;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, access_type, hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    *val = 0;
+
+    rc = ((access_type == hvm_access_insn_fetch) ?
+          hvm_fetch_from_guest_virt(val, addr, bytes) :
+          hvm_copy_from_guest_virt(val, addr, bytes));
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
+        return X86EMUL_EXCEPTION;
+
+    if ( rc == HVMCOPY_bad_gfn_to_mfn )
+    {
+        struct vcpu *curr = current;
+        unsigned long reps = 1;
+        paddr_t gpa;
+
+        if ( access_type == hvm_access_insn_fetch )
+            return X86EMUL_UNHANDLEABLE;
+
+        rc = hvmemul_linear_to_phys(
+            addr, &gpa, bytes, &reps, access_type, hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+
+        if ( curr->arch.hvm_vcpu.io_in_progress )
+            return X86EMUL_UNHANDLEABLE;
+
+        if ( !curr->arch.hvm_vcpu.io_completed )
+        {
+            curr->arch.hvm_vcpu.io_in_progress = 1;
+            send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes,
+                          0, IOREQ_READ, 0, 0);
+        }
+
+        if ( !curr->arch.hvm_vcpu.io_completed )
+            return X86EMUL_RETRY;
+
+        *val = curr->arch.hvm_vcpu.io_data;
+        curr->arch.hvm_vcpu.io_completed = 0;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long *val,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return __hvmemul_read(
+        seg, offset, val, bytes, hvm_access_read,
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
+}
+
+static int hvmemul_insn_fetch(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long *val,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    unsigned int insn_off = offset - hvmemul_ctxt->insn_buf_eip;
+
+    /* Fall back if requested bytes are not in the prefetch cache. */
+    if ( unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) )
+        return __hvmemul_read(
+            seg, offset, val, bytes,
+            hvm_access_insn_fetch, hvmemul_ctxt);
+
+    /* Hit the cache. Simple memcpy. */
+    *val = 0;
+    memcpy(val, &hvmemul_ctxt->insn_buf[insn_off], bytes);
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long val,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    unsigned long addr;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, hvm_access_write, hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvm_copy_to_guest_virt(addr, &val, bytes);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
+        return X86EMUL_EXCEPTION;
+
+    if ( rc == HVMCOPY_bad_gfn_to_mfn )
+    {
+        struct vcpu *curr = current;
+        unsigned long reps = 1;
+        paddr_t gpa;
+
+        rc = hvmemul_linear_to_phys(
+            addr, &gpa, bytes, &reps, hvm_access_write, hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+
+        if ( curr->arch.hvm_vcpu.io_in_progress )
+            return X86EMUL_UNHANDLEABLE;
+
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes,
+                      val, IOREQ_WRITE, 0, 0);
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_cmpxchg(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long old,
+    unsigned long new,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    /* Fix this in case the guest is really relying on r-m-w atomicity. */
+    return hvmemul_write(seg, offset, new, bytes, ctxt);
+}
+
+static int hvmemul_rep_ins(
+    uint16_t src_port,
+    enum x86_segment dst_seg,
+    unsigned long dst_offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *curr = current;
+    unsigned long addr;
+    paddr_t gpa;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        dst_seg, dst_offset, *reps * bytes_per_rep, hvm_access_write,
+        hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        addr, &gpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+    {
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_pio_req(src_port, *reps, bytes_per_rep,
+                     gpa, IOREQ_READ,
+                     !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+    }
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+        return X86EMUL_RETRY;
+
+    curr->arch.hvm_vcpu.io_completed = 0;
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_outs(
+    enum x86_segment src_seg,
+    unsigned long src_offset,
+    uint16_t dst_port,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *curr = current;
+    unsigned long addr;
+    paddr_t gpa;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        src_seg, src_offset, *reps * bytes_per_rep, hvm_access_read,
+        hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        addr, &gpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    curr->arch.hvm_vcpu.io_in_progress = 1;
+    send_pio_req(dst_port, *reps, bytes_per_rep,
+                 gpa, IOREQ_WRITE,
+                 !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_movs(
+   enum x86_segment src_seg,
+   unsigned long src_offset,
+   enum x86_segment dst_seg,
+   unsigned long dst_offset,
+   unsigned int bytes_per_rep,
+   unsigned long *reps,
+   struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *curr = current;
+    unsigned long saddr, daddr;
+    paddr_t sgpa, dgpa;
+    p2m_type_t p2mt;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        src_seg, src_offset, *reps * bytes_per_rep, hvm_access_read,
+        hvmemul_ctxt, &saddr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_virtual_to_linear(
+        dst_seg, dst_offset, *reps * bytes_per_rep, hvm_access_write,
+        hvmemul_ctxt, &daddr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        saddr, &sgpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        daddr, &dgpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt);
+    if ( !p2m_is_ram(p2mt) )
+    {
+        if ( !curr->arch.hvm_vcpu.io_completed )
+        {
+            curr->arch.hvm_vcpu.io_in_progress = 1;
+            send_mmio_req(IOREQ_TYPE_COPY, sgpa, *reps, bytes_per_rep,
+                      dgpa, IOREQ_READ,
+                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+        }
+
+        if ( !curr->arch.hvm_vcpu.io_completed )
+            return X86EMUL_RETRY;
+
+        curr->arch.hvm_vcpu.io_completed = 0;
+    }
+    else
+    {
+        (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt);
+        if ( p2m_is_ram(p2mt) )
+            return X86EMUL_UNHANDLEABLE;
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_mmio_req(IOREQ_TYPE_COPY, dgpa, *reps, bytes_per_rep,
+                      sgpa, IOREQ_WRITE,
+                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_segment(
+    enum x86_segment seg,
+    struct segment_register *reg,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+    memcpy(reg, sreg, sizeof(struct segment_register));
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_segment(
+    enum x86_segment seg,
+    struct segment_register *reg,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+
+    if ( seg == x86_seg_ss )
+        hvmemul_ctxt->flags.mov_ss = 1;
+
+    memcpy(sreg, reg, sizeof(struct segment_register));
+    __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty);
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_io(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+    {
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0);
+    }
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+        return X86EMUL_RETRY;
+
+    *val = curr->arch.hvm_vcpu.io_data;
+    curr->arch.hvm_vcpu.io_completed = 0;
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_io(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    if ( port == 0xe9 )
+    {
+        hvm_print_line(curr, val);
+        return X86EMUL_OKAY;
+    }
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    curr->arch.hvm_vcpu.io_in_progress = 1;
+    send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0);
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_cr(
+    unsigned int reg,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+    case 2:
+    case 3:
+    case 4:
+        *val = current->arch.hvm_vcpu.guest_cr[reg];
+        return X86EMUL_OKAY;
+    default:
+        break;
+    }
+
+    return X86EMUL_UNHANDLEABLE;
+}
+
+static int hvmemul_write_cr(
+    unsigned int reg,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+        return hvm_set_cr0(val);
+    case 2:
+        current->arch.hvm_vcpu.guest_cr[2] = val;
+        return X86EMUL_OKAY;
+    case 3:
+        return hvm_set_cr3(val);
+    case 4:
+        return hvm_set_cr4(val);
+    default:
+        break;
+    }
+
+    return X86EMUL_UNHANDLEABLE;
+}
+
+static int hvmemul_read_msr(
+    unsigned long reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct cpu_user_regs _regs;
+    int rc;
+
+    _regs.ecx = (uint32_t)reg;
+
+    if ( (rc = hvm_funcs.msr_read_intercept(&_regs)) != 0 )
+        return rc;
+
+    *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_msr(
+    unsigned long reg,
+    uint64_t val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct cpu_user_regs _regs;
+
+    _regs.edx = (uint32_t)(val >> 32);
+    _regs.eax = (uint32_t)val;
+    _regs.ecx = (uint32_t)reg;
+
+    return hvm_funcs.msr_write_intercept(&_regs);
+}
+
+static int hvmemul_write_rflags(
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    if ( (val & X86_EFLAGS_IF) && !(ctxt->regs->eflags & X86_EFLAGS_IF) )
+        hvmemul_ctxt->flags.sti = 1;
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_wbinvd(
+    struct x86_emulate_ctxt *ctxt)
+{
+    hvm_funcs.wbinvd_intercept();
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_cpuid(
+    unsigned int *eax,
+    unsigned int *ebx,
+    unsigned int *ecx,
+    unsigned int *edx,
+    struct x86_emulate_ctxt *ctxt)
+{
+    hvm_funcs.cpuid_intercept(eax, ebx, ecx, edx);
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_hlt(
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    hvmemul_ctxt->flags.hlt = 1;
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_inject_hw_exception(
+    uint8_t vector,
+    uint16_t error_code,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    if ( error_code != 0 )
+        return X86EMUL_UNHANDLEABLE;
+
+    hvmemul_ctxt->flags.exn_pending = 1;
+    hvmemul_ctxt->exn_vector = vector;
+    hvmemul_ctxt->exn_insn_len = 0;
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_inject_sw_interrupt(
+    uint8_t vector,
+    uint8_t insn_len,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    hvmemul_ctxt->flags.exn_pending = 1;
+    hvmemul_ctxt->exn_vector = vector;
+    hvmemul_ctxt->exn_insn_len = insn_len;
+
+    return X86EMUL_OKAY;
+}
+
+static void hvmemul_load_fpu_ctxt(
+    struct x86_emulate_ctxt *ctxt)
+{
+    if ( !current->fpu_dirtied )
+        hvm_funcs.fpu_dirty_intercept();
+}
+
+static struct x86_emulate_ops hvm_emulate_ops = {
+    .read          = hvmemul_read,
+    .insn_fetch    = hvmemul_insn_fetch,
+    .write         = hvmemul_write,
+    .cmpxchg       = hvmemul_cmpxchg,
+    .rep_ins       = hvmemul_rep_ins,
+    .rep_outs      = hvmemul_rep_outs,
+    .rep_movs      = hvmemul_rep_movs,
+    .read_segment  = hvmemul_read_segment,
+    .write_segment = hvmemul_write_segment,
+    .read_io       = hvmemul_read_io,
+    .write_io      = hvmemul_write_io,
+    .read_cr       = hvmemul_read_cr,
+    .write_cr      = hvmemul_write_cr,
+    .read_msr      = hvmemul_read_msr,
+    .write_msr     = hvmemul_write_msr,
+    .write_rflags  = hvmemul_write_rflags,
+    .wbinvd        = hvmemul_wbinvd,
+    .cpuid         = hvmemul_cpuid,
+    .hlt           = hvmemul_hlt,
+    .inject_hw_exception = hvmemul_inject_hw_exception,
+    .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
+    .load_fpu_ctxt = hvmemul_load_fpu_ctxt
+};
+
+int hvm_emulate_one(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
+    unsigned long addr;
+
+    hvmemul_ctxt->ctxt.addr_size =
+        hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16;
+    hvmemul_ctxt->ctxt.sp_size =
+        hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16;
+
+    hvmemul_ctxt->insn_buf_eip = regs->eip;
+    hvmemul_ctxt->insn_buf_bytes =
+        (hvm_virtual_to_linear_addr(
+            x86_seg_cs, &hvmemul_ctxt->seg_reg[x86_seg_cs],
+            regs->eip, sizeof(hvmemul_ctxt->insn_buf),
+            hvm_access_insn_fetch, hvmemul_ctxt->ctxt.addr_size, &addr) &&
+         !hvm_fetch_from_guest_virt_nofault(
+             hvmemul_ctxt->insn_buf, addr, sizeof(hvmemul_ctxt->insn_buf)))
+        ? sizeof(hvmemul_ctxt->insn_buf) : 0;
+
+    hvmemul_ctxt->flag_word = 0;
+
+    return x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
+}
+
+void hvm_emulate_prepare(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    struct cpu_user_regs *regs)
+{
+    hvmemul_ctxt->ctxt.regs = regs;
+    hvmemul_ctxt->seg_reg_accessed = 0;
+    hvmemul_ctxt->seg_reg_dirty = 0;
+    hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
+    hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
+}
+
+void hvm_emulate_writeback(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    enum x86_segment seg;
+
+    seg = find_first_bit(&hvmemul_ctxt->seg_reg_dirty,
+                         ARRAY_SIZE(hvmemul_ctxt->seg_reg));
+
+    while ( seg < ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
+    {
+        hvm_set_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
+        seg = find_next_bit(&hvmemul_ctxt->seg_reg_dirty,
+                            ARRAY_SIZE(hvmemul_ctxt->seg_reg),
+                            seg+1);
+    }
+}
+
+struct segment_register *hvmemul_get_seg_reg(
+    enum x86_segment seg,
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) )
+        hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
+    return &hvmemul_ctxt->seg_reg[seg];
+}
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/hvm.c    Wed Feb 20 14:36:45 2008 +0000
@@ -729,7 +729,7 @@ int hvm_set_efer(uint64_t value)
         gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
                  "EFER: %"PRIx64"\n", value);
         hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
+        return X86EMUL_EXCEPTION;
     }
 
     if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) &&
@@ -738,14 +738,14 @@ int hvm_set_efer(uint64_t value)
         gdprintk(XENLOG_WARNING,
                  "Trying to change EFER.LME with paging enabled\n");
         hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
+        return X86EMUL_EXCEPTION;
     }
 
     value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;
     v->arch.hvm_vcpu.guest_efer = value;
     hvm_update_guest_efer(v);
 
-    return 1;
+    return X86EMUL_OKAY;
 }
 
 extern void shadow_blow_tables_per_domain(struct domain *d);
@@ -787,8 +787,7 @@ int hvm_set_cr0(unsigned long value)
         HVM_DBG_LOG(DBG_LEVEL_1,
                     "Guest attempts to set upper 32 bits in CR0: %lx",
                     value);
-        hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
+        goto gpf;
     }
 
     value &= ~HVM_CR0_GUEST_RESERVED_BITS;
@@ -797,10 +796,7 @@ int hvm_set_cr0(unsigned long value)
     value |= X86_CR0_ET;
 
     if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
-    {
-        hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
-    }
+        goto gpf;
 
     if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
     {
@@ -809,8 +805,7 @@ int hvm_set_cr0(unsigned long value)
             if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
             {
                 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");
-                hvm_inject_exception(TRAP_gp_fault, 0, 0);
-                return 0;
+                goto gpf;
             }
             HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
             v->arch.hvm_vcpu.guest_efer |= EFER_LMA;
@@ -828,7 +823,7 @@ int hvm_set_cr0(unsigned long value)
                 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
                          v->arch.hvm_vcpu.guest_cr[3], mfn);
                 domain_crash(v->domain);
-                return 0;
+                return X86EMUL_UNHANDLEABLE;
             }
 
             /* Now arch.guest_table points to machine physical. */
@@ -895,7 +890,11 @@ int hvm_set_cr0(unsigned long value)
     if ( (value ^ old_value) & X86_CR0_PG )
         paging_update_paging_modes(v);
 
-    return 1;
+    return X86EMUL_OKAY;
+
+ gpf:
+    hvm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
 }
 
 int hvm_set_cr3(unsigned long value)
@@ -922,12 +921,12 @@ int hvm_set_cr3(unsigned long value)
 
     v->arch.hvm_vcpu.guest_cr[3] = value;
     paging_update_cr3(v);
-    return 1;
+    return X86EMUL_OKAY;
 
  bad_cr3:
     gdprintk(XENLOG_ERR, "Invalid CR3\n");
     domain_crash(v->domain);
-    return 0;
+    return X86EMUL_UNHANDLEABLE;
 }
 
 int hvm_set_cr4(unsigned long value)
@@ -958,11 +957,11 @@ int hvm_set_cr4(unsigned long value)
     if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
         paging_update_paging_modes(v);
 
-    return 1;
+    return X86EMUL_OKAY;
 
  gpf:
     hvm_inject_exception(TRAP_gp_fault, 0, 0);
-    return 0;
+    return X86EMUL_EXCEPTION;
 }
 
 int hvm_virtual_to_linear_addr(
@@ -977,7 +976,15 @@ int hvm_virtual_to_linear_addr(
     unsigned long addr = offset;
     uint32_t last_byte;
 
-    if ( addr_size != 64 )
+    if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
+    {
+        /*
+         * REAL MODE: Don't bother with segment access checks.
+         * Certain of them are not done in native real mode anyway.
+         */
+        addr = (uint32_t)(addr + reg->base);
+    }
+    else if ( addr_size != 64 )
     {
         /*
          * COMPATIBILITY MODE: Apply segment checks and add base.
@@ -1304,7 +1311,7 @@ void hvm_task_switch(
     if ( ptss == NULL )
         goto out;
 
-    if ( !hvm_set_cr3(ptss->cr3) )
+    if ( hvm_set_cr3(ptss->cr3) )
     {
         hvm_unmap(ptss);
         goto out;
@@ -1399,7 +1406,10 @@ static enum hvm_copy_result __hvm_copy(
      * VMREADs on every data access hurts emulation performance.
      * Hence we do not gather extra PFEC flags if CR0.PG == 0.
      */
-    if ( virt && (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
+    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
+        virt = 0;
+
+    if ( virt )
     {
         struct segment_register sreg;
         hvm_get_segment_register(curr, x86_seg_ss, &sreg);
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/instrlen.c
--- a/xen/arch/x86/hvm/instrlen.c       Tue Feb 19 11:14:40 2008 -0700
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,419 +0,0 @@
-/*
- * instrlen.c - calculates the instruction length for all operating modes
- * 
- * Travis Betak, travis.betak@xxxxxxx
- * Copyright (c) 2005,2006 AMD
- * Copyright (c) 2005 Keir Fraser
- *
- * Essentially a very, very stripped version of Keir Fraser's work in
- * x86_emulate.c.  Used for MMIO.
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/mm.h>
-#include <asm-x86/x86_emulate.h>
-
-/* read from guest memory */
-extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip,
-        int length);
-
-/*
- * Opcode effective-address decode tables.
- * Note that we only emulate instructions that have at least one memory
- * operand (excluding implicit stack references). We assume that stack
- * references and instruction fetches will never occur in special memory
- * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
- * not be handled.
- */
-
-/* Operand sizes: 8-bit operands or specified/overridden size. */
-#define ByteOp      (1<<0) /* 8-bit operands. */
-/* Destination operand type. */
-#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
-#define DstReg      (2<<1) /* Register operand. */
-#define DstMem      (3<<1) /* Memory operand. */
-#define DstMask     (3<<1)
-/* Source operand type. */
-#define SrcNone     (0<<3) /* No source operand. */
-#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
-#define SrcReg      (1<<3) /* Register operand. */
-#define SrcMem      (2<<3) /* Memory operand. */
-#define SrcMem16    (3<<3) /* Memory operand (16-bit). */
-#define SrcMem32    (4<<3) /* Memory operand (32-bit). */
-#define SrcImm      (5<<3) /* Immediate operand. */
-#define SrcImmByte  (6<<3) /* 8-bit sign-extended immediate operand. */
-#define SrcMask     (7<<3)
-/* Generic ModRM decode. */
-#define ModRM       (1<<6)
-/* Destination is only written; never read. */
-#define Mov         (1<<7)
-
-static uint8_t opcode_table[256] = {
-    /* 0x00 - 0x07 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x08 - 0x0F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x10 - 0x17 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x18 - 0x1F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x20 - 0x27 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x28 - 0x2F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x30 - 0x37 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x38 - 0x3F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x40 - 0x4F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x50 - 0x5F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x60 - 0x6F */
-    0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x70 - 0x7F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x80 - 0x87 */
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    /* 0x88 - 0x8F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, DstMem|SrcNone|ModRM|Mov,
-    /* 0x90 - 0x9F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xA0 - 0xA7 */
-    ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
-    ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
-    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
-    ByteOp|ImplicitOps, ImplicitOps,
-    /* 0xA8 - 0xAF */
-    0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
-    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
-    ByteOp|ImplicitOps, ImplicitOps,
-    /* 0xB0 - 0xBF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xC0 - 0xC7 */
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
-    0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
-    /* 0xC8 - 0xCF */
-    0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xD0 - 0xD7 */
-    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
-    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
-    0, 0, 0, 0,
-    /* 0xD8 - 0xDF */
-    0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xE0 - 0xEF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xF0 - 0xF7 */
-    0, 0, 0, 0,
-    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
-    /* 0xF8 - 0xFF */
-    0, 0, 0, 0,
-    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
-};
-
-static uint8_t twobyte_table[256] = {
-    /* 0x00 - 0x0F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
-    /* 0x10 - 0x1F */
-    0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x20 - 0x2F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x30 - 0x3F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x40 - 0x47 */
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    /* 0x48 - 0x4F */
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    /* 0x50 - 0x5F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x60 - 0x6F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x70 - 0x7F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x80 - 0x8F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x90 - 0x9F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xA0 - 0xA7 */
-    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, 
-    /* 0xA8 - 0xAF */
-    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
-    /* 0xB0 - 0xB7 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
-    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
-    /* 0xB8 - 0xBF */
-    0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM,
-    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
-    /* 0xC0 - 0xCF */
-    0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xD0 - 0xDF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xE0 - 0xEF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xF0 - 0xFF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* 
- * insn_fetch - fetch the next byte from instruction stream
- */
-#define insn_fetch()                                                      \
-({ uint8_t _x;                                                            \
-   if ( length >= 15 )                                                    \
-       return -1;                                                         \
-   if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) {                         \
-       unsigned long err;                                                 \
-       struct segment_register ss;                                        \
-       gdprintk(XENLOG_WARNING,                                           \
-                "Cannot read from address %lx (eip %lx, mode %d)\n",      \
-                pc, org_pc, address_bytes);                               \
-       err = 0; /* Must be not-present: we don't enforce reserved bits */ \
-       if ( hvm_nx_enabled(current) )                                     \
-           err |= PFEC_insn_fetch;                                        \
-       hvm_get_segment_register(current, x86_seg_ss, &ss);                \
-       if ( ss.attr.fields.dpl == 3 )                                     \
-           err |= PFEC_user_mode;                                         \
-       hvm_inject_exception(TRAP_page_fault, err, pc);                    \
-       return -1;                                                         \
-   }                                                                      \
-   if ( buf )                                                             \
-       buf[length] = _x;                                                  \
-   length += 1;                                                           \
-   pc += 1;                                                               \
-   _x;                                                                    \
-})
-
-#define insn_skip(_n) do {                      \
-    int _i;                                     \
-    for ( _i = 0; _i < (_n); _i++) {            \
-        (void) insn_fetch();                    \
-    }                                           \
-} while (0)
-
-/**
- * hvm_instruction_fetch - read the current instruction and return its length
- *
- * @org_pc: guest instruction pointer
- * @address_bytes: guest address width
- * @buf: (optional) buffer to load actual instruction bytes into
- *
- * Doesn't increment the guest's instruction pointer, but may
- * issue faults to the guest.  Returns -1 on failure.
- */
-int hvm_instruction_fetch(unsigned long org_pc, int address_bytes,
-                          unsigned char *buf)
-{
-    uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
-    unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
-    int length = 0;
-    unsigned long pc = org_pc;
-
-    op_bytes = op_default = ad_bytes = ad_default = address_bytes;
-    if ( op_bytes == 8 )
-    {
-        op_bytes = op_default = 4;
-#ifndef __x86_64__
-        return -1;
-#endif
-    }
-
-    /* Legacy prefixes. */
-    for ( ; ; )
-    {
-        switch ( b = insn_fetch() )
-        {
-        case 0x66: /* operand-size override */
-            op_bytes = op_default ^ 6;      /* switch between 2/4 bytes */
-            break;
-        case 0x67: /* address-size override */
-            if ( ad_default == 8 )
-                ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */
-            else
-                ad_bytes = ad_default ^ 6;  /* switch between 2/4 bytes */
-            break;
-        case 0x2e: /* CS override */
-        case 0x3e: /* DS override */
-        case 0x26: /* ES override */
-        case 0x64: /* FS override */
-        case 0x65: /* GS override */
-        case 0x36: /* SS override */
-        case 0xf0: /* LOCK */
-        case 0xf3: /* REP/REPE/REPZ */
-        case 0xf2: /* REPNE/REPNZ */
-            break;
-#ifdef __x86_64__
-        case 0x40 ... 0x4f:
-            if ( ad_default == 8 )
-            {
-                rex_prefix = b;
-                continue;
-            }
-            /* FALLTHRU */
-#endif
-        default:
-            goto done_prefixes;
-        }
-        rex_prefix = 0;
-    }
-done_prefixes:
-
-    /* REX prefix. */
-    if ( rex_prefix & 8 )
-        op_bytes = 8;                   /* REX.W */
-    /* REX.B, REX.R, and REX.X do not need to be decoded. */
-
-    /* Opcode byte(s). */
-    d = opcode_table[b];
-    if ( d == 0 )
-    {
-        /* Two-byte opcode? */
-        if ( b == 0x0f )
-        {
-            twobyte = 1;
-            b = insn_fetch();
-            d = twobyte_table[b];
-        }
-
-        /* Unrecognised? */
-        if ( d == 0 )
-            goto cannot_emulate;
-    }
-
-    /* ModRM and SIB bytes. */
-    if ( d & ModRM )
-    {
-        uint8_t modrm = insn_fetch();
-        uint8_t modrm_mod = (modrm & 0xc0) >> 6;
-        uint8_t modrm_rm  = (modrm & 0x07);
-
-        modrm_reg = (modrm & 0x38) >> 3;
-        if ( modrm_mod == 3 )
-        {
-            gdprintk(XENLOG_WARNING, "Cannot parse ModRM.mod == 3.\n");
-            goto cannot_emulate;
-        }
-
-        if ( ad_bytes == 2 )
-        {
-            /* 16-bit ModR/M decode. */
-            switch ( modrm_mod )
-            {
-            case 0:
-                if ( modrm_rm == 6 ) 
-                    insn_skip(2); /* skip disp16 */
-                break;
-            case 1:
-                insn_skip(1); /* skip disp8 */
-                break;
-            case 2:
-                insn_skip(2); /* skip disp16 */
-                break;
-            }
-        }
-        else
-        {
-            /* 32/64-bit ModR/M decode. */
-            switch ( modrm_mod )
-            {
-            case 0:
-                if ( (modrm_rm == 4) && 
-                     ((insn_fetch() & 7) == 5) )
-                    insn_skip(4); /* skip disp32 specified by SIB.base */
-                else if ( modrm_rm == 5 )
-                    insn_skip(4); /* skip disp32 */
-                break;
-            case 1:
-                if ( modrm_rm == 4 )
-                    insn_skip(1);
-                insn_skip(1); /* skip disp8 */
-                break;
-            case 2:
-                if ( modrm_rm == 4 )
-                    insn_skip(1);
-                insn_skip(4); /* skip disp32 */
-                break;
-            }
-        }
-    }
-
-    /* Decode and fetch the destination operand: register or memory. */
-    switch ( d & DstMask )
-    {
-    case ImplicitOps:
-        /* Special instructions do their own operand decoding. */
-        goto done;
-    }
-
-    /* Decode and fetch the source operand: register, memory or immediate. */
-    switch ( d & SrcMask )
-    {
-    case SrcImm:
-        tmp = (d & ByteOp) ? 1 : op_bytes;
-        if ( tmp == 8 ) tmp = 4;
-        /* NB. Immediates are sign-extended as necessary. */
-        insn_skip(tmp);
-        break;
-    case SrcImmByte:
-        insn_skip(1);
-        break;
-    }
-
-    if ( twobyte )
-        goto done;
-
-    switch ( b )
-    {
-    case 0xa0 ... 0xa3: /* mov */
-        insn_skip(ad_bytes); /* skip src/dst displacement */
-        break;
-    case 0xf6 ... 0xf7: /* Grp3 */
-        switch ( modrm_reg )
-        {
-        case 0 ... 1: /* test */
-            /* Special case in Grp3: test has an immediate source operand. */
-            tmp = (d & ByteOp) ? 1 : op_bytes;
-            if ( tmp == 8 ) tmp = 4;
-            insn_skip(tmp);
-            break;
-        }
-        break;
-    }
-
-done:
-    return length < 16 ? length : -1;
-
-cannot_emulate:
-    gdprintk(XENLOG_WARNING,
-            "Cannot emulate %02x at address %lx (%lx, addr_bytes %d)\n",
-            b, pc - 1, org_pc, address_bytes);
-    return -1;
-}
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/intercept.c      Wed Feb 20 14:36:45 2008 +0000
@@ -31,7 +31,6 @@
 #include <xen/event.h>
 #include <asm/iommu.h>
 
-
 extern struct hvm_mmio_handler hpet_mmio_handler;
 extern struct hvm_mmio_handler vlapic_mmio_handler;
 extern struct hvm_mmio_handler vioapic_mmio_handler;
@@ -50,12 +49,11 @@ static inline void hvm_mmio_access(struc
                                    hvm_mmio_read_t read_handler,
                                    hvm_mmio_write_t write_handler)
 {
-    unsigned int tmp1, tmp2;
     unsigned long data;
 
-    switch ( p->type ) {
+    switch ( p->type )
+    {
     case IOREQ_TYPE_COPY:
-    {
         if ( !p->data_is_ptr ) {
             if ( p->dir == IOREQ_READ )
                 p->data = read_handler(v, p->addr, p->size);
@@ -86,62 +84,6 @@ static inline void hvm_mmio_access(struc
                 }
             }
         }
-        break;
-    }
-
-    case IOREQ_TYPE_AND:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 & (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_ADD:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if (p->dir == IOREQ_WRITE) {
-            tmp2 = tmp1 + (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_OR:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 | (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_XOR:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 ^ (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_XCHG:
-        /*
-         * Note that we don't need to be atomic here since VCPU is accessing
-         * its own local APIC.
-         */
-        tmp1 = read_handler(v, p->addr, p->size);
-        write_handler(v, p->addr, p->size, (unsigned long) p->data);
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_SUB:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 - (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
         break;
 
     default:
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/io.c     Wed Feb 20 14:36:45 2008 +0000
@@ -46,379 +46,8 @@
 #include <xen/iocap.h>
 #include <public/hvm/ioreq.h>
 
-#if defined (__i386__)
-static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
-{
-    switch (size) {
-    case BYTE:
-        switch (index) {
-        case 0:
-            regs->eax &= 0xFFFFFF00;
-            regs->eax |= (value & 0xFF);
-            break;
-        case 1:
-            regs->ecx &= 0xFFFFFF00;
-            regs->ecx |= (value & 0xFF);
-            break;
-        case 2:
-            regs->edx &= 0xFFFFFF00;
-            regs->edx |= (value & 0xFF);
-            break;
-        case 3:
-            regs->ebx &= 0xFFFFFF00;
-            regs->ebx |= (value & 0xFF);
-            break;
-        case 4:
-            regs->eax &= 0xFFFF00FF;
-            regs->eax |= ((value & 0xFF) << 8);
-            break;
-        case 5:
-            regs->ecx &= 0xFFFF00FF;
-            regs->ecx |= ((value & 0xFF) << 8);
-            break;
-        case 6:
-            regs->edx &= 0xFFFF00FF;
-            regs->edx |= ((value & 0xFF) << 8);
-            break;
-        case 7:
-            regs->ebx &= 0xFFFF00FF;
-            regs->ebx |= ((value & 0xFF) << 8);
-            break;
-        default:
-            goto crash;
-        }
-        break;
-    case WORD:
-        switch (index) {
-        case 0:
-            regs->eax &= 0xFFFF0000;
-            regs->eax |= (value & 0xFFFF);
-            break;
-        case 1:
-            regs->ecx &= 0xFFFF0000;
-            regs->ecx |= (value & 0xFFFF);
-            break;
-        case 2:
-            regs->edx &= 0xFFFF0000;
-            regs->edx |= (value & 0xFFFF);
-            break;
-        case 3:
-            regs->ebx &= 0xFFFF0000;
-            regs->ebx |= (value & 0xFFFF);
-            break;
-        case 4:
-            regs->esp &= 0xFFFF0000;
-            regs->esp |= (value & 0xFFFF);
-            break;
-        case 5:
-            regs->ebp &= 0xFFFF0000;
-            regs->ebp |= (value & 0xFFFF);
-            break;
-        case 6:
-            regs->esi &= 0xFFFF0000;
-            regs->esi |= (value & 0xFFFF);
-            break;
-        case 7:
-            regs->edi &= 0xFFFF0000;
-            regs->edi |= (value & 0xFFFF);
-            break;
-        default:
-            goto crash;
-        }
-        break;
-    case LONG:
-        switch (index) {
-        case 0:
-            regs->eax = value;
-            break;
-        case 1:
-            regs->ecx = value;
-            break;
-        case 2:
-            regs->edx = value;
-            break;
-        case 3:
-            regs->ebx = value;
-            break;
-        case 4:
-            regs->esp = value;
-            break;
-        case 5:
-            regs->ebp = value;
-            break;
-        case 6:
-            regs->esi = value;
-            break;
-        case 7:
-            regs->edi = value;
-            break;
-        default:
-            goto crash;
-        }
-        break;
-    default:
-    crash:
-        gdprintk(XENLOG_ERR, "size:%x, index:%x are invalid!\n", size, index);
-        domain_crash_synchronous();
-    }
-}
-#else
-static inline void __set_reg_value(unsigned long *reg, int size, long value)
-{
-    switch (size) {
-    case BYTE_64:
-        *reg &= ~0xFF;
-        *reg |= (value & 0xFF);
-        break;
-    case WORD:
-        *reg &= ~0xFFFF;
-        *reg |= (value & 0xFFFF);
-        break;
-    case LONG:
-        *reg &= ~0xFFFFFFFF;
-        *reg |= (value & 0xFFFFFFFF);
-        break;
-    case QUAD:
-        *reg = value;
-        break;
-    default:
-        gdprintk(XENLOG_ERR, "size:%x is invalid\n", size);
-        domain_crash_synchronous();
-    }
-}
-
-static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
-{
-    if (size == BYTE) {
-        switch (index) {
-        case 0:
-            regs->rax &= ~0xFF;
-            regs->rax |= (value & 0xFF);
-            break;
-        case 1:
-            regs->rcx &= ~0xFF;
-            regs->rcx |= (value & 0xFF);
-            break;
-        case 2:
-            regs->rdx &= ~0xFF;
-            regs->rdx |= (value & 0xFF);
-            break;
-        case 3:
-            regs->rbx &= ~0xFF;
-            regs->rbx |= (value & 0xFF);
-            break;
-        case 4:
-            regs->rax &= 0xFFFFFFFFFFFF00FF;
-            regs->rax |= ((value & 0xFF) << 8);
-            break;
-        case 5:
-            regs->rcx &= 0xFFFFFFFFFFFF00FF;
-            regs->rcx |= ((value & 0xFF) << 8);
-            break;
-        case 6:
-            regs->rdx &= 0xFFFFFFFFFFFF00FF;
-            regs->rdx |= ((value & 0xFF) << 8);
-            break;
-        case 7:
-            regs->rbx &= 0xFFFFFFFFFFFF00FF;
-            regs->rbx |= ((value & 0xFF) << 8);
-            break;
-        default:
-            gdprintk(XENLOG_ERR, "size:%x, index:%x are invalid!\n",
-                     size, index);
-            domain_crash_synchronous();
-            break;
-        }
-        return;
-    }
-
-    switch (index) {
-    case 0:
-        __set_reg_value(&regs->rax, size, value);
-        break;
-    case 1:
-        __set_reg_value(&regs->rcx, size, value);
-        break;
-    case 2:
-        __set_reg_value(&regs->rdx, size, value);
-        break;
-    case 3:
-        __set_reg_value(&regs->rbx, size, value);
-        break;
-    case 4:
-        __set_reg_value(&regs->rsp, size, value);
-        break;
-    case 5:
-        __set_reg_value(&regs->rbp, size, value);
-        break;
-    case 6:
-        __set_reg_value(&regs->rsi, size, value);
-        break;
-    case 7:
-        __set_reg_value(&regs->rdi, size, value);
-        break;
-    case 8:
-        __set_reg_value(&regs->r8, size, value);
-        break;
-    case 9:
-        __set_reg_value(&regs->r9, size, value);
-        break;
-    case 10:
-        __set_reg_value(&regs->r10, size, value);
-        break;
-    case 11:
-        __set_reg_value(&regs->r11, size, value);
-        break;
-    case 12:
-        __set_reg_value(&regs->r12, size, value);
-        break;
-    case 13:
-        __set_reg_value(&regs->r13, size, value);
-        break;
-    case 14:
-        __set_reg_value(&regs->r14, size, value);
-        break;
-    case 15:
-        __set_reg_value(&regs->r15, size, value);
-        break;
-    default:
-        gdprintk(XENLOG_ERR, "Invalid index\n");
-        domain_crash_synchronous();
-    }
-    return;
-}
-#endif
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs);
-
-static inline void set_eflags_CF(int size,
-                                 unsigned int instr,
-                                 unsigned long result,
-                                 unsigned long src,
-                                 unsigned long dst,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask = ~0UL >> (8 * (sizeof(mask) - size));
-
-    if ( instr == INSTR_ADD )
-    {
-        /* CF=1 <==> result is less than the augend and addend) */
-        if ( (result & mask) < (dst & mask) )
-        {
-            ASSERT((result & mask) < (src & mask));
-            regs->eflags |= X86_EFLAGS_CF;
-        }
-    }
-    else
-    {
-        ASSERT( instr == INSTR_CMP || instr == INSTR_SUB );
-        if ( (src & mask) > (dst & mask) )
-            regs->eflags |= X86_EFLAGS_CF;
-    }
-}
-
-static inline void set_eflags_OF(int size,
-                                 unsigned int instr,
-                                 unsigned long result,
-                                 unsigned long src,
-                                 unsigned long dst,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask =  1UL << ((8*size) - 1);
-
-    if ( instr == INSTR_ADD )
-    {
-        if ((src ^ result) & (dst ^ result) & mask);
-            regs->eflags |= X86_EFLAGS_OF;
-    }
-    else
-    {
-        ASSERT(instr == INSTR_CMP || instr == INSTR_SUB);
-        if ((dst ^ src) & (dst ^ result) & mask)
-            regs->eflags |= X86_EFLAGS_OF;
-    }
-}
-
-static inline void set_eflags_AF(int size,
-                                 unsigned long result,
-                                 unsigned long src,
-                                 unsigned long dst,
-                                 struct cpu_user_regs *regs)
-{
-    if ((result ^ src ^ dst) & 0x10)
-        regs->eflags |= X86_EFLAGS_AF;
-}
-
-static inline void set_eflags_ZF(int size, unsigned long result,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask = ~0UL >> (8 * (sizeof(mask) - size));
-
-    if ((result & mask) == 0)
-        regs->eflags |= X86_EFLAGS_ZF;
-}
-
-static inline void set_eflags_SF(int size, unsigned long result,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask = 1UL << ((8*size) - 1);
-
-    if (result & mask)
-        regs->eflags |= X86_EFLAGS_SF;
-}
-
-static char parity_table[256] = {
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
-};
-
-static inline void set_eflags_PF(int size, unsigned long result,
-                                 struct cpu_user_regs *regs)
-{
-    if (parity_table[result & 0xFF])
-        regs->eflags |= X86_EFLAGS_PF;
-}
-
-static void hvm_pio_assist(struct cpu_user_regs *regs, ioreq_t *p,
-                           struct hvm_io_op *pio_opp)
+static void hvm_pio_assist(
+    struct cpu_user_regs *regs, ioreq_t *p, struct hvm_io_op *pio_opp)
 {
     if ( p->data_is_ptr || (pio_opp->flags & OVERLAP) )
     {
@@ -472,335 +101,6 @@ static void hvm_pio_assist(struct cpu_us
     }
 }
 
-static void hvm_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p,
-                            struct hvm_io_op *mmio_opp)
-{
-    int sign = p->df ? -1 : 1;
-    int size = -1, index = -1;
-    unsigned long value = 0, result = 0;
-    unsigned long src, dst;
-
-    src = mmio_opp->operand[0];
-    dst = mmio_opp->operand[1];
-    size = operand_size(src);
-
-    HVMTRACE_1D(MMIO_ASSIST, current, p->data);
-        
-    switch (mmio_opp->instr) {
-    case INSTR_MOV:
-        if (dst & REGISTER) {
-            index = operand_index(dst);
-            set_reg_value(size, index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_MOVZX:
-        if (dst & REGISTER) {
-            switch (size) {
-            case BYTE:
-                p->data &= 0xFFULL;
-                break;
-
-            case WORD:
-                p->data &= 0xFFFFULL;
-                break;
-
-            case LONG:
-                p->data &= 0xFFFFFFFFULL;
-                break;
-
-            default:
-                printk("Impossible source operand size of movzx instr: %d\n", 
size);
-                domain_crash_synchronous();
-            }
-            index = operand_index(dst);
-            set_reg_value(operand_size(dst), index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_MOVSX:
-        if (dst & REGISTER) {
-            switch (size) {
-            case BYTE:
-                p->data &= 0xFFULL;
-                if ( p->data & 0x80ULL )
-                    p->data |= 0xFFFFFFFFFFFFFF00ULL;
-                break;
-
-            case WORD:
-                p->data &= 0xFFFFULL;
-                if ( p->data & 0x8000ULL )
-                    p->data |= 0xFFFFFFFFFFFF0000ULL;
-                break;
-
-            case LONG:
-                p->data &= 0xFFFFFFFFULL;
-                if ( p->data & 0x80000000ULL )
-                    p->data |= 0xFFFFFFFF00000000ULL;
-                break;
-
-            default:
-                printk("Impossible source operand size of movsx instr: %d\n", 
size);
-                domain_crash_synchronous();
-            }
-            index = operand_index(dst);
-            set_reg_value(operand_size(dst), index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_MOVS:
-        sign = p->df ? -1 : 1;
-
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-
-        if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) {
-            unsigned long addr = mmio_opp->addr;
-
-            if (hvm_paging_enabled(current))
-            {
-                int rv = hvm_copy_to_guest_virt(addr, &p->data, p->size);
-                if ( rv == HVMCOPY_bad_gva_to_gfn )
-                    return; /* exception already injected */
-            }
-            else
-                (void)hvm_copy_to_guest_phys(addr, &p->data, p->size);
-        }
-
-        regs->esi += sign * p->count * p->size;
-        regs->edi += sign * p->count * p->size;
-
-        break;
-
-    case INSTR_STOS:
-        sign = p->df ? -1 : 1;
-        regs->edi += sign * p->count * p->size;
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-        break;
-
-    case INSTR_LODS:
-        set_reg_value(size, 0, 0, regs, p->data);
-        sign = p->df ? -1 : 1;
-        regs->esi += sign * p->count * p->size;
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-        break;
-
-    case INSTR_AND:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data & value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data & value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data & value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_ADD:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data + value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data + value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data + value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The CF, OF, SF, ZF, AF, and PF flags are set according
-         * to the result
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_CF(size, mmio_opp->instr, result, value,
-                      (unsigned long) p->data, regs);
-        set_eflags_OF(size, mmio_opp->instr, result, value,
-                      (unsigned long) p->data, regs);
-        set_eflags_AF(size, result, value, (unsigned long) p->data, regs);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_OR:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data | value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data | value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data | value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_XOR:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data ^ value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data ^ value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data ^ value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_CMP:
-    case INSTR_SUB:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data - value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data - value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = value - (unsigned long) p->data;
-            if ( mmio_opp->instr == INSTR_SUB )
-                set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The CF, OF, SF, ZF, AF, and PF flags are set according
-         * to the result
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        if ( src & (REGISTER | IMMEDIATE) )
-        {
-            set_eflags_CF(size, mmio_opp->instr, result, value,
-                          (unsigned long) p->data, regs);
-            set_eflags_OF(size, mmio_opp->instr, result, value,
-                          (unsigned long) p->data, regs);
-        }
-        else
-        {
-            set_eflags_CF(size, mmio_opp->instr, result,
-                          (unsigned long) p->data, value, regs);
-            set_eflags_OF(size, mmio_opp->instr, result,
-                          (unsigned long) p->data, value, regs);
-        }
-        set_eflags_AF(size, result, value, (unsigned long) p->data, regs);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_TEST:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-        }
-        result = (unsigned long) p->data & value;
-
-        /*
-         * Sets the SF, ZF, and PF status flags. CF and OF are set to 0
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_BT:
-        if ( src & REGISTER )
-        {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-        }
-        else if ( src & IMMEDIATE )
-            value = mmio_opp->immediate;
-        if (p->data & (1 << (value & ((1 << 5) - 1))))
-            regs->eflags |= X86_EFLAGS_CF;
-        else
-            regs->eflags &= ~X86_EFLAGS_CF;
-
-        break;
-
-    case INSTR_XCHG:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            set_reg_value(size, index, 0, regs, p->data);
-        } else {
-            index = operand_index(dst);
-            set_reg_value(size, index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_PUSH:
-        mmio_opp->addr += hvm_get_segment_base(current, x86_seg_ss);
-        {
-            unsigned long addr = mmio_opp->addr;
-            int rv = hvm_copy_to_guest_virt(addr, &p->data, size);
-            if ( rv == HVMCOPY_bad_gva_to_gfn )
-                return; /* exception already injected */
-        }
-        break;
-    }
-}
-
 void hvm_io_assist(void)
 {
     vcpu_iodata_t *vio;
@@ -825,8 +125,18 @@ void hvm_io_assist(void)
 
     p->state = STATE_IOREQ_NONE;
 
-    if ( v->arch.hvm_vcpu.io_complete && v->arch.hvm_vcpu.io_complete() )
-        goto out;
+    if ( v->arch.hvm_vcpu.io_in_progress )
+    {
+        v->arch.hvm_vcpu.io_in_progress = 0;
+        if ( p->dir == IOREQ_READ )
+        {
+            v->arch.hvm_vcpu.io_completed = 1;
+            v->arch.hvm_vcpu.io_data = p->data;
+        }
+        if ( v->arch.hvm_vcpu.mmio_in_progress )
+            (void)handle_mmio();
+        goto out;
+    }
 
     switch ( p->type )
     {
@@ -836,8 +146,9 @@ void hvm_io_assist(void)
         hvm_pio_assist(regs, p, io_opp);
         break;
     default:
-        hvm_mmio_assist(regs, p, io_opp);
-        break;
+        gdprintk(XENLOG_ERR, "Unexpected HVM iorequest state %d.\n", p->state);
+        domain_crash(v->domain);
+        goto out;
     }
 
     /* Copy register changes back into current guest state. */
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/platform.c       Wed Feb 20 14:36:45 2008 +0000
@@ -33,801 +33,9 @@
 #include <asm/hvm/support.h>
 #include <asm/hvm/io.h>
 #include <public/hvm/ioreq.h>
-
 #include <xen/lib.h>
 #include <xen/sched.h>
-#include <asm/current.h>
-
-#define DECODE_success  1
-#define DECODE_failure  0
-
-#define mk_operand(size_reg, index, seg, flag) \
-    (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
-
-#if defined (__x86_64__)
-static inline long __get_reg_value(unsigned long reg, int size)
-{
-    switch ( size ) {
-    case BYTE_64:
-        return (char)(reg & 0xFF);
-    case WORD:
-        return (short)(reg & 0xFFFF);
-    case LONG:
-        return (int)(reg & 0xFFFFFFFF);
-    case QUAD:
-        return (long)(reg);
-    default:
-        printk("Error: (__get_reg_value) Invalid reg size\n");
-        domain_crash_synchronous();
-    }
-}
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
-{
-    if ( size == BYTE ) {
-        switch ( index ) {
-        case 0: /* %al */
-            return (char)(regs->rax & 0xFF);
-        case 1: /* %cl */
-            return (char)(regs->rcx & 0xFF);
-        case 2: /* %dl */
-            return (char)(regs->rdx & 0xFF);
-        case 3: /* %bl */
-            return (char)(regs->rbx & 0xFF);
-        case 4: /* %ah */
-            return (char)((regs->rax & 0xFF00) >> 8);
-        case 5: /* %ch */
-            return (char)((regs->rcx & 0xFF00) >> 8);
-        case 6: /* %dh */
-            return (char)((regs->rdx & 0xFF00) >> 8);
-        case 7: /* %bh */
-            return (char)((regs->rbx & 0xFF00) >> 8);
-        default:
-            printk("Error: (get_reg_value) Invalid index value\n");
-            domain_crash_synchronous();
-        }
-        /* NOTREACHED */
-    }
-
-    switch ( index ) {
-    case 0: return __get_reg_value(regs->rax, size);
-    case 1: return __get_reg_value(regs->rcx, size);
-    case 2: return __get_reg_value(regs->rdx, size);
-    case 3: return __get_reg_value(regs->rbx, size);
-    case 4: return __get_reg_value(regs->rsp, size);
-    case 5: return __get_reg_value(regs->rbp, size);
-    case 6: return __get_reg_value(regs->rsi, size);
-    case 7: return __get_reg_value(regs->rdi, size);
-    case 8: return __get_reg_value(regs->r8, size);
-    case 9: return __get_reg_value(regs->r9, size);
-    case 10: return __get_reg_value(regs->r10, size);
-    case 11: return __get_reg_value(regs->r11, size);
-    case 12: return __get_reg_value(regs->r12, size);
-    case 13: return __get_reg_value(regs->r13, size);
-    case 14: return __get_reg_value(regs->r14, size);
-    case 15: return __get_reg_value(regs->r15, size);
-    default:
-        printk("Error: (get_reg_value) Invalid index value\n");
-        domain_crash_synchronous();
-    }
-}
-#elif defined (__i386__)
-static inline long __get_reg_value(unsigned long reg, int size)
-{
-    switch ( size ) {
-    case WORD:
-        return (short)(reg & 0xFFFF);
-    case LONG:
-        return (int)(reg & 0xFFFFFFFF);
-    default:
-        printk("Error: (__get_reg_value) Invalid reg size\n");
-        domain_crash_synchronous();
-    }
-}
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
-{
-    if ( size == BYTE ) {
-        switch ( index ) {
-        case 0: /* %al */
-            return (char)(regs->eax & 0xFF);
-        case 1: /* %cl */
-            return (char)(regs->ecx & 0xFF);
-        case 2: /* %dl */
-            return (char)(regs->edx & 0xFF);
-        case 3: /* %bl */
-            return (char)(regs->ebx & 0xFF);
-        case 4: /* %ah */
-            return (char)((regs->eax & 0xFF00) >> 8);
-        case 5: /* %ch */
-            return (char)((regs->ecx & 0xFF00) >> 8);
-        case 6: /* %dh */
-            return (char)((regs->edx & 0xFF00) >> 8);
-        case 7: /* %bh */
-            return (char)((regs->ebx & 0xFF00) >> 8);
-        default:
-            printk("Error: (get_reg_value) Invalid index value\n");
-            domain_crash_synchronous();
-        }
-    }
-
-    switch ( index ) {
-    case 0: return __get_reg_value(regs->eax, size);
-    case 1: return __get_reg_value(regs->ecx, size);
-    case 2: return __get_reg_value(regs->edx, size);
-    case 3: return __get_reg_value(regs->ebx, size);
-    case 4: return __get_reg_value(regs->esp, size);
-    case 5: return __get_reg_value(regs->ebp, size);
-    case 6: return __get_reg_value(regs->esi, size);
-    case 7: return __get_reg_value(regs->edi, size);
-    default:
-        printk("Error: (get_reg_value) Invalid index value\n");
-        domain_crash_synchronous();
-    }
-}
-#endif
-
-static inline unsigned char *check_prefix(unsigned char *inst,
-                                          struct hvm_io_op *mmio_op,
-                                          unsigned char *ad_size,
-                                          unsigned char *op_size,
-                                          unsigned char *seg_sel,
-                                          unsigned char *rex_p)
-{
-    while ( 1 ) {
-        switch ( *inst ) {
-            /* rex prefix for em64t instructions */
-        case 0x40 ... 0x4f:
-            *rex_p = *inst;
-            break;
-        case 0xf3: /* REPZ */
-            mmio_op->flags = REPZ;
-            break;
-        case 0xf2: /* REPNZ */
-            mmio_op->flags = REPNZ;
-            break;
-        case 0xf0: /* LOCK */
-            break;
-        case 0x2e: /* CS */
-        case 0x36: /* SS */
-        case 0x3e: /* DS */
-        case 0x26: /* ES */
-        case 0x64: /* FS */
-        case 0x65: /* GS */
-            *seg_sel = *inst;
-            break;
-        case 0x66: /* 32bit->16bit */
-            *op_size = WORD;
-            break;
-        case 0x67:
-            *ad_size = WORD;
-            break;
-        default:
-            return inst;
-        }
-        inst++;
-    }
-}
-
-static inline unsigned long get_immediate(int ad_size, const unsigned char 
*inst, int op_size)
-{
-    int mod, reg, rm;
-    unsigned long val = 0;
-    int i;
-
-    mod = (*inst >> 6) & 3;
-    reg = (*inst >> 3) & 7;
-    rm = *inst & 7;
-
-    inst++; //skip ModR/M byte
-    if ( ad_size != WORD && mod != 3 && rm == 4 ) {
-        rm = *inst & 7;
-        inst++; //skip SIB byte
-    }
-
-    switch ( mod ) {
-    case 0:
-        if ( ad_size == WORD ) {
-            if ( rm == 6 )
-                inst = inst + 2; //disp16, skip 2 bytes
-        }
-        else {
-            if ( rm == 5 )
-                inst = inst + 4; //disp32, skip 4 bytes
-        }
-        break;
-    case 1:
-        inst++; //disp8, skip 1 byte
-        break;
-    case 2:
-        if ( ad_size == WORD )
-            inst = inst + 2; //disp16, skip 2 bytes
-        else
-            inst = inst + 4; //disp32, skip 4 bytes
-        break;
-    }
-
-    if ( op_size == QUAD )
-        op_size = LONG;
-
-    for ( i = 0; i < op_size; i++ ) {
-        val |= (*inst++ & 0xff) << (8 * i);
-    }
-
-    return val;
-}
-
-static inline unsigned long get_immediate_sign_ext(
-    int ad_size, const unsigned char *inst, int op_size)
-{
-    unsigned long result = get_immediate(ad_size, inst, op_size);
-    if ( op_size == BYTE )
-        return (int8_t)result;
-    if ( op_size == WORD )
-        return (int16_t)result;
-    return (int32_t)result;
-}
-
-static inline int get_index(const unsigned char *inst, unsigned char rex)
-{
-    int mod, reg, rm;
-    int rex_r, rex_b;
-
-    mod = (*inst >> 6) & 3;
-    reg = (*inst >> 3) & 7;
-    rm = *inst & 7;
-
-    rex_r = (rex >> 2) & 1;
-    rex_b = rex & 1;
-
-    //Only one operand in the instruction is register
-    if ( mod == 3 ) {
-        return (rm + (rex_b << 3));
-    } else {
-        return (reg + (rex_r << 3));
-    }
-    return 0;
-}
-
-static void init_instruction(struct hvm_io_op *mmio_op)
-{
-    mmio_op->instr = 0;
-
-    mmio_op->flags = 0;
-
-    mmio_op->operand[0] = 0;
-    mmio_op->operand[1] = 0;
-    mmio_op->immediate = 0;
-}
-
-#define GET_OP_SIZE_FOR_BYTE(size_reg)      \
-    do {                                    \
-        if ( rex )                          \
-            (size_reg) = BYTE_64;           \
-        else                                \
-            (size_reg) = BYTE;              \
-    } while( 0 )
-
-#define GET_OP_SIZE_FOR_NONEBYTE(op_size)   \
-    do {                                    \
-        if ( rex & 0x8 )                    \
-            (op_size) = QUAD;               \
-        else if ( (op_size) != WORD )       \
-            (op_size) = LONG;               \
-    } while( 0 )
-
-
-/*
- * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax)
- */
-static inline int mem_acc(unsigned char size, struct hvm_io_op *mmio)
-{
-    mmio->operand[0] = mk_operand(size, 0, 0, MEMORY);
-    mmio->operand[1] = mk_operand(size, 0, 0, REGISTER);
-    return DECODE_success;
-}
-
-/*
- * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32)
- */
-static inline int acc_mem(unsigned char size, struct hvm_io_op *mmio)
-{
-    mmio->operand[0] = mk_operand(size, 0, 0, REGISTER);
-    mmio->operand[1] = mk_operand(size, 0, 0, MEMORY);
-    return DECODE_success;
-}
-
-/*
- * Decode mem,reg operands (as in <opcode> r32/16, m32/16)
- */
-static int mem_reg(unsigned char size, unsigned char *opcode,
-                   struct hvm_io_op *mmio_op, unsigned char rex)
-{
-    int index = get_index(opcode + 1, rex);
-
-    mmio_op->operand[0] = mk_operand(size, 0, 0, MEMORY);
-    mmio_op->operand[1] = mk_operand(size, index, 0, REGISTER);
-    return DECODE_success;
-}
-
-/*
- * Decode reg,mem operands (as in <opcode> m32/16, r32/16)
- */
-static int reg_mem(unsigned char size, unsigned char *opcode,
-                   struct hvm_io_op *mmio_op, unsigned char rex)
-{
-    int index = get_index(opcode + 1, rex);
-
-    mmio_op->operand[0] = mk_operand(size, index, 0, REGISTER);
-    mmio_op->operand[1] = mk_operand(size, 0, 0, MEMORY);
-    return DECODE_success;
-}
-
-static int mmio_decode(int address_bytes, unsigned char *opcode,
-                       struct hvm_io_op *mmio_op,
-                       unsigned char *ad_size, unsigned char *op_size,
-                       unsigned char *seg_sel)
-{
-    unsigned char size_reg = 0;
-    unsigned char rex = 0;
-    int index;
-
-    *ad_size = 0;
-    *op_size = 0;
-    *seg_sel = 0;
-    init_instruction(mmio_op);
-
-    opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex);
-
-    switch ( address_bytes )
-    {
-    case 2:
-        if ( *op_size == WORD )
-            *op_size = LONG;
-        else if ( *op_size == LONG )
-            *op_size = WORD;
-        else if ( *op_size == 0 )
-            *op_size = WORD;
-        if ( *ad_size == WORD )
-            *ad_size = LONG;
-        else if ( *ad_size == LONG )
-            *ad_size = WORD;
-        else if ( *ad_size == 0 )
-            *ad_size = WORD;
-        break;
-    case 4:
-        if ( *op_size == 0 )
-            *op_size = LONG;
-        if ( *ad_size == 0 )
-            *ad_size = LONG;
-        break;
-#ifdef __x86_64__
-    case 8:
-        if ( *op_size == 0 )
-            *op_size = rex & 0x8 ? QUAD : LONG;
-        if ( *ad_size == WORD )
-            *ad_size = LONG;
-        else if ( *ad_size == 0 )
-            *ad_size = QUAD;
-        break;
-#endif
-    }
-
-    /* the operands order in comments conforms to AT&T convention */
-
-    switch ( *opcode ) {
-
-    case 0x00: /* add r8, m8 */
-        mmio_op->instr = INSTR_ADD;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x03: /* add m32/16, r32/16 */
-        mmio_op->instr = INSTR_ADD;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x08: /* or r8, m8 */ 
-        mmio_op->instr = INSTR_OR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x09: /* or r32/16, m32/16 */
-        mmio_op->instr = INSTR_OR;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x0A: /* or m8, r8 */
-        mmio_op->instr = INSTR_OR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x0B: /* or m32/16, r32/16 */
-        mmio_op->instr = INSTR_OR;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x20: /* and r8, m8 */
-        mmio_op->instr = INSTR_AND;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x21: /* and r32/16, m32/16 */
-        mmio_op->instr = INSTR_AND;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x22: /* and m8, r8 */
-        mmio_op->instr = INSTR_AND;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x23: /* and m32/16, r32/16 */
-        mmio_op->instr = INSTR_AND;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x2B: /* sub m32/16, r32/16 */
-        mmio_op->instr = INSTR_SUB;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x30: /* xor r8, m8 */
-        mmio_op->instr = INSTR_XOR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x31: /* xor r32/16, m32/16 */
-        mmio_op->instr = INSTR_XOR;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x32: /* xor m8, r8 */
-        mmio_op->instr = INSTR_XOR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x38: /* cmp r8, m8 */
-        mmio_op->instr = INSTR_CMP;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x39: /* cmp r32/16, m32/16 */
-        mmio_op->instr = INSTR_CMP;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x3A: /* cmp m8, r8 */
-        mmio_op->instr = INSTR_CMP;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x3B: /* cmp m32/16, r32/16 */
-        mmio_op->instr = INSTR_CMP;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x80:
-    case 0x81:
-    case 0x83:
-    {
-        unsigned char ins_subtype = (opcode[1] >> 3) & 7;
-
-        if ( opcode[0] == 0x80 ) {
-            *op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(size_reg);
-        } else {
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-            size_reg = *op_size;
-        }
-
-        /* opcode 0x83 always has a single byte operand */
-        if ( opcode[0] == 0x83 )
-            mmio_op->immediate =
-                get_immediate_sign_ext(*ad_size, opcode + 1, BYTE);
-        else
-            mmio_op->immediate =
-                get_immediate_sign_ext(*ad_size, opcode + 1, *op_size);
-
-        mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-        mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-        switch ( ins_subtype ) {
-        case 0: /* add $imm, m32/16 */
-            mmio_op->instr = INSTR_ADD;
-            return DECODE_success;
-
-        case 1: /* or $imm, m32/16 */
-            mmio_op->instr = INSTR_OR;
-            return DECODE_success;
-
-        case 4: /* and $imm, m32/16 */
-            mmio_op->instr = INSTR_AND;
-            return DECODE_success;
-
-        case 5: /* sub $imm, m32/16 */
-            mmio_op->instr = INSTR_SUB;
-            return DECODE_success;
-
-        case 6: /* xor $imm, m32/16 */
-            mmio_op->instr = INSTR_XOR;
-            return DECODE_success;
-
-        case 7: /* cmp $imm, m32/16 */
-            mmio_op->instr = INSTR_CMP;
-            return DECODE_success;
-
-        default:
-            printk("%x/%x, This opcode isn't handled yet!\n",
-                   *opcode, ins_subtype);
-            return DECODE_failure;
-        }
-    }
-
-    case 0x84:  /* test r8, m8 */
-        mmio_op->instr = INSTR_TEST;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x85: /* test r16/32, m16/32 */
-        mmio_op->instr = INSTR_TEST;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x86:  /* xchg m8, r8 */
-        mmio_op->instr = INSTR_XCHG;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x87:  /* xchg m16/32, r16/32 */
-        mmio_op->instr = INSTR_XCHG;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x88: /* mov r8, m8 */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x89: /* mov r32/16, m32/16 */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x8A: /* mov m8, r8 */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x8B: /* mov m32/16, r32/16 */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0xA0: /* mov <addr>, al */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_acc(size_reg, mmio_op);
-
-    case 0xA1: /* mov <addr>, ax/eax */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_acc(*op_size, mmio_op);
-
-    case 0xA2: /* mov al, <addr> */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return acc_mem(size_reg, mmio_op);
-
-    case 0xA3: /* mov ax/eax, <addr> */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return acc_mem(*op_size, mmio_op);
-
-    case 0xA4: /* movsb */
-        mmio_op->instr = INSTR_MOVS;
-        *op_size = BYTE;
-        return DECODE_success;
-
-    case 0xA5: /* movsw/movsl */
-        mmio_op->instr = INSTR_MOVS;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return DECODE_success;
-
-    case 0xAA: /* stosb */
-        mmio_op->instr = INSTR_STOS;
-        *op_size = BYTE;
-        return DECODE_success;
-
-    case 0xAB: /* stosw/stosl */
-        mmio_op->instr = INSTR_STOS;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return DECODE_success;
-
-    case 0xAC: /* lodsb */
-        mmio_op->instr = INSTR_LODS;
-        *op_size = BYTE;
-        return DECODE_success;
-
-    case 0xAD: /* lodsw/lodsl */
-        mmio_op->instr = INSTR_LODS;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return DECODE_success;
-
-    case 0xC6:
-        if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm8, m8 */
-            mmio_op->instr = INSTR_MOV;
-            *op_size = BYTE;
-
-            mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
-            mmio_op->immediate  =
-                    get_immediate(*ad_size, opcode + 1, *op_size);
-            mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xC7:
-        if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm16/32, m16/32 */
-            mmio_op->instr = INSTR_MOV;
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-
-            mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
-            mmio_op->immediate =
-                    get_immediate_sign_ext(*ad_size, opcode + 1, *op_size);
-            mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xF6:
-    case 0xF7:
-        if ( ((opcode[1] >> 3) & 7) == 0 ) { /* test $imm8/16/32, m8/16/32 */
-            mmio_op->instr = INSTR_TEST;
-
-            if ( opcode[0] == 0xF6 ) {
-                *op_size = BYTE;
-                GET_OP_SIZE_FOR_BYTE(size_reg);
-            } else {
-                GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-                size_reg = *op_size;
-            }
-
-            mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-            mmio_op->immediate =
-                    get_immediate_sign_ext(*ad_size, opcode + 1, *op_size);
-            mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xFE:
-    case 0xFF:
-    {
-        unsigned char ins_subtype = (opcode[1] >> 3) & 7;
-
-        if ( opcode[0] == 0xFE ) {
-            *op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(size_reg);
-        } else {
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-            size_reg = *op_size;
-        }
-
-        mmio_op->immediate = 1;
-        mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-        mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-        switch ( ins_subtype ) {
-        case 0: /* inc */
-            mmio_op->instr = INSTR_ADD;
-            return DECODE_success;
-
-        case 1: /* dec */
-            mmio_op->instr = INSTR_SUB;
-            return DECODE_success;
-
-        case 6: /* push */
-            mmio_op->instr = INSTR_PUSH;
-            mmio_op->operand[0] = mmio_op->operand[1];
-            return DECODE_success;
-
-        default:
-            printk("%x/%x, This opcode isn't handled yet!\n",
-                   *opcode, ins_subtype);
-            return DECODE_failure;
-        }
-    }
-
-    case 0x0F:
-        break;
-
-    default:
-        printk("%x, This opcode isn't handled yet!\n", *opcode);
-        return DECODE_failure;
-    }
-
-    switch ( *++opcode ) {
-    case 0xB6: /* movzx m8, r16/r32/r64 */
-        mmio_op->instr = INSTR_MOVZX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xB7: /* movzx m16, r32/r64 */
-        mmio_op->instr = INSTR_MOVZX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xBE: /* movsx m8, r16/r32/r64 */
-        mmio_op->instr = INSTR_MOVSX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xBF: /* movsx m16, r32/r64 */
-        mmio_op->instr = INSTR_MOVSX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xA3: /* bt r32, m32 */
-        mmio_op->instr = INSTR_BT;
-        index = get_index(opcode + 1, rex);
-        *op_size = LONG;
-        mmio_op->operand[0] = mk_operand(*op_size, index, 0, REGISTER);
-        mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-        return DECODE_success;
-
-    case 0xBA:
-        if ( ((opcode[1] >> 3) & 7) == 4 ) /* BT $imm8, m16/32/64 */
-        {
-            mmio_op->instr = INSTR_BT;
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-            mmio_op->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
-            mmio_op->immediate =
-                    (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
-            mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-            return DECODE_success;
-        }
-        else
-        {
-            printk("0f %x, This opcode subtype isn't handled yet\n", *opcode);
-            return DECODE_failure;
-        }
-
-    default:
-        printk("0f %x, This opcode isn't handled yet\n", *opcode);
-        return DECODE_failure;
-    }
-}
+#include <asm/hvm/emulate.h>
 
 int inst_copy_from_guest(
     unsigned char *buf, unsigned long guest_eip, int inst_len)
@@ -984,323 +192,41 @@ void send_invalidate_req(void)
     hvm_send_assist_req(v);
 }
 
-static void mmio_operands(int type, unsigned long gpa,
-                          struct hvm_io_op *mmio_op,
-                          unsigned char op_size)
-{
-    unsigned long value = 0;
-    int df, index, size_reg;
-    struct cpu_user_regs *regs = &mmio_op->io_context;
-
-    df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
-
-    size_reg = operand_size(mmio_op->operand[0]);
-
-    if ( mmio_op->operand[0] & REGISTER ) {            /* dest is memory */
-        index = operand_index(mmio_op->operand[0]);
-        value = get_reg_value(size_reg, index, 0, regs);
-        send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0);
-    } else if ( mmio_op->operand[0] & IMMEDIATE ) {    /* dest is memory */
-        value = mmio_op->immediate;
-        send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0);
-    } else if ( mmio_op->operand[0] & MEMORY ) {       /* dest is register */
-        /* send the request and wait for the value */
-        if ( (mmio_op->instr == INSTR_MOVZX) ||
-             (mmio_op->instr == INSTR_MOVSX) )
-            send_mmio_req(type, gpa, 1, size_reg, 0, IOREQ_READ, df, 0);
-        else
-            send_mmio_req(type, gpa, 1, op_size, 0, IOREQ_READ, df, 0);
-    } else {
-        printk("%s: invalid dest mode.\n", __func__);
-        domain_crash_synchronous();
-    }
-}
-
-#define GET_REPEAT_COUNT() \
-     (mmio_op->flags & REPZ ? (ad_size == WORD ? regs->ecx & 0xFFFF : 
regs->ecx) : 1)
-
-
-void handle_mmio(paddr_t gpa)
-{
-    unsigned long inst_addr;
-    struct hvm_io_op *mmio_op;
-    struct cpu_user_regs *regs;
-    unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel;
-    int i, address_bytes, df, inst_len;
-    struct vcpu *v = current;
-
-    mmio_op = &v->arch.hvm_vcpu.io_op;
-    regs = &mmio_op->io_context;
-
-    /* Copy current guest state into io instruction state structure. */
-    memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
-
-    df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
-
-    address_bytes = hvm_guest_x86_mode(v);
-    if (address_bytes < 2)
-        /* real or vm86 modes */
-        address_bytes = 2;
-    inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip;
-    memset(inst, 0, MAX_INST_LEN);
-    inst_len = hvm_instruction_fetch(inst_addr, address_bytes, inst);
-    if ( inst_len <= 0 )
-    {
-        gdprintk(XENLOG_DEBUG, "handle_mmio: failed to get instruction\n");
-        /* hvm_instruction_fetch() will have injected a #PF; get out now */
-        return;
-    }
-
-    if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size,
-                     &op_size, &seg_sel) == DECODE_failure )
-    {
+int handle_mmio(void)
+{
+    struct hvm_emulate_ctxt ctxt;
+    struct vcpu *curr = current;
+    int rc;
+
+    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
+
+    rc = hvm_emulate_one(&ctxt);
+
+    switch ( rc )
+    {
+    case X86EMUL_UNHANDLEABLE:
         gdprintk(XENLOG_WARNING,
-                 "handle_mmio: failed to decode instruction\n");
-        gdprintk(XENLOG_WARNING,
-                 "mmio opcode: gpa 0x%"PRIpaddr", len %d:", gpa, inst_len);
-        for ( i = 0; i < inst_len; i++ )
-            printk(" %02x", inst[i] & 0xFF);
-        printk("\n");
-
-        hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
-        return;
-    }
-
-    regs->eip += inst_len; /* advance %eip */
-
-    switch ( mmio_op->instr ) {
-    case INSTR_MOV:
-        mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size);
+                 "MMIO emulation failed @ %04x:%lx: "
+                 "%02x %02x %02x %02x %02x %02x\n",
+                 hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel,
+                 ctxt.insn_buf_eip,
+                 ctxt.insn_buf[0], ctxt.insn_buf[1],
+                 ctxt.insn_buf[2], ctxt.insn_buf[3],
+                 ctxt.insn_buf[4], ctxt.insn_buf[5]);
+        return 0;
+    case X86EMUL_EXCEPTION:
+        if ( ctxt.flags.exn_pending )
+            hvm_inject_exception(ctxt.exn_vector, 0, 0);
         break;
-
-    case INSTR_MOVS:
-    {
-        struct segment_register sreg;
-        unsigned long count = GET_REPEAT_COUNT();
-        int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
-        unsigned long addr, gfn; 
-        paddr_t paddr;
-        int dir, size = op_size;
-        uint32_t pfec;
-
-        ASSERT(count);
-
-        /* determine non-MMIO address */
-        addr = regs->edi;
-        if ( ad_size == WORD )
-            addr &= 0xFFFF;
-        addr += hvm_get_segment_base(v, x86_seg_es);        
-        pfec = PFEC_page_present | PFEC_write_access;
-        hvm_get_segment_register(v, x86_seg_ss, &sreg);
-        if ( sreg.attr.fields.dpl == 3 )
-            pfec |= PFEC_user_mode;
-        gfn = paging_gva_to_gfn(v, addr, &pfec);
-        paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
-        if ( paddr == gpa )
-        {
-            enum x86_segment seg;
-
-            dir = IOREQ_WRITE;
-            addr = regs->esi;
-            if ( ad_size == WORD )
-                addr &= 0xFFFF;
-            switch ( seg_sel )
-            {
-            case 0x26: seg = x86_seg_es; break;
-            case 0x2e: seg = x86_seg_cs; break;
-            case 0x36: seg = x86_seg_ss; break;
-            case 0:
-            case 0x3e: seg = x86_seg_ds; break;
-            case 0x64: seg = x86_seg_fs; break;
-            case 0x65: seg = x86_seg_gs; break;
-            default: domain_crash_synchronous();
-            }
-            addr += hvm_get_segment_base(v, seg);
-            pfec &= ~PFEC_write_access;
-            gfn = paging_gva_to_gfn(v, addr, &pfec);
-            paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
-        }
-        else
-            dir = IOREQ_READ;
-
-        if ( gfn == INVALID_GFN ) 
-        {
-            /* The guest does not have the non-mmio address mapped. 
-             * Need to send in a page fault */
-            regs->eip -= inst_len; /* do not advance %eip */
-            hvm_inject_exception(TRAP_page_fault, pfec, addr);
-            return;
-        }
-
-        /*
-         * In case of a movs spanning multiple pages, we break the accesses
-         * up into multiple pages (the device model works with non-continguous
-         * physical guest pages). To copy just one page, we adjust %ecx and
-         * do not advance %eip so that the next rep;movs copies the next page.
-         * Unaligned accesses, for example movsl starting at PGSZ-2, are
-         * turned into a single copy where we handle the overlapping memory
-         * copy ourself. After this copy succeeds, "rep movs" is executed
-         * again.
-         */
-        if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) {
-            unsigned long value = 0;
-
-            mmio_op->flags |= OVERLAP;
-
-            if ( dir == IOREQ_WRITE ) {
-                if ( hvm_paging_enabled(v) )
-                {
-                    int rv = hvm_copy_from_guest_virt(&value, addr, size);
-                    if ( rv == HVMCOPY_bad_gva_to_gfn ) 
-                        return; /* exception already injected */
-                }
-                else
-                    (void)hvm_copy_from_guest_phys(&value, addr, size);
-            } else /* dir != IOREQ_WRITE */
-                /* Remember where to write the result, as a *VA*.
-                 * Must be a VA so we can handle the page overlap 
-                 * correctly in hvm_mmio_assist() */
-                mmio_op->addr = addr;
-
-            if ( count != 1 )
-                regs->eip -= inst_len; /* do not advance %eip */
-
-            send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, df, 0);
-        } else {
-            unsigned long last_addr = sign > 0 ? addr + count * size - 1
-                                               : addr - (count - 1) * size;
-
-            if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) )
-            {
-                regs->eip -= inst_len; /* do not advance %eip */
-
-                if ( sign > 0 )
-                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
-                else
-                    count = (addr & ~PAGE_MASK) / size + 1;
-            }
-
-            ASSERT(count);
-
-            send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, 
-                          paddr, dir, df, 1);
-        }
+    default:
         break;
     }
 
-    case INSTR_MOVZX:
-    case INSTR_MOVSX:
-        mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_STOS:
-        /*
-         * Since the destination is always in (contiguous) mmio space we don't
-         * need to break it up into pages.
-         */
-        send_mmio_req(IOREQ_TYPE_COPY, gpa,
-                      GET_REPEAT_COUNT(), op_size, regs->eax, IOREQ_WRITE, df, 
0);
-        break;
-
-    case INSTR_LODS:
-        /*
-         * Since the source is always in (contiguous) mmio space we don't
-         * need to break it up into pages.
-         */
-        mmio_op->operand[0] = mk_operand(op_size, 0, 0, REGISTER);
-        send_mmio_req(IOREQ_TYPE_COPY, gpa,
-                      GET_REPEAT_COUNT(), op_size, 0, IOREQ_READ, df, 0);
-        break;
-
-    case INSTR_OR:
-        mmio_operands(IOREQ_TYPE_OR, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_AND:
-        mmio_operands(IOREQ_TYPE_AND, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_ADD:
-        mmio_operands(IOREQ_TYPE_ADD, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_SUB:
-        mmio_operands(IOREQ_TYPE_SUB, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_XOR:
-        mmio_operands(IOREQ_TYPE_XOR, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_PUSH:
-        if ( ad_size == WORD )
-        {
-            mmio_op->addr = (uint16_t)(regs->esp - op_size);
-            regs->esp = mmio_op->addr | (regs->esp & ~0xffff);
-        }
-        else
-        {
-            regs->esp -= op_size;
-            mmio_op->addr = regs->esp;
-        }
-        /* send the request and wait for the value */
-        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, op_size, 0, IOREQ_READ, df, 0);
-        break;
-
-    case INSTR_CMP:        /* Pass through */
-    case INSTR_TEST:
-        /* send the request and wait for the value */
-        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, op_size, 0, IOREQ_READ, df, 0);
-        break;
-
-    case INSTR_BT:
-    {
-        unsigned long value = 0;
-        int index, size;
-
-        if ( mmio_op->operand[0] & REGISTER )
-        {
-            index = operand_index(mmio_op->operand[0]);
-            size = operand_size(mmio_op->operand[0]);
-            value = get_reg_value(size, index, 0, regs);
-        }
-        else if ( mmio_op->operand[0] & IMMEDIATE )
-        {
-            mmio_op->immediate = mmio_op->immediate;
-            value = mmio_op->immediate;
-        }
-        send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
-                      op_size, 0, IOREQ_READ, df, 0);
-        break;
-    }
-
-    case INSTR_XCHG:
-        if ( mmio_op->operand[0] & REGISTER ) {
-            long value;
-            unsigned long operand = mmio_op->operand[0];
-            value = get_reg_value(operand_size(operand),
-                                  operand_index(operand), 0,
-                                  regs);
-            /* send the request and wait for the value */
-            send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1,
-                          op_size, value, IOREQ_WRITE, df, 0);
-        } else {
-            /* the destination is a register */
-            long value;
-            unsigned long operand = mmio_op->operand[1];
-            value = get_reg_value(operand_size(operand),
-                                  operand_index(operand), 0,
-                                  regs);
-            /* send the request and wait for the value */
-            send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1,
-                          op_size, value, IOREQ_WRITE, df, 0);
-        }
-        break;
-
-    default:
-        printk("Unhandled MMIO instruction\n");
-        domain_crash_synchronous();
-    }
+    hvm_emulate_writeback(&ctxt);
+
+    curr->arch.hvm_vcpu.mmio_in_progress = curr->arch.hvm_vcpu.io_in_progress;
+
+    return 1;
 }
 
 DEFINE_PER_CPU(int, guest_handles_in_xen_space);
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/stdvga.c
--- a/xen/arch/x86/hvm/stdvga.c Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/stdvga.c Wed Feb 20 14:36:45 2008 +0000
@@ -458,33 +458,6 @@ static int mmio_move(struct hvm_hw_stdvg
     return 1;
 }
 
-static uint32_t op_and(uint32_t a, uint32_t b) { return a & b; }
-static uint32_t op_or (uint32_t a, uint32_t b) { return a | b; }
-static uint32_t op_xor(uint32_t a, uint32_t b) { return a ^ b; }
-static uint32_t op_add(uint32_t a, uint32_t b) { return a + b; }
-static uint32_t op_sub(uint32_t a, uint32_t b) { return a - b; }
-static uint32_t (*op_array[])(uint32_t, uint32_t) = {
-    [IOREQ_TYPE_AND] = op_and,
-    [IOREQ_TYPE_OR ] = op_or,
-    [IOREQ_TYPE_XOR] = op_xor,
-    [IOREQ_TYPE_ADD] = op_add,
-    [IOREQ_TYPE_SUB] = op_sub
-};
-
-static int mmio_op(struct hvm_hw_stdvga *s, ioreq_t *p)
-{
-    uint32_t orig, mod = 0;
-    orig = stdvga_mem_read(p->addr, p->size);
-
-    if ( p->dir == IOREQ_WRITE )
-    {
-        mod = (op_array[p->type])(orig, p->data);
-        stdvga_mem_write(p->addr, mod, p->size);
-    }
-
-    return 0; /* Don't try to buffer these operations */
-}
-
 int stdvga_intercept_mmio(ioreq_t *p)
 {
     struct domain *d = current->domain;
@@ -505,13 +478,6 @@ int stdvga_intercept_mmio(ioreq_t *p)
         {
         case IOREQ_TYPE_COPY:
             buf = mmio_move(s, p);
-            break;
-        case IOREQ_TYPE_AND:
-        case IOREQ_TYPE_OR:
-        case IOREQ_TYPE_XOR:
-        case IOREQ_TYPE_ADD:
-        case IOREQ_TYPE_SUB:
-            buf = mmio_op(s, p);
             break;
         default:
             gdprintk(XENLOG_WARNING, "unsupported mmio request type:%d "
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Feb 20 14:36:45 2008 +0000
@@ -66,6 +66,13 @@ static void svm_update_guest_efer(struct
 static void svm_update_guest_efer(struct vcpu *v);
 static void svm_inject_exception(
     unsigned int trapnr, int errcode, unsigned long cr2);
+static void svm_cpuid_intercept(
+    unsigned int *eax, unsigned int *ebx,
+    unsigned int *ecx, unsigned int *edx);
+static void svm_wbinvd_intercept(void);
+static void svm_fpu_dirty_intercept(void);
+static int svm_msr_read_intercept(struct cpu_user_regs *regs);
+static int svm_msr_write_intercept(struct cpu_user_regs *regs);
 
 /* va of hardware host save area     */
 static void *hsa[NR_CPUS] __read_mostly;
@@ -112,7 +119,7 @@ static enum handler_return long_mode_do_
     switch ( ecx )
     {
     case MSR_EFER:
-        if ( !hvm_set_efer(msr_content) )
+        if ( hvm_set_efer(msr_content) )
             return HNDL_exception_raised;
         break;
 
@@ -808,7 +815,12 @@ static struct hvm_function_table svm_fun
     .inject_exception     = svm_inject_exception,
     .init_hypercall_page  = svm_init_hypercall_page,
     .event_pending        = svm_event_pending,
-    .do_pmu_interrupt     = svm_do_pmu_interrupt
+    .do_pmu_interrupt     = svm_do_pmu_interrupt,
+    .cpuid_intercept      = svm_cpuid_intercept,
+    .wbinvd_intercept     = svm_wbinvd_intercept,
+    .fpu_dirty_intercept  = svm_fpu_dirty_intercept,
+    .msr_read_intercept   = svm_msr_read_intercept,
+    .msr_write_intercept  = svm_msr_write_intercept
 };
 
 int start_svm(struct cpuinfo_x86 *c)
@@ -873,7 +885,8 @@ static void svm_do_nested_pgfault(paddr_
     mfn = gfn_to_mfn_current(gfn, &p2mt);
     if ( p2mt == p2m_mmio_dm )
     {
-        handle_mmio(gpa);
+        if ( !handle_mmio() )
+            hvm_inject_exception(TRAP_gp_fault, 0, 0);
         return;
     }
 
@@ -882,9 +895,10 @@ static void svm_do_nested_pgfault(paddr_
     p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
 }
 
-static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
+static void svm_fpu_dirty_intercept(void)
 {
     struct vcpu *curr = current;
+    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
 
     svm_fpu_enter(curr);
 
@@ -893,72 +907,83 @@ static void svm_do_no_device_fault(struc
 }
 
 #define bitmaskof(idx)  (1U << ((idx) & 31))
-static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
-                                struct cpu_user_regs *regs)
-{
-    unsigned long input = regs->eax;
-    unsigned int eax, ebx, ecx, edx;
+static void svm_cpuid_intercept(
+    unsigned int *eax, unsigned int *ebx,
+    unsigned int *ecx, unsigned int *edx)
+{
+    unsigned int input = *eax;
     struct vcpu *v = current;
-    int inst_len;
-
-    hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
+
+    hvm_cpuid(input, eax, ebx, ecx, edx);
 
     switch ( input )
     {
     case 0x00000001:
         /* Mask Intel-only features. */
-        ecx &= ~(bitmaskof(X86_FEATURE_SSSE3) |
-                 bitmaskof(X86_FEATURE_SSE4_1) |
-                 bitmaskof(X86_FEATURE_SSE4_2));
+        *ecx &= ~(bitmaskof(X86_FEATURE_SSSE3) |
+                  bitmaskof(X86_FEATURE_SSE4_1) |
+                  bitmaskof(X86_FEATURE_SSE4_2));
         break;
 
     case 0x80000001:
         /* Filter features which are shared with 0x00000001:EDX. */
         if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
-            __clear_bit(X86_FEATURE_APIC & 31, &edx);
+            __clear_bit(X86_FEATURE_APIC & 31, edx);
 #if CONFIG_PAGING_LEVELS >= 3
         if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
 #endif
-            __clear_bit(X86_FEATURE_PAE & 31, &edx);
-        __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
+            __clear_bit(X86_FEATURE_PAE & 31, edx);
+        __clear_bit(X86_FEATURE_PSE36 & 31, edx);
 
         /* Filter all other features according to a whitelist. */
-        ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
-                bitmaskof(X86_FEATURE_ALTMOVCR) |
-                bitmaskof(X86_FEATURE_ABM) |
-                bitmaskof(X86_FEATURE_SSE4A) |
-                bitmaskof(X86_FEATURE_MISALIGNSSE) |
-                bitmaskof(X86_FEATURE_3DNOWPF));
-        edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
-                bitmaskof(X86_FEATURE_NX) |
-                bitmaskof(X86_FEATURE_LM) |
-                bitmaskof(X86_FEATURE_SYSCALL) |
-                bitmaskof(X86_FEATURE_MP) |
-                bitmaskof(X86_FEATURE_MMXEXT) |
-                bitmaskof(X86_FEATURE_FFXSR));
+        *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
+                 bitmaskof(X86_FEATURE_ALTMOVCR) |
+                 bitmaskof(X86_FEATURE_ABM) |
+                 bitmaskof(X86_FEATURE_SSE4A) |
+                 bitmaskof(X86_FEATURE_MISALIGNSSE) |
+                 bitmaskof(X86_FEATURE_3DNOWPF));
+        *edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
+                 bitmaskof(X86_FEATURE_NX) |
+                 bitmaskof(X86_FEATURE_LM) |
+                 bitmaskof(X86_FEATURE_SYSCALL) |
+                 bitmaskof(X86_FEATURE_MP) |
+                 bitmaskof(X86_FEATURE_MMXEXT) |
+                 bitmaskof(X86_FEATURE_FFXSR));
         break;
 
     case 0x80000007:
     case 0x8000000A:
         /* Mask out features of power management and SVM extension. */
-        eax = ebx = ecx = edx = 0;
+        *eax = *ebx = *ecx = *edx = 0;
         break;
 
     case 0x80000008:
         /* Make sure Number of CPU core is 1 when HTT=0 */
-        ecx &= 0xFFFFFF00;
-        break;
-    }
+        *ecx &= 0xFFFFFF00;
+        break;
+    }
+
+    HVMTRACE_3D(CPUID, v, input,
+                ((uint64_t)*eax << 32) | *ebx, ((uint64_t)*ecx << 32) | *edx);
+}
+
+static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
+{
+    unsigned int eax, ebx, ecx, edx, inst_len;
+
+    eax = regs->eax;
+    ebx = regs->ebx;
+    ecx = regs->ecx;
+    edx = regs->edx;
+
+    svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
 
     regs->eax = eax;
     regs->ebx = ebx;
     regs->ecx = ecx;
     regs->edx = edx;
 
-    HVMTRACE_3D(CPUID, v, input,
-                ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
-
-    inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
+    inst_len = __get_instruction_length(current, INSTR_CPUID, NULL);
     __update_guest_eip(regs, inst_len);
 }
 
@@ -1484,11 +1509,11 @@ static int mov_to_cr(int gpreg, int cr, 
     switch ( cr )
     {
     case 0: 
-        return hvm_set_cr0(value);
+        return !hvm_set_cr0(value);
     case 3:
-        return hvm_set_cr3(value);
+        return !hvm_set_cr3(value);
     case 4:
-        return hvm_set_cr4(value);
+        return !hvm_set_cr4(value);
     default:
         gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
         domain_crash(v->domain);
@@ -1564,7 +1589,7 @@ static void svm_cr_access(
         gpreg = decode_src_reg(prefix, buffer[index+2]);
         value = get_reg(gpreg, regs, vmcb) & 0xF;
         value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
-        result = hvm_set_cr0(value);
+        result = !hvm_set_cr0(value);
         HVMTRACE_1D(LMSW, current, value);
         break;
 
@@ -1635,176 +1660,197 @@ static void svm_cr_access(
         __update_guest_eip(regs, inst_len);
 }
 
-static void svm_do_msr_access(
-    struct vcpu *v, struct cpu_user_regs *regs)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    int  inst_len;
-    u64 msr_content=0;
+static int svm_msr_read_intercept(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
     u32 ecx = regs->ecx, eax, edx;
-
-    HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
-                ecx, (u32)regs->eax, (u32)regs->edx,
-                (unsigned long)vmcb->exitinfo1);
-
-    /* is it a read? */
-    if (vmcb->exitinfo1 == 0)
-    {
-        switch (ecx) {
-        case MSR_IA32_TSC:
-            msr_content = hvm_get_guest_time(v);
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    switch ( ecx )
+    {
+    case MSR_IA32_TSC:
+        msr_content = hvm_get_guest_time(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        break;
+
+    case MSR_EFER:
+        msr_content = v->arch.hvm_vcpu.guest_efer;
+        break;
+
+    case MSR_IA32_MC4_MISC: /* Threshold register */
+    case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
+        /*
+         * MCA/MCE: We report that the threshold register is unavailable
+         * for OS use (locked by the BIOS).
+         */
+        msr_content = 1ULL << 61; /* MC4_MISC.Locked */
+        break;
+
+    case MSR_IA32_EBC_FREQUENCY_ID:
+        /*
+         * This Intel-only register may be accessed if this HVM guest
+         * has been migrated from an Intel host. The value zero is not
+         * particularly meaningful, but at least avoids the guest crashing!
+         */
+        msr_content = 0;
+        break;
+
+    case MSR_K8_VM_HSAVE_PA:
+        goto gpf;
+
+    case MSR_IA32_MCG_CAP:
+    case MSR_IA32_MCG_STATUS:
+    case MSR_IA32_MC0_STATUS:
+    case MSR_IA32_MC1_STATUS:
+    case MSR_IA32_MC2_STATUS:
+    case MSR_IA32_MC3_STATUS:
+    case MSR_IA32_MC4_STATUS:
+    case MSR_IA32_MC5_STATUS:
+        /* No point in letting the guest see real MCEs */
+        msr_content = 0;
+        break;
+
+    case MSR_IA32_DEBUGCTLMSR:
+        msr_content = vmcb->debugctlmsr;
+        break;
+
+    case MSR_IA32_LASTBRANCHFROMIP:
+        msr_content = vmcb->lastbranchfromip;
+        break;
+
+    case MSR_IA32_LASTBRANCHTOIP:
+        msr_content = vmcb->lastbranchtoip;
+        break;
+
+    case MSR_IA32_LASTINTFROMIP:
+        msr_content = vmcb->lastintfromip;
+        break;
+
+    case MSR_IA32_LASTINTTOIP:
+        msr_content = vmcb->lastinttoip;
+        break;
+
+    default:
+        if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
+             rdmsr_safe(ecx, eax, edx) == 0 )
+        {
+            regs->eax = eax;
+            regs->edx = edx;
+            goto done;
+        }
+        goto gpf;
+    }
+    regs->eax = msr_content & 0xFFFFFFFF;
+    regs->edx = msr_content >> 32;
+
+ done:
+    hvmtrace_msr_read(v, ecx, msr_content);
+    HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
+                ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
+    return X86EMUL_OKAY;
+
+ gpf:
+    svm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+static int svm_msr_write_intercept(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
+    u32 ecx = regs->ecx;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+
+    hvmtrace_msr_write(v, ecx, msr_content);
+
+    switch ( ecx )
+    {
+    case MSR_IA32_TSC:
+        hvm_set_guest_time(v, msr_content);
+        pt_reset(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        vlapic_msr_set(vcpu_vlapic(v), msr_content);
+        break;
+
+    case MSR_K8_VM_HSAVE_PA:
+        goto gpf;
+
+    case MSR_IA32_DEBUGCTLMSR:
+        vmcb->debugctlmsr = msr_content;
+        if ( !msr_content || !cpu_has_svm_lbrv )
             break;
-
-        case MSR_IA32_APICBASE:
-            msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        vmcb->lbr_control.fields.enable = 1;
+        svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
+        break;
+
+    case MSR_IA32_LASTBRANCHFROMIP:
+        vmcb->lastbranchfromip = msr_content;
+        break;
+
+    case MSR_IA32_LASTBRANCHTOIP:
+        vmcb->lastbranchtoip = msr_content;
+        break;
+
+    case MSR_IA32_LASTINTFROMIP:
+        vmcb->lastintfromip = msr_content;
+        break;
+
+    case MSR_IA32_LASTINTTOIP:
+        vmcb->lastinttoip = msr_content;
+        break;
+
+    default:
+        switch ( long_mode_do_msr_write(regs) )
+        {
+        case HNDL_unhandled:
+            wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
             break;
-
-        case MSR_EFER:
-            msr_content = v->arch.hvm_vcpu.guest_efer;
+        case HNDL_exception_raised:
+            return X86EMUL_EXCEPTION;
+        case HNDL_done:
             break;
-
-        case MSR_IA32_MC4_MISC: /* Threshold register */
-        case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
-            /*
-             * MCA/MCE: We report that the threshold register is unavailable
-             * for OS use (locked by the BIOS).
-             */
-            msr_content = 1ULL << 61; /* MC4_MISC.Locked */
-            break;
-
-        case MSR_IA32_EBC_FREQUENCY_ID:
-            /*
-             * This Intel-only register may be accessed if this HVM guest
-             * has been migrated from an Intel host. The value zero is not
-             * particularly meaningful, but at least avoids the guest crashing!
-             */
-            msr_content = 0;
-            break;
-
-        case MSR_K8_VM_HSAVE_PA:
-            svm_inject_exception(TRAP_gp_fault, 0, 0);
-            break;
-
-        case MSR_IA32_MCG_CAP:
-        case MSR_IA32_MCG_STATUS:
-        case MSR_IA32_MC0_STATUS:
-        case MSR_IA32_MC1_STATUS:
-        case MSR_IA32_MC2_STATUS:
-        case MSR_IA32_MC3_STATUS:
-        case MSR_IA32_MC4_STATUS:
-        case MSR_IA32_MC5_STATUS:
-            /* No point in letting the guest see real MCEs */
-            msr_content = 0;
-            break;
-
-        case MSR_IA32_DEBUGCTLMSR:
-            msr_content = vmcb->debugctlmsr;
-            break;
-
-        case MSR_IA32_LASTBRANCHFROMIP:
-            msr_content = vmcb->lastbranchfromip;
-            break;
-
-        case MSR_IA32_LASTBRANCHTOIP:
-            msr_content = vmcb->lastbranchtoip;
-            break;
-
-        case MSR_IA32_LASTINTFROMIP:
-            msr_content = vmcb->lastintfromip;
-            break;
-
-        case MSR_IA32_LASTINTTOIP:
-            msr_content = vmcb->lastinttoip;
-            break;
-
-        default:
-            if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
-                 rdmsr_safe(ecx, eax, edx) == 0 )
-            {
-                regs->eax = eax;
-                regs->edx = edx;
-                goto done;
-            }
-            svm_inject_exception(TRAP_gp_fault, 0, 0);
-            return;
-        }
-        regs->eax = msr_content & 0xFFFFFFFF;
-        regs->edx = msr_content >> 32;
-
- done:
-        hvmtrace_msr_read(v, ecx, msr_content);
-        HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
-                    ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
-
+        }
+        break;
+    }
+
+    return X86EMUL_OKAY;
+
+ gpf:
+    svm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+static void svm_do_msr_access(struct cpu_user_regs *regs)
+{
+    int rc, inst_len;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if ( vmcb->exitinfo1 == 0 )
+    {
+        rc = svm_msr_read_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
     }
     else
     {
-        msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
-
-        hvmtrace_msr_write(v, ecx, msr_content);
-
-        switch (ecx)
-        {
-        case MSR_IA32_TSC:
-            hvm_set_guest_time(v, msr_content);
-            pt_reset(v);
-            break;
-
-        case MSR_IA32_APICBASE:
-            vlapic_msr_set(vcpu_vlapic(v), msr_content);
-            break;
-
-        case MSR_K8_VM_HSAVE_PA:
-            svm_inject_exception(TRAP_gp_fault, 0, 0);
-            break;
-
-        case MSR_IA32_DEBUGCTLMSR:
-            vmcb->debugctlmsr = msr_content;
-            if ( !msr_content || !cpu_has_svm_lbrv )
-                break;
-            vmcb->lbr_control.fields.enable = 1;
-            svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
-            break;
-
-        case MSR_IA32_LASTBRANCHFROMIP:
-            vmcb->lastbranchfromip = msr_content;
-            break;
-
-        case MSR_IA32_LASTBRANCHTOIP:
-            vmcb->lastbranchtoip = msr_content;
-            break;
-
-        case MSR_IA32_LASTINTFROMIP:
-            vmcb->lastintfromip = msr_content;
-            break;
-
-        case MSR_IA32_LASTINTTOIP:
-            vmcb->lastinttoip = msr_content;
-            break;
-
-        default:
-            switch ( long_mode_do_msr_write(regs) )
-            {
-            case HNDL_unhandled:
-                wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
-                break;
-            case HNDL_exception_raised:
-                return;
-            case HNDL_done:
-                break;
-            }
-            break;
-        }
-
+        rc = svm_msr_write_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
     }
 
-    __update_guest_eip(regs, inst_len);
+    if ( rc == X86EMUL_OKAY )
+        __update_guest_eip(regs, inst_len);
 }
 
 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
@@ -1830,21 +1876,26 @@ static void svm_vmexit_do_hlt(struct vmc
     hvm_hlt(regs->eflags);
 }
 
+static void wbinvd_ipi(void *info)
+{
+    wbinvd();
+}
+
+static void svm_wbinvd_intercept(void)
+{
+    if ( !list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) )
+        on_each_cpu(wbinvd_ipi, NULL, 1, 1);
+}
+
 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
 {
     enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
-    struct vcpu *curr = current;
-    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
     int inst_len;
 
-    if ( !list_empty(&(domain_hvm_iommu(curr->domain)->pdev_list)) )
-    {
-        vmcb->general2_intercepts &= ~GENERAL2_INTERCEPT_WBINVD;
-        wbinvd();
-    }
+    svm_wbinvd_intercept();
 
     inst_len = __get_instruction_length_from_list(
-        curr, list, ARRAY_SIZE(list), NULL, NULL);
+        current, list, ARRAY_SIZE(list), NULL, NULL);
     __update_guest_eip(regs, inst_len);
 }
 
@@ -1982,7 +2033,7 @@ asmlinkage void svm_vmexit_handler(struc
         break;
 
     case VMEXIT_EXCEPTION_NM:
-        svm_do_no_device_fault(vmcb);
+        svm_fpu_dirty_intercept();
         break;  
 
     case VMEXIT_EXCEPTION_PF: {
@@ -2036,7 +2087,7 @@ asmlinkage void svm_vmexit_handler(struc
     }
 
     case VMEXIT_CPUID:
-        svm_vmexit_do_cpuid(vmcb, regs);
+        svm_vmexit_do_cpuid(regs);
         break;
 
     case VMEXIT_HLT:
@@ -2083,7 +2134,7 @@ asmlinkage void svm_vmexit_handler(struc
         break;
 
     case VMEXIT_MSR:
-        svm_do_msr_access(v, regs);
+        svm_do_msr_access(regs);
         break;
 
     case VMEXIT_SHUTDOWN:
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/vmx/realmode.c
--- a/xen/arch/x86/hvm/vmx/realmode.c   Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/vmx/realmode.c   Wed Feb 20 14:36:45 2008 +0000
@@ -3,7 +3,7 @@
  * 
  * Real-mode emulation for VMX.
  * 
- * Copyright (c) 2007 Citrix Systems, Inc.
+ * Copyright (c) 2007-2008 Citrix Systems, Inc.
  * 
  * Authors:
  *    Keir Fraser <keir.fraser@xxxxxxxxxx>
@@ -15,33 +15,14 @@
 #include <xen/sched.h>
 #include <xen/paging.h>
 #include <asm/event.h>
+#include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vmcs.h>
-#include <asm/x86_emulate.h>
 
 struct realmode_emulate_ctxt {
-    struct x86_emulate_ctxt ctxt;
-
-    /* Cache of 16 bytes of instruction. */
-    uint8_t insn_buf[16];
-    unsigned long insn_buf_eip;
-
-    struct segment_register seg_reg[10];
-
-    union {
-        struct {
-            unsigned int hlt:1;
-            unsigned int mov_ss:1;
-            unsigned int sti:1;
-        } flags;
-        unsigned int flag_word;
-    };
-
-    uint8_t exn_vector;
-    uint8_t exn_insn_len;
-
+    struct hvm_emulate_ctxt hvm;
     uint32_t intr_shadow;
 };
 
@@ -50,12 +31,15 @@ static void realmode_deliver_exception(
     unsigned int insn_len,
     struct realmode_emulate_ctxt *rm_ctxt)
 {
-    struct segment_register *idtr = &rm_ctxt->seg_reg[x86_seg_idtr];
-    struct segment_register *csr = &rm_ctxt->seg_reg[x86_seg_cs];
-    struct cpu_user_regs *regs = rm_ctxt->ctxt.regs;
+    struct segment_register *idtr, *csr;
+    struct cpu_user_regs *regs = rm_ctxt->hvm.ctxt.regs;
     uint32_t cs_eip, pstk;
     uint16_t frame[3];
     unsigned int last_byte;
+
+    idtr = hvmemul_get_seg_reg(x86_seg_idtr, &rm_ctxt->hvm);
+    csr  = hvmemul_get_seg_reg(x86_seg_cs,   &rm_ctxt->hvm);
+    __set_bit(x86_seg_cs, &rm_ctxt->hvm.seg_reg_dirty);
 
  again:
     last_byte = (vector * 4) + 3;
@@ -90,7 +74,7 @@ static void realmode_deliver_exception(
     frame[1] = csr->sel;
     frame[2] = regs->eflags & ~X86_EFLAGS_RF;
 
-    if ( rm_ctxt->ctxt.addr_size == 32 )
+    if ( rm_ctxt->hvm.ctxt.addr_size == 32 )
     {
         regs->esp -= 6;
         pstk = regs->esp;
@@ -102,7 +86,7 @@ static void realmode_deliver_exception(
         regs->esp |= pstk;
     }
 
-    pstk += rm_ctxt->seg_reg[x86_seg_ss].base;
+    pstk += hvmemul_get_seg_reg(x86_seg_ss, &rm_ctxt->hvm)->base;
     (void)hvm_copy_to_guest_phys(pstk, frame, sizeof(frame));
 
     csr->sel  = cs_eip >> 16;
@@ -118,597 +102,34 @@ static void realmode_deliver_exception(
     }
 }
 
-static uint32_t virtual_to_linear(
-    enum x86_segment seg,
-    uint32_t offset,
-    struct realmode_emulate_ctxt *rm_ctxt)
-{
-    uint32_t addr = offset;
-    if ( seg == x86_seg_none )
-        return addr;
-    ASSERT(is_x86_user_segment(seg));
-    return addr + rm_ctxt->seg_reg[seg].base;
-}
-
-static int
-realmode_read(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long *val,
-    unsigned int bytes,
-    enum hvm_access_type access_type,
-    struct realmode_emulate_ctxt *rm_ctxt)
-{
-    uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt);
-
-    *val = 0;
-
-    if ( hvm_copy_from_guest_virt_nofault(val, addr, bytes) )
-    {
-        struct vcpu *curr = current;
-
-        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-            return X86EMUL_UNHANDLEABLE;
-
-        if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-            return X86EMUL_UNHANDLEABLE;
-
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        {
-            curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-            send_mmio_req(IOREQ_TYPE_COPY, addr, 1, bytes,
-                          0, IOREQ_READ, 0, 0);
-        }
-
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-            return X86EMUL_RETRY;
-
-        *val = curr->arch.hvm_vmx.real_mode_io_data;
-        curr->arch.hvm_vmx.real_mode_io_completed = 0;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_emulate_read(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long *val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    return realmode_read(
-        seg, offset, val, bytes, hvm_access_read,
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt));
-}
-
-static int
-realmode_emulate_insn_fetch(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long *val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    unsigned int insn_off = offset - rm_ctxt->insn_buf_eip;
-
-    /* Fall back if requested bytes are not in the prefetch cache. */
-    if ( unlikely((insn_off + bytes) > sizeof(rm_ctxt->insn_buf)) )
-        return realmode_read(
-            seg, offset, val, bytes,
-            hvm_access_insn_fetch, rm_ctxt);
-
-    /* Hit the cache. Simple memcpy. */
-    *val = 0;
-    memcpy(val, &rm_ctxt->insn_buf[insn_off], bytes);
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_emulate_write(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt);
-
-    if ( hvm_copy_to_guest_virt_nofault(addr, &val, bytes) )
-    {
-        struct vcpu *curr = current;
-
-        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-            return X86EMUL_UNHANDLEABLE;
-
-        if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-            return X86EMUL_UNHANDLEABLE;
-
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_mmio_req(IOREQ_TYPE_COPY, addr, 1, bytes,
-                      val, IOREQ_WRITE, 0, 0);
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int 
-realmode_emulate_cmpxchg(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long old,
-    unsigned long new,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return realmode_emulate_write(seg, offset, new, bytes, ctxt);
-}
-
-static int 
-realmode_rep_ins(
-    uint16_t src_port,
-    enum x86_segment dst_seg,
-    unsigned long dst_offset,
-    unsigned int bytes_per_rep,
-    unsigned long *reps,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
+static void realmode_emulate_one(struct realmode_emulate_ctxt *rm_ctxt)
+{
+    struct cpu_user_regs *regs = rm_ctxt->hvm.ctxt.regs;
     struct vcpu *curr = current;
-    uint32_t paddr = virtual_to_linear(dst_seg, dst_offset, rm_ctxt);
-
-    if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-    {
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_pio_req(src_port, *reps, bytes_per_rep,
-                     paddr, IOREQ_READ,
-                     !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-    }
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        return X86EMUL_RETRY;
-
-    curr->arch.hvm_vmx.real_mode_io_completed = 0;
-
-    return X86EMUL_OKAY;
-}
-
-static int 
-realmode_rep_outs(
-    enum x86_segment src_seg,
-    unsigned long src_offset,
-    uint16_t dst_port,
-    unsigned int bytes_per_rep,
-    unsigned long *reps,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    struct vcpu *curr = current;
-    uint32_t paddr = virtual_to_linear(src_seg, src_offset, rm_ctxt);
-
-    if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-    send_pio_req(dst_port, *reps, bytes_per_rep,
-                 paddr, IOREQ_WRITE,
-                 !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-
-    return X86EMUL_OKAY;
-}
-
-static int 
-realmode_rep_movs(
-   enum x86_segment src_seg,
-   unsigned long src_offset,
-   enum x86_segment dst_seg,
-   unsigned long dst_offset,
-   unsigned int bytes_per_rep,
-   unsigned long *reps,
-   struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    struct vcpu *curr = current;
-    uint32_t saddr = virtual_to_linear(src_seg, src_offset, rm_ctxt);
-    uint32_t daddr = virtual_to_linear(dst_seg, dst_offset, rm_ctxt);
-    p2m_type_t p2mt;
-
-    if ( (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
-         curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    mfn_x(gfn_to_mfn_current(saddr >> PAGE_SHIFT, &p2mt));
-    if ( !p2m_is_ram(p2mt) )
-    {
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        {
-            curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-            send_mmio_req(IOREQ_TYPE_COPY, saddr, *reps, bytes_per_rep,
-                      daddr, IOREQ_READ,
-                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-        }
-
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-            return X86EMUL_RETRY;
-
-        curr->arch.hvm_vmx.real_mode_io_completed = 0;
-    }
-    else
-    {
-        mfn_x(gfn_to_mfn_current(daddr >> PAGE_SHIFT, &p2mt));
-        if ( p2m_is_ram(p2mt) )
-            return X86EMUL_UNHANDLEABLE;
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_mmio_req(IOREQ_TYPE_COPY, daddr, *reps, bytes_per_rep,
-                      saddr, IOREQ_WRITE,
-                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_segment(
-    enum x86_segment seg,
-    struct segment_register *reg,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    memcpy(reg, &rm_ctxt->seg_reg[seg], sizeof(struct segment_register));
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_write_segment(
-    enum x86_segment seg,
-    struct segment_register *reg,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    struct vcpu *curr = current;
-
-    if ( seg == x86_seg_cs )
-    {
-        if ( reg->attr.fields.dpl != 0 )
-            return X86EMUL_UNHANDLEABLE;
+    unsigned long seg_reg_dirty;
+    uint32_t new_intr_shadow, intr_info;
+    int rc;
+
+    seg_reg_dirty = rm_ctxt->hvm.seg_reg_dirty;
+    rm_ctxt->hvm.seg_reg_dirty = 0;
+
+    rc = hvm_emulate_one(&rm_ctxt->hvm);
+
+    if ( test_bit(x86_seg_cs, &rm_ctxt->hvm.seg_reg_dirty) )
+    {
         curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_CS;
-        if ( reg->sel & 3 )
+        if ( hvmemul_get_seg_reg(x86_seg_cs, &rm_ctxt->hvm)->sel & 3 )
             curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_CS;
     }
 
-    if ( seg == x86_seg_ss )
-    {
-        if ( reg->attr.fields.dpl != 0 )
-            return X86EMUL_UNHANDLEABLE;
+    if ( test_bit(x86_seg_ss, &rm_ctxt->hvm.seg_reg_dirty) )
+    {
         curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_SS;
-        if ( reg->sel & 3 )
+        if ( hvmemul_get_seg_reg(x86_seg_ss, &rm_ctxt->hvm)->sel & 3 )
             curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_SS;
-        rm_ctxt->flags.mov_ss = 1;
-    }
-
-    memcpy(&rm_ctxt->seg_reg[seg], reg, sizeof(struct segment_register));
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_io(
-    unsigned int port,
-    unsigned int bytes,
-    unsigned long *val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *curr = current;
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-    {
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0);
-    }
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        return X86EMUL_RETRY;
-
-    *val = curr->arch.hvm_vmx.real_mode_io_data;
-    curr->arch.hvm_vmx.real_mode_io_completed = 0;
-
-    return X86EMUL_OKAY;
-}
-
-static int realmode_write_io(
-    unsigned int port,
-    unsigned int bytes,
-    unsigned long val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *curr = current;
-
-    if ( port == 0xe9 )
-    {
-        hvm_print_line(curr, val);
-        return X86EMUL_OKAY;
-    }
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-    send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0);
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_cr(
-    unsigned int reg,
-    unsigned long *val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    switch ( reg )
-    {
-    case 0:
-    case 2:
-    case 3:
-    case 4:
-        *val = current->arch.hvm_vcpu.guest_cr[reg];
-        break;
-    default:
-        return X86EMUL_UNHANDLEABLE;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_write_cr(
-    unsigned int reg,
-    unsigned long val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    switch ( reg )
-    {
-    case 0:
-        if ( !hvm_set_cr0(val) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    case 2:
-        current->arch.hvm_vcpu.guest_cr[2] = val;
-        break;
-    case 3:
-        if ( !hvm_set_cr3(val) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    case 4:
-        if ( !hvm_set_cr4(val) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    default:
-        return X86EMUL_UNHANDLEABLE;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_msr(
-    unsigned long reg,
-    uint64_t *val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct cpu_user_regs _regs;
-
-    _regs.ecx = (uint32_t)reg;
-
-    if ( !vmx_msr_read_intercept(&_regs) )
-    {
-        struct realmode_emulate_ctxt *rm_ctxt =
-            container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-        rm_ctxt->exn_vector = (uint8_t)__vmread(VM_ENTRY_INTR_INFO);
-        rm_ctxt->exn_insn_len = 0;
-        __vmwrite(VM_ENTRY_INTR_INFO, 0);
-        return X86EMUL_EXCEPTION;
-    }
-
-    *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_write_msr(
-    unsigned long reg,
-    uint64_t val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct cpu_user_regs _regs;
-
-    _regs.edx = (uint32_t)(val >> 32);
-    _regs.eax = (uint32_t)val;
-    _regs.ecx = (uint32_t)reg;
-
-    if ( !vmx_msr_write_intercept(&_regs) )
-    {
-        struct realmode_emulate_ctxt *rm_ctxt =
-            container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-        rm_ctxt->exn_vector = (uint8_t)__vmread(VM_ENTRY_INTR_INFO);
-        rm_ctxt->exn_insn_len = 0;
-        __vmwrite(VM_ENTRY_INTR_INFO, 0);
-        return X86EMUL_EXCEPTION;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int realmode_write_rflags(
-    unsigned long val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    if ( (val & X86_EFLAGS_IF) && !(ctxt->regs->eflags & X86_EFLAGS_IF) )
-        rm_ctxt->flags.sti = 1;
-    return X86EMUL_OKAY;
-}
-
-static int realmode_wbinvd(
-    struct x86_emulate_ctxt *ctxt)
-{
-    vmx_wbinvd_intercept();
-    return X86EMUL_OKAY;
-}
-
-static int realmode_cpuid(
-    unsigned int *eax,
-    unsigned int *ebx,
-    unsigned int *ecx,
-    unsigned int *edx,
-    struct x86_emulate_ctxt *ctxt)
-{
-    vmx_cpuid_intercept(eax, ebx, ecx, edx);
-    return X86EMUL_OKAY;
-}
-
-static int realmode_hlt(
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    rm_ctxt->flags.hlt = 1;
-    return X86EMUL_OKAY;
-}
-
-static int realmode_inject_hw_exception(
-    uint8_t vector,
-    uint16_t error_code,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-
-    /* We don't emulate protected-mode exception delivery. */
-    if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( error_code != 0 )
-        return X86EMUL_UNHANDLEABLE;
-
-    rm_ctxt->exn_vector = vector;
-    rm_ctxt->exn_insn_len = 0;
-
-    return X86EMUL_OKAY;
-}
-
-static int realmode_inject_sw_interrupt(
-    uint8_t vector,
-    uint8_t insn_len,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-
-    /* We don't emulate protected-mode exception delivery. */
-    if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    rm_ctxt->exn_vector = vector;
-    rm_ctxt->exn_insn_len = insn_len;
-
-    return X86EMUL_OKAY;
-}
-
-static void realmode_load_fpu_ctxt(
-    struct x86_emulate_ctxt *ctxt)
-{
-    if ( !current->fpu_dirtied )
-        vmx_do_no_device_fault();
-}
-
-static struct x86_emulate_ops realmode_emulator_ops = {
-    .read          = realmode_emulate_read,
-    .insn_fetch    = realmode_emulate_insn_fetch,
-    .write         = realmode_emulate_write,
-    .cmpxchg       = realmode_emulate_cmpxchg,
-    .rep_ins       = realmode_rep_ins,
-    .rep_outs      = realmode_rep_outs,
-    .rep_movs      = realmode_rep_movs,
-    .read_segment  = realmode_read_segment,
-    .write_segment = realmode_write_segment,
-    .read_io       = realmode_read_io,
-    .write_io      = realmode_write_io,
-    .read_cr       = realmode_read_cr,
-    .write_cr      = realmode_write_cr,
-    .read_msr      = realmode_read_msr,
-    .write_msr     = realmode_write_msr,
-    .write_rflags  = realmode_write_rflags,
-    .wbinvd        = realmode_wbinvd,
-    .cpuid         = realmode_cpuid,
-    .hlt           = realmode_hlt,
-    .inject_hw_exception = realmode_inject_hw_exception,
-    .inject_sw_interrupt = realmode_inject_sw_interrupt,
-    .load_fpu_ctxt = realmode_load_fpu_ctxt
-};
-
-static void realmode_emulate_one(struct realmode_emulate_ctxt *rm_ctxt)
-{
-    struct cpu_user_regs *regs = rm_ctxt->ctxt.regs;
-    struct vcpu *curr = current;
-    u32 new_intr_shadow;
-    int rc, io_completed;
-    unsigned long addr;
-
-    rm_ctxt->ctxt.addr_size =
-        rm_ctxt->seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16;
-    rm_ctxt->ctxt.sp_size =
-        rm_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16;
-
-    rm_ctxt->insn_buf_eip = (uint32_t)regs->eip;
-    addr = virtual_to_linear(x86_seg_cs, regs->eip, rm_ctxt);
-    if ( hvm_fetch_from_guest_virt_nofault(rm_ctxt->insn_buf, addr,
-                                           sizeof(rm_ctxt->insn_buf))
-         != HVMCOPY_okay )
-    {
-        gdprintk(XENLOG_ERR, "Failed to pre-fetch instruction bytes.\n");
-        goto fail;
-    }
-
-    rm_ctxt->flag_word = 0;
-
-    io_completed = curr->arch.hvm_vmx.real_mode_io_completed;
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-    {
-        gdprintk(XENLOG_ERR, "I/O in progress before insn is emulated.\n");
-        goto fail;
-    }
-
-    rc = x86_emulate(&rm_ctxt->ctxt, &realmode_emulator_ops);
-
-    if ( curr->arch.hvm_vmx.real_mode_io_completed )
-    {
-        gdprintk(XENLOG_ERR, "I/O completion after insn is emulated.\n");
-        goto fail;
-    }
+    }
+
+    rm_ctxt->hvm.seg_reg_dirty |= seg_reg_dirty;
 
     if ( rc == X86EMUL_UNHANDLEABLE )
     {
@@ -717,31 +138,18 @@ static void realmode_emulate_one(struct 
     }
 
     if ( rc == X86EMUL_RETRY )
-    {
-        BUG_ON(!curr->arch.hvm_vmx.real_mode_io_in_progress);
-        if ( !io_completed )
-            return;
-        gdprintk(XENLOG_ERR, "Multiple I/O reads in a single insn.\n");
-        goto fail;
-    }
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress &&
-         (get_ioreq(curr)->vp_ioreq.dir == IOREQ_READ) )
-    {
-        gdprintk(XENLOG_ERR, "I/O read in progress but insn is retired.\n");
-        goto fail;
-    }
+        return;
 
     new_intr_shadow = rm_ctxt->intr_shadow;
 
     /* MOV-SS instruction toggles MOV-SS shadow, else we just clear it. */
-    if ( rm_ctxt->flags.mov_ss )
+    if ( rm_ctxt->hvm.flags.mov_ss )
         new_intr_shadow ^= VMX_INTR_SHADOW_MOV_SS;
     else
         new_intr_shadow &= ~VMX_INTR_SHADOW_MOV_SS;
 
     /* STI instruction toggles STI shadow, else we just clear it. */
-    if ( rm_ctxt->flags.sti )
+    if ( rm_ctxt->hvm.flags.sti )
         new_intr_shadow ^= VMX_INTR_SHADOW_STI;
     else
         new_intr_shadow &= ~VMX_INTR_SHADOW_STI;
@@ -755,10 +163,30 @@ static void realmode_emulate_one(struct 
 
     if ( rc == X86EMUL_EXCEPTION )
     {
+        if ( !rm_ctxt->hvm.flags.exn_pending )
+        {
+            intr_info = __vmread(VM_ENTRY_INTR_INFO);
+            __vmwrite(VM_ENTRY_INTR_INFO, 0);
+            if ( !(intr_info & INTR_INFO_VALID_MASK) )
+            {
+                gdprintk(XENLOG_ERR, "Exception pending but no info.\n");
+                goto fail;
+            }
+            rm_ctxt->hvm.exn_vector = (uint8_t)intr_info;
+            rm_ctxt->hvm.exn_insn_len = 0;
+        }
+
+        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
+        {
+            gdprintk(XENLOG_ERR, "Exception %02x in protected mode.\n",
+                     rm_ctxt->hvm.exn_vector);
+            goto fail;
+        }
+
         realmode_deliver_exception(
-            rm_ctxt->exn_vector, rm_ctxt->exn_insn_len, rm_ctxt);
-    }
-    else if ( rm_ctxt->flags.hlt && !hvm_local_events_need_delivery(curr) )
+            rm_ctxt->hvm.exn_vector, rm_ctxt->hvm.exn_insn_len, rm_ctxt);
+    }
+    else if ( rm_ctxt->hvm.flags.hlt && !hvm_local_events_need_delivery(curr) )
     {
         hvm_hlt(regs->eflags);
     }
@@ -769,10 +197,11 @@ static void realmode_emulate_one(struct 
     gdprintk(XENLOG_ERR,
              "Real-mode emulation failed @ %04x:%08lx: "
              "%02x %02x %02x %02x %02x %02x\n",
-             rm_ctxt->seg_reg[x86_seg_cs].sel, rm_ctxt->insn_buf_eip,
-             rm_ctxt->insn_buf[0], rm_ctxt->insn_buf[1],
-             rm_ctxt->insn_buf[2], rm_ctxt->insn_buf[3],
-             rm_ctxt->insn_buf[4], rm_ctxt->insn_buf[5]);
+             hvmemul_get_seg_reg(x86_seg_cs, &rm_ctxt->hvm)->sel,
+             rm_ctxt->hvm.insn_buf_eip,
+             rm_ctxt->hvm.insn_buf[0], rm_ctxt->hvm.insn_buf[1],
+             rm_ctxt->hvm.insn_buf[2], rm_ctxt->hvm.insn_buf[3],
+             rm_ctxt->hvm.insn_buf[4], rm_ctxt->hvm.insn_buf[5]);
     domain_crash_synchronous();
 }
 
@@ -780,18 +209,20 @@ void vmx_realmode(struct cpu_user_regs *
 {
     struct vcpu *curr = current;
     struct realmode_emulate_ctxt rm_ctxt;
-    unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
-    unsigned int i, emulations = 0;
-
-    rm_ctxt.ctxt.regs = regs;
-
-    for ( i = 0; i < 10; i++ )
-        hvm_get_segment_register(curr, i, &rm_ctxt.seg_reg[i]);
-
+    struct segment_register *sreg;
+    unsigned long intr_info;
+    unsigned int emulations = 0;
+
+    /* Get-and-clear VM_ENTRY_INTR_INFO. */
+    intr_info = __vmread(VM_ENTRY_INTR_INFO);
+    if ( intr_info & INTR_INFO_VALID_MASK )
+        __vmwrite(VM_ENTRY_INTR_INFO, 0);
+
+    hvm_emulate_prepare(&rm_ctxt.hvm, regs);
     rm_ctxt.intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
 
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress ||
-         curr->arch.hvm_vmx.real_mode_io_completed )
+    if ( curr->arch.hvm_vcpu.io_in_progress ||
+         curr->arch.hvm_vcpu.io_completed )
         realmode_emulate_one(&rm_ctxt);
 
     /* Only deliver interrupts into emulated real mode. */
@@ -799,12 +230,12 @@ void vmx_realmode(struct cpu_user_regs *
          (intr_info & INTR_INFO_VALID_MASK) )
     {
         realmode_deliver_exception((uint8_t)intr_info, 0, &rm_ctxt);
-        __vmwrite(VM_ENTRY_INTR_INFO, 0);
+        intr_info = 0;
     }
 
     while ( curr->arch.hvm_vmx.vmxemul &&
             !softirq_pending(smp_processor_id()) &&
-            !curr->arch.hvm_vmx.real_mode_io_in_progress )
+            !curr->arch.hvm_vcpu.io_in_progress )
     {
         /*
          * Check for pending interrupts only every 16 instructions, because
@@ -825,34 +256,22 @@ void vmx_realmode(struct cpu_user_regs *
          * At this point CS.RPL == SS.RPL == CS.DPL == SS.DPL == 0. For
          * DS, ES, FS and GS the most uninvasive trick is to set DPL == RPL.
          */
-        rm_ctxt.seg_reg[x86_seg_ds].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_ds].sel & 3;
-        rm_ctxt.seg_reg[x86_seg_es].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_es].sel & 3;
-        rm_ctxt.seg_reg[x86_seg_fs].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_fs].sel & 3;
-        rm_ctxt.seg_reg[x86_seg_gs].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_gs].sel & 3;
-    }
-
-    for ( i = 0; i < 10; i++ )
-        hvm_set_segment_register(curr, i, &rm_ctxt.seg_reg[i]);
-}
-
-int vmx_realmode_io_complete(void)
-{
-    struct vcpu *curr = current;
-    ioreq_t *p = &get_ioreq(curr)->vp_ioreq;
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return 0;
-
-    curr->arch.hvm_vmx.real_mode_io_in_progress = 0;
-    if ( p->dir == IOREQ_READ )
-    {
-        curr->arch.hvm_vmx.real_mode_io_completed = 1;
-        curr->arch.hvm_vmx.real_mode_io_data = p->data;
-    }
-
-    return 1;
-}
+        sreg = hvmemul_get_seg_reg(x86_seg_ds, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        sreg = hvmemul_get_seg_reg(x86_seg_es, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        sreg = hvmemul_get_seg_reg(x86_seg_fs, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        sreg = hvmemul_get_seg_reg(x86_seg_gs, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        rm_ctxt.hvm.seg_reg_dirty |=
+            (1ul << x86_seg_ds) | (1ul << x86_seg_es) |
+            (1ul << x86_seg_fs) | (1ul << x86_seg_gs);
+    }
+
+    hvm_emulate_writeback(&rm_ctxt.hvm);
+
+    /* Re-instate VM_ENTRY_INTR_INFO if we did not discharge it. */
+    if ( intr_info & INTR_INFO_VALID_MASK )
+        __vmwrite(VM_ENTRY_INTR_INFO, intr_info);
+}
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Feb 20 14:36:45 2008 +0000
@@ -60,6 +60,13 @@ static void vmx_install_vlapic_mapping(s
 static void vmx_install_vlapic_mapping(struct vcpu *v);
 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
 static void vmx_update_guest_efer(struct vcpu *v);
+static void vmx_cpuid_intercept(
+    unsigned int *eax, unsigned int *ebx,
+    unsigned int *ecx, unsigned int *edx);
+static void vmx_wbinvd_intercept(void);
+static void vmx_fpu_dirty_intercept(void);
+static int vmx_msr_read_intercept(struct cpu_user_regs *regs);
+static int vmx_msr_write_intercept(struct cpu_user_regs *regs);
 
 static int vmx_domain_initialise(struct domain *d)
 {
@@ -96,7 +103,6 @@ static int vmx_vcpu_initialise(struct vc
     /* %eax == 1 signals full real-mode support to the guest loader. */
     if ( v->vcpu_id == 0 )
         v->arch.guest_context.user_regs.eax = 1;
-    v->arch.hvm_vcpu.io_complete = vmx_realmode_io_complete;
 
     return 0;
 }
@@ -204,7 +210,7 @@ static enum handler_return long_mode_do_
     switch ( ecx )
     {
     case MSR_EFER:
-        if ( !hvm_set_efer(msr_content) )
+        if ( hvm_set_efer(msr_content) )
             goto exception_raised;
         break;
 
@@ -375,7 +381,7 @@ static enum handler_return long_mode_do_
     switch ( regs->ecx )
     {
     case MSR_EFER:
-        if ( !hvm_set_efer(msr_content) )
+        if ( hvm_set_efer(msr_content) )
             return HNDL_exception_raised;
         break;
 
@@ -1076,6 +1082,11 @@ static struct hvm_function_table vmx_fun
     .do_pmu_interrupt     = vmx_do_pmu_interrupt,
     .cpu_up               = vmx_cpu_up,
     .cpu_down             = vmx_cpu_down,
+    .cpuid_intercept      = vmx_cpuid_intercept,
+    .wbinvd_intercept     = vmx_wbinvd_intercept,
+    .fpu_dirty_intercept  = vmx_fpu_dirty_intercept,
+    .msr_read_intercept   = vmx_msr_read_intercept,
+    .msr_write_intercept  = vmx_msr_write_intercept
 };
 
 void start_vmx(void)
@@ -1147,7 +1158,7 @@ static void __update_guest_eip(unsigned 
         vmx_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
 }
 
-void vmx_do_no_device_fault(void)
+static void vmx_fpu_dirty_intercept(void)
 {
     struct vcpu *curr = current;
 
@@ -1162,7 +1173,7 @@ void vmx_do_no_device_fault(void)
 }
 
 #define bitmaskof(idx)  (1U << ((idx) & 31))
-void vmx_cpuid_intercept(
+static void vmx_cpuid_intercept(
     unsigned int *eax, unsigned int *ebx,
     unsigned int *ecx, unsigned int *edx)
 {
@@ -1751,13 +1762,13 @@ static int mov_to_cr(int gp, int cr, str
     switch ( cr )
     {
     case 0:
-        return hvm_set_cr0(value);
+        return !hvm_set_cr0(value);
 
     case 3:
-        return hvm_set_cr3(value);
+        return !hvm_set_cr3(value);
 
     case 4:
-        return hvm_set_cr4(value);
+        return !hvm_set_cr4(value);
 
     case 8:
         vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
@@ -1848,7 +1859,7 @@ static int vmx_cr_access(unsigned long e
         value = (value & ~0xF) |
             (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
         HVMTRACE_1D(LMSW, current, value);
-        return hvm_set_cr0(value);
+        return !hvm_set_cr0(value);
     default:
         BUG();
     }
@@ -1932,7 +1943,7 @@ static int is_last_branch_msr(u32 ecx)
     return 0;
 }
 
-int vmx_msr_read_intercept(struct cpu_user_regs *regs)
+static int vmx_msr_read_intercept(struct cpu_user_regs *regs)
 {
     u64 msr_content = 0;
     u32 ecx = regs->ecx, eax, edx;
@@ -2017,7 +2028,7 @@ int vmx_msr_read_intercept(struct cpu_us
             case HNDL_unhandled:
                 break;
             case HNDL_exception_raised:
-                return 0;
+                return X86EMUL_EXCEPTION;
             case HNDL_done:
                 goto done;
         }
@@ -2050,11 +2061,11 @@ done:
     HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
                 ecx, (unsigned long)regs->eax,
                 (unsigned long)regs->edx);
-    return 1;
+    return X86EMUL_OKAY;
 
 gp_fault:
     vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
-    return 0;
+    return X86EMUL_EXCEPTION;
 }
 
 static int vmx_alloc_vlapic_mapping(struct domain *d)
@@ -2124,7 +2135,7 @@ extern bool_t mtrr_def_type_msr_set(stru
 extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
 extern bool_t pat_msr_set(u64 *pat, u64 msr);
 
-int vmx_msr_write_intercept(struct cpu_user_regs *regs)
+static int vmx_msr_write_intercept(struct cpu_user_regs *regs)
 {
     u32 ecx = regs->ecx;
     u64 msr_content;
@@ -2219,7 +2230,7 @@ int vmx_msr_write_intercept(struct cpu_u
         goto gp_fault;
     default:
         if ( vpmu_do_wrmsr(regs) )
-            return 1;
+            return X86EMUL_OKAY;
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
@@ -2228,18 +2239,18 @@ int vmx_msr_write_intercept(struct cpu_u
                     wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
                 break;
             case HNDL_exception_raised:
-                return 0;
+                return X86EMUL_EXCEPTION;
             case HNDL_done:
                 break;
         }
         break;
     }
 
-    return 1;
+    return X86EMUL_OKAY;
 
 gp_fault:
     vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
-    return 0;
+    return X86EMUL_EXCEPTION;
 }
 
 static void vmx_do_hlt(struct cpu_user_regs *regs)
@@ -2320,7 +2331,7 @@ static void wbinvd_ipi(void *info)
     wbinvd();
 }
 
-void vmx_wbinvd_intercept(void)
+static void vmx_wbinvd_intercept(void)
 {
     if ( list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) )
         return;
@@ -2447,7 +2458,7 @@ asmlinkage void vmx_vmexit_handler(struc
             domain_pause_for_debugger();
             break;
         case TRAP_no_device:
-            vmx_do_no_device_fault();
+            vmx_fpu_dirty_intercept();
             break;
         case TRAP_page_fault:
             exit_qualification = __vmread(EXIT_QUALIFICATION);
@@ -2566,12 +2577,12 @@ asmlinkage void vmx_vmexit_handler(struc
         break;
     case EXIT_REASON_MSR_READ:
         inst_len = __get_instruction_length(); /* Safe: RDMSR */
-        if ( vmx_msr_read_intercept(regs) )
+        if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_MSR_WRITE:
         inst_len = __get_instruction_length(); /* Safe: WRMSR */
-        if ( vmx_msr_write_intercept(regs) )
+        if ( vmx_msr_write_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
 
@@ -2597,7 +2608,8 @@ asmlinkage void vmx_vmexit_handler(struc
         unsigned long offset;
         exit_qualification = __vmread(EXIT_QUALIFICATION);
         offset = exit_qualification & 0x0fffUL;
-        handle_mmio(APIC_DEFAULT_PHYS_BASE | offset);
+        if ( !handle_mmio() )
+            hvm_inject_exception(TRAP_gp_fault, 0, 0);
         break;
     }
 
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/mm/shadow/multi.c    Wed Feb 20 14:36:45 2008 +0000
@@ -2816,8 +2816,7 @@ static int sh_page_fault(struct vcpu *v,
             perfc_incr(shadow_fault_fast_mmio);
             SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
             reset_early_unshadow(v);
-            handle_mmio(gpa);
-            return EXCRET_fault_fixed;
+            return handle_mmio() ? EXCRET_fault_fixed : 0;
         }
         else
         {
@@ -3117,8 +3116,7 @@ static int sh_page_fault(struct vcpu *v,
     shadow_audit_tables(v);
     reset_early_unshadow(v);
     shadow_unlock(d);
-    handle_mmio(gpa);
-    return EXCRET_fault_fixed;
+    return handle_mmio() ? EXCRET_fault_fixed : 0;
 
  not_a_shadow_fault:
     sh_audit_gw(v, &gw);
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/emulate.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/emulate.h Wed Feb 20 14:36:45 2008 +0000
@@ -0,0 +1,55 @@
+/******************************************************************************
+ * hvm/emulate.h
+ * 
+ * HVM instruction emulation. Used for MMIO and VMX real mode.
+ * 
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ */
+
+#ifndef __ASM_X86_HVM_EMULATE_H__
+#define __ASM_X86_HVM_EMULATE_H__
+
+#include <xen/config.h>
+#include <asm/x86_emulate.h>
+
+struct hvm_emulate_ctxt {
+    struct x86_emulate_ctxt ctxt;
+
+    /* Cache of 16 bytes of instruction. */
+    uint8_t insn_buf[16];
+    unsigned long insn_buf_eip;
+    unsigned int insn_buf_bytes;
+
+    struct segment_register seg_reg[10];
+    unsigned long seg_reg_accessed;
+    unsigned long seg_reg_dirty;
+
+    union {
+        struct {
+            unsigned int hlt:1;
+            unsigned int mov_ss:1;
+            unsigned int sti:1;
+            unsigned int exn_pending:1;
+        } flags;
+        unsigned int flag_word;
+    };
+
+    uint8_t exn_vector;
+    uint8_t exn_insn_len;
+};
+
+int hvm_emulate_one(
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+void hvm_emulate_prepare(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    struct cpu_user_regs *regs);
+void hvm_emulate_writeback(
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+struct segment_register *hvmemul_get_seg_reg(
+    enum x86_segment seg,
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+
+#endif /* __ASM_X86_HVM_EMULATE_H__ */
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/hvm.h     Wed Feb 20 14:36:45 2008 +0000
@@ -117,6 +117,15 @@ struct hvm_function_table {
 
     int  (*cpu_up)(void);
     void (*cpu_down)(void);
+
+    /* Instruction intercepts: non-void return values are X86EMUL codes. */
+    void (*cpuid_intercept)(
+        unsigned int *eax, unsigned int *ebx,
+        unsigned int *ecx, unsigned int *edx);
+    void (*wbinvd_intercept)(void);
+    void (*fpu_dirty_intercept)(void);
+    int (*msr_read_intercept)(struct cpu_user_regs *regs);
+    int (*msr_write_intercept)(struct cpu_user_regs *regs);
 };
 
 extern struct hvm_function_table hvm_funcs;
@@ -162,9 +171,6 @@ hvm_guest_x86_mode(struct vcpu *v)
     ASSERT(v == current);
     return hvm_funcs.guest_x86_mode(v);
 }
-
-int hvm_instruction_fetch(unsigned long pc, int address_bytes,
-                          unsigned char *buf);
 
 static inline void
 hvm_update_host_cr3(struct vcpu *v)
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h      Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/io.h      Wed Feb 20 14:36:45 2008 +0000
@@ -120,8 +120,8 @@ struct hvm_mmio_handler {
 };
 
 /* global io interception point in HV */
-extern int hvm_io_intercept(ioreq_t *p, int type);
-extern int register_io_handler(
+int hvm_io_intercept(ioreq_t *p, int type);
+int register_io_handler(
     struct domain *d, unsigned long addr, unsigned long size,
     void *action, int type);
 
@@ -135,8 +135,8 @@ static inline int hvm_buffered_io_interc
     return hvm_io_intercept(p, HVM_BUFFERED_IO);
 }
 
-extern int hvm_mmio_intercept(ioreq_t *p);
-extern int hvm_buffered_io_send(ioreq_t *p);
+int hvm_mmio_intercept(ioreq_t *p);
+int hvm_buffered_io_send(ioreq_t *p);
 
 static inline int register_portio_handler(
     struct domain *d, unsigned long addr,
@@ -159,11 +159,11 @@ void send_pio_req(unsigned long port, un
                   paddr_t value, int dir, int df, int value_is_ptr);
 void send_timeoffset_req(unsigned long timeoff);
 void send_invalidate_req(void);
-extern void handle_mmio(paddr_t gpa);
-extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
-extern void hvm_io_assist(void);
-extern void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
-                         union vioapic_redir_entry *ent);
+int handle_mmio(void);
+void hvm_interrupt_post(struct vcpu *v, int vector, int type);
+void hvm_io_assist(void);
+void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
+                  union vioapic_redir_entry *ent);
 
 struct hvm_hw_stdvga {
     uint8_t sr_index;
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/support.h Wed Feb 20 14:36:45 2008 +0000
@@ -138,6 +138,7 @@ void hvm_hlt(unsigned long rflags);
 void hvm_hlt(unsigned long rflags);
 void hvm_triple_fault(void);
 
+/* These functions all return X86EMUL return codes. */
 int hvm_set_efer(uint64_t value);
 int hvm_set_cr0(unsigned long value);
 int hvm_set_cr3(unsigned long value);
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Feb 20 14:36:45 2008 +0000
@@ -59,9 +59,6 @@ struct hvm_vcpu {
     bool_t              flag_dr_dirty;
     bool_t              debug_state_latch;
 
-    /* Callback function for I/O completion. */
-    int                 (*io_complete)(void);
-
     union {
         struct arch_vmx_struct vmx;
         struct arch_svm_struct svm;
@@ -72,6 +69,12 @@ struct hvm_vcpu {
 
     /* Which cache mode is this VCPU in (CR0:CD/NW)? */
     u8                  cache_mode;
+
+    /* I/O request in flight to device model. */
+    bool_t              mmio_in_progress;
+    bool_t              io_in_progress;
+    bool_t              io_completed;
+    unsigned long       io_data;
 };
 
 #define ARCH_HVM_IO_WAIT         1   /* Waiting for I/O completion */
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Wed Feb 20 14:36:45 2008 +0000
@@ -94,11 +94,6 @@ struct arch_vmx_struct {
 #define VMXEMUL_BAD_CS   2  /* Yes, because CS.RPL != CPL */
 #define VMXEMUL_BAD_SS   4  /* Yes, because SS.RPL != CPL */
     uint8_t              vmxemul;
-
-    /* I/O request in flight to device model. */
-    bool_t               real_mode_io_in_progress;
-    bool_t               real_mode_io_completed;
-    unsigned long        real_mode_io_data;
 };
 
 int vmx_create_vmcs(struct vcpu *v);
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Feb 20 14:36:45 2008 +0000
@@ -33,15 +33,7 @@ void vmx_do_resume(struct vcpu *);
 void vmx_do_resume(struct vcpu *);
 void set_guest_time(struct vcpu *v, u64 gtime);
 void vmx_vlapic_msr_changed(struct vcpu *v);
-void vmx_do_no_device_fault(void);
-void vmx_cpuid_intercept(
-    unsigned int *eax, unsigned int *ebx,
-    unsigned int *ecx, unsigned int *edx);
-int vmx_msr_read_intercept(struct cpu_user_regs *regs);
-int vmx_msr_write_intercept(struct cpu_user_regs *regs);
-void vmx_wbinvd_intercept(void);
 void vmx_realmode(struct cpu_user_regs *regs);
-int vmx_realmode_io_complete(void);
 
 /*
  * Exit Reasons
diff -r f853c0497095 -r 3f1cf03826fe xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/public/hvm/ioreq.h    Wed Feb 20 14:36:45 2008 +0000
@@ -34,14 +34,8 @@
 
 #define IOREQ_TYPE_PIO          0 /* pio */
 #define IOREQ_TYPE_COPY         1 /* mmio ops */
-#define IOREQ_TYPE_AND          2
-#define IOREQ_TYPE_OR           3
-#define IOREQ_TYPE_XOR          4
-#define IOREQ_TYPE_XCHG         5
-#define IOREQ_TYPE_ADD          6
 #define IOREQ_TYPE_TIMEOFFSET   7
 #define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
-#define IOREQ_TYPE_SUB          9
 
 /*
  * VMExit dispatcher should cooperate with instruction decoder to

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.