[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] vmx-mmio-ioemu.patch



Attached are the patches for new ioemu communication mechanism. The new
mechanism provides richer I/O operation semantics, such as and,or,xor
operation on MMIO space. This is necessary for operating systems such
as Windows XP and Windows 2003.

This is the second part of a two part patch. This patch applies to xen.

(patches are against current xen-vt-testing tree)

Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>


diff -r 04ca47c298b5 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Thu Sep  1 21:30:51 2005
+++ b/xen/arch/x86/vmx.c        Fri Sep  2 11:49:40 2005
@@ -602,15 +602,66 @@
     return 0;
 }
 
+void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
+       unsigned long count, int size, long value, int dir, int pvalid)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (vio == NULL) {
+        printk("bad shared page: %lx\n", (unsigned long) vio);
+        domain_crash_synchronous();
+    }
+
+    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
+       printf("VMX I/O has not yet completed\n");
+       domain_crash_synchronous();
+    }
+    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+
+    p = &vio->vp_ioreq;
+    p->dir = dir;
+    p->pdata_valid = pvalid;
+
+    p->type = IOREQ_TYPE_PIO;
+    p->size = size;
+    p->addr = port;
+    p->count = count;
+    p->df = regs->eflags & EF_DF ? 1 : 0;
+
+    if (pvalid) {
+        if (vmx_paging_enabled(current))
+            p->u.pdata = (void *) gva_to_gpa(value);
+        else
+            p->u.pdata = (void *) value; /* guest VA == guest PA */
+    } else
+        p->u.data = value;
+
+    p->state = STATE_IOREQ_READY;
+
+    if (vmx_portio_intercept(p)) {
+        /* no blocking & no evtchn notification */
+        clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+        return;
+    }
+
+    evtchn_send(iopacket_port(v->domain));
+    vmx_wait_io();
+}
+
 static void vmx_io_instruction(struct cpu_user_regs *regs, 
                    unsigned long exit_qualification, unsigned long inst_len) 
 {
-    struct vcpu *d = current;
-    vcpu_iodata_t *vio;
-    ioreq_t *p;
-    unsigned long addr;
+    struct mi_per_cpu_info *mpcip;
     unsigned long eip, cs, eflags;
+    unsigned long port, size, dir;
     int vm86;
+
+    mpcip = &current->domain->arch.vmx_platform.mpci;
+    mpcip->instr = INSTR_PIO;
+    mpcip->flags = 0;
 
     __vmread(GUEST_RIP, &eip);
     __vmread(GUEST_CS_SELECTOR, &cs);
@@ -623,80 +674,57 @@
                 vm86, cs, eip, exit_qualification);
 
     if (test_bit(6, &exit_qualification))
-        addr = (exit_qualification >> 16) & (0xffff);
+        port = (exit_qualification >> 16) & 0xFFFF;
     else
-        addr = regs->edx & 0xffff;
-    TRACE_VMEXIT (2,addr);
-
-    vio = get_vio(d->domain, d->vcpu_id);
-    if (vio == 0) {
-        printk("bad shared page: %lx", (unsigned long) vio);
-        domain_crash_synchronous(); 
-    }
-    p = &vio->vp_ioreq;
-    p->dir = test_bit(3, &exit_qualification); /* direction */
-
-    p->pdata_valid = 0;
-    p->count = 1;
-    p->size = (exit_qualification & 7) + 1;
+        port = regs->edx & 0xffff;
+    TRACE_VMEXIT(2, port);
+    size = (exit_qualification & 7) + 1;
+    dir = test_bit(3, &exit_qualification); /* direction */
 
     if (test_bit(4, &exit_qualification)) { /* string instruction */
-       unsigned long laddr;
-
-       __vmread(GUEST_LINEAR_ADDRESS, &laddr);
+       unsigned long addr, count = 1;
+       int sign = regs->eflags & EF_DF ? -1 : 1;
+
+       __vmread(GUEST_LINEAR_ADDRESS, &addr);
+
         /*
          * In protected mode, guest linear address is invalid if the
          * selector is null.
          */
-        if (!vm86 && check_for_null_selector(eip)) {
-            laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi;
-        }
-        p->pdata_valid = 1;
-
-        p->u.data = laddr;
-        if (vmx_paging_enabled(d))
-                p->u.pdata = (void *) gva_to_gpa(p->u.data);
-        p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
-
-        if (test_bit(5, &exit_qualification)) /* "rep" prefix */
-            p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
-
-        /*
-         * Split up string I/O operations that cross page boundaries. Don't
-         * advance %eip so that "rep insb" will restart at the next page.
-         */
-        if ((p->u.data & PAGE_MASK) != 
-               ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
-           VMX_DBG_LOG(DBG_LEVEL_2,
-               "String I/O crosses page boundary (cs:eip=0x%lx:0x%lx)\n",
-               cs, eip);
-            if (p->u.data & (p->size - 1)) {
-               printf("Unaligned string I/O operation (cs:eip=0x%lx:0x%lx)\n",
-                       cs, eip);
-                domain_crash_synchronous();     
-            }
-            p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
-        } else {
-            __update_guest_eip(inst_len);
-        }
-    } else if (p->dir == IOREQ_WRITE) {
-        p->u.data = regs->eax;
+        if (!vm86 && check_for_null_selector(eip))
+            addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
+
+        if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
+           mpcip->flags |= REPZ;
+           count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
+       }
+
+       /*
+        * Handle string pio instructions that cross pages or that
+        * are unaligned. See the comments in vmx_platform.c/handle_mmio()
+        */
+       if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
+           unsigned long value = 0;
+
+           mpcip->flags |= OVERLAP;
+           if (dir == IOREQ_WRITE)
+               vmx_copy(&value, addr, size, VMX_COPY_IN);
+           send_pio_req(regs, port, 1, size, value, dir, 0);
+       } else {
+           if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
+                if (sign > 0)
+                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
+                else
+                    count = (addr & ~PAGE_MASK) / size;
+           } else
+               __update_guest_eip(inst_len);
+
+           send_pio_req(regs, port, count, size, addr, dir, 1);
+       }
+    } else {
         __update_guest_eip(inst_len);
-    } else
-        __update_guest_eip(inst_len);
-
-    p->addr = addr;
-    p->port_mm = 0;
-
-    /* Check if the packet needs to be intercepted */
-    if (vmx_portio_intercept(p))
-       /* no blocking & no evtchn notification */
-        return;
-
-    set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
-    p->state = STATE_IOREQ_READY;
-    evtchn_send(iopacket_port(d->domain));
-    vmx_wait_io();
+       send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
+    }
 }
 
 enum { COPY_IN = 0, COPY_OUT };
diff -r 04ca47c298b5 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Thu Sep  1 21:30:51 2005
+++ b/xen/arch/x86/vmx_intercept.c      Fri Sep  2 11:49:40 2005
@@ -172,7 +172,7 @@
 
     if (p->size != 1 ||
         p->pdata_valid ||
-        p->port_mm)
+       p->type != IOREQ_TYPE_PIO)
         return 0;
     
     if (p->addr == PIT_MODE &&
@@ -284,7 +284,5 @@
         if (!reinit)
            register_portio_handler(0x40, 4, intercept_pit_io); 
     }
-
-}
-
+}
 #endif /* CONFIG_VMX */
diff -r 04ca47c298b5 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Thu Sep  1 21:30:51 2005
+++ b/xen/arch/x86/vmx_io.c     Fri Sep  2 11:49:40 2005
@@ -33,6 +33,7 @@
 #include <asm/vmx_platform.h>
 #include <asm/vmx_virpit.h>
 #include <asm/apic.h>
+#include <asm/shadow.h>
 
 #include <public/io/ioreq.h>
 #include <public/io/vmx_vlapic.h>
@@ -123,7 +124,6 @@
             regs->esp &= 0xFFFF0000;
             regs->esp |= (value & 0xFFFF);
             break;
-
         case 5:
             regs->ebp &= 0xFFFF0000;
             regs->ebp |= (value & 0xFFFF);
@@ -207,7 +207,6 @@
             *reg &= ~0xFFFF;
             *reg |= (value & 0xFFFF);
             break;
-
         case LONG:
             *reg &= ~0xFFFFFFFF;
             *reg |= (value & 0xFFFFFFFF);
@@ -322,13 +321,319 @@
 }
 #endif
 
+extern long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs);
+
+static inline void set_eflags_CF(int size, unsigned long v1,
+       unsigned long v2, struct cpu_user_regs *regs)
+{
+    unsigned long mask = (1 << (8 * size)) - 1;
+
+    if ((v1 & mask) > (v2 & mask))
+       regs->eflags |= X86_EFLAGS_CF;
+    else
+       regs->eflags &= ~X86_EFLAGS_CF;
+}
+
+static inline void set_eflags_OF(int size, unsigned long v1,
+       unsigned long v2, unsigned long v3, struct cpu_user_regs *regs)
+{
+    if ((v3 ^ v2) & (v3 ^ v1) & (1 << ((8 * size) - 1)))
+       regs->eflags |= X86_EFLAGS_OF;
+}
+
+static inline void set_eflags_AF(int size, unsigned long v1,
+       unsigned long v2, unsigned long v3, struct cpu_user_regs *regs)
+{
+    if ((v1 ^ v2 ^ v3) & 0x10)
+       regs->eflags |= X86_EFLAGS_AF;
+}
+
+static inline void set_eflags_ZF(int size, unsigned long v1,
+       struct cpu_user_regs *regs)
+{
+    unsigned long mask = (1 << (8 * size)) - 1;
+
+    if ((v1 & mask) == 0)
+       regs->eflags |= X86_EFLAGS_ZF;
+}
+
+static inline void set_eflags_SF(int size, unsigned long v1,
+       struct cpu_user_regs *regs)
+{
+    if (v1 & (1 << ((8 * size) - 1)))
+       regs->eflags |= X86_EFLAGS_SF;
+}
+
+static char parity_table[256] = {
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
+};
+
+static inline void set_eflags_PF(int size, unsigned long v1,
+       struct cpu_user_regs *regs)
+{
+    if (parity_table[v1 & 0xFF])
+       regs->eflags |= X86_EFLAGS_PF;
+}
+
+static void vmx_pio_assist(struct cpu_user_regs *regs, ioreq_t *p,
+                                       struct mi_per_cpu_info *mpcip)
+{
+    unsigned long old_eax;
+    int sign = p->df ? -1 : 1;
+
+    if (p->dir == IOREQ_WRITE) {
+        if (p->pdata_valid) {
+            regs->esi += sign * p->count * p->size;
+           if (mpcip->flags & REPZ)
+               regs->ecx -= p->count;
+        }
+    } else {
+       if (mpcip->flags & OVERLAP) {
+           unsigned long addr;
+
+            regs->edi += sign * p->count * p->size;
+           if (mpcip->flags & REPZ)
+               regs->ecx -= p->count;
+
+           addr = regs->edi;
+           if (sign > 0)
+               addr -= p->size;
+           vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
+       } else if (p->pdata_valid) {
+            regs->edi += sign * p->count * p->size;
+           if (mpcip->flags & REPZ)
+               regs->ecx -= p->count;
+        } else {
+           old_eax = regs->eax;
+           switch (p->size) {
+            case 1:
+                regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
+                break;
+            case 2:
+                regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
+                break;
+            case 4:
+                regs->eax = (p->u.data & 0xffffffff);
+                break;
+            default:
+               printk("Error: %s unknown port size\n", __FUNCTION__);
+               domain_crash_synchronous();
+           }
+       }
+    }
+}
+
+static void vmx_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p,
+                                       struct mi_per_cpu_info *mpcip)
+{
+    int sign = p->df ? -1 : 1;
+    int size = -1, index = -1;
+    unsigned long value = 0, diff = 0;
+    unsigned long src, dst;
+
+    src = mpcip->operand[0];
+    dst = mpcip->operand[1];
+    size = operand_size(src);
+
+    switch (mpcip->instr) {
+    case INSTR_MOV:
+       if (dst & REGISTER) {
+           index = operand_index(dst);
+           set_reg_value(size, index, 0, regs, p->u.data);
+       }
+       break;
+
+    case INSTR_MOVZ:
+       if (dst & REGISTER) {
+           index = operand_index(dst);
+           switch (size) {
+           case BYTE: p->u.data = p->u.data & 0xFFULL; break;
+           case WORD: p->u.data = p->u.data & 0xFFFFULL; break;
+           case LONG: p->u.data = p->u.data & 0xFFFFFFFFULL; break;
+           }
+           set_reg_value(operand_size(dst), index, 0, regs, p->u.data);
+       }
+       break;
+
+    case INSTR_MOVS:
+       sign = p->df ? -1 : 1;
+       regs->esi += sign * p->count * p->size;
+       regs->edi += sign * p->count * p->size;
+
+       if ((mpcip->flags & OVERLAP) && p->dir == IOREQ_READ) {
+           unsigned long addr = regs->edi;
+
+           if (sign > 0)
+               addr -= p->size;
+           vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
+       }
+
+       if (mpcip->flags & REPZ)
+           regs->ecx -= p->count;
+       break;
+
+    case INSTR_STOS:
+       sign = p->df ? -1 : 1;
+       regs->edi += sign * p->count * p->size;
+       if (mpcip->flags & REPZ)
+           regs->ecx -= p->count;
+       break;
+
+    case INSTR_AND:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data & value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data & value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data & value;
+           set_reg_value(size, index, 0, regs, diff);
+       }
+
+       /*
+        * The OF and CF flags are cleared; the SF, ZF, and PF
+        * flags are set according to the result. The state of
+        * the AF flag is undefined.
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_OR:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data | value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data | value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data | value;
+           set_reg_value(size, index, 0, regs, diff);
+       }
+
+       /*
+        * The OF and CF flags are cleared; the SF, ZF, and PF
+        * flags are set according to the result. The state of
+        * the AF flag is undefined.
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_XOR:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data ^ value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data ^ value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data ^ value;
+           set_reg_value(size, index, 0, regs, diff);
+       }
+
+       /*
+        * The OF and CF flags are cleared; the SF, ZF, and PF
+        * flags are set according to the result. The state of
+        * the AF flag is undefined.
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_CMP:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data - value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data - value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = value - (unsigned long) p->u.data;
+       }
+
+       /*
+        * The CF, OF, SF, ZF, AF, and PF flags are set according
+        * to the result
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_CF(size, value, (unsigned long) p->u.data, regs);
+       set_eflags_OF(size, diff, value, (unsigned long) p->u.data, regs);
+       set_eflags_AF(size, diff, value, (unsigned long) p->u.data, regs);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_TEST:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+       }
+       diff = (unsigned long) p->u.data & value;
+
+       /*
+        * Sets the SF, ZF, and PF status flags. CF and OF are set to 0
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+    }
+
+    load_cpu_user_regs(regs);
+}
+
 void vmx_io_assist(struct vcpu *v) 
 {
     vcpu_iodata_t *vio;
     ioreq_t *p;
     struct cpu_user_regs *regs = guest_cpu_user_regs();
-    unsigned long old_eax;
-    int sign;
     struct mi_per_cpu_info *mpci_p;
     struct cpu_user_regs *inst_decoder_regs;
 
@@ -340,80 +645,26 @@
     if (vio == 0) {
         VMX_DBG_LOG(DBG_LEVEL_1, 
                     "bad shared page: %lx", (unsigned long) vio);
+       printf("bad shared page: %lx\n", (unsigned long) vio);
         domain_crash_synchronous();
     }
+
     p = &vio->vp_ioreq;
-
-    if (p->state == STATE_IORESP_HOOK){
+    if (p->state == STATE_IORESP_HOOK)
         vmx_hooks_assist(v);
-    }
 
     /* clear IO wait VMX flag */
     if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
-        if (p->state != STATE_IORESP_READY) {
-                /* An interrupt send event raced us */
-                return;
-        } else {
-            p->state = STATE_INVALID;
-        }
-        clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
-    } else {
-        return;
-    }
-
-    sign = (p->df) ? -1 : 1;
-    if (p->port_mm) {
-        if (p->pdata_valid) {
-            regs->esi += sign * p->count * p->size;
-            regs->edi += sign * p->count * p->size;
-        } else {
-            if (p->dir == IOREQ_WRITE) {
-                return;
-            }
-            int size = -1, index = -1;
-
-            size = operand_size(v->domain->arch.vmx_platform.mpci.mmio_target);
-            index = 
operand_index(v->domain->arch.vmx_platform.mpci.mmio_target);
-
-            if (v->domain->arch.vmx_platform.mpci.mmio_target & WZEROEXTEND) {
-                p->u.data = p->u.data & 0xffff;
-            }        
-            set_reg_value(size, index, 0, regs, p->u.data);
-
-        }
-        load_cpu_user_regs(regs);
-        return;
-    }
-
-    if (p->dir == IOREQ_WRITE) {
-        if (p->pdata_valid) {
-            regs->esi += sign * p->count * p->size;
-            regs->ecx -= p->count;
-        }
-        return;
-    } else {
-        if (p->pdata_valid) {
-            regs->edi += sign * p->count * p->size;
-            regs->ecx -= p->count;
-            return;
-        }
-    }
-
-    old_eax = regs->eax;
-
-    switch(p->size) {
-    case 1:
-        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-        break;
-    case 2:
-        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-        break;
-    case 4:
-        regs->eax = (p->u.data & 0xffffffff);
-        break;
-    default:
-        printk("Error: %s unknwon port size\n", __FUNCTION__);
-        domain_crash_synchronous();
+        if (p->state == STATE_IORESP_READY) {
+           p->state = STATE_INVALID;
+            clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+
+           if (p->type == IOREQ_TYPE_PIO)
+               vmx_pio_assist(regs, p, mpci_p);
+           else
+               vmx_mmio_assist(regs, p, mpci_p);
+       }
+       /* else an interrupt send event raced us */
     }
 }
 
@@ -456,8 +707,9 @@
     int port = iopacket_port(current->domain);
 
     do {
-        if(!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
+        if (!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
             do_block();
+
         vmx_check_events(current);
         if (!test_bit(ARCH_VMX_IO_WAIT, &current->arch.arch_vmx.flags))
             break;
diff -r 04ca47c298b5 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c       Thu Sep  1 21:30:51 2005
+++ b/xen/arch/x86/vmx_platform.c       Fri Sep  2 11:49:40 2005
@@ -64,37 +64,37 @@
         case QUAD:
             return (long)(reg);
         default:
-            printk("Error: <__get_reg_value>Invalid reg size\n");
+       printf("Error: (__get_reg_value) Invalid reg size\n");
             domain_crash_synchronous();
     }
 }
 
-static long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs) 
+long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 
 {
     if (size == BYTE) {
         switch (index) { 
-            case 0: //%al
+       case 0: /* %al */
                 return (char)(regs->rax & 0xFF);
-            case 1: //%cl  
+       case 1: /* %cl */
                 return (char)(regs->rcx & 0xFF);
-            case 2: //%dl
+       case 2: /* %dl */
                 return (char)(regs->rdx & 0xFF); 
-            case 3: //%bl
+       case 3: /* %bl */
                 return (char)(regs->rbx & 0xFF);
-            case 4: //%ah
+       case 4: /* %ah */
                 return (char)((regs->rax & 0xFF00) >> 8);
-            case 5: //%ch 
+       case 5: /* %ch */
                 return (char)((regs->rcx & 0xFF00) >> 8);
-            case 6: //%dh
+       case 6: /* %dh */
                 return (char)((regs->rdx & 0xFF00) >> 8);
-            case 7: //%bh
+       case 7: /* %bh */
                 return (char)((regs->rbx & 0xFF00) >> 8);
             default:
-                printk("Error: (get_reg_value)Invalid index value\n"); 
+           printf("Error: (get_reg_value) Invalid index value\n"); 
                 domain_crash_synchronous();
         }
-
-    }
+    }
+
     switch (index) {
         case 0: return __get_reg_value(regs->rax, size);
         case 1: return __get_reg_value(regs->rcx, size);
@@ -113,7 +113,7 @@
         case 14: return __get_reg_value(regs->r14, size);
         case 15: return __get_reg_value(regs->r15, size);
         default:
-            printk("Error: (get_reg_value)Invalid index value\n"); 
+       printf("Error: (get_reg_value) Invalid index value\n"); 
             domain_crash_synchronous();
     }
 }
@@ -129,117 +129,91 @@
     __vmread(GUEST_RIP, &regs->eip);
 }
 
-static long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs)
+static inline long __get_reg_value(unsigned long reg, int size)
 {                    
-    /*               
-     * Reference the db_reg[] table
-     */              
-    switch (size) {  
-    case BYTE: 
+    switch(size) {
+    case WORD:
+       return (short)(reg & 0xFFFF);
+    case LONG:
+       return (int)(reg & 0xFFFFFFFF);
+    default:
+       printf("Error: (__get_reg_value) Invalid reg size\n");
+       domain_crash_synchronous();
+    }
+}
+
+long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
+{                    
+    if (size == BYTE) {
         switch (index) { 
-        case 0: //%al
+       case 0: /* %al */
             return (char)(regs->eax & 0xFF);
-        case 1: //%cl  
+       case 1: /* %cl */
             return (char)(regs->ecx & 0xFF);
-        case 2: //%dl
+       case 2: /* %dl */
             return (char)(regs->edx & 0xFF); 
-        case 3: //%bl
+       case 3: /* %bl */
             return (char)(regs->ebx & 0xFF);
-        case 4: //%ah
+       case 4: /* %ah */
             return (char)((regs->eax & 0xFF00) >> 8);
-        case 5: //%ch 
+       case 5: /* %ch */
             return (char)((regs->ecx & 0xFF00) >> 8);
-        case 6: //%dh
+       case 6: /* %dh */
             return (char)((regs->edx & 0xFF00) >> 8);
-        case 7: //%bh
+       case 7: /* %bh */
             return (char)((regs->ebx & 0xFF00) >> 8);
         default:
-            printk("Error: (get_reg_value)size case 0 error\n"); 
+           printf("Error: (get_reg_value) Invalid index value\n"); 
             domain_crash_synchronous();
         }
-    case WORD:
+        }
+
         switch (index) {
-        case 0: //%ax
-            return (short)(regs->eax & 0xFFFF);
-        case 1: //%cx
-            return (short)(regs->ecx & 0xFFFF);
-        case 2: //%dx
-            return (short)(regs->edx & 0xFFFF);
-        case 3: //%bx
-            return (short)(regs->ebx & 0xFFFF);
-        case 4: //%sp
-            return (short)(regs->esp & 0xFFFF);
-            break;
-        case 5: //%bp
-            return (short)(regs->ebp & 0xFFFF);
-        case 6: //%si
-            return (short)(regs->esi & 0xFFFF);
-        case 7: //%di
-            return (short)(regs->edi & 0xFFFF);
-        default:
-            printk("Error: (get_reg_value)size case 1 error\n");
-            domain_crash_synchronous();
-        }
-    case LONG:
-        switch (index) {
-        case 0: //%eax
-            return regs->eax;
-        case 1: //%ecx
-            return regs->ecx;
-        case 2: //%edx
-            return regs->edx;
-
-        case 3: //%ebx
-            return regs->ebx;
-        case 4: //%esp
-            return regs->esp;
-        case 5: //%ebp
-            return regs->ebp;
-        case 6: //%esi
-            return regs->esi;
-        case 7: //%edi
-            return regs->edi;
-        default:
-            printk("Error: (get_reg_value)size case 2 error\n");
-            domain_crash_synchronous();
-        }
+    case 0: return __get_reg_value(regs->eax, size);
+    case 1: return __get_reg_value(regs->ecx, size);
+    case 2: return __get_reg_value(regs->edx, size);
+    case 3: return __get_reg_value(regs->ebx, size);
+    case 4: return __get_reg_value(regs->esp, size);
+    case 5: return __get_reg_value(regs->ebp, size);
+    case 6: return __get_reg_value(regs->esi, size);
+    case 7: return __get_reg_value(regs->edi, size);
     default:
-        printk("Error: (get_reg_value)size case error\n");
+       printf("Error: (get_reg_value) Invalid index value\n"); 
         domain_crash_synchronous();
     }
 }
 #endif
 
-static inline const unsigned char *check_prefix(const unsigned char *inst, 
struct instruction *thread_inst, unsigned char *rex_p)
+static inline unsigned char *check_prefix(unsigned char *inst,
+               struct instruction *thread_inst, unsigned char *rex_p)
 {
     while (1) {
         switch (*inst) {
-            /* rex prefix for em64t instructions*/
+        /* rex prefix for em64t instructions */
             case 0x40 ... 0x4e:
                 *rex_p = *inst;
                 break;
-
-            case 0xf3: //REPZ
+        case 0xf3: /* REPZ */
                thread_inst->flags = REPZ;
-                       break;
-            case 0xf2: //REPNZ
+               break;
+        case 0xf2: /* REPNZ */
                thread_inst->flags = REPNZ;
-                       break;
-            case 0xf0: //LOCK
+               break;
+        case 0xf0: /* LOCK */
                break;
-            case 0x2e: //CS
-            case 0x36: //SS
-            case 0x3e: //DS
-            case 0x26: //ES
-            case 0x64: //FS
-            case 0x65: //GS
-                       thread_inst->seg_sel = *inst;
+        case 0x2e: /* CS */
+        case 0x36: /* SS */
+        case 0x3e: /* DS */
+        case 0x26: /* ES */
+        case 0x64: /* FS */
+        case 0x65: /* GS */
+               thread_inst->seg_sel = *inst;
                 break;
-            case 0x66: //32bit->16bit
+        case 0x66: /* 32bit->16bit */
                 thread_inst->op_size = WORD;
                 break;
             case 0x67:
-                       printf("Error: Not handling 0x67 (yet)\n");
+               printf("Error: Not handling 0x67 (yet)\n");
                 domain_crash_synchronous();
                 break;
             default:
@@ -249,7 +223,7 @@
     }
 }
 
-static inline unsigned long get_immediate(int op16, const unsigned char *inst, 
int op_size)
+static inline unsigned long get_immediate(int op16,const unsigned char *inst, 
int op_size)
 {
     int mod, reg, rm;
     unsigned long val = 0;
@@ -317,197 +291,299 @@
 
 static void init_instruction(struct instruction *mmio_inst)
 {
-    memset(mmio_inst->i_name, '0', I_NAME_LEN);
-    mmio_inst->op_size =  0;
-    mmio_inst->offset = 0;
+    mmio_inst->instr = 0;
+    mmio_inst->op_size = 0;
     mmio_inst->immediate = 0;
     mmio_inst->seg_sel = 0;
-    mmio_inst->op_num = 0;
 
     mmio_inst->operand[0] = 0;
     mmio_inst->operand[1] = 0;
-    mmio_inst->operand[2] = 0;
         
     mmio_inst->flags = 0;
 }
 
 #define GET_OP_SIZE_FOR_BYTE(op_size)   \
-    do {if (rex) op_size = BYTE_64;else op_size = BYTE;} while(0)
+    do {                               \
+       if (rex)                        \
+           op_size = BYTE_64;          \
+       else                            \
+           op_size = BYTE;             \
+    } while(0)
 
 #define GET_OP_SIZE_FOR_NONEBYTE(op_size)   \
-    do {if (rex & 0x8) op_size = QUAD; else if (op_size != WORD) op_size = 
LONG;} while(0)
-
-static int vmx_decode(const unsigned char *inst, struct instruction 
*thread_inst)
+    do {                               \
+       if (rex & 0x8)                  \
+           op_size = QUAD;             \
+       else if (op_size != WORD)       \
+           op_size = LONG;             \
+    } while(0)
+
+
+/*
+ * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax)
+ */
+static int mem_acc(unsigned char size, struct instruction *instr)
+{
+    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
+    instr->operand[1] = mk_operand(size, 0, 0, REGISTER);
+    return DECODE_success;
+}
+
+/*
+ * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32)
+ */
+static int acc_mem(unsigned char size, struct instruction *instr)
+{
+    instr->operand[0] = mk_operand(size, 0, 0, REGISTER);
+    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
+    return DECODE_success;
+}
+
+/*
+ * Decode mem,reg operands (as in <opcode> r32/16, m32/16)
+ */
+static int mem_reg(unsigned char size, unsigned char *opcode,
+                       struct instruction *instr, unsigned char rex)
+{
+    int index = get_index(opcode + 1, rex);
+
+    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
+    instr->operand[1] = mk_operand(size, index, 0, REGISTER);
+    return DECODE_success;
+}
+
+/*
+ * Decode reg,mem operands (as in <opcode> m32/16, r32/16)
+ */
+static int reg_mem(unsigned char size, unsigned char *opcode,
+                       struct instruction *instr, unsigned char rex)
+{
+    int index = get_index(opcode + 1, rex);
+
+    instr->operand[0] = mk_operand(size, index, 0, REGISTER);
+    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
+    return DECODE_success;
+}
+
+static int vmx_decode(unsigned char *opcode, struct instruction *instr)
 {
     unsigned long eflags;
     int index, vm86 = 0;
     unsigned char rex = 0;
     unsigned char tmp_size = 0;
 
-
-    init_instruction(thread_inst);
-
-    inst = check_prefix(inst, thread_inst, &rex);
+    init_instruction(instr);
+
+    opcode = check_prefix(opcode, instr, &rex);
 
     __vmread(GUEST_RFLAGS, &eflags);
     if (eflags & X86_EFLAGS_VM)
         vm86 = 1;
 
     if (vm86) { /* meaning is reversed */
-       if (thread_inst->op_size == WORD)
-           thread_inst->op_size = LONG;
-       else if (thread_inst->op_size == LONG)
-           thread_inst->op_size = WORD;
-       else if (thread_inst->op_size == 0)
-           thread_inst->op_size = WORD;
-    }
-
-    switch(*inst) {
-        case 0x81:
-            /* This is only a workaround for cmpl instruction*/
-            strcpy((char *)thread_inst->i_name, "cmp");
+       if (instr->op_size == WORD)
+           instr->op_size = LONG;
+       else if (instr->op_size == LONG)
+           instr->op_size = WORD;
+       else if (instr->op_size == 0)
+           instr->op_size = WORD;
+    }
+
+    switch (*opcode) {
+    case 0x0B: /* or m32/16, r32/16 */
+       instr->instr = INSTR_OR;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0x20: /* and r8, m8 */
+       instr->instr = INSTR_AND;
+       GET_OP_SIZE_FOR_BYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x21: /* and r32/16, m32/16 */
+       instr->instr = INSTR_AND;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x23: /* and m32/16, r32/16 */
+       instr->instr = INSTR_AND;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0x30: /* xor r8, m8 */
+       instr->instr = INSTR_XOR;
+       GET_OP_SIZE_FOR_BYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x31: /* xor r32/16, m32/16 */
+       instr->instr = INSTR_XOR;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x39: /* cmp r32/16, m32/16 */
+       instr->instr = INSTR_CMP;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x81:
+       if (((opcode[1] >> 3) & 7) == 7) { /* cmp $imm, m32/16 */
+           instr->instr = INSTR_CMP;
+           GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, BYTE);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+
             return DECODE_success;
-
-        case 0x88:
-            /* mov r8 to m8 */
-            thread_inst->op_size = BYTE;
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[0] = mk_operand(tmp_size, index, 0, REGISTER);
-
-            break;
-        case 0x89:
-            /* mov r32/16 to m32/16 */
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[0] = mk_operand(thread_inst->op_size, index, 
0, REGISTER);
-
-            break;
-        case 0x8a:
-            /* mov m8 to r8 */
-            thread_inst->op_size = BYTE;
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[1] = mk_operand(tmp_size, index, 0, REGISTER);
-            break;
-        case 0x8b:
-            /* mov r32/16 to m32/16 */
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 
0, REGISTER);
-            break;
-        case 0x8c:
-        case 0x8e:
-            printk("%x, This opcode hasn't been handled yet!", *inst);
-            return DECODE_failure;
-            /* Not handle it yet. */
-        case 0xa0:
-            /* mov byte to al */
-            thread_inst->op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[1] = mk_operand(tmp_size, 0, 0, REGISTER);
-            break;
-        case 0xa1:
-            /* mov word/doubleword to ax/eax */
-           GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-           thread_inst->operand[1] = mk_operand(thread_inst->op_size, 0, 0, 
REGISTER);
-
-            break;
-        case 0xa2:
-            /* mov al to (seg:offset) */
-            thread_inst->op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[0] = mk_operand(tmp_size, 0, 0, REGISTER);
-            break;
-        case 0xa3:
-            /* mov ax/eax to (seg:offset) */
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, 
REGISTER);
-            break;
-        case 0xa4:
-            /* movsb */
-            thread_inst->op_size = BYTE;
-            strcpy((char *)thread_inst->i_name, "movs");
-            return DECODE_success;
-        case 0xa5:
-            /* movsw/movsl */
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-           strcpy((char *)thread_inst->i_name, "movs");
-            return DECODE_success;
-        case 0xaa:
-            /* stosb */
-            thread_inst->op_size = BYTE;
-            strcpy((char *)thread_inst->i_name, "stosb");
-            return DECODE_success;
-       case 0xab:
-            /* stosw/stosl */
-            if (thread_inst->op_size == WORD) {
-                strcpy((char *)thread_inst->i_name, "stosw");
-            } else {
-                thread_inst->op_size = LONG;
-                strcpy((char *)thread_inst->i_name, "stosl");
-            }
-            return DECODE_success;
-        case 0xc6:
-            /* mov imm8 to m8 */
-            thread_inst->op_size = BYTE;
-            thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
-            thread_inst->immediate = get_immediate(vm86,
-                                       (inst+1), thread_inst->op_size);
-            break;
-        case 0xc7:
-            /* mov imm16/32 to m16/32 */
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, 
IMMEDIATE);
-            thread_inst->immediate = get_immediate(vm86, (inst+1), 
thread_inst->op_size);
+       } else
+           return DECODE_failure;
+
+    case 0x84:  /* test m8, r8 */
+       instr->instr = INSTR_TEST;
+       instr->op_size = BYTE;
+       GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return mem_reg(tmp_size, opcode, instr, rex);
+
+    case 0x88: /* mov r8, m8 */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return reg_mem(tmp_size, opcode, instr, rex);
+
+    case 0x89: /* mov r32/16, m32/16 */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x8A: /* mov m8, r8 */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return mem_reg(tmp_size, opcode, instr, rex);
+
+    case 0x8B: /* mov m32/16, r32/16 */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0xA0: /* mov <addr>, al */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return mem_acc(tmp_size, instr);
+
+    case 0xA1: /* mov <addr>, ax/eax */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_acc(instr->op_size, instr);
+
+    case 0xA2: /* mov al, <addr> */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return acc_mem(tmp_size, instr);
+
+    case 0xA3: /* mov ax/eax, <addr> */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return acc_mem(instr->op_size, instr);
+
+    case 0xA4: /* movsb */
+       instr->instr = INSTR_MOVS;
+       instr->op_size = BYTE;
+        return DECODE_success;
             
-            break;
-        case 0x0f:
-            break;
-        default:
-            printk("%x, This opcode hasn't been handled yet!", *inst);
-            return DECODE_failure;
-    }
+    case 0xA5: /* movsw/movsl */
+       instr->instr = INSTR_MOVS;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return DECODE_success;
     
-    strcpy((char *)thread_inst->i_name, "mov");
-    if (*inst != 0x0f) {
+    case 0xAA: /* stosb */
+       instr->instr = INSTR_STOS;
+       instr->op_size = BYTE;
         return DECODE_success;
-    }
-
-    inst++;
-    switch (*inst) {
+
+    case 0xAB: /* stosw/stosl */
+       instr->instr = INSTR_STOS;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return DECODE_success;
                     
-        /* movz */
-        case 0xb6:
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 
0, REGISTER);
-            thread_inst->op_size = BYTE;
-            strcpy((char *)thread_inst->i_name, "movzb");
+    case 0xC6:
+       if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */
+           instr->instr = INSTR_MOV;
+           instr->op_size = BYTE;
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
             
             return DECODE_success;
-        case 0xb7:
-           index = get_index((inst + 1), rex);
-           if (rex & 0x8) {
-                   thread_inst->op_size = LONG;
-                   thread_inst->operand[1] = mk_operand(QUAD, index, 0, 
REGISTER);
-           } else {
-                   thread_inst->op_size = WORD;
-                   thread_inst->operand[1] = mk_operand(LONG, index, 0, 
REGISTER);
-           }
+       } else
+           return DECODE_failure;
             
-            strcpy((char *)thread_inst->i_name, "movzw");
+    case 0xC7:
+       if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */
+           instr->instr = INSTR_MOV;
+           GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
             
             return DECODE_success;
-        default:
-            printk("0f %x, This opcode hasn't been handled yet!", *inst);
-            return DECODE_failure;
-    }
-
-    /* will never reach here */
-    return DECODE_failure;
-}
-
+       } else
+           return DECODE_failure;
+
+    case 0xF6:
+       if (((opcode[1] >> 3) & 7) == 0) { /* testb $imm8, m8 */
+           instr->instr = INSTR_TEST;
+           instr->op_size = BYTE;
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+
+           return DECODE_success;
+       } else
+           return DECODE_failure;
+
+    case 0x0F:
+       break;
+
+    default:
+       printf("%x, This opcode isn't handled yet!\n", *opcode);
+        return DECODE_failure;
+    }
+
+    switch (*++opcode) {
+    case 0xB6: /* movz m8, r16/r32 */
+       instr->instr = INSTR_MOVZ;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       index = get_index(opcode + 1, rex);
+       instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
+       instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
+       return DECODE_success;
+
+    case 0xB7: /* movz m16, r32 */
+       instr->instr = INSTR_MOVZ;
+       index = get_index(opcode + 1, rex);
+       if (rex & 0x8) {
+          instr->op_size = LONG;
+          instr->operand[1] = mk_operand(QUAD, index, 0, REGISTER);
+       } else {
+          instr->op_size = WORD;
+          instr->operand[1] = mk_operand(LONG, index, 0, REGISTER);
+       }
+       instr->operand[0] = mk_operand(instr->op_size, 0, 0, MEMORY);
+       return DECODE_success;
+
+    default:
+       printf("0f %x, This opcode isn't handled yet\n", *opcode);
+       return DECODE_failure;
+    }
+}
+
+/* XXX use vmx_copy instead */
 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int 
inst_len)
 {
     unsigned long gpa;
@@ -552,40 +628,27 @@
     return inst_len+remaining;
 }
 
-static int read_from_mmio(struct instruction *inst_p)
-{
-    // Only for mov instruction now!!!
-    if (inst_p->operand[1] & REGISTER)
-        return 1;
-
-    return 0;
-}
-
-// dir:  1 read from mmio
-//       0 write to mmio
-static void send_mmio_req(unsigned long gpa, 
-                   struct instruction *inst_p, long value, int dir, int pvalid)
+void send_mmio_req(unsigned char type, unsigned long gpa, 
+          unsigned long count, int size, long value, int dir, int pvalid)
 {
     struct vcpu *d = current;
     vcpu_iodata_t *vio;
     ioreq_t *p;
     int vm86;
-    struct mi_per_cpu_info *mpci_p;
-    struct cpu_user_regs *inst_decoder_regs;
+    struct cpu_user_regs *regs;
     extern long evtchn_send(int lport);
 
-    mpci_p = &current->domain->arch.vmx_platform.mpci;
-    inst_decoder_regs = mpci_p->inst_decoder_regs;
+    regs = current->domain->arch.vmx_platform.mpci.inst_decoder_regs;
 
     vio = get_vio(d->domain, d->vcpu_id);
-
     if (vio == NULL) {
-        printk("bad shared page\n");
+        printf("bad shared page\n");
         domain_crash_synchronous(); 
     }
+
     p = &vio->vp_ioreq;
 
-    vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM;
+    vm86 = regs->eflags & X86_EFLAGS_VM;
 
     if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) {
         printf("VMX I/O has not yet completed\n");
@@ -596,24 +659,21 @@
     p->dir = dir;
     p->pdata_valid = pvalid;
 
-    p->port_mm = 1;
-    p->size = inst_p->op_size;
+    p->type = type;
+    p->size = size;
     p->addr = gpa;
-    p->u.data = value;
+    p->count = count;
+    p->df = regs->eflags & EF_DF ? 1 : 0;
+
+    if (pvalid) {
+       if (vmx_paging_enabled(current))
+           p->u.pdata = (void *) gva_to_gpa(value);
+        else
+           p->u.pdata = (void *) value; /* guest VA == guest PA */
+    } else
+       p->u.data = value;
 
     p->state = STATE_IOREQ_READY;
-
-    if (inst_p->flags & REPZ) {
-        if (vm86)
-            p->count = inst_decoder_regs->ecx & 0xFFFF;
-        else
-            p->count = inst_decoder_regs->ecx;
-        p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0;
-    } else
-        p->count = 1;
-
-    if ((pvalid) && vmx_paging_enabled(current))
-        p->u.pdata = (void *) gva_to_gpa(p->u.data);
 
     if (vmx_mmio_intercept(p)){
         p->state = STATE_IORESP_READY;
@@ -625,18 +685,50 @@
     vmx_wait_io();
 }
 
+static void mmio_operands(int type, unsigned long gpa, struct instruction 
*inst,
+               struct mi_per_cpu_info *mpcip, struct cpu_user_regs *regs)
+{
+    unsigned long value = 0;
+    int index, size;
+    
+    size = operand_size(inst->operand[0]);
+
+    mpcip->flags = inst->flags;
+    mpcip->instr = inst->instr;
+    mpcip->operand[0] = inst->operand[0]; /* source */
+    mpcip->operand[1] = inst->operand[1]; /* destination */
+
+    if (inst->operand[0] & REGISTER) { /* dest is memory */
+       index = operand_index(inst->operand[0]);
+       value = get_reg_value(size, index, 0, regs);
+       send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+    } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */
+       value = inst->immediate;
+       send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+    } else if (inst->operand[0] & MEMORY) { /* dest is register */
+       /* send the request and wait for the value */
+       send_mmio_req(type, gpa, 1, size, 0, IOREQ_READ, 0);
+    } else {
+       printf("mmio_operands: invalid operand\n");
+       domain_crash_synchronous();
+    }
+}
+
+#define GET_REPEAT_COUNT() \
+     (mmio_inst.flags & REPZ ? (vm86 ? regs->ecx & 0xFFFF : regs->ecx) : 1)
+       
 void handle_mmio(unsigned long va, unsigned long gpa)
 {
     unsigned long eip, eflags, cs;
     unsigned long inst_len, inst_addr;
-    struct mi_per_cpu_info *mpci_p;
-    struct cpu_user_regs *inst_decoder_regs;
+    struct mi_per_cpu_info *mpcip;
+    struct cpu_user_regs *regs;
     struct instruction mmio_inst;
     unsigned char inst[MAX_INST_LEN];
-    int vm86, ret;
+    int i, vm86, ret;
      
-    mpci_p = &current->domain->arch.vmx_platform.mpci;
-    inst_decoder_regs = mpci_p->inst_decoder_regs;
+    mpcip = &current->domain->arch.vmx_platform.mpci;
+    regs = mpcip->inst_decoder_regs;
 
     __vmread(GUEST_RIP, &eip);
     __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
@@ -647,108 +739,142 @@
         __vmread(GUEST_CS_SELECTOR, &cs);
         inst_addr = (cs << 4) + eip;
     } else
-        inst_addr = eip; /* XXX should really look at GDT[cs].base too */
-
-    memset(inst, '0', MAX_INST_LEN);
+        inst_addr = eip;
+
+    memset(inst, 0, MAX_INST_LEN);
     ret = inst_copy_from_guest(inst, inst_addr, inst_len);
     if (ret != inst_len) {
-        printk("handle_mmio - EXIT: get guest instruction fault\n");
+        printf("handle_mmio - EXIT: get guest instruction fault\n");
         domain_crash_synchronous();
     }
-
 
     init_instruction(&mmio_inst);
     
     if (vmx_decode(inst, &mmio_inst) == DECODE_failure) {
-        printk("vmx decode failure: eip=%lx, va=%lx\n %x %x %x %x\n", eip, va, 
-               inst[0], inst[1], inst[2], inst[3]);
+       printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:",
+               va, gpa, inst_len);
+       for (i = 0; i < inst_len; i++)
+           printf(" %02x", inst[i] & 0xFF);
+       printf("\n");
         domain_crash_synchronous();
     }
 
-    __vmwrite(GUEST_RIP, eip + inst_len);
-    store_cpu_user_regs(inst_decoder_regs);
-
-    // Only handle "mov" and "movs" instructions!
-    if (!strncmp((char *)mmio_inst.i_name, "movz", 4)) {
-        if (read_from_mmio(&mmio_inst)) {
-            // Send the request and waiting for return value.
-            mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND;
-            send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0);
-            return ;
-        } else {
-            printk("handle_mmio - EXIT: movz error!\n");
-            domain_crash_synchronous();
-        }
-    }
-
-    if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) {
+    store_cpu_user_regs(regs);
+    regs->eip += inst_len; /* advance %eip */
+
+    switch (mmio_inst.instr) {
+    case INSTR_MOV:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_MOVS:
+    {
+       unsigned long count = GET_REPEAT_COUNT();
+       unsigned long size = mmio_inst.op_size;
+       int sign = regs->eflags & EF_DF ? -1 : 1;
        unsigned long addr = 0;
        int dir;
 
+       /* determine non-MMIO address */
        if (vm86) {
            unsigned long seg;
 
            __vmread(GUEST_ES_SELECTOR, &seg);
-           if (((seg << 4) + (inst_decoder_regs->edi & 0xFFFF)) == va) {
+           if (((seg << 4) + (regs->edi & 0xFFFF)) == va) {
                dir = IOREQ_WRITE;
                __vmread(GUEST_DS_SELECTOR, &seg);
-               addr = (seg << 4) + (inst_decoder_regs->esi & 0xFFFF);
+               addr = (seg << 4) + (regs->esi & 0xFFFF);
            } else {
                dir = IOREQ_READ;
-               addr = (seg << 4) + (inst_decoder_regs->edi & 0xFFFF);
+               addr = (seg << 4) + (regs->edi & 0xFFFF);
            }
-       } else { /* XXX should really look at GDT[ds/es].base too */
-           if (va == inst_decoder_regs->edi) {
+       } else {
+           if (va == regs->edi) {
                dir = IOREQ_WRITE;
-               addr = inst_decoder_regs->esi;
+               addr = regs->esi;
            } else {
                dir = IOREQ_READ;
-               addr = inst_decoder_regs->edi;
+               addr = regs->edi;
            }
        }
 
-       send_mmio_req(gpa, &mmio_inst, addr, dir, 1);
-        return;
-    }
-
-    if (!strncmp((char *)mmio_inst.i_name, "mov", 3)) {
-        long value = 0;
-        int size, index;
-
-        if (read_from_mmio(&mmio_inst)) {
-            // Send the request and waiting for return value.
-            mpci_p->mmio_target = mmio_inst.operand[1];
-            send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0);
-            return;
-        } else {
-            // Write to MMIO
-            if (mmio_inst.operand[0] & IMMEDIATE) {
-                value = mmio_inst.immediate;
-            } else if (mmio_inst.operand[0] & REGISTER) {
-                size = operand_size(mmio_inst.operand[0]);
-                index = operand_index(mmio_inst.operand[0]);
-                value = get_reg_value(size, index, 0, inst_decoder_regs);
-            } else {
-                domain_crash_synchronous();
-            }
-            send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0);
-            return;
-        }
-    }
-
-    if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) {
-        send_mmio_req(gpa, &mmio_inst,
-            inst_decoder_regs->eax, IOREQ_WRITE, 0);
-        return;
-    }
-    /* Workaround for cmp instruction */
-    if (!strncmp((char *)mmio_inst.i_name, "cmp", 3)) {
-        inst_decoder_regs->eflags &= ~X86_EFLAGS_ZF;
-        __vmwrite(GUEST_RFLAGS, inst_decoder_regs->eflags);
-        return;
-    }
-
-    domain_crash_synchronous();
+       mpcip->flags = mmio_inst.flags;
+       mpcip->instr = mmio_inst.instr;
+
+       /*
+        * In case of a movs spanning multiple pages, we break the accesses
+        * up into multiple pages (the device model works with non-continguous
+        * physical guest pages). To copy just one page, we adjust %ecx and
+        * do not advance %eip so that the next "rep movs" copies the next page.
+        * Unaligned accesses, for example movsl starting at PGSZ-2, are
+        * turned into a single copy where we handle the overlapping memory
+        * copy ourself. After this copy succeeds, "rep movs" is executed
+        * again.
+        */
+       if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
+           unsigned long value = 0;
+
+           mpcip->flags |= OVERLAP;
+
+           regs->eip -= inst_len; /* do not advance %eip */
+
+           if (dir == IOREQ_WRITE)
+               vmx_copy(&value, addr, size, VMX_COPY_IN);
+           send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0);
+       } else {
+           if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
+               regs->eip -= inst_len; /* do not advance %eip */
+
+               if (sign > 0)
+                   count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
+               else
+                   count = (addr & ~PAGE_MASK) / size;
+           }
+
+           send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1);
+       }
+        break;
+    }
+
+    case INSTR_MOVZ:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_STOS:
+       /*
+        * Since the destination is always in (contiguous) mmio space we don't
+        * need to break it up into pages.
+        */
+       mpcip->flags = mmio_inst.flags;
+       mpcip->instr = mmio_inst.instr;
+        send_mmio_req(IOREQ_TYPE_COPY, gpa,
+           GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, IOREQ_WRITE, 0);
+       break;
+
+    case INSTR_OR:
+       mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_AND:
+       mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_XOR:
+       mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_CMP:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_TEST:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    default:
+       printf("Unhandled MMIO instruction\n");
+       domain_crash_synchronous();
+    }
 }
 
 #endif /* CONFIG_VMX */
diff -r 04ca47c298b5 xen/include/asm-x86/vmx_platform.h
--- a/xen/include/asm-x86/vmx_platform.h        Thu Sep  1 21:30:51 2005
+++ b/xen/include/asm-x86/vmx_platform.h        Fri Sep  2 11:49:40 2005
@@ -24,8 +24,7 @@
 #include <asm/vmx_virpit.h>
 #include <asm/vmx_intercept.h>
 
-#define MAX_OPERAND_NUM 3
-#define I_NAME_LEN  16
+#define MAX_OPERAND_NUM 2
 
 #define mk_operand(size, index, seg, flag) \
     (((size) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
@@ -35,54 +34,60 @@
 
 #define operand_index(operand)  \
       ((operand >> 16) & 0xFF)
-      //For instruction.operand[].size
+
+/* for instruction.operand[].size */
 #define BYTE    1
 #define WORD    2
 #define LONG    4
 #define QUAD    8
 #define BYTE_64 16
 
-      //For instruction.operand[].flag
+/* for instruction.operand[].flag */
 #define REGISTER    0x1
 #define MEMORY      0x2
 #define IMMEDIATE   0x4
-#define WZEROEXTEND 0x8
 
-      //For instruction.flags
+/* for instruction.flags */
 #define REPZ    0x1
 #define REPNZ   0x2
+#define OVERLAP 0x4
+
+#define        INSTR_PIO       1
+#define INSTR_OR       2
+#define INSTR_AND      3
+#define INSTR_XOR      4
+#define INSTR_CMP      5
+#define INSTR_MOV      6
+#define INSTR_MOVS     7
+#define INSTR_MOVZ     8
+#define INSTR_STOS     9
+#define INSTR_TEST     10
 
 struct instruction {
-    __s8    i_name[I_NAME_LEN];  //Instruction's name
-    __s16   op_size;    //The operand's bit size, e.g. 16-bit or 32-bit.
-
-    __u64   offset;     //The effective address
-          //offset = Base + (Index * Scale) + Displacement
-
+    __s8    instr;     /* instruction type */
+    __s16   op_size;    /* the operand's bit size, e.g. 16-bit or 32-bit */
     __u64   immediate;
-
-    __u16   seg_sel;    //Segmentation selector
-
-    __u32   operand[MAX_OPERAND_NUM];   //The order of operand is from AT&T 
Assembly
-    __s16   op_num; //The operand numbers
-
-    __u32   flags; //
+    __u16   seg_sel;    /* segmentation selector */
+    __u32   operand[MAX_OPERAND_NUM];   /* order is AT&T assembly */
+    __u32   flags;
 };
 
 #define MAX_INST_LEN      32
 
-struct mi_per_cpu_info
-{
-    unsigned long          mmio_target;
-    struct cpu_user_regs        *inst_decoder_regs;
+struct mi_per_cpu_info {
+    int                    flags;
+    int                           instr;               /* instruction */
+    unsigned long          operand[2];         /* operands */
+    unsigned long          immediate;          /* immediate portion */
+    struct cpu_user_regs   *inst_decoder_regs; /* current context */
 };
 
 struct virtual_platform_def {
-    unsigned long          *real_mode_data; /* E820, etc. */
+    unsigned long          *real_mode_data;    /* E820, etc. */
     unsigned long          shared_page_va;
     struct vmx_virpit_t    vmx_pit;
     struct vmx_handler_t   vmx_handler;
-    struct mi_per_cpu_info mpci;            /* MMIO */
+    struct mi_per_cpu_info mpci;               /* MMIO */
 };
 
 extern void handle_mmio(unsigned long, unsigned long);
diff -r 04ca47c298b5 xen/include/public/io/ioreq.h
--- a/xen/include/public/io/ioreq.h     Thu Sep  1 21:30:51 2005
+++ b/xen/include/public/io/ioreq.h     Fri Sep  2 11:49:40 2005
@@ -29,9 +29,17 @@
 #define STATE_IORESP_READY      3
 #define STATE_IORESP_HOOK       4
 
-/* VMExit dispatcher should cooperate with instruction decoder to
-   prepare this structure and notify service OS and DM by sending
-   virq */
+#define IOREQ_TYPE_PIO         0       /* pio */
+#define IOREQ_TYPE_COPY                1       /* mmio ops */
+#define IOREQ_TYPE_AND         2
+#define IOREQ_TYPE_OR          3
+#define IOREQ_TYPE_XOR         4
+
+/*
+ * VMExit dispatcher should cooperate with instruction decoder to
+ * prepare this structure and notify service OS and DM by sending
+ * virq 
+ */
 typedef struct {
     u64     addr;               /*  physical address            */
     u64     size;               /*  size in bytes               */
@@ -43,8 +51,8 @@
     u8      state:4;
     u8      pdata_valid:1;     /* if 1, use pdata above        */
     u8      dir:1;             /*  1=read, 0=write             */
-    u8      port_mm:1;         /*  0=portio, 1=mmio            */
     u8      df:1;
+    u8      type;              /* I/O type                     */
 } ioreq_t;
 
 #define MAX_VECTOR    256

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.