[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] VMX support for MMIO/PIO in VM8086 mode



Memory mapped and port I/O is currently broken under VMX when the
partition is running in VM8086 mode. The reason is that the instruction
decoding support uses 32-bit opcode/address decodes rather 16-bit
decodes. This patch fixes that. In addition, the patch adds support for
the "stos" instruction decoding because this is a frequently used way
to clear MMIO areas such as the screen.

As an aside, vmx_platform.c should really reuse x86_emulate.c as much
as possible.

Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>

===== tools/ioemu/iodev/cpu.cc 1.7 vs edited =====
--- 1.7/tools/ioemu/iodev/cpu.cc        2005-03-28 11:56:48 -05:00
+++ edited/tools/ioemu/iodev/cpu.cc     2005-03-31 13:55:11 -05:00
@@ -51,7 +51,7 @@
        if (req->state == STATE_IOREQ_READY) {
                req->state = STATE_IOREQ_INPROCESS;
        } else {
-               BX_INFO(("False I/O requrest ... in-service already: %lx, 
pvalid: %lx,port: %lx, data: %lx, count: %lx, size: %lx\n", req->state, 
req->pdata_valid, req->addr, req->u.data, req->count, req->size));
+               BX_INFO(("False I/O request ... in-service already: %lx, 
pvalid: %lx,port: %lx, data: %lx, count: %lx, size: %lx\n", req->state, 
req->pdata_valid, req->addr, req->u.data, req->count, req->size));
                req = NULL;
        }
 
@@ -95,6 +95,8 @@
        }
        if (req->port_mm == 0){//port io
                if(req->dir == IOREQ_READ){//read
+                       //BX_INFO(("pio: <READ>addr:%llx, value:%llx, size: 
%llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
+
                        if (!req->pdata_valid)
                                req->u.data = BX_INP(req->addr, req->size);
                        else {
@@ -107,6 +109,8 @@
                                }
                        }
                } else if(req->dir == IOREQ_WRITE) {
+                       //BX_INFO(("pio: <WRITE>addr:%llx, value:%llx, size: 
%llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
+
                        if (!req->pdata_valid) {
                                BX_OUTP(req->addr, (dma_addr_t) req->u.data, 
req->size);
                        } else {
@@ -123,20 +127,29 @@
        } else if (req->port_mm == 1){//memory map io
                if (!req->pdata_valid) {
                        if(req->dir == IOREQ_READ){//read
-                               BX_MEM_READ_PHYSICAL(req->addr, req->size, 
&req->u.data);
-                       } else if(req->dir == IOREQ_WRITE)//write
-                               BX_MEM_WRITE_PHYSICAL(req->addr, req->size, 
&req->u.data);
+                               //BX_INFO(("mmio[value]: <READ> addr:%llx, 
value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size, 
req->count));
+
+                               for (i = 0; i < req->count; i++) {
+                                       BX_MEM_READ_PHYSICAL(req->addr, 
req->size, &req->u.data);
+                               }
+                       } else if(req->dir == IOREQ_WRITE) {//write
+                               //BX_INFO(("mmio[value]: <WRITE> addr:%llx, 
value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size, 
req->count));
+
+                               for (i = 0; i < req->count; i++) {
+                                       BX_MEM_WRITE_PHYSICAL(req->addr, 
req->size, &req->u.data);
+                               }
+                       }
                } else {
                        //handle movs
                        unsigned long tmp;
                        if (req->dir == IOREQ_READ) {
-                               //BX_INFO(("<READ>addr:%llx, pdata:%llx, size: 
%x, count: %x\n", req->addr, req->u.pdata, req->size, req->count));
+                               //BX_INFO(("mmio[pdata]: <READ>addr:%llx, 
pdata:%llx, size: %x, count: %x\n", req->addr, req->u.pdata, req->size, 
req->count));
                                for (i = 0; i < req->count; i++) {
                                        BX_MEM_READ_PHYSICAL(req->addr + (sign 
* i * req->size), req->size, &tmp);
                                        BX_MEM_WRITE_PHYSICAL((dma_addr_t) 
req->u.pdata + (sign * i * req->size), req->size, &tmp);
                                }
                        } else if (req->dir == IOREQ_WRITE) {
-                               //BX_INFO(("<WRITE>addr:%llx, pdata:%llx, size: 
%x, count: %x\n", req->addr, req->u.pdata, req->size, req->count));
+                               //BX_INFO(("mmio[pdata]: <WRITE>addr:%llx, 
pdata:%llx, size: %x, count: %x\n", req->addr, req->u.pdata, req->size, 
req->count));
                                for (i = 0; i < req->count; i++) {
                                        
BX_MEM_READ_PHYSICAL((dma_addr_t)req->u.pdata + (sign * i * req->size), 
req->size, &tmp);
                                        BX_MEM_WRITE_PHYSICAL(req->addr + (sign 
* i * req->size), req->size, &tmp);
===== xen/arch/x86/vmx.c 1.35 vs edited =====
--- 1.35/xen/arch/x86/vmx.c     2005-03-25 08:46:18 -05:00
+++ edited/xen/arch/x86/vmx.c   2005-03-31 13:41:48 -05:00
@@ -294,13 +294,17 @@
     vcpu_iodata_t *vio;
     ioreq_t *p;
     unsigned long addr;
-    unsigned long eip;
+    unsigned long eip, cs, eflags;
+    int vm86;
 
     __vmread(GUEST_EIP, &eip);
+    __vmread(GUEST_CS_SELECTOR, &cs);
+    __vmread(GUEST_EFLAGS, &eflags);
+    vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
 
     VMX_DBG_LOG(DBG_LEVEL_1, 
-            "vmx_io_instruction: eip=%p, exit_qualification = %lx",
-            eip, exit_qualification);
+            "vmx_io_instruction: vm86 %d, eip=%p:%p, exit_qualification = %lx",
+            vm86, cs, eip, exit_qualification);
 
     if (test_bit(6, &exit_qualification))
         addr = (exit_qualification >> 16) & (0xffff);
@@ -325,17 +329,29 @@
     p->size = (exit_qualification & 7) + 1;
 
     if (test_bit(4, &exit_qualification)) {
-        unsigned long eflags;
-
-        __vmread(GUEST_EFLAGS, &eflags);
         p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
         p->pdata_valid = 1;
-        p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
-            regs->esi
-            : regs->edi);
+
+        if (vm86) {
+            unsigned long seg;
+            if (p->dir == IOREQ_WRITE) {
+                __vmread(GUEST_DS_SELECTOR, &seg);
+                p->u.pdata = (void *)
+                        ((seg << 4) | (regs->esi & 0xFFFF));
+            } else {
+                __vmread(GUEST_ES_SELECTOR, &seg);
+                p->u.pdata = (void *)
+                        ((seg << 4) | (regs->edi & 0xFFFF));
+            }
+        } else {
+               p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
+                   regs->esi : regs->edi);
+        }
         p->u.pdata = (void *) gva_to_gpa(p->u.data);
+
+
         if (test_bit(5, &exit_qualification))
-            p->count = regs->ecx;
+           p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
         if ((p->u.data & PAGE_MASK) != 
             ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
             printk("stringio crosses page boundary!\n");
@@ -368,13 +384,20 @@
     do_block();
 }
 
+static int
+vm86assist(struct exec_domain *d)
+{
+    /* stay tuned ... */
+    return 0;
+}
+
 #define CASE_GET_REG(REG, reg)  \
     case REG_ ## REG: value = regs->reg; break
 
 /*
  * Write to control registers
  */
-static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
+static int mov_to_cr(int gp, int cr, struct xen_regs *regs)
 {
     unsigned long value;
     unsigned long old_cr;
@@ -454,8 +477,21 @@
                     d->arch.arch_vmx.cpu_cr3, mfn);
             /* undo the get_page done in the para virt case */
             put_page_and_type(&frame_table[old_base_mfn]);
+        } else {
+            if ((value & X86_CR0_PE) == 0) {
+               unsigned long eip;
 
-        }
+               __vmread(GUEST_EIP, &eip);
+                VMX_DBG_LOG(DBG_LEVEL_1,
+                       "Disabling CR0.PE at %%eip 0x%lx", eip);
+               if (vm86assist(d)) {
+                   __vmread(GUEST_EIP, &eip);
+                   VMX_DBG_LOG(DBG_LEVEL_1,
+                       "Transfering control to vm86assist %%eip 0x%lx", eip);
+                   return 0; /* do not update eip! */
+               }
+           }
+       }
         break;
     }
     case 3: 
@@ -534,7 +570,9 @@
         printk("invalid cr: %d\n", gp);
         __vmx_bug(regs);
     }
-}   
+
+    return 1;
+}
 
 #define CASE_SET_REG(REG, reg)      \
     case REG_ ## REG:       \
@@ -575,7 +613,7 @@
     VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
 }
 
-static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs 
*regs)
+static int vmx_cr_access(unsigned long exit_qualification, struct xen_regs 
*regs)
 {
     unsigned int gp, cr;
     unsigned long value;
@@ -584,8 +622,7 @@
     case TYPE_MOV_TO_CR:
         gp = exit_qualification & CONTROL_REG_ACCESS_REG;
         cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
-        mov_to_cr(gp, cr, regs);
-        break;
+        return mov_to_cr(gp, cr, regs);
     case TYPE_MOV_FROM_CR:
         gp = exit_qualification & CONTROL_REG_ACCESS_REG;
         cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
@@ -604,6 +641,7 @@
         __vmx_bug(regs);
         break;
     }
+    return 1;
 }
 
 static inline void vmx_do_msr_read(struct xen_regs *regs)
@@ -619,7 +657,7 @@
 }
 
 /*
- * Need to use this exit to rescheule
+ * Need to use this exit to reschedule
  */
 static inline void vmx_vmexit_do_hlt(void)
 {
@@ -891,8 +929,8 @@
 
         VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification 
= %lx", 
                 eip, inst_len, exit_qualification);
-        vmx_cr_access(exit_qualification, &regs);
-        __update_guest_eip(inst_len);
+        if (vmx_cr_access(exit_qualification, &regs))
+           __update_guest_eip(inst_len);
         break;
     }
     case EXIT_REASON_DR_ACCESS:
===== xen/arch/x86/vmx_platform.c 1.11 vs edited =====
--- 1.11/xen/arch/x86/vmx_platform.c    2005-03-25 08:46:18 -05:00
+++ edited/xen/arch/x86/vmx_platform.c  2005-03-31 13:39:35 -05:00
@@ -55,6 +55,8 @@
     __vmread(GUEST_ESP, &regs->esp);
     __vmread(GUEST_EFLAGS, &regs->eflags);
     __vmread(GUEST_CS_SELECTOR, &regs->cs);
+    __vmread(GUEST_DS_SELECTOR, &regs->ds);
+    __vmread(GUEST_ES_SELECTOR, &regs->es);
     __vmread(GUEST_EIP, &regs->eip);
 }
 
@@ -144,19 +146,27 @@
     while (1) {
         switch (*inst) {
             case 0xf3: //REPZ
+               thread_inst->flags = REPZ;
+               break;
             case 0xf2: //REPNZ
+               thread_inst->flags = REPNZ;
+               break;
             case 0xf0: //LOCK
+               break;
             case 0x2e: //CS
             case 0x36: //SS
             case 0x3e: //DS
             case 0x26: //ES
             case 0x64: //FS
             case 0x65: //GS
+               thread_inst->seg_sel = *inst;
                 break;
             case 0x66: //32bit->16bit
                 thread_inst->op_size = WORD;
                 break;
             case 0x67:
+               printf("Not handling 0x67 (yet)\n");
+               domain_crash_synchronous(); 
                 break;
             default:
                 return inst;
@@ -165,7 +175,7 @@
     }
 }
 
-static inline unsigned long get_immediate(const unsigned char *inst, int 
op_size)
+static inline unsigned long get_immediate(int op16, const unsigned char *inst, 
int op_size)
 {
     int mod, reg, rm;
     unsigned long val = 0;
@@ -183,14 +193,21 @@
     switch(mod) {
         case 0:
             if (rm == 5) {
-                inst = inst + 4; //disp32, skip 4 bytes
+               if (op16)
+                    inst = inst + 2; //disp16, skip 2 bytes
+               else
+                    inst = inst + 4; //disp32, skip 4 bytes
             }
             break;
         case 1:
             inst++; //disp8, skip 1 byte
             break;
         case 2:
-            inst = inst + 4; //disp32, skip 4 bytes
+           if (op16)
+                inst = inst + 2; //disp16, skip 2 bytes
+           else
+                inst = inst + 4; //disp32, skip 4 bytes
+            break;
     }
     for (i = 0; i < op_size; i++) {
         val |= (*inst++ & 0xff) << (8 * i);
@@ -218,7 +235,21 @@
 
 static int vmx_decode(const unsigned char *inst, struct instruction 
*thread_inst)
 {
-    int index;
+    unsigned long eflags;
+    int index, vm86 = 0;
+
+    __vmread(GUEST_EFLAGS, &eflags);
+    if (eflags & X86_EFLAGS_VM)
+       vm86 = 1;
+
+    if (vm86) { /* meaning is reversed */
+       if (thread_inst->op_size == WORD)
+           thread_inst->op_size = LONG;
+       else if (thread_inst->op_size == LONG)
+           thread_inst->op_size = WORD;
+       else if (thread_inst->op_size == 0)
+           thread_inst->op_size = WORD;
+    }
 
     switch(*inst) {
         case 0x88:
@@ -258,7 +289,6 @@
             printk("%x, This opcode hasn't been handled yet!", *inst);
             return DECODE_failure;
             /* Not handle it yet. */
-
         case 0xa0:
             /* mov byte to al */
             thread_inst->op_size = BYTE;
@@ -291,7 +321,6 @@
             /* movsb */
             thread_inst->op_size = BYTE;
             strcpy((char *)thread_inst->i_name, "movs");
-            
             return DECODE_success;
         case 0xa5:
             /* movsw/movsl */
@@ -299,16 +328,28 @@
             } else {
                 thread_inst->op_size = LONG;
             }
-            
             strcpy((char *)thread_inst->i_name, "movs");
-            
             return DECODE_success;
-
+        case 0xaa:
+            /* stosb */
+            thread_inst->op_size = BYTE;
+            strcpy((char *)thread_inst->i_name, "stosb");
+            return DECODE_success;
+       case 0xab:
+            /* stosw/stosl */
+            if (thread_inst->op_size == WORD) {
+                strcpy((char *)thread_inst->i_name, "stosw");
+            } else {
+                thread_inst->op_size = LONG;
+                strcpy((char *)thread_inst->i_name, "stosl");
+            }
+            return DECODE_success;
         case 0xc6:
             /* mov imm8 to m8 */
             thread_inst->op_size = BYTE;
             thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
-            thread_inst->immediate = get_immediate((inst+1), 
thread_inst->op_size);
+            thread_inst->immediate = get_immediate(vm86,
+                                       (inst+1), thread_inst->op_size);
             break;
         case 0xc7:
             /* mov imm16/32 to m16/32 */
@@ -318,9 +359,9 @@
                 thread_inst->op_size = LONG;
                 thread_inst->operand[0] = mk_operand(LONG, 0, 0, IMMEDIATE);
             }
-            thread_inst->immediate = get_immediate((inst+1), 
thread_inst->op_size);
+            thread_inst->immediate = get_immediate(vm86,
+                                       (inst+1), thread_inst->op_size);
             break;
-
         case 0x0f:
             break;
         default:
@@ -425,6 +466,7 @@
     struct exec_domain *d = current;
     vcpu_iodata_t *vio;
     ioreq_t *p;
+    int vm86;
     struct mi_per_cpu_info *mpci_p;
     struct xen_regs *inst_decoder_regs;
     extern long evtchn_send(int lport);
@@ -432,53 +474,59 @@
 
     mpci_p = &current->arch.arch_vmx.vmx_platform.mpci;
     inst_decoder_regs = mpci_p->inst_decoder_regs;
+
     vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
-        
     if (vio == NULL) {
         printk("bad shared page\n");
         domain_crash_synchronous(); 
     }
     p = &vio->vp_ioreq;
-        
+
+    vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM;
+
     set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
     p->dir = dir;
     p->pdata_valid = pvalid;
-    p->count = 1;
 
     p->port_mm = 1;
     p->size = inst_p->op_size;
     p->addr = gpa;
     p->u.data = value;
 
-    // p->state = STATE_UPSTREAM_SENDING;
     p->state = STATE_IOREQ_READY;
 
-    // Try to use ins/outs' framework
-    if (pvalid) {
-        // Handle "movs"
-        p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
-                               inst_decoder_regs->esi
-                               : inst_decoder_regs->edi); 
-        p->u.pdata = (void *) gva_to_gpa(p->u.data);
-        p->count = inst_decoder_regs->ecx;
-        inst_decoder_regs->ecx = 0;
+    if (inst_p->flags & REPZ) {
+        if (vm86)
+            p->count = inst_decoder_regs->ecx & 0xFFFF;
+        else
+            p->count = inst_decoder_regs->ecx;
         p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0;
-    }
+    } else
+        p->count = 1;
+
+    if (pvalid)
+        p->u.pdata = (void *) gva_to_gpa(p->u.data);
+
+#if 0
+    printf("send_mmio_req: eip 0x%lx:0x%lx, dir %d, pdata_valid %d, ",
+       inst_decoder_regs->cs, inst_decoder_regs->eip, p->dir, p->pdata_valid);
+    printf("port_mm %d, size %lld, addr 0x%llx, value 0x%lx, count %lld\n",
+       p->port_mm, p->size, p->addr, value, p->count);
+#endif
 
     evtchn_send(IOPACKET_PORT);
     do_block(); 
-
 }
 
 void handle_mmio(unsigned long va, unsigned long gpa)
 {
-    unsigned long eip;
-    unsigned long inst_len;
+    unsigned long eip, eflags, cs;
+    unsigned long inst_len, inst_addr;
     struct mi_per_cpu_info *mpci_p;
     struct xen_regs *inst_decoder_regs;
     struct instruction mmio_inst;
     unsigned char inst[MAX_INST_LEN];
-    int ret;
+    int vm86, ret;
      
     mpci_p = &current->arch.arch_vmx.vmx_platform.mpci;
     inst_decoder_regs = mpci_p->inst_decoder_regs;
@@ -486,13 +534,30 @@
     __vmread(GUEST_EIP, &eip);
     __vmread(INSTRUCTION_LEN, &inst_len);
 
+    __vmread(GUEST_EFLAGS, &eflags);
+    vm86 = eflags & X86_EFLAGS_VM;
+
+    if (vm86) {
+        __vmread(GUEST_CS_SELECTOR, &cs);
+        inst_addr = (cs << 4) | eip;
+    } else
+        inst_addr = eip; /* XXX should really look at GDT[cs].base too */
+
     memset(inst, '0', MAX_INST_LEN);
-    ret = inst_copy_from_guest(inst, eip, inst_len);
+    ret = inst_copy_from_guest(inst, inst_addr, inst_len);
     if (ret != inst_len) {
         printk("handle_mmio - EXIT: get guest instruction fault\n");
         domain_crash_synchronous();
     }
 
+#if 0
+    printk("handle_mmio: cs:eip 0x%lx:0x%lx(0x%lx): opcode",
+        cs, eip, inst_addr, inst_len);
+    for (ret = 0; ret < inst_len; ret++)
+        printk(" %02x", inst[ret]);
+    printk("\n");
+#endif
+
     init_instruction(&mmio_inst);
     
     if (vmx_decode(check_prefix(inst, &mmio_inst), &mmio_inst) == 
DECODE_failure)
@@ -506,7 +571,7 @@
         if (read_from_mmio(&mmio_inst)) {
             // Send the request and waiting for return value.
             mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND;
-            send_mmio_req(gpa, &mmio_inst, 0, 1, 0);
+            send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0);
             return ;
         } else {
             printk("handle_mmio - EXIT: movz error!\n");
@@ -515,10 +580,32 @@
     }
 
     if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) {
-        int tmp_dir;
+       unsigned long addr = 0;
+       int dir;
 
-        tmp_dir = ((va == inst_decoder_regs->edi) ? IOREQ_WRITE : IOREQ_READ);
-        send_mmio_req(gpa, &mmio_inst, 0, tmp_dir, 1);
+       if (vm86) {
+           unsigned long seg;
+
+           __vmread(GUEST_ES_SELECTOR, &seg);
+           if (((seg << 4) | (inst_decoder_regs->edi & 0xFFFF)) == va) {
+               dir = IOREQ_WRITE;
+               __vmread(GUEST_DS_SELECTOR, &seg);
+               addr = (seg << 4) | (inst_decoder_regs->esi & 0xFFFF);
+           } else {
+               dir = IOREQ_READ;
+               addr = (seg << 4) | (inst_decoder_regs->edi & 0xFFFF);
+           }
+       } else { /* XXX should really look at GDT[ds/es].base too */
+           if (va == inst_decoder_regs->edi) {
+               dir = IOREQ_WRITE;
+               addr = inst_decoder_regs->esi;
+           } else {
+               dir = IOREQ_READ;
+               addr = inst_decoder_regs->edi;
+           }
+       }
+
+       send_mmio_req(gpa, &mmio_inst, addr, dir, 1);
         return;
     }
 
@@ -529,7 +616,7 @@
         if (read_from_mmio(&mmio_inst)) {
             // Send the request and waiting for return value.
             mpci_p->mmio_target = mmio_inst.operand[1];
-            send_mmio_req(gpa, &mmio_inst, value, 1, 0);
+            send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0);
         } else {
             // Write to MMIO
             if (mmio_inst.operand[0] & IMMEDIATE) {
@@ -541,9 +628,14 @@
             } else {
                 domain_crash_synchronous();
             }
-            send_mmio_req(gpa, &mmio_inst, value, 0, 0);
+            send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0);
             return;
         }
+    }
+
+    if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) {
+       send_mmio_req(gpa, &mmio_inst,
+               inst_decoder_regs->eax, IOREQ_WRITE, 0);
     }
 
     domain_crash_synchronous();

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.