[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86 hvm: Replace old MMIO emulator with x86_emulate()-based harness.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1203518205 0 # Node ID 3f1cf03826fe642434197f898c3aac55dc81ad25 # Parent f853c049709546b4f1fa1b4b03ddff165c163d38 x86 hvm: Replace old MMIO emulator with x86_emulate()-based harness. Re-factor VMX real-mode emulation to use the same harness. Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> --- xen/arch/x86/hvm/instrlen.c | 419 ------------ tools/ioemu/target-i386-dm/helper2.c | 107 --- xen/arch/x86/hvm/Makefile | 2 xen/arch/x86/hvm/emulate.c | 755 +++++++++++++++++++++++ xen/arch/x86/hvm/hvm.c | 50 - xen/arch/x86/hvm/intercept.c | 62 - xen/arch/x86/hvm/io.c | 723 ---------------------- xen/arch/x86/hvm/platform.c | 1136 ----------------------------------- xen/arch/x86/hvm/stdvga.c | 34 - xen/arch/x86/hvm/svm/svm.c | 477 ++++++++------ xen/arch/x86/hvm/vmx/realmode.c | 773 ++--------------------- xen/arch/x86/hvm/vmx/vmx.c | 58 + xen/arch/x86/mm/shadow/multi.c | 6 xen/include/asm-x86/hvm/emulate.h | 55 + xen/include/asm-x86/hvm/hvm.h | 12 xen/include/asm-x86/hvm/io.h | 18 xen/include/asm-x86/hvm/support.h | 1 xen/include/asm-x86/hvm/vcpu.h | 9 xen/include/asm-x86/hvm/vmx/vmcs.h | 5 xen/include/asm-x86/hvm/vmx/vmx.h | 8 xen/include/public/hvm/ioreq.h | 6 21 files changed, 1314 insertions(+), 3402 deletions(-) diff -r f853c0497095 -r 3f1cf03826fe tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Tue Feb 19 11:14:40 2008 -0700 +++ b/tools/ioemu/target-i386-dm/helper2.c Wed Feb 20 14:36:45 2008 +0000 @@ -379,82 +379,7 @@ void cpu_ioreq_move(CPUState *env, ioreq } } -void cpu_ioreq_and(CPUState *env, ioreq_t *req) -{ - target_ulong tmp1, tmp2; - - if (req->data_is_ptr != 0) - hw_error("expected scalar value"); - - read_physical(req->addr, req->size, &tmp1); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 & (target_ulong) req->data; - write_physical(req->addr, req->size, &tmp2); - } - req->data = tmp1; -} - -void cpu_ioreq_add(CPUState *env, ioreq_t *req) -{ - target_ulong tmp1, tmp2; - - if (req->data_is_ptr != 0) - hw_error("expected scalar value"); - - read_physical(req->addr, req->size, &tmp1); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 + (target_ulong) req->data; - write_physical(req->addr, req->size, &tmp2); - } - req->data = tmp1; -} - -void cpu_ioreq_sub(CPUState *env, ioreq_t *req) -{ - target_ulong tmp1, tmp2; - - if (req->data_is_ptr != 0) - hw_error("expected scalar value"); - - read_physical(req->addr, req->size, &tmp1); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 - (target_ulong) req->data; - write_physical(req->addr, req->size, &tmp2); - } - req->data = tmp1; -} - -void cpu_ioreq_or(CPUState *env, ioreq_t *req) -{ - target_ulong tmp1, tmp2; - - if (req->data_is_ptr != 0) - hw_error("expected scalar value"); - - read_physical(req->addr, req->size, &tmp1); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 | (target_ulong) req->data; - write_physical(req->addr, req->size, &tmp2); - } - req->data = tmp1; -} - -void cpu_ioreq_xor(CPUState *env, ioreq_t *req) -{ - target_ulong tmp1, tmp2; - - if (req->data_is_ptr != 0) - hw_error("expected scalar value"); - - read_physical(req->addr, req->size, &tmp1); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 ^ (target_ulong) req->data; - write_physical(req->addr, req->size, &tmp2); - } - req->data = tmp1; -} - -void timeoffset_get() +void timeoffset_get(void) { char *p; @@ -481,18 +406,6 @@ void cpu_ioreq_timeoffset(CPUState *env, fprintf(logfile, "Time offset set %ld, added offset %ld\n", time_offset, req->data); sprintf(b, "%ld", time_offset); xenstore_vm_write(domid, "rtc/timeoffset", b); -} - -void cpu_ioreq_xchg(CPUState *env, ioreq_t *req) -{ - unsigned long tmp1; - - if (req->data_is_ptr != 0) - hw_error("expected scalar value"); - - read_physical(req->addr, req->size, &tmp1); - write_physical(req->addr, req->size, &req->data); - req->data = tmp1; } void __handle_ioreq(CPUState *env, ioreq_t *req) @@ -507,24 +420,6 @@ void __handle_ioreq(CPUState *env, ioreq break; case IOREQ_TYPE_COPY: cpu_ioreq_move(env, req); - break; - case IOREQ_TYPE_AND: - cpu_ioreq_and(env, req); - break; - case IOREQ_TYPE_ADD: - cpu_ioreq_add(env, req); - break; - case IOREQ_TYPE_SUB: - cpu_ioreq_sub(env, req); - break; - case IOREQ_TYPE_OR: - cpu_ioreq_or(env, req); - break; - case IOREQ_TYPE_XOR: - cpu_ioreq_xor(env, req); - break; - case IOREQ_TYPE_XCHG: - cpu_ioreq_xchg(env, req); break; case IOREQ_TYPE_TIMEOFFSET: cpu_ioreq_timeoffset(env, req); diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/Makefile --- a/xen/arch/x86/hvm/Makefile Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/Makefile Wed Feb 20 14:36:45 2008 +0000 @@ -1,9 +1,9 @@ subdir-y += svm subdir-y += svm subdir-y += vmx +obj-y += emulate.o obj-y += hvm.o obj-y += i8254.o -obj-y += instrlen.o obj-y += intercept.o obj-y += io.o obj-y += iommu.o diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/emulate.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/emulate.c Wed Feb 20 14:36:45 2008 +0000 @@ -0,0 +1,755 @@ +/****************************************************************************** + * hvm/emulate.c + * + * HVM instruction emulation. Used for MMIO and VMX real mode. + * + * Copyright (c) 2008 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir.fraser@xxxxxxxxxx> + */ + +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/paging.h> +#include <asm/event.h> +#include <asm/hvm/emulate.h> +#include <asm/hvm/hvm.h> +#include <asm/hvm/support.h> + +/* + * Convert addr from linear to physical form, valid over the range + * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to + * the valid computed range. It is always >0 when X86EMUL_OKAY is returned. + */ +static int hvmemul_linear_to_phys( + unsigned long addr, + paddr_t *paddr, + unsigned int bytes_per_rep, + unsigned long *reps, + enum hvm_access_type access_type, + struct hvm_emulate_ctxt *hvmemul_ctxt) +{ + struct vcpu *curr = current; + unsigned long pfn, npfn, done, todo, i; + struct segment_register *sreg; + uint32_t pfec; + + /* Clip repetitions to a sensible maximum. */ + *reps = min_t(unsigned long, *reps, 4096); + + /* With no paging it's easy: linear == physical. */ + if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) ) + { + *paddr = addr; + return X86EMUL_OKAY; + } + + *paddr = addr & ~PAGE_MASK; + + /* Gather access-type information for the page walks. */ + sreg = hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt); + pfec = PFEC_page_present; + if ( sreg->attr.fields.dpl == 3 ) + pfec |= PFEC_user_mode; + if ( access_type == hvm_access_write ) + pfec |= PFEC_write_access; + + /* Get the first PFN in the range. */ + if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN ) + { + hvm_inject_exception(TRAP_page_fault, pfec, addr); + return X86EMUL_EXCEPTION; + } + + /* If the range does not straddle a page boundary then we're done. */ + done = PAGE_SIZE - (addr & ~PAGE_MASK); + todo = *reps * bytes_per_rep; + if ( done >= todo ) + goto done; + + addr += done; + for ( i = 1; done < todo; i++ ) + { + /* Get the next PFN in the range. */ + if ( (npfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN ) + { + hvm_inject_exception(TRAP_page_fault, pfec, addr); + return X86EMUL_EXCEPTION; + } + + /* Is it contiguous with the preceding PFNs? If not then we're done. */ + if ( npfn != (pfn + i) ) + { + done /= bytes_per_rep; + if ( done == 0 ) + return X86EMUL_UNHANDLEABLE; + *reps = done; + break; + } + + addr += PAGE_SIZE; + done += PAGE_SIZE; + } + + done: + *paddr |= (paddr_t)pfn << PAGE_SHIFT; + return X86EMUL_OKAY; +} + + +static int hvmemul_virtual_to_linear( + enum x86_segment seg, + unsigned long offset, + unsigned int bytes, + enum hvm_access_type access_type, + struct hvm_emulate_ctxt *hvmemul_ctxt, + unsigned long *paddr) +{ + struct segment_register *reg; + int okay; + + if ( seg == x86_seg_none ) + { + *paddr = offset; + return X86EMUL_OKAY; + } + + reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); + okay = hvm_virtual_to_linear_addr( + seg, reg, offset, bytes, access_type, + hvmemul_ctxt->ctxt.addr_size, paddr); + + if ( !okay ) + { + hvmemul_ctxt->flags.exn_pending = 1; + hvmemul_ctxt->exn_vector = TRAP_gp_fault; + hvmemul_ctxt->exn_insn_len = 0; + return X86EMUL_EXCEPTION; + } + + return X86EMUL_OKAY; +} + +static int __hvmemul_read( + enum x86_segment seg, + unsigned long offset, + unsigned long *val, + unsigned int bytes, + enum hvm_access_type access_type, + struct hvm_emulate_ctxt *hvmemul_ctxt) +{ + unsigned long addr; + int rc; + + rc = hvmemul_virtual_to_linear( + seg, offset, bytes, access_type, hvmemul_ctxt, &addr); + if ( rc != X86EMUL_OKAY ) + return rc; + + *val = 0; + + rc = ((access_type == hvm_access_insn_fetch) ? + hvm_fetch_from_guest_virt(val, addr, bytes) : + hvm_copy_from_guest_virt(val, addr, bytes)); + if ( rc == HVMCOPY_bad_gva_to_gfn ) + return X86EMUL_EXCEPTION; + + if ( rc == HVMCOPY_bad_gfn_to_mfn ) + { + struct vcpu *curr = current; + unsigned long reps = 1; + paddr_t gpa; + + if ( access_type == hvm_access_insn_fetch ) + return X86EMUL_UNHANDLEABLE; + + rc = hvmemul_linear_to_phys( + addr, &gpa, bytes, &reps, access_type, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + if ( curr->arch.hvm_vcpu.io_in_progress ) + return X86EMUL_UNHANDLEABLE; + + if ( !curr->arch.hvm_vcpu.io_completed ) + { + curr->arch.hvm_vcpu.io_in_progress = 1; + send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes, + 0, IOREQ_READ, 0, 0); + } + + if ( !curr->arch.hvm_vcpu.io_completed ) + return X86EMUL_RETRY; + + *val = curr->arch.hvm_vcpu.io_data; + curr->arch.hvm_vcpu.io_completed = 0; + } + + return X86EMUL_OKAY; +} + +static int hvmemul_read( + enum x86_segment seg, + unsigned long offset, + unsigned long *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + return __hvmemul_read( + seg, offset, val, bytes, hvm_access_read, + container_of(ctxt, struct hvm_emulate_ctxt, ctxt)); +} + +static int hvmemul_insn_fetch( + enum x86_segment seg, + unsigned long offset, + unsigned long *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + unsigned int insn_off = offset - hvmemul_ctxt->insn_buf_eip; + + /* Fall back if requested bytes are not in the prefetch cache. */ + if ( unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) ) + return __hvmemul_read( + seg, offset, val, bytes, + hvm_access_insn_fetch, hvmemul_ctxt); + + /* Hit the cache. Simple memcpy. */ + *val = 0; + memcpy(val, &hvmemul_ctxt->insn_buf[insn_off], bytes); + return X86EMUL_OKAY; +} + +static int hvmemul_write( + enum x86_segment seg, + unsigned long offset, + unsigned long val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + unsigned long addr; + int rc; + + rc = hvmemul_virtual_to_linear( + seg, offset, bytes, hvm_access_write, hvmemul_ctxt, &addr); + if ( rc != X86EMUL_OKAY ) + return rc; + + rc = hvm_copy_to_guest_virt(addr, &val, bytes); + if ( rc == HVMCOPY_bad_gva_to_gfn ) + return X86EMUL_EXCEPTION; + + if ( rc == HVMCOPY_bad_gfn_to_mfn ) + { + struct vcpu *curr = current; + unsigned long reps = 1; + paddr_t gpa; + + rc = hvmemul_linear_to_phys( + addr, &gpa, bytes, &reps, hvm_access_write, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + if ( curr->arch.hvm_vcpu.io_in_progress ) + return X86EMUL_UNHANDLEABLE; + + curr->arch.hvm_vcpu.io_in_progress = 1; + send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes, + val, IOREQ_WRITE, 0, 0); + } + + return X86EMUL_OKAY; +} + +static int hvmemul_cmpxchg( + enum x86_segment seg, + unsigned long offset, + unsigned long old, + unsigned long new, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + /* Fix this in case the guest is really relying on r-m-w atomicity. */ + return hvmemul_write(seg, offset, new, bytes, ctxt); +} + +static int hvmemul_rep_ins( + uint16_t src_port, + enum x86_segment dst_seg, + unsigned long dst_offset, + unsigned int bytes_per_rep, + unsigned long *reps, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct vcpu *curr = current; + unsigned long addr; + paddr_t gpa; + int rc; + + rc = hvmemul_virtual_to_linear( + dst_seg, dst_offset, *reps * bytes_per_rep, hvm_access_write, + hvmemul_ctxt, &addr); + if ( rc != X86EMUL_OKAY ) + return rc; + + rc = hvmemul_linear_to_phys( + addr, &gpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + if ( curr->arch.hvm_vcpu.io_in_progress ) + return X86EMUL_UNHANDLEABLE; + + if ( !curr->arch.hvm_vcpu.io_completed ) + { + curr->arch.hvm_vcpu.io_in_progress = 1; + send_pio_req(src_port, *reps, bytes_per_rep, + gpa, IOREQ_READ, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); + } + + if ( !curr->arch.hvm_vcpu.io_completed ) + return X86EMUL_RETRY; + + curr->arch.hvm_vcpu.io_completed = 0; + + return X86EMUL_OKAY; +} + +static int hvmemul_rep_outs( + enum x86_segment src_seg, + unsigned long src_offset, + uint16_t dst_port, + unsigned int bytes_per_rep, + unsigned long *reps, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct vcpu *curr = current; + unsigned long addr; + paddr_t gpa; + int rc; + + rc = hvmemul_virtual_to_linear( + src_seg, src_offset, *reps * bytes_per_rep, hvm_access_read, + hvmemul_ctxt, &addr); + if ( rc != X86EMUL_OKAY ) + return rc; + + rc = hvmemul_linear_to_phys( + addr, &gpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + if ( curr->arch.hvm_vcpu.io_in_progress ) + return X86EMUL_UNHANDLEABLE; + + curr->arch.hvm_vcpu.io_in_progress = 1; + send_pio_req(dst_port, *reps, bytes_per_rep, + gpa, IOREQ_WRITE, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); + + return X86EMUL_OKAY; +} + +static int hvmemul_rep_movs( + enum x86_segment src_seg, + unsigned long src_offset, + enum x86_segment dst_seg, + unsigned long dst_offset, + unsigned int bytes_per_rep, + unsigned long *reps, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct vcpu *curr = current; + unsigned long saddr, daddr; + paddr_t sgpa, dgpa; + p2m_type_t p2mt; + int rc; + + rc = hvmemul_virtual_to_linear( + src_seg, src_offset, *reps * bytes_per_rep, hvm_access_read, + hvmemul_ctxt, &saddr); + if ( rc != X86EMUL_OKAY ) + return rc; + + rc = hvmemul_virtual_to_linear( + dst_seg, dst_offset, *reps * bytes_per_rep, hvm_access_write, + hvmemul_ctxt, &daddr); + if ( rc != X86EMUL_OKAY ) + return rc; + + rc = hvmemul_linear_to_phys( + saddr, &sgpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + rc = hvmemul_linear_to_phys( + daddr, &dgpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + if ( curr->arch.hvm_vcpu.io_in_progress ) + return X86EMUL_UNHANDLEABLE; + + (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt); + if ( !p2m_is_ram(p2mt) ) + { + if ( !curr->arch.hvm_vcpu.io_completed ) + { + curr->arch.hvm_vcpu.io_in_progress = 1; + send_mmio_req(IOREQ_TYPE_COPY, sgpa, *reps, bytes_per_rep, + dgpa, IOREQ_READ, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); + } + + if ( !curr->arch.hvm_vcpu.io_completed ) + return X86EMUL_RETRY; + + curr->arch.hvm_vcpu.io_completed = 0; + } + else + { + (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt); + if ( p2m_is_ram(p2mt) ) + return X86EMUL_UNHANDLEABLE; + curr->arch.hvm_vcpu.io_in_progress = 1; + send_mmio_req(IOREQ_TYPE_COPY, dgpa, *reps, bytes_per_rep, + sgpa, IOREQ_WRITE, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); + } + + return X86EMUL_OKAY; +} + +static int hvmemul_read_segment( + enum x86_segment seg, + struct segment_register *reg, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); + memcpy(reg, sreg, sizeof(struct segment_register)); + return X86EMUL_OKAY; +} + +static int hvmemul_write_segment( + enum x86_segment seg, + struct segment_register *reg, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); + + if ( seg == x86_seg_ss ) + hvmemul_ctxt->flags.mov_ss = 1; + + memcpy(sreg, reg, sizeof(struct segment_register)); + __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty); + + return X86EMUL_OKAY; +} + +static int hvmemul_read_io( + unsigned int port, + unsigned int bytes, + unsigned long *val, + struct x86_emulate_ctxt *ctxt) +{ + struct vcpu *curr = current; + + if ( curr->arch.hvm_vcpu.io_in_progress ) + return X86EMUL_UNHANDLEABLE; + + if ( !curr->arch.hvm_vcpu.io_completed ) + { + curr->arch.hvm_vcpu.io_in_progress = 1; + send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0); + } + + if ( !curr->arch.hvm_vcpu.io_completed ) + return X86EMUL_RETRY; + + *val = curr->arch.hvm_vcpu.io_data; + curr->arch.hvm_vcpu.io_completed = 0; + + return X86EMUL_OKAY; +} + +static int hvmemul_write_io( + unsigned int port, + unsigned int bytes, + unsigned long val, + struct x86_emulate_ctxt *ctxt) +{ + struct vcpu *curr = current; + + if ( port == 0xe9 ) + { + hvm_print_line(curr, val); + return X86EMUL_OKAY; + } + + if ( curr->arch.hvm_vcpu.io_in_progress ) + return X86EMUL_UNHANDLEABLE; + + curr->arch.hvm_vcpu.io_in_progress = 1; + send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0); + + return X86EMUL_OKAY; +} + +static int hvmemul_read_cr( + unsigned int reg, + unsigned long *val, + struct x86_emulate_ctxt *ctxt) +{ + switch ( reg ) + { + case 0: + case 2: + case 3: + case 4: + *val = current->arch.hvm_vcpu.guest_cr[reg]; + return X86EMUL_OKAY; + default: + break; + } + + return X86EMUL_UNHANDLEABLE; +} + +static int hvmemul_write_cr( + unsigned int reg, + unsigned long val, + struct x86_emulate_ctxt *ctxt) +{ + switch ( reg ) + { + case 0: + return hvm_set_cr0(val); + case 2: + current->arch.hvm_vcpu.guest_cr[2] = val; + return X86EMUL_OKAY; + case 3: + return hvm_set_cr3(val); + case 4: + return hvm_set_cr4(val); + default: + break; + } + + return X86EMUL_UNHANDLEABLE; +} + +static int hvmemul_read_msr( + unsigned long reg, + uint64_t *val, + struct x86_emulate_ctxt *ctxt) +{ + struct cpu_user_regs _regs; + int rc; + + _regs.ecx = (uint32_t)reg; + + if ( (rc = hvm_funcs.msr_read_intercept(&_regs)) != 0 ) + return rc; + + *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax; + return X86EMUL_OKAY; +} + +static int hvmemul_write_msr( + unsigned long reg, + uint64_t val, + struct x86_emulate_ctxt *ctxt) +{ + struct cpu_user_regs _regs; + + _regs.edx = (uint32_t)(val >> 32); + _regs.eax = (uint32_t)val; + _regs.ecx = (uint32_t)reg; + + return hvm_funcs.msr_write_intercept(&_regs); +} + +static int hvmemul_write_rflags( + unsigned long val, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + if ( (val & X86_EFLAGS_IF) && !(ctxt->regs->eflags & X86_EFLAGS_IF) ) + hvmemul_ctxt->flags.sti = 1; + return X86EMUL_OKAY; +} + +static int hvmemul_wbinvd( + struct x86_emulate_ctxt *ctxt) +{ + hvm_funcs.wbinvd_intercept(); + return X86EMUL_OKAY; +} + +static int hvmemul_cpuid( + unsigned int *eax, + unsigned int *ebx, + unsigned int *ecx, + unsigned int *edx, + struct x86_emulate_ctxt *ctxt) +{ + hvm_funcs.cpuid_intercept(eax, ebx, ecx, edx); + return X86EMUL_OKAY; +} + +static int hvmemul_hlt( + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + hvmemul_ctxt->flags.hlt = 1; + return X86EMUL_OKAY; +} + +static int hvmemul_inject_hw_exception( + uint8_t vector, + uint16_t error_code, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + + if ( error_code != 0 ) + return X86EMUL_UNHANDLEABLE; + + hvmemul_ctxt->flags.exn_pending = 1; + hvmemul_ctxt->exn_vector = vector; + hvmemul_ctxt->exn_insn_len = 0; + + return X86EMUL_OKAY; +} + +static int hvmemul_inject_sw_interrupt( + uint8_t vector, + uint8_t insn_len, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + + hvmemul_ctxt->flags.exn_pending = 1; + hvmemul_ctxt->exn_vector = vector; + hvmemul_ctxt->exn_insn_len = insn_len; + + return X86EMUL_OKAY; +} + +static void hvmemul_load_fpu_ctxt( + struct x86_emulate_ctxt *ctxt) +{ + if ( !current->fpu_dirtied ) + hvm_funcs.fpu_dirty_intercept(); +} + +static struct x86_emulate_ops hvm_emulate_ops = { + .read = hvmemul_read, + .insn_fetch = hvmemul_insn_fetch, + .write = hvmemul_write, + .cmpxchg = hvmemul_cmpxchg, + .rep_ins = hvmemul_rep_ins, + .rep_outs = hvmemul_rep_outs, + .rep_movs = hvmemul_rep_movs, + .read_segment = hvmemul_read_segment, + .write_segment = hvmemul_write_segment, + .read_io = hvmemul_read_io, + .write_io = hvmemul_write_io, + .read_cr = hvmemul_read_cr, + .write_cr = hvmemul_write_cr, + .read_msr = hvmemul_read_msr, + .write_msr = hvmemul_write_msr, + .write_rflags = hvmemul_write_rflags, + .wbinvd = hvmemul_wbinvd, + .cpuid = hvmemul_cpuid, + .hlt = hvmemul_hlt, + .inject_hw_exception = hvmemul_inject_hw_exception, + .inject_sw_interrupt = hvmemul_inject_sw_interrupt, + .load_fpu_ctxt = hvmemul_load_fpu_ctxt +}; + +int hvm_emulate_one( + struct hvm_emulate_ctxt *hvmemul_ctxt) +{ + struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs; + unsigned long addr; + + hvmemul_ctxt->ctxt.addr_size = + hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16; + hvmemul_ctxt->ctxt.sp_size = + hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16; + + hvmemul_ctxt->insn_buf_eip = regs->eip; + hvmemul_ctxt->insn_buf_bytes = + (hvm_virtual_to_linear_addr( + x86_seg_cs, &hvmemul_ctxt->seg_reg[x86_seg_cs], + regs->eip, sizeof(hvmemul_ctxt->insn_buf), + hvm_access_insn_fetch, hvmemul_ctxt->ctxt.addr_size, &addr) && + !hvm_fetch_from_guest_virt_nofault( + hvmemul_ctxt->insn_buf, addr, sizeof(hvmemul_ctxt->insn_buf))) + ? sizeof(hvmemul_ctxt->insn_buf) : 0; + + hvmemul_ctxt->flag_word = 0; + + return x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops); +} + +void hvm_emulate_prepare( + struct hvm_emulate_ctxt *hvmemul_ctxt, + struct cpu_user_regs *regs) +{ + hvmemul_ctxt->ctxt.regs = regs; + hvmemul_ctxt->seg_reg_accessed = 0; + hvmemul_ctxt->seg_reg_dirty = 0; + hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt); + hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt); +} + +void hvm_emulate_writeback( + struct hvm_emulate_ctxt *hvmemul_ctxt) +{ + enum x86_segment seg; + + seg = find_first_bit(&hvmemul_ctxt->seg_reg_dirty, + ARRAY_SIZE(hvmemul_ctxt->seg_reg)); + + while ( seg < ARRAY_SIZE(hvmemul_ctxt->seg_reg) ) + { + hvm_set_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]); + seg = find_next_bit(&hvmemul_ctxt->seg_reg_dirty, + ARRAY_SIZE(hvmemul_ctxt->seg_reg), + seg+1); + } +} + +struct segment_register *hvmemul_get_seg_reg( + enum x86_segment seg, + struct hvm_emulate_ctxt *hvmemul_ctxt) +{ + if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) ) + hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]); + return &hvmemul_ctxt->seg_reg[seg]; +} diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/hvm.c Wed Feb 20 14:36:45 2008 +0000 @@ -729,7 +729,7 @@ int hvm_set_efer(uint64_t value) gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " "EFER: %"PRIx64"\n", value); hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; + return X86EMUL_EXCEPTION; } if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) && @@ -738,14 +738,14 @@ int hvm_set_efer(uint64_t value) gdprintk(XENLOG_WARNING, "Trying to change EFER.LME with paging enabled\n"); hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; + return X86EMUL_EXCEPTION; } value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA; v->arch.hvm_vcpu.guest_efer = value; hvm_update_guest_efer(v); - return 1; + return X86EMUL_OKAY; } extern void shadow_blow_tables_per_domain(struct domain *d); @@ -787,8 +787,7 @@ int hvm_set_cr0(unsigned long value) HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to set upper 32 bits in CR0: %lx", value); - hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; + goto gpf; } value &= ~HVM_CR0_GUEST_RESERVED_BITS; @@ -797,10 +796,7 @@ int hvm_set_cr0(unsigned long value) value |= X86_CR0_ET; if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG ) - { - hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; - } + goto gpf; if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) ) { @@ -809,8 +805,7 @@ int hvm_set_cr0(unsigned long value) if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) ) { HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable"); - hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; + goto gpf; } HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode"); v->arch.hvm_vcpu.guest_efer |= EFER_LMA; @@ -828,7 +823,7 @@ int hvm_set_cr0(unsigned long value) gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", v->arch.hvm_vcpu.guest_cr[3], mfn); domain_crash(v->domain); - return 0; + return X86EMUL_UNHANDLEABLE; } /* Now arch.guest_table points to machine physical. */ @@ -895,7 +890,11 @@ int hvm_set_cr0(unsigned long value) if ( (value ^ old_value) & X86_CR0_PG ) paging_update_paging_modes(v); - return 1; + return X86EMUL_OKAY; + + gpf: + hvm_inject_exception(TRAP_gp_fault, 0, 0); + return X86EMUL_EXCEPTION; } int hvm_set_cr3(unsigned long value) @@ -922,12 +921,12 @@ int hvm_set_cr3(unsigned long value) v->arch.hvm_vcpu.guest_cr[3] = value; paging_update_cr3(v); - return 1; + return X86EMUL_OKAY; bad_cr3: gdprintk(XENLOG_ERR, "Invalid CR3\n"); domain_crash(v->domain); - return 0; + return X86EMUL_UNHANDLEABLE; } int hvm_set_cr4(unsigned long value) @@ -958,11 +957,11 @@ int hvm_set_cr4(unsigned long value) if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) paging_update_paging_modes(v); - return 1; + return X86EMUL_OKAY; gpf: hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; + return X86EMUL_EXCEPTION; } int hvm_virtual_to_linear_addr( @@ -977,7 +976,15 @@ int hvm_virtual_to_linear_addr( unsigned long addr = offset; uint32_t last_byte; - if ( addr_size != 64 ) + if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) + { + /* + * REAL MODE: Don't bother with segment access checks. + * Certain of them are not done in native real mode anyway. + */ + addr = (uint32_t)(addr + reg->base); + } + else if ( addr_size != 64 ) { /* * COMPATIBILITY MODE: Apply segment checks and add base. @@ -1304,7 +1311,7 @@ void hvm_task_switch( if ( ptss == NULL ) goto out; - if ( !hvm_set_cr3(ptss->cr3) ) + if ( hvm_set_cr3(ptss->cr3) ) { hvm_unmap(ptss); goto out; @@ -1399,7 +1406,10 @@ static enum hvm_copy_result __hvm_copy( * VMREADs on every data access hurts emulation performance. * Hence we do not gather extra PFEC flags if CR0.PG == 0. */ - if ( virt && (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) ) + if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) ) + virt = 0; + + if ( virt ) { struct segment_register sreg; hvm_get_segment_register(curr, x86_seg_ss, &sreg); diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/instrlen.c --- a/xen/arch/x86/hvm/instrlen.c Tue Feb 19 11:14:40 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,419 +0,0 @@ -/* - * instrlen.c - calculates the instruction length for all operating modes - * - * Travis Betak, travis.betak@xxxxxxx - * Copyright (c) 2005,2006 AMD - * Copyright (c) 2005 Keir Fraser - * - * Essentially a very, very stripped version of Keir Fraser's work in - * x86_emulate.c. Used for MMIO. - */ - -#include <xen/config.h> -#include <xen/sched.h> -#include <xen/mm.h> -#include <asm-x86/x86_emulate.h> - -/* read from guest memory */ -extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip, - int length); - -/* - * Opcode effective-address decode tables. - * Note that we only emulate instructions that have at least one memory - * operand (excluding implicit stack references). We assume that stack - * references and instruction fetches will never occur in special memory - * areas that require emulation. So, for example, 'mov <imm>,<reg>' need - * not be handled. - */ - -/* Operand sizes: 8-bit operands or specified/overridden size. */ -#define ByteOp (1<<0) /* 8-bit operands. */ -/* Destination operand type. */ -#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ -#define DstReg (2<<1) /* Register operand. */ -#define DstMem (3<<1) /* Memory operand. */ -#define DstMask (3<<1) -/* Source operand type. */ -#define SrcNone (0<<3) /* No source operand. */ -#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ -#define SrcReg (1<<3) /* Register operand. */ -#define SrcMem (2<<3) /* Memory operand. */ -#define SrcMem16 (3<<3) /* Memory operand (16-bit). */ -#define SrcMem32 (4<<3) /* Memory operand (32-bit). */ -#define SrcImm (5<<3) /* Immediate operand. */ -#define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */ -#define SrcMask (7<<3) -/* Generic ModRM decode. */ -#define ModRM (1<<6) -/* Destination is only written; never read. */ -#define Mov (1<<7) - -static uint8_t opcode_table[256] = { - /* 0x00 - 0x07 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x08 - 0x0F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x10 - 0x17 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x18 - 0x1F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x20 - 0x27 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x28 - 0x2F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x30 - 0x37 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x38 - 0x3F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x40 - 0x4F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x50 - 0x5F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x60 - 0x6F */ - 0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x80 - 0x87 */ - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - /* 0x88 - 0x8F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, DstMem|SrcNone|ModRM|Mov, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xA0 - 0xA7 */ - ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov, - ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps, ImplicitOps, - /* 0xA8 - 0xAF */ - 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps, ImplicitOps, - /* 0xB0 - 0xBF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xC0 - 0xC7 */ - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0, - 0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, - /* 0xC8 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xD0 - 0xD7 */ - ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - 0, 0, 0, 0, - /* 0xD8 - 0xDF */ - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xF0 - 0xF7 */ - 0, 0, 0, 0, - 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, - /* 0xF8 - 0xFF */ - 0, 0, 0, 0, - 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM -}; - -static uint8_t twobyte_table[256] = { - /* 0x00 - 0x0F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, - /* 0x10 - 0x1F */ - 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, - /* 0x20 - 0x2F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x30 - 0x3F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x40 - 0x47 */ - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - /* 0x48 - 0x4F */ - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - /* 0x50 - 0x5F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x60 - 0x6F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x80 - 0x8F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xA0 - 0xA7 */ - 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, - /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, - /* 0xB0 - 0xB7 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM, - 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, - /* 0xB8 - 0xBF */ - 0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, - 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, - /* 0xC0 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xD0 - 0xDF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xF0 - 0xFF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* - * insn_fetch - fetch the next byte from instruction stream - */ -#define insn_fetch() \ -({ uint8_t _x; \ - if ( length >= 15 ) \ - return -1; \ - if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \ - unsigned long err; \ - struct segment_register ss; \ - gdprintk(XENLOG_WARNING, \ - "Cannot read from address %lx (eip %lx, mode %d)\n", \ - pc, org_pc, address_bytes); \ - err = 0; /* Must be not-present: we don't enforce reserved bits */ \ - if ( hvm_nx_enabled(current) ) \ - err |= PFEC_insn_fetch; \ - hvm_get_segment_register(current, x86_seg_ss, &ss); \ - if ( ss.attr.fields.dpl == 3 ) \ - err |= PFEC_user_mode; \ - hvm_inject_exception(TRAP_page_fault, err, pc); \ - return -1; \ - } \ - if ( buf ) \ - buf[length] = _x; \ - length += 1; \ - pc += 1; \ - _x; \ -}) - -#define insn_skip(_n) do { \ - int _i; \ - for ( _i = 0; _i < (_n); _i++) { \ - (void) insn_fetch(); \ - } \ -} while (0) - -/** - * hvm_instruction_fetch - read the current instruction and return its length - * - * @org_pc: guest instruction pointer - * @address_bytes: guest address width - * @buf: (optional) buffer to load actual instruction bytes into - * - * Doesn't increment the guest's instruction pointer, but may - * issue faults to the guest. Returns -1 on failure. - */ -int hvm_instruction_fetch(unsigned long org_pc, int address_bytes, - unsigned char *buf) -{ - uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0; - unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp; - int length = 0; - unsigned long pc = org_pc; - - op_bytes = op_default = ad_bytes = ad_default = address_bytes; - if ( op_bytes == 8 ) - { - op_bytes = op_default = 4; -#ifndef __x86_64__ - return -1; -#endif - } - - /* Legacy prefixes. */ - for ( ; ; ) - { - switch ( b = insn_fetch() ) - { - case 0x66: /* operand-size override */ - op_bytes = op_default ^ 6; /* switch between 2/4 bytes */ - break; - case 0x67: /* address-size override */ - if ( ad_default == 8 ) - ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */ - else - ad_bytes = ad_default ^ 6; /* switch between 2/4 bytes */ - break; - case 0x2e: /* CS override */ - case 0x3e: /* DS override */ - case 0x26: /* ES override */ - case 0x64: /* FS override */ - case 0x65: /* GS override */ - case 0x36: /* SS override */ - case 0xf0: /* LOCK */ - case 0xf3: /* REP/REPE/REPZ */ - case 0xf2: /* REPNE/REPNZ */ - break; -#ifdef __x86_64__ - case 0x40 ... 0x4f: - if ( ad_default == 8 ) - { - rex_prefix = b; - continue; - } - /* FALLTHRU */ -#endif - default: - goto done_prefixes; - } - rex_prefix = 0; - } -done_prefixes: - - /* REX prefix. */ - if ( rex_prefix & 8 ) - op_bytes = 8; /* REX.W */ - /* REX.B, REX.R, and REX.X do not need to be decoded. */ - - /* Opcode byte(s). */ - d = opcode_table[b]; - if ( d == 0 ) - { - /* Two-byte opcode? */ - if ( b == 0x0f ) - { - twobyte = 1; - b = insn_fetch(); - d = twobyte_table[b]; - } - - /* Unrecognised? */ - if ( d == 0 ) - goto cannot_emulate; - } - - /* ModRM and SIB bytes. */ - if ( d & ModRM ) - { - uint8_t modrm = insn_fetch(); - uint8_t modrm_mod = (modrm & 0xc0) >> 6; - uint8_t modrm_rm = (modrm & 0x07); - - modrm_reg = (modrm & 0x38) >> 3; - if ( modrm_mod == 3 ) - { - gdprintk(XENLOG_WARNING, "Cannot parse ModRM.mod == 3.\n"); - goto cannot_emulate; - } - - if ( ad_bytes == 2 ) - { - /* 16-bit ModR/M decode. */ - switch ( modrm_mod ) - { - case 0: - if ( modrm_rm == 6 ) - insn_skip(2); /* skip disp16 */ - break; - case 1: - insn_skip(1); /* skip disp8 */ - break; - case 2: - insn_skip(2); /* skip disp16 */ - break; - } - } - else - { - /* 32/64-bit ModR/M decode. */ - switch ( modrm_mod ) - { - case 0: - if ( (modrm_rm == 4) && - ((insn_fetch() & 7) == 5) ) - insn_skip(4); /* skip disp32 specified by SIB.base */ - else if ( modrm_rm == 5 ) - insn_skip(4); /* skip disp32 */ - break; - case 1: - if ( modrm_rm == 4 ) - insn_skip(1); - insn_skip(1); /* skip disp8 */ - break; - case 2: - if ( modrm_rm == 4 ) - insn_skip(1); - insn_skip(4); /* skip disp32 */ - break; - } - } - } - - /* Decode and fetch the destination operand: register or memory. */ - switch ( d & DstMask ) - { - case ImplicitOps: - /* Special instructions do their own operand decoding. */ - goto done; - } - - /* Decode and fetch the source operand: register, memory or immediate. */ - switch ( d & SrcMask ) - { - case SrcImm: - tmp = (d & ByteOp) ? 1 : op_bytes; - if ( tmp == 8 ) tmp = 4; - /* NB. Immediates are sign-extended as necessary. */ - insn_skip(tmp); - break; - case SrcImmByte: - insn_skip(1); - break; - } - - if ( twobyte ) - goto done; - - switch ( b ) - { - case 0xa0 ... 0xa3: /* mov */ - insn_skip(ad_bytes); /* skip src/dst displacement */ - break; - case 0xf6 ... 0xf7: /* Grp3 */ - switch ( modrm_reg ) - { - case 0 ... 1: /* test */ - /* Special case in Grp3: test has an immediate source operand. */ - tmp = (d & ByteOp) ? 1 : op_bytes; - if ( tmp == 8 ) tmp = 4; - insn_skip(tmp); - break; - } - break; - } - -done: - return length < 16 ? length : -1; - -cannot_emulate: - gdprintk(XENLOG_WARNING, - "Cannot emulate %02x at address %lx (%lx, addr_bytes %d)\n", - b, pc - 1, org_pc, address_bytes); - return -1; -} diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/intercept.c Wed Feb 20 14:36:45 2008 +0000 @@ -31,7 +31,6 @@ #include <xen/event.h> #include <asm/iommu.h> - extern struct hvm_mmio_handler hpet_mmio_handler; extern struct hvm_mmio_handler vlapic_mmio_handler; extern struct hvm_mmio_handler vioapic_mmio_handler; @@ -50,12 +49,11 @@ static inline void hvm_mmio_access(struc hvm_mmio_read_t read_handler, hvm_mmio_write_t write_handler) { - unsigned int tmp1, tmp2; unsigned long data; - switch ( p->type ) { + switch ( p->type ) + { case IOREQ_TYPE_COPY: - { if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) p->data = read_handler(v, p->addr, p->size); @@ -86,62 +84,6 @@ static inline void hvm_mmio_access(struc } } } - break; - } - - case IOREQ_TYPE_AND: - tmp1 = read_handler(v, p->addr, p->size); - if ( p->dir == IOREQ_WRITE ) { - tmp2 = tmp1 & (unsigned long) p->data; - write_handler(v, p->addr, p->size, tmp2); - } - p->data = tmp1; - break; - - case IOREQ_TYPE_ADD: - tmp1 = read_handler(v, p->addr, p->size); - if (p->dir == IOREQ_WRITE) { - tmp2 = tmp1 + (unsigned long) p->data; - write_handler(v, p->addr, p->size, tmp2); - } - p->data = tmp1; - break; - - case IOREQ_TYPE_OR: - tmp1 = read_handler(v, p->addr, p->size); - if ( p->dir == IOREQ_WRITE ) { - tmp2 = tmp1 | (unsigned long) p->data; - write_handler(v, p->addr, p->size, tmp2); - } - p->data = tmp1; - break; - - case IOREQ_TYPE_XOR: - tmp1 = read_handler(v, p->addr, p->size); - if ( p->dir == IOREQ_WRITE ) { - tmp2 = tmp1 ^ (unsigned long) p->data; - write_handler(v, p->addr, p->size, tmp2); - } - p->data = tmp1; - break; - - case IOREQ_TYPE_XCHG: - /* - * Note that we don't need to be atomic here since VCPU is accessing - * its own local APIC. - */ - tmp1 = read_handler(v, p->addr, p->size); - write_handler(v, p->addr, p->size, (unsigned long) p->data); - p->data = tmp1; - break; - - case IOREQ_TYPE_SUB: - tmp1 = read_handler(v, p->addr, p->size); - if ( p->dir == IOREQ_WRITE ) { - tmp2 = tmp1 - (unsigned long) p->data; - write_handler(v, p->addr, p->size, tmp2); - } - p->data = tmp1; break; default: diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/io.c Wed Feb 20 14:36:45 2008 +0000 @@ -46,379 +46,8 @@ #include <xen/iocap.h> #include <public/hvm/ioreq.h> -#if defined (__i386__) -static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *regs, long value) -{ - switch (size) { - case BYTE: - switch (index) { - case 0: - regs->eax &= 0xFFFFFF00; - regs->eax |= (value & 0xFF); - break; - case 1: - regs->ecx &= 0xFFFFFF00; - regs->ecx |= (value & 0xFF); - break; - case 2: - regs->edx &= 0xFFFFFF00; - regs->edx |= (value & 0xFF); - break; - case 3: - regs->ebx &= 0xFFFFFF00; - regs->ebx |= (value & 0xFF); - break; - case 4: - regs->eax &= 0xFFFF00FF; - regs->eax |= ((value & 0xFF) << 8); - break; - case 5: - regs->ecx &= 0xFFFF00FF; - regs->ecx |= ((value & 0xFF) << 8); - break; - case 6: - regs->edx &= 0xFFFF00FF; - regs->edx |= ((value & 0xFF) << 8); - break; - case 7: - regs->ebx &= 0xFFFF00FF; - regs->ebx |= ((value & 0xFF) << 8); - break; - default: - goto crash; - } - break; - case WORD: - switch (index) { - case 0: - regs->eax &= 0xFFFF0000; - regs->eax |= (value & 0xFFFF); - break; - case 1: - regs->ecx &= 0xFFFF0000; - regs->ecx |= (value & 0xFFFF); - break; - case 2: - regs->edx &= 0xFFFF0000; - regs->edx |= (value & 0xFFFF); - break; - case 3: - regs->ebx &= 0xFFFF0000; - regs->ebx |= (value & 0xFFFF); - break; - case 4: - regs->esp &= 0xFFFF0000; - regs->esp |= (value & 0xFFFF); - break; - case 5: - regs->ebp &= 0xFFFF0000; - regs->ebp |= (value & 0xFFFF); - break; - case 6: - regs->esi &= 0xFFFF0000; - regs->esi |= (value & 0xFFFF); - break; - case 7: - regs->edi &= 0xFFFF0000; - regs->edi |= (value & 0xFFFF); - break; - default: - goto crash; - } - break; - case LONG: - switch (index) { - case 0: - regs->eax = value; - break; - case 1: - regs->ecx = value; - break; - case 2: - regs->edx = value; - break; - case 3: - regs->ebx = value; - break; - case 4: - regs->esp = value; - break; - case 5: - regs->ebp = value; - break; - case 6: - regs->esi = value; - break; - case 7: - regs->edi = value; - break; - default: - goto crash; - } - break; - default: - crash: - gdprintk(XENLOG_ERR, "size:%x, index:%x are invalid!\n", size, index); - domain_crash_synchronous(); - } -} -#else -static inline void __set_reg_value(unsigned long *reg, int size, long value) -{ - switch (size) { - case BYTE_64: - *reg &= ~0xFF; - *reg |= (value & 0xFF); - break; - case WORD: - *reg &= ~0xFFFF; - *reg |= (value & 0xFFFF); - break; - case LONG: - *reg &= ~0xFFFFFFFF; - *reg |= (value & 0xFFFFFFFF); - break; - case QUAD: - *reg = value; - break; - default: - gdprintk(XENLOG_ERR, "size:%x is invalid\n", size); - domain_crash_synchronous(); - } -} - -static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *regs, long value) -{ - if (size == BYTE) { - switch (index) { - case 0: - regs->rax &= ~0xFF; - regs->rax |= (value & 0xFF); - break; - case 1: - regs->rcx &= ~0xFF; - regs->rcx |= (value & 0xFF); - break; - case 2: - regs->rdx &= ~0xFF; - regs->rdx |= (value & 0xFF); - break; - case 3: - regs->rbx &= ~0xFF; - regs->rbx |= (value & 0xFF); - break; - case 4: - regs->rax &= 0xFFFFFFFFFFFF00FF; - regs->rax |= ((value & 0xFF) << 8); - break; - case 5: - regs->rcx &= 0xFFFFFFFFFFFF00FF; - regs->rcx |= ((value & 0xFF) << 8); - break; - case 6: - regs->rdx &= 0xFFFFFFFFFFFF00FF; - regs->rdx |= ((value & 0xFF) << 8); - break; - case 7: - regs->rbx &= 0xFFFFFFFFFFFF00FF; - regs->rbx |= ((value & 0xFF) << 8); - break; - default: - gdprintk(XENLOG_ERR, "size:%x, index:%x are invalid!\n", - size, index); - domain_crash_synchronous(); - break; - } - return; - } - - switch (index) { - case 0: - __set_reg_value(®s->rax, size, value); - break; - case 1: - __set_reg_value(®s->rcx, size, value); - break; - case 2: - __set_reg_value(®s->rdx, size, value); - break; - case 3: - __set_reg_value(®s->rbx, size, value); - break; - case 4: - __set_reg_value(®s->rsp, size, value); - break; - case 5: - __set_reg_value(®s->rbp, size, value); - break; - case 6: - __set_reg_value(®s->rsi, size, value); - break; - case 7: - __set_reg_value(®s->rdi, size, value); - break; - case 8: - __set_reg_value(®s->r8, size, value); - break; - case 9: - __set_reg_value(®s->r9, size, value); - break; - case 10: - __set_reg_value(®s->r10, size, value); - break; - case 11: - __set_reg_value(®s->r11, size, value); - break; - case 12: - __set_reg_value(®s->r12, size, value); - break; - case 13: - __set_reg_value(®s->r13, size, value); - break; - case 14: - __set_reg_value(®s->r14, size, value); - break; - case 15: - __set_reg_value(®s->r15, size, value); - break; - default: - gdprintk(XENLOG_ERR, "Invalid index\n"); - domain_crash_synchronous(); - } - return; -} -#endif - -long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs); - -static inline void set_eflags_CF(int size, - unsigned int instr, - unsigned long result, - unsigned long src, - unsigned long dst, - struct cpu_user_regs *regs) -{ - unsigned long mask; - - if ( size == BYTE_64 ) - size = BYTE; - ASSERT((size <= sizeof(mask)) && (size > 0)); - - mask = ~0UL >> (8 * (sizeof(mask) - size)); - - if ( instr == INSTR_ADD ) - { - /* CF=1 <==> result is less than the augend and addend) */ - if ( (result & mask) < (dst & mask) ) - { - ASSERT((result & mask) < (src & mask)); - regs->eflags |= X86_EFLAGS_CF; - } - } - else - { - ASSERT( instr == INSTR_CMP || instr == INSTR_SUB ); - if ( (src & mask) > (dst & mask) ) - regs->eflags |= X86_EFLAGS_CF; - } -} - -static inline void set_eflags_OF(int size, - unsigned int instr, - unsigned long result, - unsigned long src, - unsigned long dst, - struct cpu_user_regs *regs) -{ - unsigned long mask; - - if ( size == BYTE_64 ) - size = BYTE; - ASSERT((size <= sizeof(mask)) && (size > 0)); - - mask = 1UL << ((8*size) - 1); - - if ( instr == INSTR_ADD ) - { - if ((src ^ result) & (dst ^ result) & mask); - regs->eflags |= X86_EFLAGS_OF; - } - else - { - ASSERT(instr == INSTR_CMP || instr == INSTR_SUB); - if ((dst ^ src) & (dst ^ result) & mask) - regs->eflags |= X86_EFLAGS_OF; - } -} - -static inline void set_eflags_AF(int size, - unsigned long result, - unsigned long src, - unsigned long dst, - struct cpu_user_regs *regs) -{ - if ((result ^ src ^ dst) & 0x10) - regs->eflags |= X86_EFLAGS_AF; -} - -static inline void set_eflags_ZF(int size, unsigned long result, - struct cpu_user_regs *regs) -{ - unsigned long mask; - - if ( size == BYTE_64 ) - size = BYTE; - ASSERT((size <= sizeof(mask)) && (size > 0)); - - mask = ~0UL >> (8 * (sizeof(mask) - size)); - - if ((result & mask) == 0) - regs->eflags |= X86_EFLAGS_ZF; -} - -static inline void set_eflags_SF(int size, unsigned long result, - struct cpu_user_regs *regs) -{ - unsigned long mask; - - if ( size == BYTE_64 ) - size = BYTE; - ASSERT((size <= sizeof(mask)) && (size > 0)); - - mask = 1UL << ((8*size) - 1); - - if (result & mask) - regs->eflags |= X86_EFLAGS_SF; -} - -static char parity_table[256] = { - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, - 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1 -}; - -static inline void set_eflags_PF(int size, unsigned long result, - struct cpu_user_regs *regs) -{ - if (parity_table[result & 0xFF]) - regs->eflags |= X86_EFLAGS_PF; -} - -static void hvm_pio_assist(struct cpu_user_regs *regs, ioreq_t *p, - struct hvm_io_op *pio_opp) +static void hvm_pio_assist( + struct cpu_user_regs *regs, ioreq_t *p, struct hvm_io_op *pio_opp) { if ( p->data_is_ptr || (pio_opp->flags & OVERLAP) ) { @@ -472,335 +101,6 @@ static void hvm_pio_assist(struct cpu_us } } -static void hvm_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p, - struct hvm_io_op *mmio_opp) -{ - int sign = p->df ? -1 : 1; - int size = -1, index = -1; - unsigned long value = 0, result = 0; - unsigned long src, dst; - - src = mmio_opp->operand[0]; - dst = mmio_opp->operand[1]; - size = operand_size(src); - - HVMTRACE_1D(MMIO_ASSIST, current, p->data); - - switch (mmio_opp->instr) { - case INSTR_MOV: - if (dst & REGISTER) { - index = operand_index(dst); - set_reg_value(size, index, 0, regs, p->data); - } - break; - - case INSTR_MOVZX: - if (dst & REGISTER) { - switch (size) { - case BYTE: - p->data &= 0xFFULL; - break; - - case WORD: - p->data &= 0xFFFFULL; - break; - - case LONG: - p->data &= 0xFFFFFFFFULL; - break; - - default: - printk("Impossible source operand size of movzx instr: %d\n", size); - domain_crash_synchronous(); - } - index = operand_index(dst); - set_reg_value(operand_size(dst), index, 0, regs, p->data); - } - break; - - case INSTR_MOVSX: - if (dst & REGISTER) { - switch (size) { - case BYTE: - p->data &= 0xFFULL; - if ( p->data & 0x80ULL ) - p->data |= 0xFFFFFFFFFFFFFF00ULL; - break; - - case WORD: - p->data &= 0xFFFFULL; - if ( p->data & 0x8000ULL ) - p->data |= 0xFFFFFFFFFFFF0000ULL; - break; - - case LONG: - p->data &= 0xFFFFFFFFULL; - if ( p->data & 0x80000000ULL ) - p->data |= 0xFFFFFFFF00000000ULL; - break; - - default: - printk("Impossible source operand size of movsx instr: %d\n", size); - domain_crash_synchronous(); - } - index = operand_index(dst); - set_reg_value(operand_size(dst), index, 0, regs, p->data); - } - break; - - case INSTR_MOVS: - sign = p->df ? -1 : 1; - - if (mmio_opp->flags & REPZ) - regs->ecx -= p->count; - - if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) { - unsigned long addr = mmio_opp->addr; - - if (hvm_paging_enabled(current)) - { - int rv = hvm_copy_to_guest_virt(addr, &p->data, p->size); - if ( rv == HVMCOPY_bad_gva_to_gfn ) - return; /* exception already injected */ - } - else - (void)hvm_copy_to_guest_phys(addr, &p->data, p->size); - } - - regs->esi += sign * p->count * p->size; - regs->edi += sign * p->count * p->size; - - break; - - case INSTR_STOS: - sign = p->df ? -1 : 1; - regs->edi += sign * p->count * p->size; - if (mmio_opp->flags & REPZ) - regs->ecx -= p->count; - break; - - case INSTR_LODS: - set_reg_value(size, 0, 0, regs, p->data); - sign = p->df ? -1 : 1; - regs->esi += sign * p->count * p->size; - if (mmio_opp->flags & REPZ) - regs->ecx -= p->count; - break; - - case INSTR_AND: - if (src & REGISTER) { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data & value; - } else if (src & IMMEDIATE) { - value = mmio_opp->immediate; - result = (unsigned long) p->data & value; - } else if (src & MEMORY) { - index = operand_index(dst); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data & value; - set_reg_value(size, index, 0, regs, result); - } - - /* - * The OF and CF flags are cleared; the SF, ZF, and PF - * flags are set according to the result. The state of - * the AF flag is undefined. - */ - regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| - X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, result, regs); - set_eflags_SF(size, result, regs); - set_eflags_PF(size, result, regs); - break; - - case INSTR_ADD: - if (src & REGISTER) { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data + value; - } else if (src & IMMEDIATE) { - value = mmio_opp->immediate; - result = (unsigned long) p->data + value; - } else if (src & MEMORY) { - index = operand_index(dst); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data + value; - set_reg_value(size, index, 0, regs, result); - } - - /* - * The CF, OF, SF, ZF, AF, and PF flags are set according - * to the result - */ - regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| - X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_CF(size, mmio_opp->instr, result, value, - (unsigned long) p->data, regs); - set_eflags_OF(size, mmio_opp->instr, result, value, - (unsigned long) p->data, regs); - set_eflags_AF(size, result, value, (unsigned long) p->data, regs); - set_eflags_ZF(size, result, regs); - set_eflags_SF(size, result, regs); - set_eflags_PF(size, result, regs); - break; - - case INSTR_OR: - if (src & REGISTER) { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data | value; - } else if (src & IMMEDIATE) { - value = mmio_opp->immediate; - result = (unsigned long) p->data | value; - } else if (src & MEMORY) { - index = operand_index(dst); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data | value; - set_reg_value(size, index, 0, regs, result); - } - - /* - * The OF and CF flags are cleared; the SF, ZF, and PF - * flags are set according to the result. The state of - * the AF flag is undefined. - */ - regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| - X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, result, regs); - set_eflags_SF(size, result, regs); - set_eflags_PF(size, result, regs); - break; - - case INSTR_XOR: - if (src & REGISTER) { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data ^ value; - } else if (src & IMMEDIATE) { - value = mmio_opp->immediate; - result = (unsigned long) p->data ^ value; - } else if (src & MEMORY) { - index = operand_index(dst); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data ^ value; - set_reg_value(size, index, 0, regs, result); - } - - /* - * The OF and CF flags are cleared; the SF, ZF, and PF - * flags are set according to the result. The state of - * the AF flag is undefined. - */ - regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| - X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, result, regs); - set_eflags_SF(size, result, regs); - set_eflags_PF(size, result, regs); - break; - - case INSTR_CMP: - case INSTR_SUB: - if (src & REGISTER) { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - result = (unsigned long) p->data - value; - } else if (src & IMMEDIATE) { - value = mmio_opp->immediate; - result = (unsigned long) p->data - value; - } else if (src & MEMORY) { - index = operand_index(dst); - value = get_reg_value(size, index, 0, regs); - result = value - (unsigned long) p->data; - if ( mmio_opp->instr == INSTR_SUB ) - set_reg_value(size, index, 0, regs, result); - } - - /* - * The CF, OF, SF, ZF, AF, and PF flags are set according - * to the result - */ - regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| - X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - if ( src & (REGISTER | IMMEDIATE) ) - { - set_eflags_CF(size, mmio_opp->instr, result, value, - (unsigned long) p->data, regs); - set_eflags_OF(size, mmio_opp->instr, result, value, - (unsigned long) p->data, regs); - } - else - { - set_eflags_CF(size, mmio_opp->instr, result, - (unsigned long) p->data, value, regs); - set_eflags_OF(size, mmio_opp->instr, result, - (unsigned long) p->data, value, regs); - } - set_eflags_AF(size, result, value, (unsigned long) p->data, regs); - set_eflags_ZF(size, result, regs); - set_eflags_SF(size, result, regs); - set_eflags_PF(size, result, regs); - break; - - case INSTR_TEST: - if (src & REGISTER) { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - } else if (src & IMMEDIATE) { - value = mmio_opp->immediate; - } else if (src & MEMORY) { - index = operand_index(dst); - value = get_reg_value(size, index, 0, regs); - } - result = (unsigned long) p->data & value; - - /* - * Sets the SF, ZF, and PF status flags. CF and OF are set to 0 - */ - regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| - X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, result, regs); - set_eflags_SF(size, result, regs); - set_eflags_PF(size, result, regs); - break; - - case INSTR_BT: - if ( src & REGISTER ) - { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - } - else if ( src & IMMEDIATE ) - value = mmio_opp->immediate; - if (p->data & (1 << (value & ((1 << 5) - 1)))) - regs->eflags |= X86_EFLAGS_CF; - else - regs->eflags &= ~X86_EFLAGS_CF; - - break; - - case INSTR_XCHG: - if (src & REGISTER) { - index = operand_index(src); - set_reg_value(size, index, 0, regs, p->data); - } else { - index = operand_index(dst); - set_reg_value(size, index, 0, regs, p->data); - } - break; - - case INSTR_PUSH: - mmio_opp->addr += hvm_get_segment_base(current, x86_seg_ss); - { - unsigned long addr = mmio_opp->addr; - int rv = hvm_copy_to_guest_virt(addr, &p->data, size); - if ( rv == HVMCOPY_bad_gva_to_gfn ) - return; /* exception already injected */ - } - break; - } -} - void hvm_io_assist(void) { vcpu_iodata_t *vio; @@ -825,8 +125,18 @@ void hvm_io_assist(void) p->state = STATE_IOREQ_NONE; - if ( v->arch.hvm_vcpu.io_complete && v->arch.hvm_vcpu.io_complete() ) - goto out; + if ( v->arch.hvm_vcpu.io_in_progress ) + { + v->arch.hvm_vcpu.io_in_progress = 0; + if ( p->dir == IOREQ_READ ) + { + v->arch.hvm_vcpu.io_completed = 1; + v->arch.hvm_vcpu.io_data = p->data; + } + if ( v->arch.hvm_vcpu.mmio_in_progress ) + (void)handle_mmio(); + goto out; + } switch ( p->type ) { @@ -836,8 +146,9 @@ void hvm_io_assist(void) hvm_pio_assist(regs, p, io_opp); break; default: - hvm_mmio_assist(regs, p, io_opp); - break; + gdprintk(XENLOG_ERR, "Unexpected HVM iorequest state %d.\n", p->state); + domain_crash(v->domain); + goto out; } /* Copy register changes back into current guest state. */ diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/platform.c Wed Feb 20 14:36:45 2008 +0000 @@ -33,801 +33,9 @@ #include <asm/hvm/support.h> #include <asm/hvm/io.h> #include <public/hvm/ioreq.h> - #include <xen/lib.h> #include <xen/sched.h> -#include <asm/current.h> - -#define DECODE_success 1 -#define DECODE_failure 0 - -#define mk_operand(size_reg, index, seg, flag) \ - (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag)) - -#if defined (__x86_64__) -static inline long __get_reg_value(unsigned long reg, int size) -{ - switch ( size ) { - case BYTE_64: - return (char)(reg & 0xFF); - case WORD: - return (short)(reg & 0xFFFF); - case LONG: - return (int)(reg & 0xFFFFFFFF); - case QUAD: - return (long)(reg); - default: - printk("Error: (__get_reg_value) Invalid reg size\n"); - domain_crash_synchronous(); - } -} - -long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) -{ - if ( size == BYTE ) { - switch ( index ) { - case 0: /* %al */ - return (char)(regs->rax & 0xFF); - case 1: /* %cl */ - return (char)(regs->rcx & 0xFF); - case 2: /* %dl */ - return (char)(regs->rdx & 0xFF); - case 3: /* %bl */ - return (char)(regs->rbx & 0xFF); - case 4: /* %ah */ - return (char)((regs->rax & 0xFF00) >> 8); - case 5: /* %ch */ - return (char)((regs->rcx & 0xFF00) >> 8); - case 6: /* %dh */ - return (char)((regs->rdx & 0xFF00) >> 8); - case 7: /* %bh */ - return (char)((regs->rbx & 0xFF00) >> 8); - default: - printk("Error: (get_reg_value) Invalid index value\n"); - domain_crash_synchronous(); - } - /* NOTREACHED */ - } - - switch ( index ) { - case 0: return __get_reg_value(regs->rax, size); - case 1: return __get_reg_value(regs->rcx, size); - case 2: return __get_reg_value(regs->rdx, size); - case 3: return __get_reg_value(regs->rbx, size); - case 4: return __get_reg_value(regs->rsp, size); - case 5: return __get_reg_value(regs->rbp, size); - case 6: return __get_reg_value(regs->rsi, size); - case 7: return __get_reg_value(regs->rdi, size); - case 8: return __get_reg_value(regs->r8, size); - case 9: return __get_reg_value(regs->r9, size); - case 10: return __get_reg_value(regs->r10, size); - case 11: return __get_reg_value(regs->r11, size); - case 12: return __get_reg_value(regs->r12, size); - case 13: return __get_reg_value(regs->r13, size); - case 14: return __get_reg_value(regs->r14, size); - case 15: return __get_reg_value(regs->r15, size); - default: - printk("Error: (get_reg_value) Invalid index value\n"); - domain_crash_synchronous(); - } -} -#elif defined (__i386__) -static inline long __get_reg_value(unsigned long reg, int size) -{ - switch ( size ) { - case WORD: - return (short)(reg & 0xFFFF); - case LONG: - return (int)(reg & 0xFFFFFFFF); - default: - printk("Error: (__get_reg_value) Invalid reg size\n"); - domain_crash_synchronous(); - } -} - -long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) -{ - if ( size == BYTE ) { - switch ( index ) { - case 0: /* %al */ - return (char)(regs->eax & 0xFF); - case 1: /* %cl */ - return (char)(regs->ecx & 0xFF); - case 2: /* %dl */ - return (char)(regs->edx & 0xFF); - case 3: /* %bl */ - return (char)(regs->ebx & 0xFF); - case 4: /* %ah */ - return (char)((regs->eax & 0xFF00) >> 8); - case 5: /* %ch */ - return (char)((regs->ecx & 0xFF00) >> 8); - case 6: /* %dh */ - return (char)((regs->edx & 0xFF00) >> 8); - case 7: /* %bh */ - return (char)((regs->ebx & 0xFF00) >> 8); - default: - printk("Error: (get_reg_value) Invalid index value\n"); - domain_crash_synchronous(); - } - } - - switch ( index ) { - case 0: return __get_reg_value(regs->eax, size); - case 1: return __get_reg_value(regs->ecx, size); - case 2: return __get_reg_value(regs->edx, size); - case 3: return __get_reg_value(regs->ebx, size); - case 4: return __get_reg_value(regs->esp, size); - case 5: return __get_reg_value(regs->ebp, size); - case 6: return __get_reg_value(regs->esi, size); - case 7: return __get_reg_value(regs->edi, size); - default: - printk("Error: (get_reg_value) Invalid index value\n"); - domain_crash_synchronous(); - } -} -#endif - -static inline unsigned char *check_prefix(unsigned char *inst, - struct hvm_io_op *mmio_op, - unsigned char *ad_size, - unsigned char *op_size, - unsigned char *seg_sel, - unsigned char *rex_p) -{ - while ( 1 ) { - switch ( *inst ) { - /* rex prefix for em64t instructions */ - case 0x40 ... 0x4f: - *rex_p = *inst; - break; - case 0xf3: /* REPZ */ - mmio_op->flags = REPZ; - break; - case 0xf2: /* REPNZ */ - mmio_op->flags = REPNZ; - break; - case 0xf0: /* LOCK */ - break; - case 0x2e: /* CS */ - case 0x36: /* SS */ - case 0x3e: /* DS */ - case 0x26: /* ES */ - case 0x64: /* FS */ - case 0x65: /* GS */ - *seg_sel = *inst; - break; - case 0x66: /* 32bit->16bit */ - *op_size = WORD; - break; - case 0x67: - *ad_size = WORD; - break; - default: - return inst; - } - inst++; - } -} - -static inline unsigned long get_immediate(int ad_size, const unsigned char *inst, int op_size) -{ - int mod, reg, rm; - unsigned long val = 0; - int i; - - mod = (*inst >> 6) & 3; - reg = (*inst >> 3) & 7; - rm = *inst & 7; - - inst++; //skip ModR/M byte - if ( ad_size != WORD && mod != 3 && rm == 4 ) { - rm = *inst & 7; - inst++; //skip SIB byte - } - - switch ( mod ) { - case 0: - if ( ad_size == WORD ) { - if ( rm == 6 ) - inst = inst + 2; //disp16, skip 2 bytes - } - else { - if ( rm == 5 ) - inst = inst + 4; //disp32, skip 4 bytes - } - break; - case 1: - inst++; //disp8, skip 1 byte - break; - case 2: - if ( ad_size == WORD ) - inst = inst + 2; //disp16, skip 2 bytes - else - inst = inst + 4; //disp32, skip 4 bytes - break; - } - - if ( op_size == QUAD ) - op_size = LONG; - - for ( i = 0; i < op_size; i++ ) { - val |= (*inst++ & 0xff) << (8 * i); - } - - return val; -} - -static inline unsigned long get_immediate_sign_ext( - int ad_size, const unsigned char *inst, int op_size) -{ - unsigned long result = get_immediate(ad_size, inst, op_size); - if ( op_size == BYTE ) - return (int8_t)result; - if ( op_size == WORD ) - return (int16_t)result; - return (int32_t)result; -} - -static inline int get_index(const unsigned char *inst, unsigned char rex) -{ - int mod, reg, rm; - int rex_r, rex_b; - - mod = (*inst >> 6) & 3; - reg = (*inst >> 3) & 7; - rm = *inst & 7; - - rex_r = (rex >> 2) & 1; - rex_b = rex & 1; - - //Only one operand in the instruction is register - if ( mod == 3 ) { - return (rm + (rex_b << 3)); - } else { - return (reg + (rex_r << 3)); - } - return 0; -} - -static void init_instruction(struct hvm_io_op *mmio_op) -{ - mmio_op->instr = 0; - - mmio_op->flags = 0; - - mmio_op->operand[0] = 0; - mmio_op->operand[1] = 0; - mmio_op->immediate = 0; -} - -#define GET_OP_SIZE_FOR_BYTE(size_reg) \ - do { \ - if ( rex ) \ - (size_reg) = BYTE_64; \ - else \ - (size_reg) = BYTE; \ - } while( 0 ) - -#define GET_OP_SIZE_FOR_NONEBYTE(op_size) \ - do { \ - if ( rex & 0x8 ) \ - (op_size) = QUAD; \ - else if ( (op_size) != WORD ) \ - (op_size) = LONG; \ - } while( 0 ) - - -/* - * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax) - */ -static inline int mem_acc(unsigned char size, struct hvm_io_op *mmio) -{ - mmio->operand[0] = mk_operand(size, 0, 0, MEMORY); - mmio->operand[1] = mk_operand(size, 0, 0, REGISTER); - return DECODE_success; -} - -/* - * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32) - */ -static inline int acc_mem(unsigned char size, struct hvm_io_op *mmio) -{ - mmio->operand[0] = mk_operand(size, 0, 0, REGISTER); - mmio->operand[1] = mk_operand(size, 0, 0, MEMORY); - return DECODE_success; -} - -/* - * Decode mem,reg operands (as in <opcode> r32/16, m32/16) - */ -static int mem_reg(unsigned char size, unsigned char *opcode, - struct hvm_io_op *mmio_op, unsigned char rex) -{ - int index = get_index(opcode + 1, rex); - - mmio_op->operand[0] = mk_operand(size, 0, 0, MEMORY); - mmio_op->operand[1] = mk_operand(size, index, 0, REGISTER); - return DECODE_success; -} - -/* - * Decode reg,mem operands (as in <opcode> m32/16, r32/16) - */ -static int reg_mem(unsigned char size, unsigned char *opcode, - struct hvm_io_op *mmio_op, unsigned char rex) -{ - int index = get_index(opcode + 1, rex); - - mmio_op->operand[0] = mk_operand(size, index, 0, REGISTER); - mmio_op->operand[1] = mk_operand(size, 0, 0, MEMORY); - return DECODE_success; -} - -static int mmio_decode(int address_bytes, unsigned char *opcode, - struct hvm_io_op *mmio_op, - unsigned char *ad_size, unsigned char *op_size, - unsigned char *seg_sel) -{ - unsigned char size_reg = 0; - unsigned char rex = 0; - int index; - - *ad_size = 0; - *op_size = 0; - *seg_sel = 0; - init_instruction(mmio_op); - - opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex); - - switch ( address_bytes ) - { - case 2: - if ( *op_size == WORD ) - *op_size = LONG; - else if ( *op_size == LONG ) - *op_size = WORD; - else if ( *op_size == 0 ) - *op_size = WORD; - if ( *ad_size == WORD ) - *ad_size = LONG; - else if ( *ad_size == LONG ) - *ad_size = WORD; - else if ( *ad_size == 0 ) - *ad_size = WORD; - break; - case 4: - if ( *op_size == 0 ) - *op_size = LONG; - if ( *ad_size == 0 ) - *ad_size = LONG; - break; -#ifdef __x86_64__ - case 8: - if ( *op_size == 0 ) - *op_size = rex & 0x8 ? QUAD : LONG; - if ( *ad_size == WORD ) - *ad_size = LONG; - else if ( *ad_size == 0 ) - *ad_size = QUAD; - break; -#endif - } - - /* the operands order in comments conforms to AT&T convention */ - - switch ( *opcode ) { - - case 0x00: /* add r8, m8 */ - mmio_op->instr = INSTR_ADD; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x03: /* add m32/16, r32/16 */ - mmio_op->instr = INSTR_ADD; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return mem_reg(*op_size, opcode, mmio_op, rex); - - case 0x08: /* or r8, m8 */ - mmio_op->instr = INSTR_OR; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x09: /* or r32/16, m32/16 */ - mmio_op->instr = INSTR_OR; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return reg_mem(*op_size, opcode, mmio_op, rex); - - case 0x0A: /* or m8, r8 */ - mmio_op->instr = INSTR_OR; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, mmio_op, rex); - - case 0x0B: /* or m32/16, r32/16 */ - mmio_op->instr = INSTR_OR; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return mem_reg(*op_size, opcode, mmio_op, rex); - - case 0x20: /* and r8, m8 */ - mmio_op->instr = INSTR_AND; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x21: /* and r32/16, m32/16 */ - mmio_op->instr = INSTR_AND; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return reg_mem(*op_size, opcode, mmio_op, rex); - - case 0x22: /* and m8, r8 */ - mmio_op->instr = INSTR_AND; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, mmio_op, rex); - - case 0x23: /* and m32/16, r32/16 */ - mmio_op->instr = INSTR_AND; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return mem_reg(*op_size, opcode, mmio_op, rex); - - case 0x2B: /* sub m32/16, r32/16 */ - mmio_op->instr = INSTR_SUB; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return mem_reg(*op_size, opcode, mmio_op, rex); - - case 0x30: /* xor r8, m8 */ - mmio_op->instr = INSTR_XOR; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x31: /* xor r32/16, m32/16 */ - mmio_op->instr = INSTR_XOR; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return reg_mem(*op_size, opcode, mmio_op, rex); - - case 0x32: /* xor m8, r8 */ - mmio_op->instr = INSTR_XOR; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, mmio_op, rex); - - case 0x38: /* cmp r8, m8 */ - mmio_op->instr = INSTR_CMP; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x39: /* cmp r32/16, m32/16 */ - mmio_op->instr = INSTR_CMP; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return reg_mem(*op_size, opcode, mmio_op, rex); - - case 0x3A: /* cmp m8, r8 */ - mmio_op->instr = INSTR_CMP; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, mmio_op, rex); - - case 0x3B: /* cmp m32/16, r32/16 */ - mmio_op->instr = INSTR_CMP; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return mem_reg(*op_size, opcode, mmio_op, rex); - - case 0x80: - case 0x81: - case 0x83: - { - unsigned char ins_subtype = (opcode[1] >> 3) & 7; - - if ( opcode[0] == 0x80 ) { - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - } else { - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - size_reg = *op_size; - } - - /* opcode 0x83 always has a single byte operand */ - if ( opcode[0] == 0x83 ) - mmio_op->immediate = - get_immediate_sign_ext(*ad_size, opcode + 1, BYTE); - else - mmio_op->immediate = - get_immediate_sign_ext(*ad_size, opcode + 1, *op_size); - - mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); - mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); - - switch ( ins_subtype ) { - case 0: /* add $imm, m32/16 */ - mmio_op->instr = INSTR_ADD; - return DECODE_success; - - case 1: /* or $imm, m32/16 */ - mmio_op->instr = INSTR_OR; - return DECODE_success; - - case 4: /* and $imm, m32/16 */ - mmio_op->instr = INSTR_AND; - return DECODE_success; - - case 5: /* sub $imm, m32/16 */ - mmio_op->instr = INSTR_SUB; - return DECODE_success; - - case 6: /* xor $imm, m32/16 */ - mmio_op->instr = INSTR_XOR; - return DECODE_success; - - case 7: /* cmp $imm, m32/16 */ - mmio_op->instr = INSTR_CMP; - return DECODE_success; - - default: - printk("%x/%x, This opcode isn't handled yet!\n", - *opcode, ins_subtype); - return DECODE_failure; - } - } - - case 0x84: /* test r8, m8 */ - mmio_op->instr = INSTR_TEST; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x85: /* test r16/32, m16/32 */ - mmio_op->instr = INSTR_TEST; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return reg_mem(*op_size, opcode, mmio_op, rex); - - case 0x86: /* xchg m8, r8 */ - mmio_op->instr = INSTR_XCHG; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x87: /* xchg m16/32, r16/32 */ - mmio_op->instr = INSTR_XCHG; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return reg_mem(*op_size, opcode, mmio_op, rex); - - case 0x88: /* mov r8, m8 */ - mmio_op->instr = INSTR_MOV; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, mmio_op, rex); - - case 0x89: /* mov r32/16, m32/16 */ - mmio_op->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return reg_mem(*op_size, opcode, mmio_op, rex); - - case 0x8A: /* mov m8, r8 */ - mmio_op->instr = INSTR_MOV; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, mmio_op, rex); - - case 0x8B: /* mov m32/16, r32/16 */ - mmio_op->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return mem_reg(*op_size, opcode, mmio_op, rex); - - case 0xA0: /* mov <addr>, al */ - mmio_op->instr = INSTR_MOV; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_acc(size_reg, mmio_op); - - case 0xA1: /* mov <addr>, ax/eax */ - mmio_op->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return mem_acc(*op_size, mmio_op); - - case 0xA2: /* mov al, <addr> */ - mmio_op->instr = INSTR_MOV; - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return acc_mem(size_reg, mmio_op); - - case 0xA3: /* mov ax/eax, <addr> */ - mmio_op->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return acc_mem(*op_size, mmio_op); - - case 0xA4: /* movsb */ - mmio_op->instr = INSTR_MOVS; - *op_size = BYTE; - return DECODE_success; - - case 0xA5: /* movsw/movsl */ - mmio_op->instr = INSTR_MOVS; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return DECODE_success; - - case 0xAA: /* stosb */ - mmio_op->instr = INSTR_STOS; - *op_size = BYTE; - return DECODE_success; - - case 0xAB: /* stosw/stosl */ - mmio_op->instr = INSTR_STOS; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return DECODE_success; - - case 0xAC: /* lodsb */ - mmio_op->instr = INSTR_LODS; - *op_size = BYTE; - return DECODE_success; - - case 0xAD: /* lodsw/lodsl */ - mmio_op->instr = INSTR_LODS; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - return DECODE_success; - - case 0xC6: - if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm8, m8 */ - mmio_op->instr = INSTR_MOV; - *op_size = BYTE; - - mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE); - mmio_op->immediate = - get_immediate(*ad_size, opcode + 1, *op_size); - mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); - - return DECODE_success; - } else - return DECODE_failure; - - case 0xC7: - if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm16/32, m16/32 */ - mmio_op->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - - mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE); - mmio_op->immediate = - get_immediate_sign_ext(*ad_size, opcode + 1, *op_size); - mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); - - return DECODE_success; - } else - return DECODE_failure; - - case 0xF6: - case 0xF7: - if ( ((opcode[1] >> 3) & 7) == 0 ) { /* test $imm8/16/32, m8/16/32 */ - mmio_op->instr = INSTR_TEST; - - if ( opcode[0] == 0xF6 ) { - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - } else { - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - size_reg = *op_size; - } - - mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); - mmio_op->immediate = - get_immediate_sign_ext(*ad_size, opcode + 1, *op_size); - mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); - - return DECODE_success; - } else - return DECODE_failure; - - case 0xFE: - case 0xFF: - { - unsigned char ins_subtype = (opcode[1] >> 3) & 7; - - if ( opcode[0] == 0xFE ) { - *op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - } else { - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - size_reg = *op_size; - } - - mmio_op->immediate = 1; - mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); - mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); - - switch ( ins_subtype ) { - case 0: /* inc */ - mmio_op->instr = INSTR_ADD; - return DECODE_success; - - case 1: /* dec */ - mmio_op->instr = INSTR_SUB; - return DECODE_success; - - case 6: /* push */ - mmio_op->instr = INSTR_PUSH; - mmio_op->operand[0] = mmio_op->operand[1]; - return DECODE_success; - - default: - printk("%x/%x, This opcode isn't handled yet!\n", - *opcode, ins_subtype); - return DECODE_failure; - } - } - - case 0x0F: - break; - - default: - printk("%x, This opcode isn't handled yet!\n", *opcode); - return DECODE_failure; - } - - switch ( *++opcode ) { - case 0xB6: /* movzx m8, r16/r32/r64 */ - mmio_op->instr = INSTR_MOVZX; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - index = get_index(opcode + 1, rex); - mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); - mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); - return DECODE_success; - - case 0xB7: /* movzx m16, r32/r64 */ - mmio_op->instr = INSTR_MOVZX; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - index = get_index(opcode + 1, rex); - mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY); - mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); - return DECODE_success; - - case 0xBE: /* movsx m8, r16/r32/r64 */ - mmio_op->instr = INSTR_MOVSX; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - index = get_index(opcode + 1, rex); - mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); - mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); - return DECODE_success; - - case 0xBF: /* movsx m16, r32/r64 */ - mmio_op->instr = INSTR_MOVSX; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - index = get_index(opcode + 1, rex); - mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY); - mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); - return DECODE_success; - - case 0xA3: /* bt r32, m32 */ - mmio_op->instr = INSTR_BT; - index = get_index(opcode + 1, rex); - *op_size = LONG; - mmio_op->operand[0] = mk_operand(*op_size, index, 0, REGISTER); - mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); - return DECODE_success; - - case 0xBA: - if ( ((opcode[1] >> 3) & 7) == 4 ) /* BT $imm8, m16/32/64 */ - { - mmio_op->instr = INSTR_BT; - GET_OP_SIZE_FOR_NONEBYTE(*op_size); - mmio_op->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE); - mmio_op->immediate = - (signed char)get_immediate(*ad_size, opcode + 1, BYTE); - mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); - return DECODE_success; - } - else - { - printk("0f %x, This opcode subtype isn't handled yet\n", *opcode); - return DECODE_failure; - } - - default: - printk("0f %x, This opcode isn't handled yet\n", *opcode); - return DECODE_failure; - } -} +#include <asm/hvm/emulate.h> int inst_copy_from_guest( unsigned char *buf, unsigned long guest_eip, int inst_len) @@ -984,323 +192,41 @@ void send_invalidate_req(void) hvm_send_assist_req(v); } -static void mmio_operands(int type, unsigned long gpa, - struct hvm_io_op *mmio_op, - unsigned char op_size) -{ - unsigned long value = 0; - int df, index, size_reg; - struct cpu_user_regs *regs = &mmio_op->io_context; - - df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; - - size_reg = operand_size(mmio_op->operand[0]); - - if ( mmio_op->operand[0] & REGISTER ) { /* dest is memory */ - index = operand_index(mmio_op->operand[0]); - value = get_reg_value(size_reg, index, 0, regs); - send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0); - } else if ( mmio_op->operand[0] & IMMEDIATE ) { /* dest is memory */ - value = mmio_op->immediate; - send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0); - } else if ( mmio_op->operand[0] & MEMORY ) { /* dest is register */ - /* send the request and wait for the value */ - if ( (mmio_op->instr == INSTR_MOVZX) || - (mmio_op->instr == INSTR_MOVSX) ) - send_mmio_req(type, gpa, 1, size_reg, 0, IOREQ_READ, df, 0); - else - send_mmio_req(type, gpa, 1, op_size, 0, IOREQ_READ, df, 0); - } else { - printk("%s: invalid dest mode.\n", __func__); - domain_crash_synchronous(); - } -} - -#define GET_REPEAT_COUNT() \ - (mmio_op->flags & REPZ ? (ad_size == WORD ? regs->ecx & 0xFFFF : regs->ecx) : 1) - - -void handle_mmio(paddr_t gpa) -{ - unsigned long inst_addr; - struct hvm_io_op *mmio_op; - struct cpu_user_regs *regs; - unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel; - int i, address_bytes, df, inst_len; - struct vcpu *v = current; - - mmio_op = &v->arch.hvm_vcpu.io_op; - regs = &mmio_op->io_context; - - /* Copy current guest state into io instruction state structure. */ - memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); - - df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; - - address_bytes = hvm_guest_x86_mode(v); - if (address_bytes < 2) - /* real or vm86 modes */ - address_bytes = 2; - inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip; - memset(inst, 0, MAX_INST_LEN); - inst_len = hvm_instruction_fetch(inst_addr, address_bytes, inst); - if ( inst_len <= 0 ) - { - gdprintk(XENLOG_DEBUG, "handle_mmio: failed to get instruction\n"); - /* hvm_instruction_fetch() will have injected a #PF; get out now */ - return; - } - - if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size, - &op_size, &seg_sel) == DECODE_failure ) - { +int handle_mmio(void) +{ + struct hvm_emulate_ctxt ctxt; + struct vcpu *curr = current; + int rc; + + hvm_emulate_prepare(&ctxt, guest_cpu_user_regs()); + + rc = hvm_emulate_one(&ctxt); + + switch ( rc ) + { + case X86EMUL_UNHANDLEABLE: gdprintk(XENLOG_WARNING, - "handle_mmio: failed to decode instruction\n"); - gdprintk(XENLOG_WARNING, - "mmio opcode: gpa 0x%"PRIpaddr", len %d:", gpa, inst_len); - for ( i = 0; i < inst_len; i++ ) - printk(" %02x", inst[i] & 0xFF); - printk("\n"); - - hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0); - return; - } - - regs->eip += inst_len; /* advance %eip */ - - switch ( mmio_op->instr ) { - case INSTR_MOV: - mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size); + "MMIO emulation failed @ %04x:%lx: " + "%02x %02x %02x %02x %02x %02x\n", + hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel, + ctxt.insn_buf_eip, + ctxt.insn_buf[0], ctxt.insn_buf[1], + ctxt.insn_buf[2], ctxt.insn_buf[3], + ctxt.insn_buf[4], ctxt.insn_buf[5]); + return 0; + case X86EMUL_EXCEPTION: + if ( ctxt.flags.exn_pending ) + hvm_inject_exception(ctxt.exn_vector, 0, 0); break; - - case INSTR_MOVS: - { - struct segment_register sreg; - unsigned long count = GET_REPEAT_COUNT(); - int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; - unsigned long addr, gfn; - paddr_t paddr; - int dir, size = op_size; - uint32_t pfec; - - ASSERT(count); - - /* determine non-MMIO address */ - addr = regs->edi; - if ( ad_size == WORD ) - addr &= 0xFFFF; - addr += hvm_get_segment_base(v, x86_seg_es); - pfec = PFEC_page_present | PFEC_write_access; - hvm_get_segment_register(v, x86_seg_ss, &sreg); - if ( sreg.attr.fields.dpl == 3 ) - pfec |= PFEC_user_mode; - gfn = paging_gva_to_gfn(v, addr, &pfec); - paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); - if ( paddr == gpa ) - { - enum x86_segment seg; - - dir = IOREQ_WRITE; - addr = regs->esi; - if ( ad_size == WORD ) - addr &= 0xFFFF; - switch ( seg_sel ) - { - case 0x26: seg = x86_seg_es; break; - case 0x2e: seg = x86_seg_cs; break; - case 0x36: seg = x86_seg_ss; break; - case 0: - case 0x3e: seg = x86_seg_ds; break; - case 0x64: seg = x86_seg_fs; break; - case 0x65: seg = x86_seg_gs; break; - default: domain_crash_synchronous(); - } - addr += hvm_get_segment_base(v, seg); - pfec &= ~PFEC_write_access; - gfn = paging_gva_to_gfn(v, addr, &pfec); - paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); - } - else - dir = IOREQ_READ; - - if ( gfn == INVALID_GFN ) - { - /* The guest does not have the non-mmio address mapped. - * Need to send in a page fault */ - regs->eip -= inst_len; /* do not advance %eip */ - hvm_inject_exception(TRAP_page_fault, pfec, addr); - return; - } - - /* - * In case of a movs spanning multiple pages, we break the accesses - * up into multiple pages (the device model works with non-continguous - * physical guest pages). To copy just one page, we adjust %ecx and - * do not advance %eip so that the next rep;movs copies the next page. - * Unaligned accesses, for example movsl starting at PGSZ-2, are - * turned into a single copy where we handle the overlapping memory - * copy ourself. After this copy succeeds, "rep movs" is executed - * again. - */ - if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) { - unsigned long value = 0; - - mmio_op->flags |= OVERLAP; - - if ( dir == IOREQ_WRITE ) { - if ( hvm_paging_enabled(v) ) - { - int rv = hvm_copy_from_guest_virt(&value, addr, size); - if ( rv == HVMCOPY_bad_gva_to_gfn ) - return; /* exception already injected */ - } - else - (void)hvm_copy_from_guest_phys(&value, addr, size); - } else /* dir != IOREQ_WRITE */ - /* Remember where to write the result, as a *VA*. - * Must be a VA so we can handle the page overlap - * correctly in hvm_mmio_assist() */ - mmio_op->addr = addr; - - if ( count != 1 ) - regs->eip -= inst_len; /* do not advance %eip */ - - send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, df, 0); - } else { - unsigned long last_addr = sign > 0 ? addr + count * size - 1 - : addr - (count - 1) * size; - - if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) ) - { - regs->eip -= inst_len; /* do not advance %eip */ - - if ( sign > 0 ) - count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; - else - count = (addr & ~PAGE_MASK) / size + 1; - } - - ASSERT(count); - - send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, - paddr, dir, df, 1); - } + default: break; } - case INSTR_MOVZX: - case INSTR_MOVSX: - mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size); - break; - - case INSTR_STOS: - /* - * Since the destination is always in (contiguous) mmio space we don't - * need to break it up into pages. - */ - send_mmio_req(IOREQ_TYPE_COPY, gpa, - GET_REPEAT_COUNT(), op_size, regs->eax, IOREQ_WRITE, df, 0); - break; - - case INSTR_LODS: - /* - * Since the source is always in (contiguous) mmio space we don't - * need to break it up into pages. - */ - mmio_op->operand[0] = mk_operand(op_size, 0, 0, REGISTER); - send_mmio_req(IOREQ_TYPE_COPY, gpa, - GET_REPEAT_COUNT(), op_size, 0, IOREQ_READ, df, 0); - break; - - case INSTR_OR: - mmio_operands(IOREQ_TYPE_OR, gpa, mmio_op, op_size); - break; - - case INSTR_AND: - mmio_operands(IOREQ_TYPE_AND, gpa, mmio_op, op_size); - break; - - case INSTR_ADD: - mmio_operands(IOREQ_TYPE_ADD, gpa, mmio_op, op_size); - break; - - case INSTR_SUB: - mmio_operands(IOREQ_TYPE_SUB, gpa, mmio_op, op_size); - break; - - case INSTR_XOR: - mmio_operands(IOREQ_TYPE_XOR, gpa, mmio_op, op_size); - break; - - case INSTR_PUSH: - if ( ad_size == WORD ) - { - mmio_op->addr = (uint16_t)(regs->esp - op_size); - regs->esp = mmio_op->addr | (regs->esp & ~0xffff); - } - else - { - regs->esp -= op_size; - mmio_op->addr = regs->esp; - } - /* send the request and wait for the value */ - send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, op_size, 0, IOREQ_READ, df, 0); - break; - - case INSTR_CMP: /* Pass through */ - case INSTR_TEST: - /* send the request and wait for the value */ - send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, op_size, 0, IOREQ_READ, df, 0); - break; - - case INSTR_BT: - { - unsigned long value = 0; - int index, size; - - if ( mmio_op->operand[0] & REGISTER ) - { - index = operand_index(mmio_op->operand[0]); - size = operand_size(mmio_op->operand[0]); - value = get_reg_value(size, index, 0, regs); - } - else if ( mmio_op->operand[0] & IMMEDIATE ) - { - mmio_op->immediate = mmio_op->immediate; - value = mmio_op->immediate; - } - send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1, - op_size, 0, IOREQ_READ, df, 0); - break; - } - - case INSTR_XCHG: - if ( mmio_op->operand[0] & REGISTER ) { - long value; - unsigned long operand = mmio_op->operand[0]; - value = get_reg_value(operand_size(operand), - operand_index(operand), 0, - regs); - /* send the request and wait for the value */ - send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1, - op_size, value, IOREQ_WRITE, df, 0); - } else { - /* the destination is a register */ - long value; - unsigned long operand = mmio_op->operand[1]; - value = get_reg_value(operand_size(operand), - operand_index(operand), 0, - regs); - /* send the request and wait for the value */ - send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1, - op_size, value, IOREQ_WRITE, df, 0); - } - break; - - default: - printk("Unhandled MMIO instruction\n"); - domain_crash_synchronous(); - } + hvm_emulate_writeback(&ctxt); + + curr->arch.hvm_vcpu.mmio_in_progress = curr->arch.hvm_vcpu.io_in_progress; + + return 1; } DEFINE_PER_CPU(int, guest_handles_in_xen_space); diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/stdvga.c --- a/xen/arch/x86/hvm/stdvga.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/stdvga.c Wed Feb 20 14:36:45 2008 +0000 @@ -458,33 +458,6 @@ static int mmio_move(struct hvm_hw_stdvg return 1; } -static uint32_t op_and(uint32_t a, uint32_t b) { return a & b; } -static uint32_t op_or (uint32_t a, uint32_t b) { return a | b; } -static uint32_t op_xor(uint32_t a, uint32_t b) { return a ^ b; } -static uint32_t op_add(uint32_t a, uint32_t b) { return a + b; } -static uint32_t op_sub(uint32_t a, uint32_t b) { return a - b; } -static uint32_t (*op_array[])(uint32_t, uint32_t) = { - [IOREQ_TYPE_AND] = op_and, - [IOREQ_TYPE_OR ] = op_or, - [IOREQ_TYPE_XOR] = op_xor, - [IOREQ_TYPE_ADD] = op_add, - [IOREQ_TYPE_SUB] = op_sub -}; - -static int mmio_op(struct hvm_hw_stdvga *s, ioreq_t *p) -{ - uint32_t orig, mod = 0; - orig = stdvga_mem_read(p->addr, p->size); - - if ( p->dir == IOREQ_WRITE ) - { - mod = (op_array[p->type])(orig, p->data); - stdvga_mem_write(p->addr, mod, p->size); - } - - return 0; /* Don't try to buffer these operations */ -} - int stdvga_intercept_mmio(ioreq_t *p) { struct domain *d = current->domain; @@ -505,13 +478,6 @@ int stdvga_intercept_mmio(ioreq_t *p) { case IOREQ_TYPE_COPY: buf = mmio_move(s, p); - break; - case IOREQ_TYPE_AND: - case IOREQ_TYPE_OR: - case IOREQ_TYPE_XOR: - case IOREQ_TYPE_ADD: - case IOREQ_TYPE_SUB: - buf = mmio_op(s, p); break; default: gdprintk(XENLOG_WARNING, "unsupported mmio request type:%d " diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Feb 20 14:36:45 2008 +0000 @@ -66,6 +66,13 @@ static void svm_update_guest_efer(struct static void svm_update_guest_efer(struct vcpu *v); static void svm_inject_exception( unsigned int trapnr, int errcode, unsigned long cr2); +static void svm_cpuid_intercept( + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); +static void svm_wbinvd_intercept(void); +static void svm_fpu_dirty_intercept(void); +static int svm_msr_read_intercept(struct cpu_user_regs *regs); +static int svm_msr_write_intercept(struct cpu_user_regs *regs); /* va of hardware host save area */ static void *hsa[NR_CPUS] __read_mostly; @@ -112,7 +119,7 @@ static enum handler_return long_mode_do_ switch ( ecx ) { case MSR_EFER: - if ( !hvm_set_efer(msr_content) ) + if ( hvm_set_efer(msr_content) ) return HNDL_exception_raised; break; @@ -808,7 +815,12 @@ static struct hvm_function_table svm_fun .inject_exception = svm_inject_exception, .init_hypercall_page = svm_init_hypercall_page, .event_pending = svm_event_pending, - .do_pmu_interrupt = svm_do_pmu_interrupt + .do_pmu_interrupt = svm_do_pmu_interrupt, + .cpuid_intercept = svm_cpuid_intercept, + .wbinvd_intercept = svm_wbinvd_intercept, + .fpu_dirty_intercept = svm_fpu_dirty_intercept, + .msr_read_intercept = svm_msr_read_intercept, + .msr_write_intercept = svm_msr_write_intercept }; int start_svm(struct cpuinfo_x86 *c) @@ -873,7 +885,8 @@ static void svm_do_nested_pgfault(paddr_ mfn = gfn_to_mfn_current(gfn, &p2mt); if ( p2mt == p2m_mmio_dm ) { - handle_mmio(gpa); + if ( !handle_mmio() ) + hvm_inject_exception(TRAP_gp_fault, 0, 0); return; } @@ -882,9 +895,10 @@ static void svm_do_nested_pgfault(paddr_ p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); } -static void svm_do_no_device_fault(struct vmcb_struct *vmcb) +static void svm_fpu_dirty_intercept(void) { struct vcpu *curr = current; + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; svm_fpu_enter(curr); @@ -893,72 +907,83 @@ static void svm_do_no_device_fault(struc } #define bitmaskof(idx) (1U << ((idx) & 31)) -static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, - struct cpu_user_regs *regs) -{ - unsigned long input = regs->eax; - unsigned int eax, ebx, ecx, edx; +static void svm_cpuid_intercept( + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + unsigned int input = *eax; struct vcpu *v = current; - int inst_len; - - hvm_cpuid(input, &eax, &ebx, &ecx, &edx); + + hvm_cpuid(input, eax, ebx, ecx, edx); switch ( input ) { case 0x00000001: /* Mask Intel-only features. */ - ecx &= ~(bitmaskof(X86_FEATURE_SSSE3) | - bitmaskof(X86_FEATURE_SSE4_1) | - bitmaskof(X86_FEATURE_SSE4_2)); + *ecx &= ~(bitmaskof(X86_FEATURE_SSSE3) | + bitmaskof(X86_FEATURE_SSE4_1) | + bitmaskof(X86_FEATURE_SSE4_2)); break; case 0x80000001: /* Filter features which are shared with 0x00000001:EDX. */ if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) - __clear_bit(X86_FEATURE_APIC & 31, &edx); + __clear_bit(X86_FEATURE_APIC & 31, edx); #if CONFIG_PAGING_LEVELS >= 3 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) #endif - __clear_bit(X86_FEATURE_PAE & 31, &edx); - __clear_bit(X86_FEATURE_PSE36 & 31, &edx); + __clear_bit(X86_FEATURE_PAE & 31, edx); + __clear_bit(X86_FEATURE_PSE36 & 31, edx); /* Filter all other features according to a whitelist. */ - ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) | - bitmaskof(X86_FEATURE_ALTMOVCR) | - bitmaskof(X86_FEATURE_ABM) | - bitmaskof(X86_FEATURE_SSE4A) | - bitmaskof(X86_FEATURE_MISALIGNSSE) | - bitmaskof(X86_FEATURE_3DNOWPF)); - edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */ - bitmaskof(X86_FEATURE_NX) | - bitmaskof(X86_FEATURE_LM) | - bitmaskof(X86_FEATURE_SYSCALL) | - bitmaskof(X86_FEATURE_MP) | - bitmaskof(X86_FEATURE_MMXEXT) | - bitmaskof(X86_FEATURE_FFXSR)); + *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) | + bitmaskof(X86_FEATURE_ALTMOVCR) | + bitmaskof(X86_FEATURE_ABM) | + bitmaskof(X86_FEATURE_SSE4A) | + bitmaskof(X86_FEATURE_MISALIGNSSE) | + bitmaskof(X86_FEATURE_3DNOWPF)); + *edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */ + bitmaskof(X86_FEATURE_NX) | + bitmaskof(X86_FEATURE_LM) | + bitmaskof(X86_FEATURE_SYSCALL) | + bitmaskof(X86_FEATURE_MP) | + bitmaskof(X86_FEATURE_MMXEXT) | + bitmaskof(X86_FEATURE_FFXSR)); break; case 0x80000007: case 0x8000000A: /* Mask out features of power management and SVM extension. */ - eax = ebx = ecx = edx = 0; + *eax = *ebx = *ecx = *edx = 0; break; case 0x80000008: /* Make sure Number of CPU core is 1 when HTT=0 */ - ecx &= 0xFFFFFF00; - break; - } + *ecx &= 0xFFFFFF00; + break; + } + + HVMTRACE_3D(CPUID, v, input, + ((uint64_t)*eax << 32) | *ebx, ((uint64_t)*ecx << 32) | *edx); +} + +static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs) +{ + unsigned int eax, ebx, ecx, edx, inst_len; + + eax = regs->eax; + ebx = regs->ebx; + ecx = regs->ecx; + edx = regs->edx; + + svm_cpuid_intercept(&eax, &ebx, &ecx, &edx); regs->eax = eax; regs->ebx = ebx; regs->ecx = ecx; regs->edx = edx; - HVMTRACE_3D(CPUID, v, input, - ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx); - - inst_len = __get_instruction_length(v, INSTR_CPUID, NULL); + inst_len = __get_instruction_length(current, INSTR_CPUID, NULL); __update_guest_eip(regs, inst_len); } @@ -1484,11 +1509,11 @@ static int mov_to_cr(int gpreg, int cr, switch ( cr ) { case 0: - return hvm_set_cr0(value); + return !hvm_set_cr0(value); case 3: - return hvm_set_cr3(value); + return !hvm_set_cr3(value); case 4: - return hvm_set_cr4(value); + return !hvm_set_cr4(value); default: gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); domain_crash(v->domain); @@ -1564,7 +1589,7 @@ static void svm_cr_access( gpreg = decode_src_reg(prefix, buffer[index+2]); value = get_reg(gpreg, regs, vmcb) & 0xF; value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value; - result = hvm_set_cr0(value); + result = !hvm_set_cr0(value); HVMTRACE_1D(LMSW, current, value); break; @@ -1635,176 +1660,197 @@ static void svm_cr_access( __update_guest_eip(regs, inst_len); } -static void svm_do_msr_access( - struct vcpu *v, struct cpu_user_regs *regs) -{ - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - int inst_len; - u64 msr_content=0; +static int svm_msr_read_intercept(struct cpu_user_regs *regs) +{ + u64 msr_content = 0; u32 ecx = regs->ecx, eax, edx; - - HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx", - ecx, (u32)regs->eax, (u32)regs->edx, - (unsigned long)vmcb->exitinfo1); - - /* is it a read? */ - if (vmcb->exitinfo1 == 0) - { - switch (ecx) { - case MSR_IA32_TSC: - msr_content = hvm_get_guest_time(v); + struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + switch ( ecx ) + { + case MSR_IA32_TSC: + msr_content = hvm_get_guest_time(v); + break; + + case MSR_IA32_APICBASE: + msr_content = vcpu_vlapic(v)->hw.apic_base_msr; + break; + + case MSR_EFER: + msr_content = v->arch.hvm_vcpu.guest_efer; + break; + + case MSR_IA32_MC4_MISC: /* Threshold register */ + case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: + /* + * MCA/MCE: We report that the threshold register is unavailable + * for OS use (locked by the BIOS). + */ + msr_content = 1ULL << 61; /* MC4_MISC.Locked */ + break; + + case MSR_IA32_EBC_FREQUENCY_ID: + /* + * This Intel-only register may be accessed if this HVM guest + * has been migrated from an Intel host. The value zero is not + * particularly meaningful, but at least avoids the guest crashing! + */ + msr_content = 0; + break; + + case MSR_K8_VM_HSAVE_PA: + goto gpf; + + case MSR_IA32_MCG_CAP: + case MSR_IA32_MCG_STATUS: + case MSR_IA32_MC0_STATUS: + case MSR_IA32_MC1_STATUS: + case MSR_IA32_MC2_STATUS: + case MSR_IA32_MC3_STATUS: + case MSR_IA32_MC4_STATUS: + case MSR_IA32_MC5_STATUS: + /* No point in letting the guest see real MCEs */ + msr_content = 0; + break; + + case MSR_IA32_DEBUGCTLMSR: + msr_content = vmcb->debugctlmsr; + break; + + case MSR_IA32_LASTBRANCHFROMIP: + msr_content = vmcb->lastbranchfromip; + break; + + case MSR_IA32_LASTBRANCHTOIP: + msr_content = vmcb->lastbranchtoip; + break; + + case MSR_IA32_LASTINTFROMIP: + msr_content = vmcb->lastintfromip; + break; + + case MSR_IA32_LASTINTTOIP: + msr_content = vmcb->lastinttoip; + break; + + default: + if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) || + rdmsr_safe(ecx, eax, edx) == 0 ) + { + regs->eax = eax; + regs->edx = edx; + goto done; + } + goto gpf; + } + regs->eax = msr_content & 0xFFFFFFFF; + regs->edx = msr_content >> 32; + + done: + hvmtrace_msr_read(v, ecx, msr_content); + HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx", + ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); + return X86EMUL_OKAY; + + gpf: + svm_inject_exception(TRAP_gp_fault, 0, 0); + return X86EMUL_EXCEPTION; +} + +static int svm_msr_write_intercept(struct cpu_user_regs *regs) +{ + u64 msr_content = 0; + u32 ecx = regs->ecx; + struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + msr_content = (u32)regs->eax | ((u64)regs->edx << 32); + + hvmtrace_msr_write(v, ecx, msr_content); + + switch ( ecx ) + { + case MSR_IA32_TSC: + hvm_set_guest_time(v, msr_content); + pt_reset(v); + break; + + case MSR_IA32_APICBASE: + vlapic_msr_set(vcpu_vlapic(v), msr_content); + break; + + case MSR_K8_VM_HSAVE_PA: + goto gpf; + + case MSR_IA32_DEBUGCTLMSR: + vmcb->debugctlmsr = msr_content; + if ( !msr_content || !cpu_has_svm_lbrv ) break; - - case MSR_IA32_APICBASE: - msr_content = vcpu_vlapic(v)->hw.apic_base_msr; + vmcb->lbr_control.fields.enable = 1; + svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR); + svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP); + svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP); + svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP); + svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP); + break; + + case MSR_IA32_LASTBRANCHFROMIP: + vmcb->lastbranchfromip = msr_content; + break; + + case MSR_IA32_LASTBRANCHTOIP: + vmcb->lastbranchtoip = msr_content; + break; + + case MSR_IA32_LASTINTFROMIP: + vmcb->lastintfromip = msr_content; + break; + + case MSR_IA32_LASTINTTOIP: + vmcb->lastinttoip = msr_content; + break; + + default: + switch ( long_mode_do_msr_write(regs) ) + { + case HNDL_unhandled: + wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); break; - - case MSR_EFER: - msr_content = v->arch.hvm_vcpu.guest_efer; + case HNDL_exception_raised: + return X86EMUL_EXCEPTION; + case HNDL_done: break; - - case MSR_IA32_MC4_MISC: /* Threshold register */ - case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: - /* - * MCA/MCE: We report that the threshold register is unavailable - * for OS use (locked by the BIOS). - */ - msr_content = 1ULL << 61; /* MC4_MISC.Locked */ - break; - - case MSR_IA32_EBC_FREQUENCY_ID: - /* - * This Intel-only register may be accessed if this HVM guest - * has been migrated from an Intel host. The value zero is not - * particularly meaningful, but at least avoids the guest crashing! - */ - msr_content = 0; - break; - - case MSR_K8_VM_HSAVE_PA: - svm_inject_exception(TRAP_gp_fault, 0, 0); - break; - - case MSR_IA32_MCG_CAP: - case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_STATUS: - case MSR_IA32_MC1_STATUS: - case MSR_IA32_MC2_STATUS: - case MSR_IA32_MC3_STATUS: - case MSR_IA32_MC4_STATUS: - case MSR_IA32_MC5_STATUS: - /* No point in letting the guest see real MCEs */ - msr_content = 0; - break; - - case MSR_IA32_DEBUGCTLMSR: - msr_content = vmcb->debugctlmsr; - break; - - case MSR_IA32_LASTBRANCHFROMIP: - msr_content = vmcb->lastbranchfromip; - break; - - case MSR_IA32_LASTBRANCHTOIP: - msr_content = vmcb->lastbranchtoip; - break; - - case MSR_IA32_LASTINTFROMIP: - msr_content = vmcb->lastintfromip; - break; - - case MSR_IA32_LASTINTTOIP: - msr_content = vmcb->lastinttoip; - break; - - default: - if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) || - rdmsr_safe(ecx, eax, edx) == 0 ) - { - regs->eax = eax; - regs->edx = edx; - goto done; - } - svm_inject_exception(TRAP_gp_fault, 0, 0); - return; - } - regs->eax = msr_content & 0xFFFFFFFF; - regs->edx = msr_content >> 32; - - done: - hvmtrace_msr_read(v, ecx, msr_content); - HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx", - ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); - + } + break; + } + + return X86EMUL_OKAY; + + gpf: + svm_inject_exception(TRAP_gp_fault, 0, 0); + return X86EMUL_EXCEPTION; +} + +static void svm_do_msr_access(struct cpu_user_regs *regs) +{ + int rc, inst_len; + struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + if ( vmcb->exitinfo1 == 0 ) + { + rc = svm_msr_read_intercept(regs); inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL); } else { - msr_content = (u32)regs->eax | ((u64)regs->edx << 32); - - hvmtrace_msr_write(v, ecx, msr_content); - - switch (ecx) - { - case MSR_IA32_TSC: - hvm_set_guest_time(v, msr_content); - pt_reset(v); - break; - - case MSR_IA32_APICBASE: - vlapic_msr_set(vcpu_vlapic(v), msr_content); - break; - - case MSR_K8_VM_HSAVE_PA: - svm_inject_exception(TRAP_gp_fault, 0, 0); - break; - - case MSR_IA32_DEBUGCTLMSR: - vmcb->debugctlmsr = msr_content; - if ( !msr_content || !cpu_has_svm_lbrv ) - break; - vmcb->lbr_control.fields.enable = 1; - svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR); - svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP); - svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP); - svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP); - svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP); - break; - - case MSR_IA32_LASTBRANCHFROMIP: - vmcb->lastbranchfromip = msr_content; - break; - - case MSR_IA32_LASTBRANCHTOIP: - vmcb->lastbranchtoip = msr_content; - break; - - case MSR_IA32_LASTINTFROMIP: - vmcb->lastintfromip = msr_content; - break; - - case MSR_IA32_LASTINTTOIP: - vmcb->lastinttoip = msr_content; - break; - - default: - switch ( long_mode_do_msr_write(regs) ) - { - case HNDL_unhandled: - wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); - break; - case HNDL_exception_raised: - return; - case HNDL_done: - break; - } - break; - } - + rc = svm_msr_write_intercept(regs); inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL); } - __update_guest_eip(regs, inst_len); + if ( rc == X86EMUL_OKAY ) + __update_guest_eip(regs, inst_len); } static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb, @@ -1830,21 +1876,26 @@ static void svm_vmexit_do_hlt(struct vmc hvm_hlt(regs->eflags); } +static void wbinvd_ipi(void *info) +{ + wbinvd(); +} + +static void svm_wbinvd_intercept(void) +{ + if ( !list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) ) + on_each_cpu(wbinvd_ipi, NULL, 1, 1); +} + static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs) { enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD }; - struct vcpu *curr = current; - struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; int inst_len; - if ( !list_empty(&(domain_hvm_iommu(curr->domain)->pdev_list)) ) - { - vmcb->general2_intercepts &= ~GENERAL2_INTERCEPT_WBINVD; - wbinvd(); - } + svm_wbinvd_intercept(); inst_len = __get_instruction_length_from_list( - curr, list, ARRAY_SIZE(list), NULL, NULL); + current, list, ARRAY_SIZE(list), NULL, NULL); __update_guest_eip(regs, inst_len); } @@ -1982,7 +2033,7 @@ asmlinkage void svm_vmexit_handler(struc break; case VMEXIT_EXCEPTION_NM: - svm_do_no_device_fault(vmcb); + svm_fpu_dirty_intercept(); break; case VMEXIT_EXCEPTION_PF: { @@ -2036,7 +2087,7 @@ asmlinkage void svm_vmexit_handler(struc } case VMEXIT_CPUID: - svm_vmexit_do_cpuid(vmcb, regs); + svm_vmexit_do_cpuid(regs); break; case VMEXIT_HLT: @@ -2083,7 +2134,7 @@ asmlinkage void svm_vmexit_handler(struc break; case VMEXIT_MSR: - svm_do_msr_access(v, regs); + svm_do_msr_access(regs); break; case VMEXIT_SHUTDOWN: diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/vmx/realmode.c --- a/xen/arch/x86/hvm/vmx/realmode.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/vmx/realmode.c Wed Feb 20 14:36:45 2008 +0000 @@ -3,7 +3,7 @@ * * Real-mode emulation for VMX. * - * Copyright (c) 2007 Citrix Systems, Inc. + * Copyright (c) 2007-2008 Citrix Systems, Inc. * * Authors: * Keir Fraser <keir.fraser@xxxxxxxxxx> @@ -15,33 +15,14 @@ #include <xen/sched.h> #include <xen/paging.h> #include <asm/event.h> +#include <asm/hvm/emulate.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/vmx/vmx.h> #include <asm/hvm/vmx/vmcs.h> -#include <asm/x86_emulate.h> struct realmode_emulate_ctxt { - struct x86_emulate_ctxt ctxt; - - /* Cache of 16 bytes of instruction. */ - uint8_t insn_buf[16]; - unsigned long insn_buf_eip; - - struct segment_register seg_reg[10]; - - union { - struct { - unsigned int hlt:1; - unsigned int mov_ss:1; - unsigned int sti:1; - } flags; - unsigned int flag_word; - }; - - uint8_t exn_vector; - uint8_t exn_insn_len; - + struct hvm_emulate_ctxt hvm; uint32_t intr_shadow; }; @@ -50,12 +31,15 @@ static void realmode_deliver_exception( unsigned int insn_len, struct realmode_emulate_ctxt *rm_ctxt) { - struct segment_register *idtr = &rm_ctxt->seg_reg[x86_seg_idtr]; - struct segment_register *csr = &rm_ctxt->seg_reg[x86_seg_cs]; - struct cpu_user_regs *regs = rm_ctxt->ctxt.regs; + struct segment_register *idtr, *csr; + struct cpu_user_regs *regs = rm_ctxt->hvm.ctxt.regs; uint32_t cs_eip, pstk; uint16_t frame[3]; unsigned int last_byte; + + idtr = hvmemul_get_seg_reg(x86_seg_idtr, &rm_ctxt->hvm); + csr = hvmemul_get_seg_reg(x86_seg_cs, &rm_ctxt->hvm); + __set_bit(x86_seg_cs, &rm_ctxt->hvm.seg_reg_dirty); again: last_byte = (vector * 4) + 3; @@ -90,7 +74,7 @@ static void realmode_deliver_exception( frame[1] = csr->sel; frame[2] = regs->eflags & ~X86_EFLAGS_RF; - if ( rm_ctxt->ctxt.addr_size == 32 ) + if ( rm_ctxt->hvm.ctxt.addr_size == 32 ) { regs->esp -= 6; pstk = regs->esp; @@ -102,7 +86,7 @@ static void realmode_deliver_exception( regs->esp |= pstk; } - pstk += rm_ctxt->seg_reg[x86_seg_ss].base; + pstk += hvmemul_get_seg_reg(x86_seg_ss, &rm_ctxt->hvm)->base; (void)hvm_copy_to_guest_phys(pstk, frame, sizeof(frame)); csr->sel = cs_eip >> 16; @@ -118,597 +102,34 @@ static void realmode_deliver_exception( } } -static uint32_t virtual_to_linear( - enum x86_segment seg, - uint32_t offset, - struct realmode_emulate_ctxt *rm_ctxt) -{ - uint32_t addr = offset; - if ( seg == x86_seg_none ) - return addr; - ASSERT(is_x86_user_segment(seg)); - return addr + rm_ctxt->seg_reg[seg].base; -} - -static int -realmode_read( - enum x86_segment seg, - unsigned long offset, - unsigned long *val, - unsigned int bytes, - enum hvm_access_type access_type, - struct realmode_emulate_ctxt *rm_ctxt) -{ - uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt); - - *val = 0; - - if ( hvm_copy_from_guest_virt_nofault(val, addr, bytes) ) - { - struct vcpu *curr = current; - - if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE ) - return X86EMUL_UNHANDLEABLE; - - if ( curr->arch.hvm_vmx.real_mode_io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - { - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, addr, 1, bytes, - 0, IOREQ_READ, 0, 0); - } - - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - return X86EMUL_RETRY; - - *val = curr->arch.hvm_vmx.real_mode_io_data; - curr->arch.hvm_vmx.real_mode_io_completed = 0; - } - - return X86EMUL_OKAY; -} - -static int -realmode_emulate_read( - enum x86_segment seg, - unsigned long offset, - unsigned long *val, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) -{ - return realmode_read( - seg, offset, val, bytes, hvm_access_read, - container_of(ctxt, struct realmode_emulate_ctxt, ctxt)); -} - -static int -realmode_emulate_insn_fetch( - enum x86_segment seg, - unsigned long offset, - unsigned long *val, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - unsigned int insn_off = offset - rm_ctxt->insn_buf_eip; - - /* Fall back if requested bytes are not in the prefetch cache. */ - if ( unlikely((insn_off + bytes) > sizeof(rm_ctxt->insn_buf)) ) - return realmode_read( - seg, offset, val, bytes, - hvm_access_insn_fetch, rm_ctxt); - - /* Hit the cache. Simple memcpy. */ - *val = 0; - memcpy(val, &rm_ctxt->insn_buf[insn_off], bytes); - return X86EMUL_OKAY; -} - -static int -realmode_emulate_write( - enum x86_segment seg, - unsigned long offset, - unsigned long val, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt); - - if ( hvm_copy_to_guest_virt_nofault(addr, &val, bytes) ) - { - struct vcpu *curr = current; - - if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE ) - return X86EMUL_UNHANDLEABLE; - - if ( curr->arch.hvm_vmx.real_mode_io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, addr, 1, bytes, - val, IOREQ_WRITE, 0, 0); - } - - return X86EMUL_OKAY; -} - -static int -realmode_emulate_cmpxchg( - enum x86_segment seg, - unsigned long offset, - unsigned long old, - unsigned long new, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) -{ - /* Fix this in case the guest is really relying on r-m-w atomicity. */ - return realmode_emulate_write(seg, offset, new, bytes, ctxt); -} - -static int -realmode_rep_ins( - uint16_t src_port, - enum x86_segment dst_seg, - unsigned long dst_offset, - unsigned int bytes_per_rep, - unsigned long *reps, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); +static void realmode_emulate_one(struct realmode_emulate_ctxt *rm_ctxt) +{ + struct cpu_user_regs *regs = rm_ctxt->hvm.ctxt.regs; struct vcpu *curr = current; - uint32_t paddr = virtual_to_linear(dst_seg, dst_offset, rm_ctxt); - - if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE ) - return X86EMUL_UNHANDLEABLE; - - if ( curr->arch.hvm_vmx.real_mode_io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - { - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_pio_req(src_port, *reps, bytes_per_rep, - paddr, IOREQ_READ, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - } - - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - return X86EMUL_RETRY; - - curr->arch.hvm_vmx.real_mode_io_completed = 0; - - return X86EMUL_OKAY; -} - -static int -realmode_rep_outs( - enum x86_segment src_seg, - unsigned long src_offset, - uint16_t dst_port, - unsigned int bytes_per_rep, - unsigned long *reps, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - struct vcpu *curr = current; - uint32_t paddr = virtual_to_linear(src_seg, src_offset, rm_ctxt); - - if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE ) - return X86EMUL_UNHANDLEABLE; - - if ( curr->arch.hvm_vmx.real_mode_io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_pio_req(dst_port, *reps, bytes_per_rep, - paddr, IOREQ_WRITE, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - - return X86EMUL_OKAY; -} - -static int -realmode_rep_movs( - enum x86_segment src_seg, - unsigned long src_offset, - enum x86_segment dst_seg, - unsigned long dst_offset, - unsigned int bytes_per_rep, - unsigned long *reps, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - struct vcpu *curr = current; - uint32_t saddr = virtual_to_linear(src_seg, src_offset, rm_ctxt); - uint32_t daddr = virtual_to_linear(dst_seg, dst_offset, rm_ctxt); - p2m_type_t p2mt; - - if ( (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) || - curr->arch.hvm_vmx.real_mode_io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - mfn_x(gfn_to_mfn_current(saddr >> PAGE_SHIFT, &p2mt)); - if ( !p2m_is_ram(p2mt) ) - { - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - { - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, saddr, *reps, bytes_per_rep, - daddr, IOREQ_READ, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - } - - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - return X86EMUL_RETRY; - - curr->arch.hvm_vmx.real_mode_io_completed = 0; - } - else - { - mfn_x(gfn_to_mfn_current(daddr >> PAGE_SHIFT, &p2mt)); - if ( p2m_is_ram(p2mt) ) - return X86EMUL_UNHANDLEABLE; - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, daddr, *reps, bytes_per_rep, - saddr, IOREQ_WRITE, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - } - - return X86EMUL_OKAY; -} - -static int -realmode_read_segment( - enum x86_segment seg, - struct segment_register *reg, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - memcpy(reg, &rm_ctxt->seg_reg[seg], sizeof(struct segment_register)); - return X86EMUL_OKAY; -} - -static int -realmode_write_segment( - enum x86_segment seg, - struct segment_register *reg, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - struct vcpu *curr = current; - - if ( seg == x86_seg_cs ) - { - if ( reg->attr.fields.dpl != 0 ) - return X86EMUL_UNHANDLEABLE; + unsigned long seg_reg_dirty; + uint32_t new_intr_shadow, intr_info; + int rc; + + seg_reg_dirty = rm_ctxt->hvm.seg_reg_dirty; + rm_ctxt->hvm.seg_reg_dirty = 0; + + rc = hvm_emulate_one(&rm_ctxt->hvm); + + if ( test_bit(x86_seg_cs, &rm_ctxt->hvm.seg_reg_dirty) ) + { curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_CS; - if ( reg->sel & 3 ) + if ( hvmemul_get_seg_reg(x86_seg_cs, &rm_ctxt->hvm)->sel & 3 ) curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_CS; } - if ( seg == x86_seg_ss ) - { - if ( reg->attr.fields.dpl != 0 ) - return X86EMUL_UNHANDLEABLE; + if ( test_bit(x86_seg_ss, &rm_ctxt->hvm.seg_reg_dirty) ) + { curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_SS; - if ( reg->sel & 3 ) + if ( hvmemul_get_seg_reg(x86_seg_ss, &rm_ctxt->hvm)->sel & 3 ) curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_SS; - rm_ctxt->flags.mov_ss = 1; - } - - memcpy(&rm_ctxt->seg_reg[seg], reg, sizeof(struct segment_register)); - - return X86EMUL_OKAY; -} - -static int -realmode_read_io( - unsigned int port, - unsigned int bytes, - unsigned long *val, - struct x86_emulate_ctxt *ctxt) -{ - struct vcpu *curr = current; - - if ( curr->arch.hvm_vmx.real_mode_io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - { - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0); - } - - if ( !curr->arch.hvm_vmx.real_mode_io_completed ) - return X86EMUL_RETRY; - - *val = curr->arch.hvm_vmx.real_mode_io_data; - curr->arch.hvm_vmx.real_mode_io_completed = 0; - - return X86EMUL_OKAY; -} - -static int realmode_write_io( - unsigned int port, - unsigned int bytes, - unsigned long val, - struct x86_emulate_ctxt *ctxt) -{ - struct vcpu *curr = current; - - if ( port == 0xe9 ) - { - hvm_print_line(curr, val); - return X86EMUL_OKAY; - } - - if ( curr->arch.hvm_vmx.real_mode_io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - curr->arch.hvm_vmx.real_mode_io_in_progress = 1; - send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0); - - return X86EMUL_OKAY; -} - -static int -realmode_read_cr( - unsigned int reg, - unsigned long *val, - struct x86_emulate_ctxt *ctxt) -{ - switch ( reg ) - { - case 0: - case 2: - case 3: - case 4: - *val = current->arch.hvm_vcpu.guest_cr[reg]; - break; - default: - return X86EMUL_UNHANDLEABLE; - } - - return X86EMUL_OKAY; -} - -static int -realmode_write_cr( - unsigned int reg, - unsigned long val, - struct x86_emulate_ctxt *ctxt) -{ - switch ( reg ) - { - case 0: - if ( !hvm_set_cr0(val) ) - return X86EMUL_UNHANDLEABLE; - break; - case 2: - current->arch.hvm_vcpu.guest_cr[2] = val; - break; - case 3: - if ( !hvm_set_cr3(val) ) - return X86EMUL_UNHANDLEABLE; - break; - case 4: - if ( !hvm_set_cr4(val) ) - return X86EMUL_UNHANDLEABLE; - break; - default: - return X86EMUL_UNHANDLEABLE; - } - - return X86EMUL_OKAY; -} - -static int -realmode_read_msr( - unsigned long reg, - uint64_t *val, - struct x86_emulate_ctxt *ctxt) -{ - struct cpu_user_regs _regs; - - _regs.ecx = (uint32_t)reg; - - if ( !vmx_msr_read_intercept(&_regs) ) - { - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - rm_ctxt->exn_vector = (uint8_t)__vmread(VM_ENTRY_INTR_INFO); - rm_ctxt->exn_insn_len = 0; - __vmwrite(VM_ENTRY_INTR_INFO, 0); - return X86EMUL_EXCEPTION; - } - - *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax; - return X86EMUL_OKAY; -} - -static int -realmode_write_msr( - unsigned long reg, - uint64_t val, - struct x86_emulate_ctxt *ctxt) -{ - struct cpu_user_regs _regs; - - _regs.edx = (uint32_t)(val >> 32); - _regs.eax = (uint32_t)val; - _regs.ecx = (uint32_t)reg; - - if ( !vmx_msr_write_intercept(&_regs) ) - { - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - rm_ctxt->exn_vector = (uint8_t)__vmread(VM_ENTRY_INTR_INFO); - rm_ctxt->exn_insn_len = 0; - __vmwrite(VM_ENTRY_INTR_INFO, 0); - return X86EMUL_EXCEPTION; - } - - return X86EMUL_OKAY; -} - -static int realmode_write_rflags( - unsigned long val, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - if ( (val & X86_EFLAGS_IF) && !(ctxt->regs->eflags & X86_EFLAGS_IF) ) - rm_ctxt->flags.sti = 1; - return X86EMUL_OKAY; -} - -static int realmode_wbinvd( - struct x86_emulate_ctxt *ctxt) -{ - vmx_wbinvd_intercept(); - return X86EMUL_OKAY; -} - -static int realmode_cpuid( - unsigned int *eax, - unsigned int *ebx, - unsigned int *ecx, - unsigned int *edx, - struct x86_emulate_ctxt *ctxt) -{ - vmx_cpuid_intercept(eax, ebx, ecx, edx); - return X86EMUL_OKAY; -} - -static int realmode_hlt( - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - rm_ctxt->flags.hlt = 1; - return X86EMUL_OKAY; -} - -static int realmode_inject_hw_exception( - uint8_t vector, - uint16_t error_code, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - - /* We don't emulate protected-mode exception delivery. */ - if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE ) - return X86EMUL_UNHANDLEABLE; - - if ( error_code != 0 ) - return X86EMUL_UNHANDLEABLE; - - rm_ctxt->exn_vector = vector; - rm_ctxt->exn_insn_len = 0; - - return X86EMUL_OKAY; -} - -static int realmode_inject_sw_interrupt( - uint8_t vector, - uint8_t insn_len, - struct x86_emulate_ctxt *ctxt) -{ - struct realmode_emulate_ctxt *rm_ctxt = - container_of(ctxt, struct realmode_emulate_ctxt, ctxt); - - /* We don't emulate protected-mode exception delivery. */ - if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE ) - return X86EMUL_UNHANDLEABLE; - - rm_ctxt->exn_vector = vector; - rm_ctxt->exn_insn_len = insn_len; - - return X86EMUL_OKAY; -} - -static void realmode_load_fpu_ctxt( - struct x86_emulate_ctxt *ctxt) -{ - if ( !current->fpu_dirtied ) - vmx_do_no_device_fault(); -} - -static struct x86_emulate_ops realmode_emulator_ops = { - .read = realmode_emulate_read, - .insn_fetch = realmode_emulate_insn_fetch, - .write = realmode_emulate_write, - .cmpxchg = realmode_emulate_cmpxchg, - .rep_ins = realmode_rep_ins, - .rep_outs = realmode_rep_outs, - .rep_movs = realmode_rep_movs, - .read_segment = realmode_read_segment, - .write_segment = realmode_write_segment, - .read_io = realmode_read_io, - .write_io = realmode_write_io, - .read_cr = realmode_read_cr, - .write_cr = realmode_write_cr, - .read_msr = realmode_read_msr, - .write_msr = realmode_write_msr, - .write_rflags = realmode_write_rflags, - .wbinvd = realmode_wbinvd, - .cpuid = realmode_cpuid, - .hlt = realmode_hlt, - .inject_hw_exception = realmode_inject_hw_exception, - .inject_sw_interrupt = realmode_inject_sw_interrupt, - .load_fpu_ctxt = realmode_load_fpu_ctxt -}; - -static void realmode_emulate_one(struct realmode_emulate_ctxt *rm_ctxt) -{ - struct cpu_user_regs *regs = rm_ctxt->ctxt.regs; - struct vcpu *curr = current; - u32 new_intr_shadow; - int rc, io_completed; - unsigned long addr; - - rm_ctxt->ctxt.addr_size = - rm_ctxt->seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16; - rm_ctxt->ctxt.sp_size = - rm_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16; - - rm_ctxt->insn_buf_eip = (uint32_t)regs->eip; - addr = virtual_to_linear(x86_seg_cs, regs->eip, rm_ctxt); - if ( hvm_fetch_from_guest_virt_nofault(rm_ctxt->insn_buf, addr, - sizeof(rm_ctxt->insn_buf)) - != HVMCOPY_okay ) - { - gdprintk(XENLOG_ERR, "Failed to pre-fetch instruction bytes.\n"); - goto fail; - } - - rm_ctxt->flag_word = 0; - - io_completed = curr->arch.hvm_vmx.real_mode_io_completed; - if ( curr->arch.hvm_vmx.real_mode_io_in_progress ) - { - gdprintk(XENLOG_ERR, "I/O in progress before insn is emulated.\n"); - goto fail; - } - - rc = x86_emulate(&rm_ctxt->ctxt, &realmode_emulator_ops); - - if ( curr->arch.hvm_vmx.real_mode_io_completed ) - { - gdprintk(XENLOG_ERR, "I/O completion after insn is emulated.\n"); - goto fail; - } + } + + rm_ctxt->hvm.seg_reg_dirty |= seg_reg_dirty; if ( rc == X86EMUL_UNHANDLEABLE ) { @@ -717,31 +138,18 @@ static void realmode_emulate_one(struct } if ( rc == X86EMUL_RETRY ) - { - BUG_ON(!curr->arch.hvm_vmx.real_mode_io_in_progress); - if ( !io_completed ) - return; - gdprintk(XENLOG_ERR, "Multiple I/O reads in a single insn.\n"); - goto fail; - } - - if ( curr->arch.hvm_vmx.real_mode_io_in_progress && - (get_ioreq(curr)->vp_ioreq.dir == IOREQ_READ) ) - { - gdprintk(XENLOG_ERR, "I/O read in progress but insn is retired.\n"); - goto fail; - } + return; new_intr_shadow = rm_ctxt->intr_shadow; /* MOV-SS instruction toggles MOV-SS shadow, else we just clear it. */ - if ( rm_ctxt->flags.mov_ss ) + if ( rm_ctxt->hvm.flags.mov_ss ) new_intr_shadow ^= VMX_INTR_SHADOW_MOV_SS; else new_intr_shadow &= ~VMX_INTR_SHADOW_MOV_SS; /* STI instruction toggles STI shadow, else we just clear it. */ - if ( rm_ctxt->flags.sti ) + if ( rm_ctxt->hvm.flags.sti ) new_intr_shadow ^= VMX_INTR_SHADOW_STI; else new_intr_shadow &= ~VMX_INTR_SHADOW_STI; @@ -755,10 +163,30 @@ static void realmode_emulate_one(struct if ( rc == X86EMUL_EXCEPTION ) { + if ( !rm_ctxt->hvm.flags.exn_pending ) + { + intr_info = __vmread(VM_ENTRY_INTR_INFO); + __vmwrite(VM_ENTRY_INTR_INFO, 0); + if ( !(intr_info & INTR_INFO_VALID_MASK) ) + { + gdprintk(XENLOG_ERR, "Exception pending but no info.\n"); + goto fail; + } + rm_ctxt->hvm.exn_vector = (uint8_t)intr_info; + rm_ctxt->hvm.exn_insn_len = 0; + } + + if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE ) + { + gdprintk(XENLOG_ERR, "Exception %02x in protected mode.\n", + rm_ctxt->hvm.exn_vector); + goto fail; + } + realmode_deliver_exception( - rm_ctxt->exn_vector, rm_ctxt->exn_insn_len, rm_ctxt); - } - else if ( rm_ctxt->flags.hlt && !hvm_local_events_need_delivery(curr) ) + rm_ctxt->hvm.exn_vector, rm_ctxt->hvm.exn_insn_len, rm_ctxt); + } + else if ( rm_ctxt->hvm.flags.hlt && !hvm_local_events_need_delivery(curr) ) { hvm_hlt(regs->eflags); } @@ -769,10 +197,11 @@ static void realmode_emulate_one(struct gdprintk(XENLOG_ERR, "Real-mode emulation failed @ %04x:%08lx: " "%02x %02x %02x %02x %02x %02x\n", - rm_ctxt->seg_reg[x86_seg_cs].sel, rm_ctxt->insn_buf_eip, - rm_ctxt->insn_buf[0], rm_ctxt->insn_buf[1], - rm_ctxt->insn_buf[2], rm_ctxt->insn_buf[3], - rm_ctxt->insn_buf[4], rm_ctxt->insn_buf[5]); + hvmemul_get_seg_reg(x86_seg_cs, &rm_ctxt->hvm)->sel, + rm_ctxt->hvm.insn_buf_eip, + rm_ctxt->hvm.insn_buf[0], rm_ctxt->hvm.insn_buf[1], + rm_ctxt->hvm.insn_buf[2], rm_ctxt->hvm.insn_buf[3], + rm_ctxt->hvm.insn_buf[4], rm_ctxt->hvm.insn_buf[5]); domain_crash_synchronous(); } @@ -780,18 +209,20 @@ void vmx_realmode(struct cpu_user_regs * { struct vcpu *curr = current; struct realmode_emulate_ctxt rm_ctxt; - unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO); - unsigned int i, emulations = 0; - - rm_ctxt.ctxt.regs = regs; - - for ( i = 0; i < 10; i++ ) - hvm_get_segment_register(curr, i, &rm_ctxt.seg_reg[i]); - + struct segment_register *sreg; + unsigned long intr_info; + unsigned int emulations = 0; + + /* Get-and-clear VM_ENTRY_INTR_INFO. */ + intr_info = __vmread(VM_ENTRY_INTR_INFO); + if ( intr_info & INTR_INFO_VALID_MASK ) + __vmwrite(VM_ENTRY_INTR_INFO, 0); + + hvm_emulate_prepare(&rm_ctxt.hvm, regs); rm_ctxt.intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO); - if ( curr->arch.hvm_vmx.real_mode_io_in_progress || - curr->arch.hvm_vmx.real_mode_io_completed ) + if ( curr->arch.hvm_vcpu.io_in_progress || + curr->arch.hvm_vcpu.io_completed ) realmode_emulate_one(&rm_ctxt); /* Only deliver interrupts into emulated real mode. */ @@ -799,12 +230,12 @@ void vmx_realmode(struct cpu_user_regs * (intr_info & INTR_INFO_VALID_MASK) ) { realmode_deliver_exception((uint8_t)intr_info, 0, &rm_ctxt); - __vmwrite(VM_ENTRY_INTR_INFO, 0); + intr_info = 0; } while ( curr->arch.hvm_vmx.vmxemul && !softirq_pending(smp_processor_id()) && - !curr->arch.hvm_vmx.real_mode_io_in_progress ) + !curr->arch.hvm_vcpu.io_in_progress ) { /* * Check for pending interrupts only every 16 instructions, because @@ -825,34 +256,22 @@ void vmx_realmode(struct cpu_user_regs * * At this point CS.RPL == SS.RPL == CS.DPL == SS.DPL == 0. For * DS, ES, FS and GS the most uninvasive trick is to set DPL == RPL. */ - rm_ctxt.seg_reg[x86_seg_ds].attr.fields.dpl = - rm_ctxt.seg_reg[x86_seg_ds].sel & 3; - rm_ctxt.seg_reg[x86_seg_es].attr.fields.dpl = - rm_ctxt.seg_reg[x86_seg_es].sel & 3; - rm_ctxt.seg_reg[x86_seg_fs].attr.fields.dpl = - rm_ctxt.seg_reg[x86_seg_fs].sel & 3; - rm_ctxt.seg_reg[x86_seg_gs].attr.fields.dpl = - rm_ctxt.seg_reg[x86_seg_gs].sel & 3; - } - - for ( i = 0; i < 10; i++ ) - hvm_set_segment_register(curr, i, &rm_ctxt.seg_reg[i]); -} - -int vmx_realmode_io_complete(void) -{ - struct vcpu *curr = current; - ioreq_t *p = &get_ioreq(curr)->vp_ioreq; - - if ( !curr->arch.hvm_vmx.real_mode_io_in_progress ) - return 0; - - curr->arch.hvm_vmx.real_mode_io_in_progress = 0; - if ( p->dir == IOREQ_READ ) - { - curr->arch.hvm_vmx.real_mode_io_completed = 1; - curr->arch.hvm_vmx.real_mode_io_data = p->data; - } - - return 1; -} + sreg = hvmemul_get_seg_reg(x86_seg_ds, &rm_ctxt.hvm); + sreg->attr.fields.dpl = sreg->sel & 3; + sreg = hvmemul_get_seg_reg(x86_seg_es, &rm_ctxt.hvm); + sreg->attr.fields.dpl = sreg->sel & 3; + sreg = hvmemul_get_seg_reg(x86_seg_fs, &rm_ctxt.hvm); + sreg->attr.fields.dpl = sreg->sel & 3; + sreg = hvmemul_get_seg_reg(x86_seg_gs, &rm_ctxt.hvm); + sreg->attr.fields.dpl = sreg->sel & 3; + rm_ctxt.hvm.seg_reg_dirty |= + (1ul << x86_seg_ds) | (1ul << x86_seg_es) | + (1ul << x86_seg_fs) | (1ul << x86_seg_gs); + } + + hvm_emulate_writeback(&rm_ctxt.hvm); + + /* Re-instate VM_ENTRY_INTR_INFO if we did not discharge it. */ + if ( intr_info & INTR_INFO_VALID_MASK ) + __vmwrite(VM_ENTRY_INTR_INFO, intr_info); +} diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Feb 20 14:36:45 2008 +0000 @@ -60,6 +60,13 @@ static void vmx_install_vlapic_mapping(s static void vmx_install_vlapic_mapping(struct vcpu *v); static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); static void vmx_update_guest_efer(struct vcpu *v); +static void vmx_cpuid_intercept( + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); +static void vmx_wbinvd_intercept(void); +static void vmx_fpu_dirty_intercept(void); +static int vmx_msr_read_intercept(struct cpu_user_regs *regs); +static int vmx_msr_write_intercept(struct cpu_user_regs *regs); static int vmx_domain_initialise(struct domain *d) { @@ -96,7 +103,6 @@ static int vmx_vcpu_initialise(struct vc /* %eax == 1 signals full real-mode support to the guest loader. */ if ( v->vcpu_id == 0 ) v->arch.guest_context.user_regs.eax = 1; - v->arch.hvm_vcpu.io_complete = vmx_realmode_io_complete; return 0; } @@ -204,7 +210,7 @@ static enum handler_return long_mode_do_ switch ( ecx ) { case MSR_EFER: - if ( !hvm_set_efer(msr_content) ) + if ( hvm_set_efer(msr_content) ) goto exception_raised; break; @@ -375,7 +381,7 @@ static enum handler_return long_mode_do_ switch ( regs->ecx ) { case MSR_EFER: - if ( !hvm_set_efer(msr_content) ) + if ( hvm_set_efer(msr_content) ) return HNDL_exception_raised; break; @@ -1076,6 +1082,11 @@ static struct hvm_function_table vmx_fun .do_pmu_interrupt = vmx_do_pmu_interrupt, .cpu_up = vmx_cpu_up, .cpu_down = vmx_cpu_down, + .cpuid_intercept = vmx_cpuid_intercept, + .wbinvd_intercept = vmx_wbinvd_intercept, + .fpu_dirty_intercept = vmx_fpu_dirty_intercept, + .msr_read_intercept = vmx_msr_read_intercept, + .msr_write_intercept = vmx_msr_write_intercept }; void start_vmx(void) @@ -1147,7 +1158,7 @@ static void __update_guest_eip(unsigned vmx_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0); } -void vmx_do_no_device_fault(void) +static void vmx_fpu_dirty_intercept(void) { struct vcpu *curr = current; @@ -1162,7 +1173,7 @@ void vmx_do_no_device_fault(void) } #define bitmaskof(idx) (1U << ((idx) & 31)) -void vmx_cpuid_intercept( +static void vmx_cpuid_intercept( unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { @@ -1751,13 +1762,13 @@ static int mov_to_cr(int gp, int cr, str switch ( cr ) { case 0: - return hvm_set_cr0(value); + return !hvm_set_cr0(value); case 3: - return hvm_set_cr3(value); + return !hvm_set_cr3(value); case 4: - return hvm_set_cr4(value); + return !hvm_set_cr4(value); case 8: vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); @@ -1848,7 +1859,7 @@ static int vmx_cr_access(unsigned long e value = (value & ~0xF) | (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF); HVMTRACE_1D(LMSW, current, value); - return hvm_set_cr0(value); + return !hvm_set_cr0(value); default: BUG(); } @@ -1932,7 +1943,7 @@ static int is_last_branch_msr(u32 ecx) return 0; } -int vmx_msr_read_intercept(struct cpu_user_regs *regs) +static int vmx_msr_read_intercept(struct cpu_user_regs *regs) { u64 msr_content = 0; u32 ecx = regs->ecx, eax, edx; @@ -2017,7 +2028,7 @@ int vmx_msr_read_intercept(struct cpu_us case HNDL_unhandled: break; case HNDL_exception_raised: - return 0; + return X86EMUL_EXCEPTION; case HNDL_done: goto done; } @@ -2050,11 +2061,11 @@ done: HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx", ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); - return 1; + return X86EMUL_OKAY; gp_fault: vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; + return X86EMUL_EXCEPTION; } static int vmx_alloc_vlapic_mapping(struct domain *d) @@ -2124,7 +2135,7 @@ extern bool_t mtrr_def_type_msr_set(stru extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content); extern bool_t pat_msr_set(u64 *pat, u64 msr); -int vmx_msr_write_intercept(struct cpu_user_regs *regs) +static int vmx_msr_write_intercept(struct cpu_user_regs *regs) { u32 ecx = regs->ecx; u64 msr_content; @@ -2219,7 +2230,7 @@ int vmx_msr_write_intercept(struct cpu_u goto gp_fault; default: if ( vpmu_do_wrmsr(regs) ) - return 1; + return X86EMUL_OKAY; switch ( long_mode_do_msr_write(regs) ) { case HNDL_unhandled: @@ -2228,18 +2239,18 @@ int vmx_msr_write_intercept(struct cpu_u wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); break; case HNDL_exception_raised: - return 0; + return X86EMUL_EXCEPTION; case HNDL_done: break; } break; } - return 1; + return X86EMUL_OKAY; gp_fault: vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; + return X86EMUL_EXCEPTION; } static void vmx_do_hlt(struct cpu_user_regs *regs) @@ -2320,7 +2331,7 @@ static void wbinvd_ipi(void *info) wbinvd(); } -void vmx_wbinvd_intercept(void) +static void vmx_wbinvd_intercept(void) { if ( list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) ) return; @@ -2447,7 +2458,7 @@ asmlinkage void vmx_vmexit_handler(struc domain_pause_for_debugger(); break; case TRAP_no_device: - vmx_do_no_device_fault(); + vmx_fpu_dirty_intercept(); break; case TRAP_page_fault: exit_qualification = __vmread(EXIT_QUALIFICATION); @@ -2566,12 +2577,12 @@ asmlinkage void vmx_vmexit_handler(struc break; case EXIT_REASON_MSR_READ: inst_len = __get_instruction_length(); /* Safe: RDMSR */ - if ( vmx_msr_read_intercept(regs) ) + if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY ) __update_guest_eip(inst_len); break; case EXIT_REASON_MSR_WRITE: inst_len = __get_instruction_length(); /* Safe: WRMSR */ - if ( vmx_msr_write_intercept(regs) ) + if ( vmx_msr_write_intercept(regs) == X86EMUL_OKAY ) __update_guest_eip(inst_len); break; @@ -2597,7 +2608,8 @@ asmlinkage void vmx_vmexit_handler(struc unsigned long offset; exit_qualification = __vmread(EXIT_QUALIFICATION); offset = exit_qualification & 0x0fffUL; - handle_mmio(APIC_DEFAULT_PHYS_BASE | offset); + if ( !handle_mmio() ) + hvm_inject_exception(TRAP_gp_fault, 0, 0); break; } diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Feb 20 14:36:45 2008 +0000 @@ -2816,8 +2816,7 @@ static int sh_page_fault(struct vcpu *v, perfc_incr(shadow_fault_fast_mmio); SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa); reset_early_unshadow(v); - handle_mmio(gpa); - return EXCRET_fault_fixed; + return handle_mmio() ? EXCRET_fault_fixed : 0; } else { @@ -3117,8 +3116,7 @@ static int sh_page_fault(struct vcpu *v, shadow_audit_tables(v); reset_early_unshadow(v); shadow_unlock(d); - handle_mmio(gpa); - return EXCRET_fault_fixed; + return handle_mmio() ? EXCRET_fault_fixed : 0; not_a_shadow_fault: sh_audit_gw(v, &gw); diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/emulate.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/hvm/emulate.h Wed Feb 20 14:36:45 2008 +0000 @@ -0,0 +1,55 @@ +/****************************************************************************** + * hvm/emulate.h + * + * HVM instruction emulation. Used for MMIO and VMX real mode. + * + * Copyright (c) 2008 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir.fraser@xxxxxxxxxx> + */ + +#ifndef __ASM_X86_HVM_EMULATE_H__ +#define __ASM_X86_HVM_EMULATE_H__ + +#include <xen/config.h> +#include <asm/x86_emulate.h> + +struct hvm_emulate_ctxt { + struct x86_emulate_ctxt ctxt; + + /* Cache of 16 bytes of instruction. */ + uint8_t insn_buf[16]; + unsigned long insn_buf_eip; + unsigned int insn_buf_bytes; + + struct segment_register seg_reg[10]; + unsigned long seg_reg_accessed; + unsigned long seg_reg_dirty; + + union { + struct { + unsigned int hlt:1; + unsigned int mov_ss:1; + unsigned int sti:1; + unsigned int exn_pending:1; + } flags; + unsigned int flag_word; + }; + + uint8_t exn_vector; + uint8_t exn_insn_len; +}; + +int hvm_emulate_one( + struct hvm_emulate_ctxt *hvmemul_ctxt); +void hvm_emulate_prepare( + struct hvm_emulate_ctxt *hvmemul_ctxt, + struct cpu_user_regs *regs); +void hvm_emulate_writeback( + struct hvm_emulate_ctxt *hvmemul_ctxt); +struct segment_register *hvmemul_get_seg_reg( + enum x86_segment seg, + struct hvm_emulate_ctxt *hvmemul_ctxt); + +#endif /* __ASM_X86_HVM_EMULATE_H__ */ diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/include/asm-x86/hvm/hvm.h Wed Feb 20 14:36:45 2008 +0000 @@ -117,6 +117,15 @@ struct hvm_function_table { int (*cpu_up)(void); void (*cpu_down)(void); + + /* Instruction intercepts: non-void return values are X86EMUL codes. */ + void (*cpuid_intercept)( + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + void (*wbinvd_intercept)(void); + void (*fpu_dirty_intercept)(void); + int (*msr_read_intercept)(struct cpu_user_regs *regs); + int (*msr_write_intercept)(struct cpu_user_regs *regs); }; extern struct hvm_function_table hvm_funcs; @@ -162,9 +171,6 @@ hvm_guest_x86_mode(struct vcpu *v) ASSERT(v == current); return hvm_funcs.guest_x86_mode(v); } - -int hvm_instruction_fetch(unsigned long pc, int address_bytes, - unsigned char *buf); static inline void hvm_update_host_cr3(struct vcpu *v) diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/include/asm-x86/hvm/io.h Wed Feb 20 14:36:45 2008 +0000 @@ -120,8 +120,8 @@ struct hvm_mmio_handler { }; /* global io interception point in HV */ -extern int hvm_io_intercept(ioreq_t *p, int type); -extern int register_io_handler( +int hvm_io_intercept(ioreq_t *p, int type); +int register_io_handler( struct domain *d, unsigned long addr, unsigned long size, void *action, int type); @@ -135,8 +135,8 @@ static inline int hvm_buffered_io_interc return hvm_io_intercept(p, HVM_BUFFERED_IO); } -extern int hvm_mmio_intercept(ioreq_t *p); -extern int hvm_buffered_io_send(ioreq_t *p); +int hvm_mmio_intercept(ioreq_t *p); +int hvm_buffered_io_send(ioreq_t *p); static inline int register_portio_handler( struct domain *d, unsigned long addr, @@ -159,11 +159,11 @@ void send_pio_req(unsigned long port, un paddr_t value, int dir, int df, int value_is_ptr); void send_timeoffset_req(unsigned long timeoff); void send_invalidate_req(void); -extern void handle_mmio(paddr_t gpa); -extern void hvm_interrupt_post(struct vcpu *v, int vector, int type); -extern void hvm_io_assist(void); -extern void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq, - union vioapic_redir_entry *ent); +int handle_mmio(void); +void hvm_interrupt_post(struct vcpu *v, int vector, int type); +void hvm_io_assist(void); +void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq, + union vioapic_redir_entry *ent); struct hvm_hw_stdvga { uint8_t sr_index; diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/include/asm-x86/hvm/support.h Wed Feb 20 14:36:45 2008 +0000 @@ -138,6 +138,7 @@ void hvm_hlt(unsigned long rflags); void hvm_hlt(unsigned long rflags); void hvm_triple_fault(void); +/* These functions all return X86EMUL return codes. */ int hvm_set_efer(uint64_t value); int hvm_set_cr0(unsigned long value); int hvm_set_cr3(unsigned long value); diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/include/asm-x86/hvm/vcpu.h Wed Feb 20 14:36:45 2008 +0000 @@ -59,9 +59,6 @@ struct hvm_vcpu { bool_t flag_dr_dirty; bool_t debug_state_latch; - /* Callback function for I/O completion. */ - int (*io_complete)(void); - union { struct arch_vmx_struct vmx; struct arch_svm_struct svm; @@ -72,6 +69,12 @@ struct hvm_vcpu { /* Which cache mode is this VCPU in (CR0:CD/NW)? */ u8 cache_mode; + + /* I/O request in flight to device model. */ + bool_t mmio_in_progress; + bool_t io_in_progress; + bool_t io_completed; + unsigned long io_data; }; #define ARCH_HVM_IO_WAIT 1 /* Waiting for I/O completion */ diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Wed Feb 20 14:36:45 2008 +0000 @@ -94,11 +94,6 @@ struct arch_vmx_struct { #define VMXEMUL_BAD_CS 2 /* Yes, because CS.RPL != CPL */ #define VMXEMUL_BAD_SS 4 /* Yes, because SS.RPL != CPL */ uint8_t vmxemul; - - /* I/O request in flight to device model. */ - bool_t real_mode_io_in_progress; - bool_t real_mode_io_completed; - unsigned long real_mode_io_data; }; int vmx_create_vmcs(struct vcpu *v); diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Feb 20 14:36:45 2008 +0000 @@ -33,15 +33,7 @@ void vmx_do_resume(struct vcpu *); void vmx_do_resume(struct vcpu *); void set_guest_time(struct vcpu *v, u64 gtime); void vmx_vlapic_msr_changed(struct vcpu *v); -void vmx_do_no_device_fault(void); -void vmx_cpuid_intercept( - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); -int vmx_msr_read_intercept(struct cpu_user_regs *regs); -int vmx_msr_write_intercept(struct cpu_user_regs *regs); -void vmx_wbinvd_intercept(void); void vmx_realmode(struct cpu_user_regs *regs); -int vmx_realmode_io_complete(void); /* * Exit Reasons diff -r f853c0497095 -r 3f1cf03826fe xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Tue Feb 19 11:14:40 2008 -0700 +++ b/xen/include/public/hvm/ioreq.h Wed Feb 20 14:36:45 2008 +0000 @@ -34,14 +34,8 @@ #define IOREQ_TYPE_PIO 0 /* pio */ #define IOREQ_TYPE_COPY 1 /* mmio ops */ -#define IOREQ_TYPE_AND 2 -#define IOREQ_TYPE_OR 3 -#define IOREQ_TYPE_XOR 4 -#define IOREQ_TYPE_XCHG 5 -#define IOREQ_TYPE_ADD 6 #define IOREQ_TYPE_TIMEOFFSET 7 #define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ -#define IOREQ_TYPE_SUB 9 /* * VMExit dispatcher should cooperate with instruction decoder to _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |