[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN] Add 'insn_fetch' memory operation to the x86_emulator.
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxxx # Node ID 12ee3d6e61ef2637baa1f2509c68150bc7ea8fcd # Parent de8abd5ce652353c6ebd4f9e9047e10f7e28059f [XEN] Add 'insn_fetch' memory operation to the x86_emulator. This can be used to perform correct access checks, provide appropriate error codes when injecting faults, and to implement an instruction-stream prefetch cache (which is included here for HVM PTE update emulations). Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> --- tools/tests/test_x86_emulator.c | 6 + xen/arch/x86/mm.c | 9 + xen/arch/x86/mm/shadow/common.c | 185 +++++++++++++++++++++++++++----------- xen/arch/x86/mm/shadow/multi.c | 12 -- xen/arch/x86/mm/shadow/private.h | 6 + xen/arch/x86/x86_emulate.c | 76 ++++++++------- xen/include/asm-x86/x86_emulate.h | 14 ++ 7 files changed, 208 insertions(+), 100 deletions(-) diff -r de8abd5ce652 -r 12ee3d6e61ef tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Sun Dec 03 13:30:23 2006 +0000 +++ b/tools/tests/test_x86_emulator.c Sun Dec 03 17:15:48 2006 +0000 @@ -88,7 +88,11 @@ static int cmpxchg8b( } static struct x86_emulate_ops emulops = { - read, write, cmpxchg, cmpxchg8b + .read = read, + .insn_fetch = read, + .write = write, + .cmpxchg = cmpxchg, + .cmpxchg8b = cmpxchg8b }; int main(int argc, char **argv) diff -r de8abd5ce652 -r 12ee3d6e61ef xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Sun Dec 03 13:30:23 2006 +0000 +++ b/xen/arch/x86/mm.c Sun Dec 03 17:15:48 2006 +0000 @@ -3224,10 +3224,11 @@ static int ptwr_emulated_cmpxchg8b( } static struct x86_emulate_ops ptwr_emulate_ops = { - .read = ptwr_emulated_read, - .write = ptwr_emulated_write, - .cmpxchg = ptwr_emulated_cmpxchg, - .cmpxchg8b = ptwr_emulated_cmpxchg8b + .read = ptwr_emulated_read, + .insn_fetch = ptwr_emulated_read, + .write = ptwr_emulated_write, + .cmpxchg = ptwr_emulated_cmpxchg, + .cmpxchg8b = ptwr_emulated_cmpxchg8b }; /* Write page fault handler: check if guest is trying to modify a PTE. */ diff -r de8abd5ce652 -r 12ee3d6e61ef xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Sun Dec 03 13:30:23 2006 +0000 +++ b/xen/arch/x86/mm/shadow/common.c Sun Dec 03 17:15:48 2006 +0000 @@ -78,43 +78,54 @@ struct segment_register *hvm_get_seg_reg return seg_reg; } +enum hvm_access_type { + hvm_access_insn_fetch, hvm_access_read, hvm_access_write +}; + static int hvm_translate_linear_addr( enum x86_segment seg, unsigned long offset, unsigned int bytes, - unsigned int is_write, + enum hvm_access_type access_type, struct sh_emulate_ctxt *sh_ctxt, unsigned long *paddr) { - struct segment_register *creg, *dreg; + struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt); unsigned long limit, addr = offset; uint32_t last_byte; - creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt); - dreg = hvm_get_seg_reg(seg, sh_ctxt); - - if ( !creg->attr.fields.l || !hvm_long_mode_enabled(current) ) + if ( sh_ctxt->ctxt.mode != X86EMUL_MODE_PROT64 ) { /* * COMPATIBILITY MODE: Apply segment checks and add base. */ - /* If this is a store, is the segment a writable data segment? */ - if ( is_write && ((dreg->attr.fields.type & 0xa) != 0x2) ) - goto gpf; + switch ( access_type ) + { + case hvm_access_read: + if ( (reg->attr.fields.type & 0xa) == 0x8 ) + goto gpf; /* execute-only code segment */ + break; + case hvm_access_write: + if ( (reg->attr.fields.type & 0xa) != 0x2 ) + goto gpf; /* not a writable data segment */ + break; + default: + break; + } /* Calculate the segment limit, including granularity flag. */ - limit = dreg->limit; - if ( dreg->attr.fields.g ) + limit = reg->limit; + if ( reg->attr.fields.g ) limit = (limit << 12) | 0xfff; last_byte = offset + bytes - 1; /* Is this a grows-down data segment? Special limit check if so. */ - if ( (dreg->attr.fields.type & 0xc) == 0x4 ) + if ( (reg->attr.fields.type & 0xc) == 0x4 ) { /* Is upper limit 0xFFFF or 0xFFFFFFFF? */ - if ( !dreg->attr.fields.db ) + if ( !reg->attr.fields.db ) last_byte = (uint16_t)last_byte; /* Check first byte and last byte against respective bounds. */ @@ -128,7 +139,7 @@ static int hvm_translate_linear_addr( * Hardware truncates to 32 bits in compatibility mode. * It does not truncate to 16 bits in 16-bit address-size mode. */ - addr = (uint32_t)(addr + dreg->base); + addr = (uint32_t)(addr + reg->base); } else { @@ -137,7 +148,7 @@ static int hvm_translate_linear_addr( */ if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) ) - addr += dreg->base; + addr += reg->base; if ( !is_canonical_address(addr) ) goto gpf; @@ -150,6 +161,82 @@ static int hvm_translate_linear_addr( /* Inject #GP(0). */ hvm_inject_exception(TRAP_gp_fault, 0, 0); return X86EMUL_PROPAGATE_FAULT; +} + +static int +hvm_read(enum x86_segment seg, + unsigned long offset, + unsigned long *val, + unsigned int bytes, + enum hvm_access_type access_type, + struct sh_emulate_ctxt *sh_ctxt) +{ + unsigned long addr; + int rc, errcode; + + rc = hvm_translate_linear_addr( + seg, offset, bytes, access_type, sh_ctxt, &addr); + if ( rc ) + return rc; + + *val = 0; + // XXX -- this is WRONG. + // It entirely ignores the permissions in the page tables. + // In this case, that is only a user vs supervisor access check. + // + if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 ) + { +#if 0 + struct vcpu *v = current; + SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n", + v->domain->domain_id, v->vcpu_id, + addr, *val, bytes); +#endif + return X86EMUL_CONTINUE; + } + + /* If we got here, there was nothing mapped here, or a bad GFN + * was mapped here. This should never happen: we're here because + * of a write fault at the end of the instruction we're emulating. */ + SHADOW_PRINTK("read failed to va %#lx\n", addr); + errcode = ring_3(sh_ctxt->ctxt.regs) ? PFEC_user_mode : 0; + if ( access_type == hvm_access_insn_fetch ) + errcode |= PFEC_insn_fetch; + hvm_inject_exception(TRAP_page_fault, errcode, addr + bytes - rc); + return X86EMUL_PROPAGATE_FAULT; +} + +void shadow_init_emulation(struct sh_emulate_ctxt *sh_ctxt, + struct cpu_user_regs *regs) +{ + struct segment_register *creg; + struct vcpu *v = current; + unsigned long addr; + + sh_ctxt->ctxt.regs = regs; + + /* Segment cache initialisation. Primed with CS. */ + sh_ctxt->valid_seg_regs = 0; + creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt); + + /* Work out the emulation mode. */ + if ( hvm_long_mode_enabled(v) ) + sh_ctxt->ctxt.mode = creg->attr.fields.l ? + X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32; + else if ( regs->eflags & X86_EFLAGS_VM ) + sh_ctxt->ctxt.mode = X86EMUL_MODE_REAL; + else + sh_ctxt->ctxt.mode = creg->attr.fields.db ? + X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + + /* Attempt to prefetch whole instruction. */ + sh_ctxt->insn_buf_bytes = + (!hvm_translate_linear_addr( + x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf), + hvm_access_insn_fetch, sh_ctxt, &addr) && + !hvm_copy_from_guest_virt( + sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf))) + ? sizeof(sh_ctxt->insn_buf) : 0; } static int @@ -159,38 +246,30 @@ sh_x86_emulate_read(enum x86_segment seg unsigned int bytes, struct x86_emulate_ctxt *ctxt) { + return hvm_read(seg, offset, val, bytes, hvm_access_read, + container_of(ctxt, struct sh_emulate_ctxt, ctxt)); +} + +static int +sh_x86_emulate_insn_fetch(enum x86_segment seg, + unsigned long offset, + unsigned long *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); - unsigned long addr; - int rc, errcode; - - rc = hvm_translate_linear_addr(seg, offset, bytes, 0, sh_ctxt, &addr); - if ( rc ) - return rc; - + unsigned int insn_off = offset - ctxt->regs->eip; + + /* Fall back if requested bytes are not in the prefetch cache. */ + if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) ) + return hvm_read(seg, offset, val, bytes, + hvm_access_insn_fetch, sh_ctxt); + + /* Hit the cache. Simple memcpy. */ *val = 0; - // XXX -- this is WRONG. - // It entirely ignores the permissions in the page tables. - // In this case, that is only a user vs supervisor access check. - // - if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 ) - { -#if 0 - struct vcpu *v = current; - SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n", - v->domain->domain_id, v->vcpu_id, - addr, *val, bytes); -#endif - return X86EMUL_CONTINUE; - } - - /* If we got here, there was nothing mapped here, or a bad GFN - * was mapped here. This should never happen: we're here because - * of a write fault at the end of the instruction we're emulating. */ - SHADOW_PRINTK("read failed to va %#lx\n", addr); - errcode = ring_3(sh_ctxt->ctxt.regs) ? PFEC_user_mode : 0; - hvm_inject_exception(TRAP_page_fault, errcode, addr + bytes - rc); - return X86EMUL_PROPAGATE_FAULT; + memcpy(val, &sh_ctxt->insn_buf[insn_off], bytes); + return X86EMUL_CONTINUE; } static int @@ -206,7 +285,8 @@ sh_x86_emulate_write(enum x86_segment se unsigned long addr; int rc; - rc = hvm_translate_linear_addr(seg, offset, bytes, 1, sh_ctxt, &addr); + rc = hvm_translate_linear_addr( + seg, offset, bytes, hvm_access_write, sh_ctxt, &addr); if ( rc ) return rc; @@ -232,7 +312,8 @@ sh_x86_emulate_cmpxchg(enum x86_segment unsigned long addr; int rc; - rc = hvm_translate_linear_addr(seg, offset, bytes, 1, sh_ctxt, &addr); + rc = hvm_translate_linear_addr( + seg, offset, bytes, hvm_access_write, sh_ctxt, &addr); if ( rc ) return rc; @@ -259,7 +340,8 @@ sh_x86_emulate_cmpxchg8b(enum x86_segmen unsigned long addr; int rc; - rc = hvm_translate_linear_addr(seg, offset, 8, 1, sh_ctxt, &addr); + rc = hvm_translate_linear_addr( + seg, offset, 8, hvm_access_write, sh_ctxt, &addr); if ( rc ) return rc; @@ -274,10 +356,11 @@ sh_x86_emulate_cmpxchg8b(enum x86_segmen struct x86_emulate_ops shadow_emulator_ops = { - .read = sh_x86_emulate_read, - .write = sh_x86_emulate_write, - .cmpxchg = sh_x86_emulate_cmpxchg, - .cmpxchg8b = sh_x86_emulate_cmpxchg8b, + .read = sh_x86_emulate_read, + .insn_fetch = sh_x86_emulate_insn_fetch, + .write = sh_x86_emulate_write, + .cmpxchg = sh_x86_emulate_cmpxchg, + .cmpxchg8b = sh_x86_emulate_cmpxchg8b, }; /**************************************************************************/ diff -r de8abd5ce652 -r 12ee3d6e61ef xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Sun Dec 03 13:30:23 2006 +0000 +++ b/xen/arch/x86/mm/shadow/multi.c Sun Dec 03 17:15:48 2006 +0000 @@ -2808,24 +2808,20 @@ static int sh_page_fault(struct vcpu *v, return EXCRET_fault_fixed; emulate: - if ( !is_hvm_domain(d) ) + if ( !is_hvm_domain(d) || !guest_mode(regs) ) goto not_a_shadow_fault; hvm_store_cpu_guest_regs(v, regs, NULL); - emul_ctxt.ctxt.regs = regs; - emul_ctxt.ctxt.mode = (is_hvm_domain(d) ? - hvm_guest_x86_mode(v) : X86EMUL_MODE_HOST); - emul_ctxt.valid_seg_regs = 0; - SHADOW_PRINTK("emulate: eip=%#lx\n", regs->eip); + + shadow_init_emulation(&emul_ctxt, regs); /* * We do not emulate user writes. Instead we use them as a hint that the * page is no longer a page table. This behaviour differs from native, but * it seems very unlikely that any OS grants user access to page tables. - * We also disallow guest PTE updates from within Xen. */ - if ( (regs->error_code & PFEC_user_mode) || !guest_mode(regs) || + if ( (regs->error_code & PFEC_user_mode) || x86_emulate_memop(&emul_ctxt.ctxt, &shadow_emulator_ops) ) { SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", diff -r de8abd5ce652 -r 12ee3d6e61ef xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Sun Dec 03 13:30:23 2006 +0000 +++ b/xen/arch/x86/mm/shadow/private.h Sun Dec 03 17:15:48 2006 +0000 @@ -513,11 +513,17 @@ struct sh_emulate_ctxt { struct sh_emulate_ctxt { struct x86_emulate_ctxt ctxt; + /* Cache of up to 15 bytes of instruction. */ + uint8_t insn_buf[15]; + uint8_t insn_buf_bytes; + /* Cache of segment registers already gathered for this emulation. */ unsigned int valid_seg_regs; struct segment_register seg_reg[6]; }; +void shadow_init_emulation(struct sh_emulate_ctxt *sh_ctxt, + struct cpu_user_regs *regs); #endif /* _XEN_SHADOW_PRIVATE_H */ diff -r de8abd5ce652 -r 12ee3d6e61ef xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Sun Dec 03 13:30:23 2006 +0000 +++ b/xen/arch/x86/x86_emulate.c Sun Dec 03 17:15:48 2006 +0000 @@ -18,11 +18,6 @@ #undef cmpxchg #endif #include <asm-x86/x86_emulate.h> - -#ifndef PFEC_write_access -#define PFEC_write_access (1U<<1) -#define PFEC_insn_fetch (1U<<4) -#endif /* * Opcode effective-address decode tables. @@ -374,15 +369,15 @@ do{ __asm__ __volatile__ ( #endif /* __i386__ */ /* Fetch next part of the instruction being emulated. */ -#define _insn_fetch(_size) \ -({ unsigned long _x; \ - rc = ops->read(x86_seg_cs, _regs.eip, &_x, (_size), ctxt); \ - if ( rc != 0 ) \ - goto done; \ - _regs.eip += (_size); \ - _x; \ +#define insn_fetch_bytes(_size) \ +({ unsigned long _x; \ + rc = ops->insn_fetch(x86_seg_cs, _regs.eip, &_x, (_size), ctxt); \ + if ( rc != 0 ) \ + goto done; \ + _regs.eip += (_size); \ + _x; \ }) -#define insn_fetch(_type) ((_type)_insn_fetch(sizeof(_type))) +#define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type))) #define truncate_ea(ea) \ ({ unsigned long __ea = (ea); \ @@ -481,7 +476,7 @@ x86_emulate_memop( /* Legacy prefixes. */ for ( i = 0; i < 8; i++ ) { - switch ( b = insn_fetch(uint8_t) ) + switch ( b = insn_fetch_type(uint8_t) ) { case 0x66: /* operand-size override */ op_bytes ^= 6; /* switch between 2/4 bytes */ @@ -530,7 +525,7 @@ x86_emulate_memop( rex_prefix = b; if ( b & 8 ) /* REX.W */ op_bytes = 8; - b = insn_fetch(uint8_t); + b = insn_fetch_type(uint8_t); } /* Opcode byte(s). */ @@ -541,7 +536,7 @@ x86_emulate_memop( if ( b == 0x0f ) { twobyte = 1; - b = insn_fetch(uint8_t); + b = insn_fetch_type(uint8_t); d = twobyte_table[b]; } @@ -553,7 +548,7 @@ x86_emulate_memop( /* ModRM and SIB bytes. */ if ( d & ModRM ) { - modrm = insn_fetch(uint8_t); + modrm = insn_fetch_type(uint8_t); modrm_mod = (modrm & 0xc0) >> 6; modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3); modrm_rm = modrm & 0x07; @@ -577,9 +572,16 @@ x86_emulate_memop( } switch ( modrm_mod ) { - case 0: if ( modrm_rm == 6 ) ea_off = insn_fetch(int16_t); break; - case 1: ea_off += insn_fetch(int8_t); break; - case 2: ea_off += insn_fetch(int16_t); break; + case 0: + if ( modrm_rm == 6 ) + ea_off = insn_fetch_type(int16_t); + break; + case 1: + ea_off += insn_fetch_type(int8_t); + break; + case 2: + ea_off += insn_fetch_type(int16_t); + break; } } else @@ -587,14 +589,14 @@ x86_emulate_memop( /* 32/64-bit ModR/M decode. */ if ( modrm_rm == 4 ) { - sib = insn_fetch(uint8_t); + sib = insn_fetch_type(uint8_t); sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8); sib_base = (sib & 7) | ((rex_prefix << 3) & 8); if ( sib_index != 4 ) ea_off = *(long *)decode_register(sib_index, &_regs, 0); ea_off <<= (sib >> 6) & 3; if ( (modrm_mod == 0) && ((sib_base & 7) == 5) ) - ea_off += insn_fetch(int32_t); + ea_off += insn_fetch_type(int32_t); else ea_off += *(long *)decode_register(sib_base, &_regs, 0); } @@ -608,7 +610,7 @@ x86_emulate_memop( case 0: if ( (modrm_rm & 7) != 5 ) break; - ea_off = insn_fetch(int32_t); + ea_off = insn_fetch_type(int32_t); if ( mode != X86EMUL_MODE_PROT64 ) break; /* Relative to RIP of next instruction. Argh! */ @@ -624,8 +626,12 @@ x86_emulate_memop( ea_off += (d & ByteOp) ? 1 : ((op_bytes == 8) ? 4 : op_bytes); break; - case 1: ea_off += insn_fetch(int8_t); break; - case 2: ea_off += insn_fetch(int32_t); break; + case 1: + ea_off += insn_fetch_type(int8_t); + break; + case 2: + ea_off += insn_fetch_type(int32_t); + break; } } @@ -684,15 +690,15 @@ x86_emulate_memop( /* NB. Immediates are sign-extended as necessary. */ switch ( src.bytes ) { - case 1: src.val = insn_fetch(int8_t); break; - case 2: src.val = insn_fetch(int16_t); break; - case 4: src.val = insn_fetch(int32_t); break; + case 1: src.val = insn_fetch_type(int8_t); break; + case 2: src.val = insn_fetch_type(int16_t); break; + case 4: src.val = insn_fetch_type(int32_t); break; } break; case SrcImmByte: src.type = OP_IMM; src.bytes = 1; - src.val = insn_fetch(int8_t); + src.val = insn_fetch_type(int8_t); break; } @@ -885,9 +891,9 @@ x86_emulate_memop( if ( src.bytes == 8 ) src.bytes = 4; switch ( src.bytes ) { - case 1: src.val = insn_fetch(int8_t); break; - case 2: src.val = insn_fetch(int16_t); break; - case 4: src.val = insn_fetch(int32_t); break; + case 1: src.val = insn_fetch_type(int8_t); break; + case 2: src.val = insn_fetch_type(int16_t); break; + case 4: src.val = insn_fetch_type(int32_t); break; } goto test; case 2: /* not */ @@ -986,7 +992,7 @@ x86_emulate_memop( dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; dst.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( (rc = ops->read(ea_seg, _insn_fetch(ad_bytes), + if ( (rc = ops->read(ea_seg, insn_fetch_bytes(ad_bytes), &dst.val, dst.bytes, ctxt)) != 0 ) goto done; break; @@ -994,7 +1000,7 @@ x86_emulate_memop( /* Destination EA is not encoded via ModRM. */ dst.type = OP_MEM; dst.mem_seg = ea_seg; - dst.mem_off = _insn_fetch(ad_bytes); + dst.mem_off = insn_fetch_bytes(ad_bytes); dst.bytes = (d & ByteOp) ? 1 : op_bytes; dst.val = (unsigned long)_regs.eax; break; @@ -1198,7 +1204,7 @@ x86_emulate_memop( for ( ea_off = ctxt->regs->eip; ea_off < _regs.eip; ea_off++ ) { unsigned long x; - ops->read(x86_seg_cs, ea_off, &x, 1, ctxt); + ops->insn_fetch(x86_seg_cs, ea_off, &x, 1, ctxt); printk(" %02x", (uint8_t)x); } printk("\n"); diff -r de8abd5ce652 -r 12ee3d6e61ef xen/include/asm-x86/x86_emulate.h --- a/xen/include/asm-x86/x86_emulate.h Sun Dec 03 13:30:23 2006 +0000 +++ b/xen/include/asm-x86/x86_emulate.h Sun Dec 03 17:15:48 2006 +0000 @@ -56,7 +56,8 @@ struct x86_emulate_ops /* * All functions: * @seg: [IN ] Segment being dereferenced (specified as x86_seg_??). - * @offset [IN ] Offset within segment. + * @offset:[IN ] Offset within segment. + * @ctxt: [IN ] Emulation context info as passed to the emulator. */ /* @@ -65,6 +66,17 @@ struct x86_emulate_ops * @bytes: [IN ] Number of bytes to read from memory. */ int (*read)( + enum x86_segment seg, + unsigned long offset, + unsigned long *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); + + /* + * insn_fetch: Emulate fetch from instruction byte stream. + * Parameters are same as for 'read'. @seg is always x86_seg_cs. + */ + int (*insn_fetch)( enum x86_segment seg, unsigned long offset, unsigned long *val, _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |