[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] hvm: Remove lots of custom trap-and-emulate code and defer to
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1203616962 0 # Node ID 09b53f27a18b68f3474347291a04eb66b489fb1f # Parent 591cfd37bd5409d534034c64a3356a9b492b23bb hvm: Remove lots of custom trap-and-emulate code and defer to handle_mmio()->hvm_emulate_one()->x86_emulate(). Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> --- xen/arch/x86/hvm/emulate.c | 22 xen/arch/x86/hvm/svm/emulate.c | 413 +---------------- xen/arch/x86/hvm/svm/svm.c | 790 ---------------------------------- xen/arch/x86/hvm/vmx/vmx.c | 502 --------------------- xen/arch/x86/x86_emulate.c | 18 xen/include/asm-x86/hvm/hvm.h | 13 xen/include/asm-x86/hvm/svm/emulate.h | 93 ---- xen/include/asm-x86/x86_emulate.h | 6 8 files changed, 120 insertions(+), 1737 deletions(-) diff -r 591cfd37bd54 -r 09b53f27a18b xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/arch/x86/hvm/emulate.c Thu Feb 21 18:02:42 2008 +0000 @@ -664,6 +664,25 @@ static void hvmemul_load_fpu_ctxt( { if ( !current->fpu_dirtied ) hvm_funcs.fpu_dirty_intercept(); +} + +static int hvmemul_invlpg( + enum x86_segment seg, + unsigned long offset, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + unsigned long addr; + int rc; + + rc = hvmemul_virtual_to_linear( + seg, offset, 1, hvm_access_none, hvmemul_ctxt, &addr); + + if ( rc == X86EMUL_OKAY ) + hvm_funcs.invlpg_intercept(addr); + + return rc; } static struct x86_emulate_ops hvm_emulate_ops = { @@ -688,7 +707,8 @@ static struct x86_emulate_ops hvm_emulat .hlt = hvmemul_hlt, .inject_hw_exception = hvmemul_inject_hw_exception, .inject_sw_interrupt = hvmemul_inject_sw_interrupt, - .load_fpu_ctxt = hvmemul_load_fpu_ctxt + .load_fpu_ctxt = hvmemul_load_fpu_ctxt, + .invlpg = hvmemul_invlpg }; int hvm_emulate_one( diff -r 591cfd37bd54 -r 09b53f27a18b xen/arch/x86/hvm/svm/emulate.c --- a/xen/arch/x86/hvm/svm/emulate.c Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/arch/x86/hvm/svm/emulate.c Thu Feb 21 18:02:42 2008 +0000 @@ -14,7 +14,6 @@ * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. - * */ #include <xen/config.h> @@ -28,314 +27,40 @@ #include <asm/hvm/svm/vmcb.h> #include <asm/hvm/svm/emulate.h> +int inst_copy_from_guest( + unsigned char *buf, unsigned long guest_eip, int inst_len); -extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, - int inst_len); - -#define REX_PREFIX_BASE 0x40 -#define REX_X 0x02 -#define REX_W 0x08 -#define REX_R 0x04 -#define REX_B 0x01 - -#define IS_REX_PREFIX(prefix) ((prefix & 0xf0) == REX_PREFIX_BASE) - -#define DECODE_MODRM_MOD(modrm) ((modrm & 0xC0) >> 6) - -#define DECODE_MODRM_REG(prefix, modrm) \ - ((prefix & REX_R) && IS_REX_PREFIX(prefix)) \ - ? (0x08 | ((modrm >> 3) & 0x07)) : ((modrm >> 3) & 0x07) - -#define DECODE_MODRM_RM(prefix, modrm) \ - ((prefix & REX_B) && IS_REX_PREFIX(prefix)) \ - ? (0x08 | (modrm & 0x07)) : (modrm & 0x07) - -#define DECODE_SIB_SCALE(sib) DECODE_MODRM_MOD(sib) - -#define DECODE_SIB_INDEX(prefix, sib) \ - ((prefix & REX_X) && IS_REX_PREFIX(prefix)) \ - ? (0x08 | ((sib >> 3) & 0x07)) : ((sib >> 3) & 0x07) - -#define DECODE_SIB_BASE(prefix, sib) DECODE_MODRM_RM(prefix, sib) - - -static inline unsigned long DECODE_GPR_VALUE( - struct cpu_user_regs *regs, u8 gpr_rm) +static unsigned int is_prefix(u8 opc) { - unsigned long value; - switch (gpr_rm) - { - case 0x0: - value = regs->eax; - break; - case 0x1: - value = regs->ecx; - break; - case 0x2: - value = regs->edx; - break; - case 0x3: - value = regs->ebx; - break; - case 0x4: - value = regs->esp; - case 0x5: - value = regs->ebp; - break; - case 0x6: - value = regs->esi; - break; - case 0x7: - value = regs->edi; - break; + switch ( opc ) + { + case 0x66: + case 0x67: + case 0x2E: + case 0x3E: + case 0x26: + case 0x64: + case 0x65: + case 0x36: + case 0xF0: + case 0xF3: + case 0xF2: #if __x86_64__ - case 0x8: - value = regs->r8; - break; - case 0x9: - value = regs->r9; - break; - case 0xA: - value = regs->r10; - break; - case 0xB: - value = regs->r11; - break; - case 0xC: - value = regs->r12; - break; - case 0xD: - value = regs->r13; - break; - case 0xE: - value = regs->r14; - break; - case 0xF: - value = regs->r15; - break; -#endif - default: - printk("Invlaid gpr_rm = %d\n", gpr_rm); - ASSERT(0); - value = (unsigned long)-1; /* error retrun */ + case 0x40 ... 0x4f: +#endif /* __x86_64__ */ + return 1; } - return value; + return 0; } - -#define CHECK_LENGTH64(num) \ - if (num > length) \ - { \ - *size = 0; \ - return (unsigned long) -1; \ - } - -#define modrm operand [0] - -#define sib operand [1] - - -unsigned long get_effective_addr_modrm64(struct cpu_user_regs *regs, - const u8 prefix, int inst_len, - const u8 *operand, u8 *size) +static unsigned long svm_rip2pointer(struct vcpu *v) { - unsigned long effective_addr = (unsigned long) -1; - u8 length, modrm_mod, modrm_rm; - u32 disp = 0; - struct vcpu *v = current; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - - HVM_DBG_LOG(DBG_LEVEL_1, "prefix = %x, length = %d, operand[0,1] = %x %x", - prefix, *size, operand[0], operand[1]); - - if ((NULL == size) || (NULL == operand) || (1 > *size)) - { - *size = 0; - return effective_addr; - } - - modrm_mod = DECODE_MODRM_MOD(modrm); - modrm_rm = DECODE_MODRM_RM(prefix, modrm); - - length = *size; - *size = 1; - switch (modrm_rm) - { - case 0x4: -#if __x86_64__ - case 0xC: -#endif - if (modrm_mod < 3) - { - *size = length; - effective_addr = get_effective_addr_sib(vmcb, regs, prefix, operand, size); - } - else - { - effective_addr = DECODE_GPR_VALUE(regs, modrm_rm); - } - break; - - case 0x5: - if (0 < modrm_mod) - { - effective_addr = regs->ebp; - *size = 1; - break; - } -#if __x86_64__ - /* FALLTHRU */ - case 0xD: - if (0 < modrm_mod) - { - *size = 1; - effective_addr = regs->r13; - break; - } -#endif - - CHECK_LENGTH64(*size + (u8)sizeof(u32)); - - memcpy (&disp, operand + 1, sizeof (u32)); - *size += sizeof (u32); - -#if __x86_64__ - /* 64-bit mode */ - if (vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) - return regs->eip + inst_len + *size + disp; -#endif - return disp; - - default: - effective_addr = DECODE_GPR_VALUE(regs, modrm_rm); - - } - - if (3 > modrm_mod) - { - if (1 == modrm_mod ) - { - CHECK_LENGTH64(*size + (u8)sizeof(u8)); - disp = sib; - *size += sizeof (u8); - } - else if (2 == modrm_mod ) - { - CHECK_LENGTH64(*size + sizeof (u32)); - memcpy (&disp, operand + 1, sizeof (u32)); - *size += sizeof (u32); - } - - effective_addr += disp; - } - - return effective_addr; -} - - -unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, - struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, - u8 *size) -{ - unsigned long base, effective_addr = (unsigned long)-1; - u8 sib_scale, sib_idx, sib_base, length; - u32 disp = 0; - - if (NULL == size || NULL == operand || 2 > *size) - { - *size = 0; - return effective_addr; - } - - sib_scale = DECODE_SIB_SCALE(sib); - sib_idx = DECODE_SIB_INDEX(prefix, sib); - sib_base = DECODE_SIB_BASE(prefix, sib); - - base = DECODE_GPR_VALUE(regs, sib_base); - - if ((unsigned long)-1 == base) - { - /* - * Surely this is wrong. base should be allowed to be -1, even if - * it's not the usual case... - */ - *size = 0; - return base; - } - - length = *size; - *size = 2; - if (0x5 == (sib_base & 0x5)) - { - switch (DECODE_MODRM_MOD(modrm)) - { - case 0: - CHECK_LENGTH64(*size + (u8)sizeof(u32)); - memcpy (&disp, operand + 2, sizeof(u32)); - *size += sizeof(u32); - base = disp; - break; - - case 1: - CHECK_LENGTH64(*size + (u8)sizeof (u8)); - *size += sizeof(u8); - base += operand [2]; - break; - - case 2: - CHECK_LENGTH64(*size + (u8)sizeof (u32)); - memcpy(&disp, operand + 2, sizeof(u32)); - *size += sizeof(u32); - base += disp; - } - } - - if (4 == sib_idx) - return base; - - effective_addr = DECODE_GPR_VALUE(regs, sib_idx); - - effective_addr <<= sib_scale; - - return (effective_addr + base); -} - - -/* Get the register/mode number of src register in ModRM register. */ -unsigned int decode_dest_reg(u8 prefix, u8 m) -{ - return DECODE_MODRM_REG(prefix, m); -} - -unsigned int decode_src_reg(u8 prefix, u8 m) -{ - return DECODE_MODRM_RM(prefix, m); -} - - -unsigned long svm_rip2pointer(struct vcpu *v) -{ - /* - * The following is subtle. Intuitively this code would be something like: - * - * if (16bit) addr = (cs << 4) + rip; else addr = rip; - * - * However, this code doesn't work for code executing after CR0.PE=0, - * but before the %cs has been updated. We don't get signalled when - * %cs is update, but fortunately, base contain the valid base address - * no matter what kind of addressing is used. - */ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; unsigned long p = vmcb->cs.base + guest_cpu_user_regs()->eip; - ASSERT(v == current); - if (!(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v))) + if ( !(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) ) return (u32)p; /* mask to 32 bits */ - /* NB. Should mask to 16 bits if in real mode or 16-bit protected mode. */ return p; } - - -#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ } /* * Here's how it works: @@ -343,35 +68,14 @@ unsigned long svm_rip2pointer(struct vcp * Following bytes: Opcode bytes. * Special case: Last byte, if zero, doesn't need to match. */ +#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ } MAKE_INSTR(INVD, 2, 0x0f, 0x08); MAKE_INSTR(WBINVD, 2, 0x0f, 0x09); MAKE_INSTR(CPUID, 2, 0x0f, 0xa2); MAKE_INSTR(RDMSR, 2, 0x0f, 0x32); MAKE_INSTR(WRMSR, 2, 0x0f, 0x30); -MAKE_INSTR(CLI, 1, 0xfa); -MAKE_INSTR(STI, 1, 0xfb); -MAKE_INSTR(RDPMC, 2, 0x0f, 0x33); -MAKE_INSTR(CLGI, 3, 0x0f, 0x01, 0xdd); -MAKE_INSTR(STGI, 3, 0x0f, 0x01, 0xdc); -MAKE_INSTR(VMRUN, 3, 0x0f, 0x01, 0xd8); -MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda); -MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb); MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9); -MAKE_INSTR(PAUSE, 2, 0xf3, 0x90); -MAKE_INSTR(SKINIT, 3, 0x0f, 0x01, 0xde); -MAKE_INSTR(MOV2CR, 3, 0x0f, 0x22, 0x00); -MAKE_INSTR(MOVCR2, 3, 0x0f, 0x20, 0x00); -MAKE_INSTR(MOV2DR, 3, 0x0f, 0x23, 0x00); -MAKE_INSTR(MOVDR2, 3, 0x0f, 0x21, 0x00); -MAKE_INSTR(PUSHF, 1, 0x9c); -MAKE_INSTR(POPF, 1, 0x9d); -MAKE_INSTR(RSM, 2, 0x0f, 0xaa); -MAKE_INSTR(INVLPG, 3, 0x0f, 0x01, 0x00); -MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf); MAKE_INSTR(HLT, 1, 0xf4); -MAKE_INSTR(CLTS, 2, 0x0f, 0x06); -MAKE_INSTR(LMSW, 3, 0x0f, 0x01, 0x00); -MAKE_INSTR(SMSW, 3, 0x0f, 0x01, 0x00); MAKE_INSTR(INT3, 1, 0xcc); static const u8 *opc_bytes[INSTR_MAX_COUNT] = @@ -381,55 +85,24 @@ static const u8 *opc_bytes[INSTR_MAX_COU [INSTR_CPUID] = OPCODE_CPUID, [INSTR_RDMSR] = OPCODE_RDMSR, [INSTR_WRMSR] = OPCODE_WRMSR, - [INSTR_CLI] = OPCODE_CLI, - [INSTR_STI] = OPCODE_STI, - [INSTR_RDPMC] = OPCODE_RDPMC, - [INSTR_CLGI] = OPCODE_CLGI, - [INSTR_STGI] = OPCODE_STGI, - [INSTR_VMRUN] = OPCODE_VMRUN, - [INSTR_VMLOAD] = OPCODE_VMLOAD, - [INSTR_VMSAVE] = OPCODE_VMSAVE, [INSTR_VMCALL] = OPCODE_VMCALL, - [INSTR_PAUSE] = OPCODE_PAUSE, - [INSTR_SKINIT] = OPCODE_SKINIT, - [INSTR_MOV2CR] = OPCODE_MOV2CR, - [INSTR_MOVCR2] = OPCODE_MOVCR2, - [INSTR_MOV2DR] = OPCODE_MOV2DR, - [INSTR_MOVDR2] = OPCODE_MOVDR2, - [INSTR_PUSHF] = OPCODE_PUSHF, - [INSTR_POPF] = OPCODE_POPF, - [INSTR_RSM] = OPCODE_RSM, - [INSTR_INVLPG] = OPCODE_INVLPG, - [INSTR_INVLPGA]= OPCODE_INVLPGA, - [INSTR_CLTS] = OPCODE_CLTS, [INSTR_HLT] = OPCODE_HLT, - [INSTR_LMSW] = OPCODE_LMSW, - [INSTR_SMSW] = OPCODE_SMSW, [INSTR_INT3] = OPCODE_INT3 }; -/* - * Intel has a vmcs entry to give the instruction length. AMD doesn't. So we - * have to do a little bit of work to find out... - * - * The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer - * to enough bytes to satisfy the instruction including prefix bytes. - */ int __get_instruction_length_from_list(struct vcpu *v, enum instruction_index *list, unsigned int list_count, u8 *guest_eip_buf, enum instruction_index *match) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned int inst_len = 0; - unsigned int i; - unsigned int j; + unsigned int i, j, inst_len = 0; int found = 0; enum instruction_index instr = 0; u8 buffer[MAX_INST_LEN]; u8 *buf; const u8 *opcode = NULL; - if (guest_eip_buf) + if ( guest_eip_buf ) { buf = guest_eip_buf; } @@ -439,53 +112,47 @@ int __get_instruction_length_from_list(s buf = buffer; } - for (j = 0; j < list_count; j++) + for ( j = 0; j < list_count; j++ ) { instr = list[j]; opcode = opc_bytes[instr]; ASSERT(opcode); - while (inst_len < MAX_INST_LEN && + while ( (inst_len < MAX_INST_LEN) && is_prefix(buf[inst_len]) && - !is_prefix(opcode[1])) + !is_prefix(opcode[1]) ) inst_len++; ASSERT(opcode[0] <= 15); /* Make sure the table is correct. */ found = 1; - for (i = 0; i < opcode[0]; i++) + for ( i = 0; i < opcode[0]; i++ ) { /* If the last byte is zero, we just accept it without checking */ - if (i == opcode[0]-1 && opcode[i+1] == 0) + if ( (i == (opcode[0]-1)) && (opcode[i+1] == 0) ) break; - if (buf[inst_len+i] != opcode[i+1]) + if ( buf[inst_len+i] != opcode[i+1] ) { found = 0; break; } } - if (found) - break; - } - - /* It's a match */ - if (found) - { - inst_len += opcode[0]; - - ASSERT(inst_len <= MAX_INST_LEN); - - if (match) - *match = instr; - - return inst_len; + if ( found ) + goto done; } printk("%s: Mismatch between expected and actual instruction bytes: " "eip = %lx\n", __func__, (unsigned long)vmcb->rip); return 0; + + done: + inst_len += opcode[0]; + ASSERT(inst_len <= MAX_INST_LEN); + if ( match ) + *match = instr; + return inst_len; } /* diff -r 591cfd37bd54 -r 09b53f27a18b xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Feb 21 18:02:42 2008 +0000 @@ -73,6 +73,7 @@ static void svm_fpu_dirty_intercept(void static void svm_fpu_dirty_intercept(void); static int svm_msr_read_intercept(struct cpu_user_regs *regs); static int svm_msr_write_intercept(struct cpu_user_regs *regs); +static void svm_invlpg_intercept(unsigned long vaddr); /* va of hardware host save area */ static void *hsa[NR_CPUS] __read_mostly; @@ -472,28 +473,6 @@ static void svm_sync_vmcb(struct vcpu *v arch_svm->vmcb_in_sync = 1; svm_vmsave(arch_svm->vmcb); -} - -static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg) -{ - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v); - - switch ( seg ) - { - case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base; - case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base; - case x86_seg_es: return long_mode ? 0 : vmcb->es.base; - case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base; - case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base; - case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base; - case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base; - case x86_seg_gdtr: return vmcb->gdtr.base; - case x86_seg_idtr: return vmcb->idtr.base; - case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base; - default: BUG(); - } - return 0; } static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg, @@ -804,7 +783,6 @@ static struct hvm_function_table svm_fun .load_cpu_ctxt = svm_load_vmcb_ctxt, .interrupt_blocked = svm_interrupt_blocked, .guest_x86_mode = svm_guest_x86_mode, - .get_segment_base = svm_get_segment_base, .get_segment_register = svm_get_segment_register, .set_segment_register = svm_set_segment_register, .update_host_cr3 = svm_update_host_cr3, @@ -820,7 +798,8 @@ static struct hvm_function_table svm_fun .wbinvd_intercept = svm_wbinvd_intercept, .fpu_dirty_intercept = svm_fpu_dirty_intercept, .msr_read_intercept = svm_msr_read_intercept, - .msr_write_intercept = svm_msr_write_intercept + .msr_write_intercept = svm_msr_write_intercept, + .invlpg_intercept = svm_invlpg_intercept }; int start_svm(struct cpuinfo_x86 *c) @@ -987,677 +966,10 @@ static void svm_vmexit_do_cpuid(struct c __update_guest_eip(regs, inst_len); } -static unsigned long *get_reg_p( - unsigned int gpreg, - struct cpu_user_regs *regs, struct vmcb_struct *vmcb) -{ - unsigned long *reg_p = NULL; - switch (gpreg) - { - case SVM_REG_EAX: - reg_p = (unsigned long *)®s->eax; - break; - case SVM_REG_EBX: - reg_p = (unsigned long *)®s->ebx; - break; - case SVM_REG_ECX: - reg_p = (unsigned long *)®s->ecx; - break; - case SVM_REG_EDX: - reg_p = (unsigned long *)®s->edx; - break; - case SVM_REG_EDI: - reg_p = (unsigned long *)®s->edi; - break; - case SVM_REG_ESI: - reg_p = (unsigned long *)®s->esi; - break; - case SVM_REG_EBP: - reg_p = (unsigned long *)®s->ebp; - break; - case SVM_REG_ESP: - reg_p = (unsigned long *)®s->esp; - break; -#ifdef __x86_64__ - case SVM_REG_R8: - reg_p = (unsigned long *)®s->r8; - break; - case SVM_REG_R9: - reg_p = (unsigned long *)®s->r9; - break; - case SVM_REG_R10: - reg_p = (unsigned long *)®s->r10; - break; - case SVM_REG_R11: - reg_p = (unsigned long *)®s->r11; - break; - case SVM_REG_R12: - reg_p = (unsigned long *)®s->r12; - break; - case SVM_REG_R13: - reg_p = (unsigned long *)®s->r13; - break; - case SVM_REG_R14: - reg_p = (unsigned long *)®s->r14; - break; - case SVM_REG_R15: - reg_p = (unsigned long *)®s->r15; - break; -#endif - default: - BUG(); - } - - return reg_p; -} - - -static unsigned long get_reg( - unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb) -{ - unsigned long *gp; - gp = get_reg_p(gpreg, regs, vmcb); - return *gp; -} - - -static void set_reg( - unsigned int gpreg, unsigned long value, - struct cpu_user_regs *regs, struct vmcb_struct *vmcb) -{ - unsigned long *gp; - gp = get_reg_p(gpreg, regs, vmcb); - *gp = value; -} - - static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs) { HVMTRACE_0D(DR_WRITE, v); __restore_debug_registers(v); -} - - -static void svm_get_prefix_info(struct vcpu *v, unsigned int dir, - svm_segment_register_t **seg, - unsigned int *asize) -{ - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned char inst[MAX_INST_LEN]; - int i; - - memset(inst, 0, MAX_INST_LEN); - if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst)) - != MAX_INST_LEN) - { - gdprintk(XENLOG_ERR, "get guest instruction failed\n"); - domain_crash(current->domain); - return; - } - - for (i = 0; i < MAX_INST_LEN; i++) - { - switch (inst[i]) - { - case 0xf3: /* REPZ */ - case 0xf2: /* REPNZ */ - case 0xf0: /* LOCK */ - case 0x66: /* data32 */ -#ifdef __x86_64__ - /* REX prefixes */ - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - - case 0x48: - case 0x49: - case 0x4a: - case 0x4b: - case 0x4c: - case 0x4d: - case 0x4e: - case 0x4f: -#endif - continue; - case 0x67: /* addr32 */ - *asize ^= 48; /* Switch 16/32 bits */ - continue; - case 0x2e: /* CS */ - *seg = &vmcb->cs; - continue; - case 0x36: /* SS */ - *seg = &vmcb->ss; - continue; - case 0x26: /* ES */ - *seg = &vmcb->es; - continue; - case 0x64: /* FS */ - svm_sync_vmcb(v); - *seg = &vmcb->fs; - continue; - case 0x65: /* GS */ - svm_sync_vmcb(v); - *seg = &vmcb->gs; - continue; - case 0x3e: /* DS */ - *seg = &vmcb->ds; - continue; - default: - break; - } - return; - } -} - - -/* Get the address of INS/OUTS instruction */ -static int svm_get_io_address( - struct vcpu *v, struct cpu_user_regs *regs, - unsigned int size, ioio_info_t info, - unsigned long *count, unsigned long *addr) -{ - unsigned long reg; - unsigned int asize, isize; - int long_mode = 0; - svm_segment_register_t *seg = NULL; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - - /* If we're in long mode, don't check the segment presence & limit */ - long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v); - - /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit. - * l field combined with EFER_LMA says whether it's 16 or 64 bit. - */ - asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16); - - - /* The ins/outs instructions are single byte, so if we have got more - * than one byte (+ maybe rep-prefix), we have some prefix so we need - * to figure out what it is... - */ - isize = vmcb->exitinfo2 - regs->eip; - - if (info.fields.rep) - isize --; - - if (isize > 1) - svm_get_prefix_info(v, info.fields.type, &seg, &asize); - - if (info.fields.type == IOREQ_WRITE) - { - reg = regs->esi; - if (!seg) /* If no prefix, used DS. */ - seg = &vmcb->ds; - if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) { - svm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; - } - } - else - { - reg = regs->edi; - seg = &vmcb->es; /* Note: This is ALWAYS ES. */ - if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) { - svm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; - } - } - - /* If the segment isn't present, give GP fault! */ - if (!long_mode && !seg->attr.fields.p) - { - svm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; - } - - if (asize == 16) - { - *addr = (reg & 0xFFFF); - *count = regs->ecx & 0xffff; - } - else - { - *addr = reg; - *count = regs->ecx; - } - if (!info.fields.rep) - *count = 1; - - if (!long_mode) - { - ASSERT(*addr == (u32)*addr); - if ((u32)(*addr + size - 1) < (u32)*addr || - (seg->attr.fields.type & 0xc) != 0x4 ? - *addr + size - 1 > seg->limit : - *addr <= seg->limit) - { - svm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; - } - - /* Check the limit for repeated instructions, as above we checked only - the first instance. Truncate the count if a limit violation would - occur. Note that the checking is not necessary for page granular - segments as transfers crossing page boundaries will be broken up - anyway. */ - if (!seg->attr.fields.g && *count > 1) - { - if ((seg->attr.fields.type & 0xc) != 0x4) - { - /* expand-up */ - if (!(regs->eflags & EF_DF)) - { - if (*addr + *count * size - 1 < *addr || - *addr + *count * size - 1 > seg->limit) - *count = (seg->limit + 1UL - *addr) / size; - } - else - { - if (*count - 1 > *addr / size) - *count = *addr / size + 1; - } - } - else - { - /* expand-down */ - if (!(regs->eflags & EF_DF)) - { - if (*count - 1 > -(s32)*addr / size) - *count = -(s32)*addr / size + 1UL; - } - else - { - if (*addr < (*count - 1) * size || - *addr - (*count - 1) * size <= seg->limit) - *count = (*addr - seg->limit - 1) / size + 1; - } - } - ASSERT(*count); - } - - *addr += seg->base; - } -#ifdef __x86_64__ - else - { - if (seg == &vmcb->fs || seg == &vmcb->gs) - *addr += seg->base; - - if (!is_canonical_address(*addr) || - !is_canonical_address(*addr + size - 1)) - { - svm_inject_exception(TRAP_gp_fault, 0, 0); - return 0; - } - if (*count > (1UL << 48) / size) - *count = (1UL << 48) / size; - if (!(regs->eflags & EF_DF)) - { - if (*addr + *count * size - 1 < *addr || - !is_canonical_address(*addr + *count * size - 1)) - *count = (*addr & ~((1UL << 48) - 1)) / size; - } - else - { - if ((*count - 1) * size > *addr || - !is_canonical_address(*addr + (*count - 1) * size)) - *count = (*addr & ~((1UL << 48) - 1)) / size + 1; - } - ASSERT(*count); - } -#endif - - return 1; -} - - -static void svm_io_instruction(struct vcpu *v) -{ - struct cpu_user_regs *regs; - struct hvm_io_op *pio_opp; - unsigned int port; - unsigned int size, dir, df; - ioio_info_t info; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - - pio_opp = ¤t->arch.hvm_vcpu.io_op; - pio_opp->instr = INSTR_PIO; - pio_opp->flags = 0; - - regs = &pio_opp->io_context; - - /* Copy current guest state into io instruction state structure. */ - memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); - - info.bytes = vmcb->exitinfo1; - - port = info.fields.port; /* port used to be addr */ - dir = info.fields.type; /* direction */ - df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; - - if (info.fields.sz32) - size = 4; - else if (info.fields.sz16) - size = 2; - else - size = 1; - - if (dir==IOREQ_READ) - HVMTRACE_2D(IO_READ, v, port, size); - else - HVMTRACE_3D(IO_WRITE, v, port, size, regs->eax); - - HVM_DBG_LOG(DBG_LEVEL_IO, - "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", " - "exit_qualification = %"PRIx64, - port, vmcb->cs.sel, (uint64_t)regs->eip, info.bytes); - - /* string instruction */ - if (info.fields.str) - { - unsigned long addr, count; - paddr_t paddr; - unsigned long gfn; - uint32_t pfec; - int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; - - if (!svm_get_io_address(v, regs, size, info, &count, &addr)) - { - /* We failed to get a valid address, so don't do the IO operation - - * it would just get worse if we do! Hopefully the guest is handing - * gp-faults... - */ - return; - } - - /* "rep" prefix */ - if (info.fields.rep) - { - pio_opp->flags |= REPZ; - } - - /* Translate the address to a physical address */ - pfec = PFEC_page_present; - if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */ - pfec |= PFEC_write_access; - if ( vmcb->cpl == 3 ) - pfec |= PFEC_user_mode; - gfn = paging_gva_to_gfn(v, addr, &pfec); - if ( gfn == INVALID_GFN ) - { - /* The guest does not have the RAM address mapped. - * Need to send in a page fault */ - svm_inject_exception(TRAP_page_fault, pfec, addr); - return; - } - paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); - - /* - * Handle string pio instructions that cross pages or that - * are unaligned. See the comments in hvm_platform.c/handle_mmio() - */ - if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) - { - unsigned long value = 0; - - pio_opp->flags |= OVERLAP; - pio_opp->addr = addr; - - if (dir == IOREQ_WRITE) /* OUTS */ - { - if ( hvm_paging_enabled(current) ) - { - int rv = hvm_copy_from_guest_virt(&value, addr, size); - if ( rv == HVMCOPY_bad_gva_to_gfn ) - return; /* exception already injected */ - } - else - (void)hvm_copy_from_guest_phys(&value, addr, size); - } - else /* dir != IOREQ_WRITE */ - /* Remember where to write the result, as a *VA*. - * Must be a VA so we can handle the page overlap - * correctly in hvm_pio_assist() */ - pio_opp->addr = addr; - - if (count == 1) - regs->eip = vmcb->exitinfo2; - - send_pio_req(port, 1, size, value, dir, df, 0); - } - else - { - unsigned long last_addr = sign > 0 ? addr + count * size - 1 - : addr - (count - 1) * size; - - if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK)) - { - if (sign > 0) - count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; - else - count = (addr & ~PAGE_MASK) / size + 1; - } - else - regs->eip = vmcb->exitinfo2; - - send_pio_req(port, count, size, paddr, dir, df, 1); - } - } - else - { - /* - * On SVM, the RIP of the intruction following the IN/OUT is saved in - * ExitInfo2 - */ - regs->eip = vmcb->exitinfo2; - - if (port == 0xe9 && dir == IOREQ_WRITE && size == 1) - hvm_print_line(v, regs->eax); /* guest debug output */ - - send_pio_req(port, 1, size, regs->eax, dir, df, 0); - } -} - -static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) -{ - unsigned long value = 0; - struct vcpu *v = current; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - - switch ( cr ) - { - case 0: - value = v->arch.hvm_vcpu.guest_cr[0]; - break; - case 3: - value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3]; - break; - case 4: - value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4]; - break; - default: - gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); - domain_crash(v->domain); - return; - } - - HVMTRACE_2D(CR_READ, v, cr, value); - - set_reg(gp, value, regs, vmcb); - - HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value); -} - -static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs) -{ - unsigned long value; - struct vcpu *v = current; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - - value = get_reg(gpreg, regs, vmcb); - - HVMTRACE_2D(CR_WRITE, v, cr, value); - - HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, current = %p", - cr, value, v); - - switch ( cr ) - { - case 0: - return !hvm_set_cr0(value); - case 3: - return !hvm_set_cr3(value); - case 4: - return !hvm_set_cr4(value); - default: - gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); - domain_crash(v->domain); - return 0; - } - - return 1; -} - -static void svm_cr_access( - struct vcpu *v, unsigned int cr, unsigned int type, - struct cpu_user_regs *regs) -{ - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - int inst_len = 0; - int index,addr_size,i; - unsigned int gpreg,offset; - unsigned long value,addr; - u8 buffer[MAX_INST_LEN]; - u8 prefix = 0; - u8 modrm; - enum x86_segment seg; - int result = 1; - enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW}; - enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW}; - enum instruction_index match; - - inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer)); - - /* get index to first actual instruction byte - as we will need to know - where the prefix lives later on */ - index = skip_prefix_bytes(buffer, sizeof(buffer)); - - if ( type == TYPE_MOV_TO_CR ) - { - inst_len = __get_instruction_length_from_list( - v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match); - } - else /* type == TYPE_MOV_FROM_CR */ - { - inst_len = __get_instruction_length_from_list( - v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match); - } - - inst_len += index; - - /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */ - if (index > 0 && (buffer[index-1] & 0xF0) == 0x40) - prefix = buffer[index-1]; - - HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long)regs->eip); - - switch ( match ) - - { - case INSTR_MOV2CR: - gpreg = decode_src_reg(prefix, buffer[index+2]); - result = mov_to_cr(gpreg, cr, regs); - break; - - case INSTR_MOVCR2: - gpreg = decode_src_reg(prefix, buffer[index+2]); - mov_from_cr(cr, gpreg, regs); - break; - - case INSTR_CLTS: - v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; - svm_update_guest_cr(v, 0); - HVMTRACE_0D(CLTS, current); - break; - - case INSTR_LMSW: - gpreg = decode_src_reg(prefix, buffer[index+2]); - value = get_reg(gpreg, regs, vmcb) & 0xF; - value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value; - result = !hvm_set_cr0(value); - HVMTRACE_1D(LMSW, current, value); - break; - - case INSTR_SMSW: - value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF; - modrm = buffer[index+2]; - addr_size = svm_guest_x86_mode(v); - if ( addr_size < 2 ) - addr_size = 2; - if ( likely((modrm & 0xC0) >> 6 == 3) ) - { - gpreg = decode_src_reg(prefix, modrm); - set_reg(gpreg, value, regs, vmcb); - } - /* - * For now, only implement decode of the offset mode, since that's the - * only mode observed in a real-world OS. This code is also making the - * assumption that we'll never hit this code in long mode. - */ - else if ( (modrm == 0x26) || (modrm == 0x25) ) - { - seg = x86_seg_ds; - i = index; - /* Segment or address size overrides? */ - while ( i-- ) - { - switch ( buffer[i] ) - { - case 0x26: seg = x86_seg_es; break; - case 0x2e: seg = x86_seg_cs; break; - case 0x36: seg = x86_seg_ss; break; - case 0x64: seg = x86_seg_fs; break; - case 0x65: seg = x86_seg_gs; break; - case 0x67: addr_size ^= 6; break; - } - } - /* Bail unless this really is a seg_base + offset case */ - if ( ((modrm == 0x26) && (addr_size == 4)) || - ((modrm == 0x25) && (addr_size == 2)) ) - { - gdprintk(XENLOG_ERR, "SMSW emulation at guest address: " - "%lx failed due to unhandled addressing mode." - "ModRM byte was: %x \n", svm_rip2pointer(v), modrm); - domain_crash(v->domain); - } - inst_len += addr_size; - offset = *(( unsigned int *) ( void *) &buffer[index + 3]); - offset = ( addr_size == 4 ) ? offset : ( offset & 0xFFFF ); - addr = hvm_get_segment_base(v, seg); - addr += offset; - result = (hvm_copy_to_guest_virt(addr, &value, 2) - != HVMCOPY_bad_gva_to_gfn); - } - else - { - gdprintk(XENLOG_ERR, "SMSW emulation at guest address: %lx " - "failed due to unhandled addressing mode!" - "ModRM byte was: %x \n", svm_rip2pointer(v), modrm); - domain_crash(v->domain); - } - break; - - default: - BUG(); - } - - if ( result ) - __update_guest_eip(regs, inst_len); } static int svm_msr_read_intercept(struct cpu_user_regs *regs) @@ -1899,68 +1211,12 @@ static void svm_vmexit_do_invalidate_cac __update_guest_eip(regs, inst_len); } -void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN; - unsigned long g_vaddr; - int inst_len; - - /* - * Unknown how many bytes the invlpg instruction will take. Use the - * maximum instruction length here - */ - if ( inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length ) - { - gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length); - goto crash; - } - - if ( invlpga ) - { - inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode); - __update_guest_eip(regs, inst_len); - - /* - * The address is implicit on this instruction. At the moment, we don't - * use ecx (ASID) to identify individual guests pages - */ - g_vaddr = regs->eax; - } - else - { - /* What about multiple prefix codes? */ - prefix = (is_prefix(opcode[0]) ? opcode[0] : 0); - inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode); - if ( inst_len <= 0 ) - { - gdprintk(XENLOG_ERR, "Error getting invlpg instr len\n"); - goto crash; - } - - inst_len--; - length -= inst_len; - - /* - * Decode memory operand of the instruction including ModRM, SIB, and - * displacement to get effective address and length in bytes. Assume - * the system in either 32- or 64-bit mode. - */ - g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len, - &opcode[inst_len], &length); - - inst_len += length; - __update_guest_eip(regs, inst_len); - } - - HVMTRACE_3D(INVLPG, v, !!invlpga, g_vaddr, (invlpga ? regs->ecx : 0)); - - paging_invlpg(v, g_vaddr); - svm_asid_g_invlpg(v, g_vaddr); - return; - - crash: - domain_crash(v->domain); +static void svm_invlpg_intercept(unsigned long vaddr) +{ + struct vcpu *curr = current; + HVMTRACE_2D(INVLPG, curr, 0, vaddr); + paging_invlpg(curr, vaddr); + svm_asid_g_invlpg(curr, vaddr); } asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) @@ -2094,12 +1350,13 @@ asmlinkage void svm_vmexit_handler(struc svm_vmexit_do_hlt(vmcb, regs); break; + case VMEXIT_CR0_READ ... VMEXIT_CR15_READ: + case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE: case VMEXIT_INVLPG: - svm_handle_invlpg(0, regs); - break; - case VMEXIT_INVLPGA: - svm_handle_invlpg(1, regs); + case VMEXIT_IOIO: + if ( !handle_mmio() ) + hvm_inject_exception(TRAP_gp_fault, 0, 0); break; case VMEXIT_VMMCALL: @@ -2114,23 +1371,9 @@ asmlinkage void svm_vmexit_handler(struc } break; - case VMEXIT_CR0_READ ... VMEXIT_CR15_READ: - svm_cr_access(v, exit_reason - VMEXIT_CR0_READ, - TYPE_MOV_FROM_CR, regs); - break; - - case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE: - svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE, - TYPE_MOV_TO_CR, regs); - break; - case VMEXIT_DR0_READ ... VMEXIT_DR7_READ: case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE: svm_dr_access(v, regs); - break; - - case VMEXIT_IOIO: - svm_io_instruction(v); break; case VMEXIT_MSR: @@ -2176,10 +1419,7 @@ asmlinkage void svm_vmexit_handler(struc asmlinkage void svm_trace_vmentry(void) { - struct vcpu *v = current; - - /* This is the last C code before the VMRUN instruction. */ - hvmtrace_vmentry(v); + hvmtrace_vmentry(current); } /* diff -r 591cfd37bd54 -r 09b53f27a18b xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Feb 21 18:02:42 2008 +0000 @@ -67,6 +67,7 @@ static void vmx_fpu_dirty_intercept(void static void vmx_fpu_dirty_intercept(void); static int vmx_msr_read_intercept(struct cpu_user_regs *regs); static int vmx_msr_write_intercept(struct cpu_user_regs *regs); +static void vmx_invlpg_intercept(unsigned long vaddr); static int vmx_domain_initialise(struct domain *d) { @@ -699,35 +700,6 @@ static void vmx_ctxt_switch_to(struct vc vmx_restore_guest_msrs(v); vmx_restore_dr(v); vpmu_load(v); -} - -static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg) -{ - unsigned long base = 0; - int long_mode = 0; - - ASSERT(v == current); - - if ( hvm_long_mode_enabled(v) && - (__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) ) - long_mode = 1; - - switch ( seg ) - { - case x86_seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break; - case x86_seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break; - case x86_seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break; - case x86_seg_fs: base = __vmread(GUEST_FS_BASE); break; - case x86_seg_gs: base = __vmread(GUEST_GS_BASE); break; - case x86_seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break; - case x86_seg_tr: base = __vmread(GUEST_TR_BASE); break; - case x86_seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break; - case x86_seg_idtr: base = __vmread(GUEST_IDTR_BASE); break; - case x86_seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break; - default: BUG(); break; - } - - return base; } static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg, @@ -1068,7 +1040,6 @@ static struct hvm_function_table vmx_fun .load_cpu_ctxt = vmx_load_vmcs_ctxt, .interrupt_blocked = vmx_interrupt_blocked, .guest_x86_mode = vmx_guest_x86_mode, - .get_segment_base = vmx_get_segment_base, .get_segment_register = vmx_get_segment_register, .set_segment_register = vmx_set_segment_register, .update_host_cr3 = vmx_update_host_cr3, @@ -1086,7 +1057,8 @@ static struct hvm_function_table vmx_fun .wbinvd_intercept = vmx_wbinvd_intercept, .fpu_dirty_intercept = vmx_fpu_dirty_intercept, .msr_read_intercept = vmx_msr_read_intercept, - .msr_write_intercept = vmx_msr_write_intercept + .msr_write_intercept = vmx_msr_write_intercept, + .invlpg_intercept = vmx_invlpg_intercept }; void start_vmx(void) @@ -1261,452 +1233,11 @@ static void vmx_dr_access(unsigned long __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); } -/* - * Invalidate the TLB for va. Invalidate the shadow page corresponding - * the address va. - */ -static void vmx_do_invlpg(unsigned long va) -{ - struct vcpu *v = current; - - HVMTRACE_2D(INVLPG, v, /*invlpga=*/ 0, va); - - /* - * We do the safest things first, then try to update the shadow - * copying from guest - */ - paging_invlpg(v, va); -} - -/* Get segment for OUTS according to guest instruction. */ -static enum x86_segment vmx_outs_get_segment( - int long_mode, unsigned long eip, int inst_len) -{ - unsigned char inst[MAX_INST_LEN]; - enum x86_segment seg = x86_seg_ds; - int i; - extern int inst_copy_from_guest(unsigned char *, unsigned long, int); - - if ( likely(cpu_has_vmx_ins_outs_instr_info) ) - { - unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO); - - /* Get segment register according to bits 17:15. */ - switch ( (instr_info >> 15) & 7 ) - { - case 0: seg = x86_seg_es; break; - case 1: seg = x86_seg_cs; break; - case 2: seg = x86_seg_ss; break; - case 3: seg = x86_seg_ds; break; - case 4: seg = x86_seg_fs; break; - case 5: seg = x86_seg_gs; break; - default: BUG(); - } - - goto out; - } - - if ( !long_mode ) - eip += __vmread(GUEST_CS_BASE); - - memset(inst, 0, MAX_INST_LEN); - if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len ) - { - gdprintk(XENLOG_ERR, "Get guest instruction failed\n"); - domain_crash(current->domain); - goto out; - } - - for ( i = 0; i < inst_len; i++ ) - { - switch ( inst[i] ) - { - case 0xf3: /* REPZ */ - case 0xf2: /* REPNZ */ - case 0xf0: /* LOCK */ - case 0x66: /* data32 */ - case 0x67: /* addr32 */ -#ifdef __x86_64__ - case 0x40 ... 0x4f: /* REX */ -#endif - continue; - case 0x2e: /* CS */ - seg = x86_seg_cs; - continue; - case 0x36: /* SS */ - seg = x86_seg_ss; - continue; - case 0x26: /* ES */ - seg = x86_seg_es; - continue; - case 0x64: /* FS */ - seg = x86_seg_fs; - continue; - case 0x65: /* GS */ - seg = x86_seg_gs; - continue; - case 0x3e: /* DS */ - seg = x86_seg_ds; - continue; - } - } - - out: - return seg; -} - -static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip, - int inst_len, enum x86_segment seg, - unsigned long *base, u32 *limit, - u32 *ar_bytes) -{ - enum vmcs_field ar_field, base_field, limit_field; - - *base = 0; - *limit = 0; - if ( seg != x86_seg_es ) - seg = vmx_outs_get_segment(long_mode, eip, inst_len); - - switch ( seg ) - { - case x86_seg_cs: - ar_field = GUEST_CS_AR_BYTES; - base_field = GUEST_CS_BASE; - limit_field = GUEST_CS_LIMIT; - break; - case x86_seg_ds: - ar_field = GUEST_DS_AR_BYTES; - base_field = GUEST_DS_BASE; - limit_field = GUEST_DS_LIMIT; - break; - case x86_seg_es: - ar_field = GUEST_ES_AR_BYTES; - base_field = GUEST_ES_BASE; - limit_field = GUEST_ES_LIMIT; - break; - case x86_seg_fs: - ar_field = GUEST_FS_AR_BYTES; - base_field = GUEST_FS_BASE; - limit_field = GUEST_FS_LIMIT; - break; - case x86_seg_gs: - ar_field = GUEST_GS_AR_BYTES; - base_field = GUEST_GS_BASE; - limit_field = GUEST_GS_LIMIT; - break; - case x86_seg_ss: - ar_field = GUEST_SS_AR_BYTES; - base_field = GUEST_SS_BASE; - limit_field = GUEST_SS_LIMIT; - break; - default: - BUG(); - return 0; - } - - if ( !long_mode || seg == x86_seg_fs || seg == x86_seg_gs ) - { - *base = __vmread(base_field); - *limit = __vmread(limit_field); - } - *ar_bytes = __vmread(ar_field); - - return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE); -} - - -static int vmx_str_pio_check_limit(u32 limit, unsigned int size, - u32 ar_bytes, unsigned long addr, - unsigned long base, int df, - unsigned long *count) -{ - unsigned long ea = addr - base; - - /* Offset must be within limits. */ - ASSERT(ea == (u32)ea); - if ( (u32)(ea + size - 1) < (u32)ea || - (ar_bytes & 0xc) != 0x4 ? ea + size - 1 > limit - : ea <= limit ) - return 0; - - /* Check the limit for repeated instructions, as above we checked - only the first instance. Truncate the count if a limit violation - would occur. Note that the checking is not necessary for page - granular segments as transfers crossing page boundaries will be - broken up anyway. */ - if ( !(ar_bytes & X86_SEG_AR_GRANULARITY) && *count > 1 ) - { - if ( (ar_bytes & 0xc) != 0x4 ) - { - /* expand-up */ - if ( !df ) - { - if ( ea + *count * size - 1 < ea || - ea + *count * size - 1 > limit ) - *count = (limit + 1UL - ea) / size; - } - else - { - if ( *count - 1 > ea / size ) - *count = ea / size + 1; - } - } - else - { - /* expand-down */ - if ( !df ) - { - if ( *count - 1 > -(s32)ea / size ) - *count = -(s32)ea / size + 1UL; - } - else - { - if ( ea < (*count - 1) * size || - ea - (*count - 1) * size <= limit ) - *count = (ea - limit - 1) / size + 1; - } - } - ASSERT(*count); - } - - return 1; -} - -#ifdef __x86_64__ -static int vmx_str_pio_lm_check_limit(struct cpu_user_regs *regs, - unsigned int size, - unsigned long addr, - unsigned long *count) -{ - if ( !is_canonical_address(addr) || - !is_canonical_address(addr + size - 1) ) - return 0; - - if ( *count > (1UL << 48) / size ) - *count = (1UL << 48) / size; - - if ( !(regs->eflags & EF_DF) ) - { - if ( addr + *count * size - 1 < addr || - !is_canonical_address(addr + *count * size - 1) ) - *count = (addr & ~((1UL << 48) - 1)) / size; - } - else - { - if ( (*count - 1) * size > addr || - !is_canonical_address(addr + (*count - 1) * size) ) - *count = (addr & ~((1UL << 48) - 1)) / size + 1; - } - - ASSERT(*count); - - return 1; -} -#endif - -static void vmx_send_str_pio(struct cpu_user_regs *regs, - struct hvm_io_op *pio_opp, - unsigned long inst_len, unsigned int port, - int sign, unsigned int size, int dir, - int df, unsigned long addr, - paddr_t paddr, unsigned long count) -{ - /* - * Handle string pio instructions that cross pages or that - * are unaligned. See the comments in hvm_domain.c/handle_mmio() - */ - if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) { - unsigned long value = 0; - - pio_opp->flags |= OVERLAP; - - if ( dir == IOREQ_WRITE ) /* OUTS */ - { - if ( hvm_paging_enabled(current) ) - { - int rv = hvm_copy_from_guest_virt(&value, addr, size); - if ( rv == HVMCOPY_bad_gva_to_gfn ) - return; /* exception already injected */ - } - else - (void)hvm_copy_from_guest_phys(&value, addr, size); - } - else /* dir != IOREQ_WRITE */ - /* Remember where to write the result, as a *VA*. - * Must be a VA so we can handle the page overlap - * correctly in hvm_pio_assist() */ - pio_opp->addr = addr; - - if ( count == 1 ) - regs->eip += inst_len; - - send_pio_req(port, 1, size, value, dir, df, 0); - } else { - unsigned long last_addr = sign > 0 ? addr + count * size - 1 - : addr - (count - 1) * size; - - if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) ) - { - if ( sign > 0 ) - count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; - else - count = (addr & ~PAGE_MASK) / size + 1; - } else - regs->eip += inst_len; - - send_pio_req(port, count, size, paddr, dir, df, 1); - } -} - -static void vmx_do_str_pio(unsigned long exit_qualification, - unsigned long inst_len, - struct cpu_user_regs *regs, - struct hvm_io_op *pio_opp) -{ - unsigned int port, size; - int dir, df, vm86; - unsigned long addr, count = 1, base; - paddr_t paddr; - unsigned long gfn; - u32 ar_bytes, limit, pfec; - int sign; - int long_mode = 0; - - vm86 = regs->eflags & X86_EFLAGS_VM ? 1 : 0; - df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; - - if ( test_bit(6, &exit_qualification) ) - port = (exit_qualification >> 16) & 0xFFFF; - else - port = regs->edx & 0xffff; - - size = (exit_qualification & 7) + 1; - dir = test_bit(3, &exit_qualification); /* direction */ - - if ( dir == IOREQ_READ ) - HVMTRACE_2D(IO_READ, current, port, size); - else - HVMTRACE_2D(IO_WRITE, current, port, size); - - sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; - ar_bytes = __vmread(GUEST_CS_AR_BYTES); - if ( hvm_long_mode_enabled(current) && - (ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) ) - long_mode = 1; - addr = __vmread(GUEST_LINEAR_ADDRESS); - - if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */ - pio_opp->flags |= REPZ; - count = regs->ecx; - if ( !long_mode && - (vm86 || !(ar_bytes & X86_SEG_AR_DEF_OP_SIZE)) ) - count &= 0xFFFF; - } - - /* - * In protected mode, guest linear address is invalid if the - * selector is null. - */ - if ( !vmx_str_pio_check_descriptor(long_mode, regs->eip, inst_len, - dir==IOREQ_WRITE ? x86_seg_ds : - x86_seg_es, &base, &limit, - &ar_bytes) ) { - if ( !long_mode ) { - vmx_inject_hw_exception(current, TRAP_gp_fault, 0); - return; - } - addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi; - } - - if ( !long_mode ) - { - /* Segment must be readable for outs and writeable for ins. */ - if ( ((dir == IOREQ_WRITE) - ? ((ar_bytes & 0xa) == 0x8) - : ((ar_bytes & 0xa) != 0x2)) || - !vmx_str_pio_check_limit(limit, size, ar_bytes, - addr, base, df, &count) ) - { - vmx_inject_hw_exception(current, TRAP_gp_fault, 0); - return; - } - } -#ifdef __x86_64__ - else if ( !vmx_str_pio_lm_check_limit(regs, size, addr, &count) ) - { - vmx_inject_hw_exception(current, TRAP_gp_fault, 0); - return; - } -#endif - - /* Translate the address to a physical address */ - pfec = PFEC_page_present; - if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */ - pfec |= PFEC_write_access; - if ( ((__vmread(GUEST_SS_AR_BYTES) >> 5) & 3) == 3 ) - pfec |= PFEC_user_mode; - gfn = paging_gva_to_gfn(current, addr, &pfec); - if ( gfn == INVALID_GFN ) - { - /* The guest does not have the RAM address mapped. - * Need to send in a page fault */ - vmx_inject_exception(TRAP_page_fault, pfec, addr); - return; - } - paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); - - vmx_send_str_pio(regs, pio_opp, inst_len, port, sign, - size, dir, df, addr, paddr, count); -} - -static void vmx_io_instruction(unsigned long exit_qualification, - unsigned long inst_len) -{ - struct cpu_user_regs *regs; - struct hvm_io_op *pio_opp; - - pio_opp = ¤t->arch.hvm_vcpu.io_op; - pio_opp->instr = INSTR_PIO; - pio_opp->flags = 0; - - regs = &pio_opp->io_context; - - /* Copy current guest state into io instruction state structure. */ - memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); - - HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, " - "exit_qualification = %lx", - regs->eflags & X86_EFLAGS_VM ? 1 : 0, - regs->cs, (unsigned long)regs->eip, exit_qualification); - - if ( test_bit(4, &exit_qualification) ) /* string instrucation */ - vmx_do_str_pio(exit_qualification, inst_len, regs, pio_opp); - else - { - unsigned int port, size; - int dir, df; - - df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; - - if ( test_bit(6, &exit_qualification) ) - port = (exit_qualification >> 16) & 0xFFFF; - else - port = regs->edx & 0xffff; - - size = (exit_qualification & 7) + 1; - dir = test_bit(3, &exit_qualification); /* direction */ - - if ( dir == IOREQ_READ ) - HVMTRACE_2D(IO_READ, current, port, size); - else - HVMTRACE_3D(IO_WRITE, current, port, size, regs->eax); - - if ( port == 0xe9 && dir == IOREQ_WRITE && size == 1 ) - hvm_print_line(current, regs->eax); /* guest debug output */ - - regs->eip += inst_len; - send_pio_req(port, 1, size, regs->eax, dir, df, 0); - } +static void vmx_invlpg_intercept(unsigned long vaddr) +{ + struct vcpu *curr = current; + HVMTRACE_2D(INVLPG, curr, /*invlpga=*/ 0, vaddr); + paging_invlpg(curr, vaddr); } #define CASE_SET_REG(REG, reg) \ @@ -2541,7 +2072,7 @@ asmlinkage void vmx_vmexit_handler(struc inst_len = __get_instruction_length(); /* Safe: INVLPG */ __update_guest_eip(inst_len); exit_qualification = __vmread(EXIT_QUALIFICATION); - vmx_do_invlpg(exit_qualification); + vmx_invlpg_intercept(exit_qualification); break; } case EXIT_REASON_VMCALL: @@ -2569,11 +2100,6 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_DR_ACCESS: exit_qualification = __vmread(EXIT_QUALIFICATION); vmx_dr_access(exit_qualification, regs); - break; - case EXIT_REASON_IO_INSTRUCTION: - exit_qualification = __vmread(EXIT_QUALIFICATION); - inst_len = __get_instruction_length(); /* Safe: IN, INS, OUT, OUTS */ - vmx_io_instruction(exit_qualification, inst_len); break; case EXIT_REASON_MSR_READ: inst_len = __get_instruction_length(); /* Safe: RDMSR */ @@ -2603,15 +2129,11 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_TPR_BELOW_THRESHOLD: break; + case EXIT_REASON_IO_INSTRUCTION: case EXIT_REASON_APIC_ACCESS: - { - unsigned long offset; - exit_qualification = __vmread(EXIT_QUALIFICATION); - offset = exit_qualification & 0x0fffUL; if ( !handle_mmio() ) hvm_inject_exception(TRAP_gp_fault, 0, 0); break; - } case EXIT_REASON_INVD: case EXIT_REASON_WBINVD: @@ -2632,9 +2154,7 @@ asmlinkage void vmx_vmexit_handler(struc asmlinkage void vmx_trace_vmentry(void) { - struct vcpu *v = current; - - hvmtrace_vmentry(v); + hvmtrace_vmentry(current); } /* diff -r 591cfd37bd54 -r 09b53f27a18b xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/arch/x86/x86_emulate.c Thu Feb 21 18:02:42 2008 +0000 @@ -3036,6 +3036,17 @@ x86_emulate( struct segment_register reg; unsigned long base, limit, cr0, cr0w; + if ( modrm == 0xdf ) /* invlpga */ + { + generate_exception_if(in_realmode(ctxt, ops), EXC_UD); + generate_exception_if(!mode_ring0(), EXC_GP); + fail_if(ops->invlpg == NULL); + if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.eax), + ctxt)) ) + goto done; + break; + } + switch ( modrm_reg & 7 ) { case 0: /* sgdt */ @@ -3096,6 +3107,13 @@ x86_emulate( if ( (rc = ops->write_cr(0, cr0, ctxt)) ) goto done; break; + case 7: /* invlpg */ + generate_exception_if(!mode_ring0(), EXC_GP); + generate_exception_if(ea.type != OP_MEM, EXC_UD); + fail_if(ops->invlpg == NULL); + if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) ) + goto done; + break; default: goto cannot_emulate; } diff -r 591cfd37bd54 -r 09b53f27a18b xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Feb 21 18:02:42 2008 +0000 @@ -81,7 +81,6 @@ struct hvm_function_table { */ enum hvm_intblk (*interrupt_blocked)(struct vcpu *v, struct hvm_intack); int (*guest_x86_mode)(struct vcpu *v); - unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg); void (*get_segment_register)(struct vcpu *v, enum x86_segment seg, struct segment_register *reg); void (*set_segment_register)(struct vcpu *v, enum x86_segment seg, @@ -126,6 +125,7 @@ struct hvm_function_table { void (*fpu_dirty_intercept)(void); int (*msr_read_intercept)(struct cpu_user_regs *regs); int (*msr_write_intercept)(struct cpu_user_regs *regs); + void (*invlpg_intercept)(unsigned long vaddr); }; extern struct hvm_function_table hvm_funcs; @@ -197,12 +197,6 @@ hvm_flush_guest_tlbs(void) void hvm_hypercall_page_initialise(struct domain *d, void *hypercall_page); - -static inline unsigned long -hvm_get_segment_base(struct vcpu *v, enum x86_segment seg) -{ - return hvm_funcs.get_segment_base(v, seg); -} static inline void hvm_get_segment_register(struct vcpu *v, enum x86_segment seg, @@ -321,7 +315,10 @@ void hvm_task_switch( int32_t errcode); enum hvm_access_type { - hvm_access_insn_fetch, hvm_access_read, hvm_access_write + hvm_access_insn_fetch, + hvm_access_none, + hvm_access_read, + hvm_access_write }; int hvm_virtual_to_linear_addr( enum x86_segment seg, diff -r 591cfd37bd54 -r 09b53f27a18b xen/include/asm-x86/hvm/svm/emulate.h --- a/xen/include/asm-x86/hvm/svm/emulate.h Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/include/asm-x86/hvm/svm/emulate.h Thu Feb 21 18:02:42 2008 +0000 @@ -15,30 +15,10 @@ * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. - * */ #ifndef __ASM_X86_HVM_SVM_EMULATE_H__ #define __ASM_X86_HVM_SVM_EMULATE_H__ - -typedef enum OPERATING_MODE_ { - INVALID_OPERATING_MODE = -1, - LEGACY_MODE, - LEGACY_16BIT, - LONG_MODE, - COMP_MODE, - COMP_16BIT, - OPMODE_16BIT, - - LEGACY_32BIT, - COMP_32BIT, - OPMODE_32BIT, - - LONG_64BIT, - UNKNOWN_OP_MODE, - NUM_OPERATING_MODES -} OPERATING_MODE; - /* Enumerate some standard instructions that we support */ enum instruction_index { @@ -47,87 +27,22 @@ enum instruction_index { INSTR_CPUID, INSTR_RDMSR, INSTR_WRMSR, - INSTR_CLI, - INSTR_STI, - INSTR_RDPMC, - INSTR_CLGI, - INSTR_STGI, - INSTR_VMRUN, - INSTR_VMLOAD, - INSTR_VMSAVE, INSTR_VMCALL, - INSTR_PAUSE, - INSTR_SKINIT, - INSTR_MOV2CR, /* Mov register to CR */ - INSTR_MOVCR2, /* Not MOV CR2, but MOV CRn to register */ - INSTR_MOV2DR, - INSTR_MOVDR2, - INSTR_PUSHF, - INSTR_POPF, - INSTR_RSM, - INSTR_INVLPG, - INSTR_INVLPGA, INSTR_HLT, - INSTR_CLTS, - INSTR_LMSW, - INSTR_SMSW, INSTR_INT3, INSTR_MAX_COUNT /* Must be last - Number of instructions supported */ }; - -extern unsigned long get_effective_addr_modrm64( - struct cpu_user_regs *regs, const u8 prefix, int inst_len, - const u8 *operand, u8 *size); -extern unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, - struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, - u8 *size); -extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb); -extern unsigned int decode_dest_reg(u8 prefix, u8 modrm); -extern unsigned int decode_src_reg(u8 prefix, u8 modrm); -extern unsigned long svm_rip2pointer(struct vcpu *v); -extern int __get_instruction_length_from_list(struct vcpu *v, - enum instruction_index *list, unsigned int list_count, - u8 *guest_eip_buf, enum instruction_index *match); - +int __get_instruction_length_from_list( + struct vcpu *v, + enum instruction_index *list, unsigned int list_count, + u8 *guest_eip_buf, enum instruction_index *match); static inline int __get_instruction_length(struct vcpu *v, enum instruction_index instr, u8 *guest_eip_buf) { return __get_instruction_length_from_list( v, &instr, 1, guest_eip_buf, NULL); -} - - -static inline unsigned int is_prefix(u8 opc) -{ - switch ( opc ) { - case 0x66: - case 0x67: - case 0x2E: - case 0x3E: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xF0: - case 0xF3: - case 0xF2: -#if __x86_64__ - case 0x40 ... 0x4f: -#endif /* __x86_64__ */ - return 1; - } - return 0; -} - - -static inline int skip_prefix_bytes(u8 *buf, size_t size) -{ - int index; - for ( index = 0; index < size && is_prefix(buf[index]); index++ ) - continue; - return index; } #endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */ diff -r 591cfd37bd54 -r 09b53f27a18b xen/include/asm-x86/x86_emulate.h --- a/xen/include/asm-x86/x86_emulate.h Thu Feb 21 15:06:37 2008 +0000 +++ b/xen/include/asm-x86/x86_emulate.h Thu Feb 21 18:02:42 2008 +0000 @@ -354,6 +354,12 @@ struct x86_emulate_ops /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */ void (*load_fpu_ctxt)( struct x86_emulate_ctxt *ctxt); + + /* invlpg: Invalidate paging structures which map addressed byte. */ + int (*invlpg)( + enum x86_segment seg, + unsigned long offset, + struct x86_emulate_ctxt *ctxt); }; struct cpu_user_regs; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |