[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instruction exits
Tim Deegan wrote: > At 16:22 +0100 on 08 Sep (1283962934), Qing He wrote: >> diff -r f1c1d3077337 xen/arch/x86/hvm/vmx/nest.c >> +/* >> + * VMX instructions support functions >> + */ >> + >> +enum vmx_regs_enc { >> + VMX_REG_RAX, >> + VMX_REG_RCX, >> + VMX_REG_RDX, >> + VMX_REG_RBX, >> + VMX_REG_RSP, >> + VMX_REG_RBP, >> + VMX_REG_RSI, >> + VMX_REG_RDI, >> +#ifdef CONFIG_X86_64 >> + VMX_REG_R8, >> + VMX_REG_R9, >> + VMX_REG_R10, >> + VMX_REG_R11, >> + VMX_REG_R12, >> + VMX_REG_R13, >> + VMX_REG_R14, >> + VMX_REG_R15, >> +#endif >> +}; >> + >> +enum vmx_sregs_enc { >> + VMX_SREG_ES, >> + VMX_SREG_CS, >> + VMX_SREG_SS, >> + VMX_SREG_DS, >> + VMX_SREG_FS, >> + VMX_SREG_GS, >> +}; >> + >> +enum x86_segment sreg_to_index[] = { >> + [VMX_SREG_ES] = x86_seg_es, >> + [VMX_SREG_CS] = x86_seg_cs, >> + [VMX_SREG_SS] = x86_seg_ss, >> + [VMX_SREG_DS] = x86_seg_ds, >> + [VMX_SREG_FS] = x86_seg_fs, >> + [VMX_SREG_GS] = x86_seg_gs, >> +}; > > Since you dislike adding new namespaces and translations, I'm sure you > can get rid of these. :) It might even simplify some of the macros > below. True, some dupcation here. Regarding following definition in x86_emulate.c, we can reuse. static enum x86_segment decode_segment(uint8_t modrm_reg) { switch ( modrm_reg ) { case 0: return x86_seg_es; case 1: return x86_seg_cs; case 2: return x86_seg_ss; case 3: return x86_seg_ds; case 4: return x86_seg_fs; case 5: return x86_seg_gs; default: break; } return decode_segment_failed; } BTW, should we use MACROs rather than digital # here? and can we modify x86_segment structure to use same naming space? > >> +union vmx_inst_info { >> + struct { >> + unsigned int scaling :2; /* bit 0-1 */ >> + unsigned int __rsvd0 :1; /* bit 2 */ >> + unsigned int reg1 :4; /* bit 3-6 */ >> + unsigned int addr_size :3; /* bit 7-9 */ >> + unsigned int memreg :1; /* bit 10 */ >> + unsigned int __rsvd1 :4; /* bit 11-14 */ >> + unsigned int segment :3; /* bit 15-17 */ >> + unsigned int index_reg :4; /* bit 18-21 */ >> + unsigned int index_reg_invalid :1; /* bit 22 */ >> + unsigned int base_reg :4; /* bit 23-26 */ >> + unsigned int base_reg_invalid :1; /* bit 27 */ >> + unsigned int reg2 :4; /* bit 28-31 */ + } >> fields; + u32 word; >> +}; >> + >> +struct vmx_inst_decoded { >> +#define VMX_INST_MEMREG_TYPE_MEMORY 0 >> +#define VMX_INST_MEMREG_TYPE_REG 1 >> + int type; >> + union { >> + struct { >> + unsigned long mem; >> + unsigned int len; >> + }; >> + enum vmx_regs_enc reg1; >> + }; >> + >> + enum vmx_regs_enc reg2; >> +}; >> + >> +enum vmx_ops_result { >> + VMSUCCEED, >> + VMFAIL_VALID, >> + VMFAIL_INVALID, >> +}; >> + >> +#define CASE_SET_REG(REG, reg) \ >> + case VMX_REG_ ## REG: regs->reg = value; break >> +#define CASE_GET_REG(REG, reg) \ >> + case VMX_REG_ ## REG: value = regs->reg; break + >> +#define CASE_EXTEND_SET_REG \ >> + CASE_EXTEND_REG(S) >> +#define CASE_EXTEND_GET_REG \ >> + CASE_EXTEND_REG(G) >> + >> +#ifdef __i386__ >> +#define CASE_EXTEND_REG(T) >> +#else >> +#define CASE_EXTEND_REG(T) \ >> + CASE_ ## T ## ET_REG(R8, r8); \ >> + CASE_ ## T ## ET_REG(R9, r9); \ >> + CASE_ ## T ## ET_REG(R10, r10); \ >> + CASE_ ## T ## ET_REG(R11, r11); \ >> + CASE_ ## T ## ET_REG(R12, r12); \ >> + CASE_ ## T ## ET_REG(R13, r13); \ >> + CASE_ ## T ## ET_REG(R14, r14); \ >> + CASE_ ## T ## ET_REG(R15, r15) >> +#endif >> + >> +static unsigned long reg_read(struct cpu_user_regs *regs, >> + enum vmx_regs_enc index) +{ >> + unsigned long value = 0; >> + >> + switch ( index ) { >> + CASE_GET_REG(RAX, eax); >> + CASE_GET_REG(RCX, ecx); >> + CASE_GET_REG(RDX, edx); >> + CASE_GET_REG(RBX, ebx); >> + CASE_GET_REG(RBP, ebp); >> + CASE_GET_REG(RSI, esi); >> + CASE_GET_REG(RDI, edi); >> + CASE_GET_REG(RSP, esp); >> + CASE_EXTEND_GET_REG; >> + default: >> + break; >> + } >> + >> + return value; >> +} >> + >> +static void reg_write(struct cpu_user_regs *regs, >> + enum vmx_regs_enc index, >> + unsigned long value) >> +{ >> + switch ( index ) { >> + CASE_SET_REG(RAX, eax); >> + CASE_SET_REG(RCX, ecx); >> + CASE_SET_REG(RDX, edx); >> + CASE_SET_REG(RBX, ebx); >> + CASE_SET_REG(RBP, ebp); >> + CASE_SET_REG(RSI, esi); >> + CASE_SET_REG(RDI, edi); >> + CASE_SET_REG(RSP, esp); >> + CASE_EXTEND_SET_REG; >> + default: >> + break; >> + } >> +} >> + >> +static int decode_vmx_inst(struct cpu_user_regs *regs, >> + struct vmx_inst_decoded *decode) +{ >> + struct vcpu *v = current; >> + union vmx_inst_info info; >> + struct segment_register seg; >> + unsigned long base, index, seg_base, disp, offset; + int >> scale; + >> + info.word = __vmread(VMX_INSTRUCTION_INFO); >> + >> + if ( info.fields.memreg ) { >> + decode->type = VMX_INST_MEMREG_TYPE_REG; >> + decode->reg1 = info.fields.reg1; >> + } >> + else >> + { >> + decode->type = VMX_INST_MEMREG_TYPE_MEMORY; >> + hvm_get_segment_register(v, >> sreg_to_index[info.fields.segment], &seg); + /* TODO: segment >> type check */ > > Indeed. :) > >> + seg_base = seg.base; >> + >> + base = info.fields.base_reg_invalid ? 0 : >> + reg_read(regs, info.fields.base_reg); >> + >> + index = info.fields.index_reg_invalid ? 0 : >> + reg_read(regs, info.fields.index_reg); + >> + scale = 1 << info.fields.scaling; >> + >> + disp = __vmread(EXIT_QUALIFICATION); >> + >> + offset = base + index * scale + disp; >> + if ( offset > seg.limit ) > > DYM if ( offset + len > limit )? > > Would it be worth trying to fold this into the existing x86_emulate > code, which already has careful memory access checks? Can you give more details? Re-construct hvm_virtual_to_linear_addr? > >> + goto gp_fault; >> + >> + decode->mem = seg_base + base + index * scale + disp; >> + decode->len = 1 << (info.fields.addr_size + 1); + } >> + >> + decode->reg2 = info.fields.reg2; >> + >> + return X86EMUL_OKAY; >> + >> +gp_fault: >> + hvm_inject_exception(TRAP_gp_fault, 0, 0); >> + return X86EMUL_EXCEPTION; >> +} >> + >> +static int vmx_inst_check_privilege(struct cpu_user_regs *regs) +{ >> + struct vcpu *v = current; >> + struct segment_register cs; >> + >> + hvm_get_segment_register(v, x86_seg_cs, &cs); >> + >> + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) || >> + !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) || >> + (regs->eflags & X86_EFLAGS_VM) || >> + (hvm_long_mode_enabled(v) && cs.attr.fields.l == 0) ) + >> goto invalid_op; + >> + if ( (cs.sel & 3) > 0 ) >> + goto gp_fault; >> + >> + return X86EMUL_OKAY; >> + >> +invalid_op: >> + hvm_inject_exception(TRAP_invalid_op, 0, 0); >> + return X86EMUL_EXCEPTION; >> + >> +gp_fault: >> + hvm_inject_exception(TRAP_gp_fault, 0, 0); >> + return X86EMUL_EXCEPTION; >> +} >> + >> +static void vmreturn(struct cpu_user_regs *regs, enum >> vmx_ops_result res) +{ + unsigned long eflags = regs->eflags; >> + unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | >> X86_EFLAGS_AF | + X86_EFLAGS_ZF | >> X86_EFLAGS_SF | X86_EFLAGS_OF; + + eflags &= ~mask; >> + >> + switch ( res ) { >> + case VMSUCCEED: >> + break; >> + case VMFAIL_VALID: >> + /* TODO: error number, useful for guest VMM debugging */ >> + eflags |= X86_EFLAGS_ZF; >> + break; >> + case VMFAIL_INVALID: >> + default: >> + eflags |= X86_EFLAGS_CF; >> + break; >> + } >> + >> + regs->eflags = eflags; >> +} >> + >> +static int __clear_current_vvmcs(struct vmx_nest_struct *nest) +{ >> + int rc; >> + >> + if ( nest->svmcs ) >> + __vmpclear(virt_to_maddr(nest->svmcs)); >> + >> +#if !CONFIG_VVMCS_MAPPING >> + rc = hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs, >> PAGE_SIZE); + if ( rc != HVMCOPY_okay ) >> + return X86EMUL_EXCEPTION; >> +#endif >> + >> + nest->vmcs_valid = 0; >> + >> + return X86EMUL_OKAY; >> +} >> + >> +/* >> + * VMX instructions handling >> + */ >> + >> +int vmx_nest_handle_vmxon(struct cpu_user_regs *regs) +{ >> + struct vcpu *v = current; >> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; >> + struct vmx_inst_decoded decode; >> + unsigned long gpa = 0; >> + int rc; >> + >> + if ( !is_nested_avail(v->domain) ) >> + goto invalid_op; >> + >> + rc = vmx_inst_check_privilege(regs); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; > > I think you could fold these checks and the goto target into > decode_vmx_inst(), just to avoid repeating them in every > vmx_nest_handle_* function. > >> + rc = decode_vmx_inst(regs, &decode); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY); >> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0); >> + if ( rc != HVMCOPY_okay ) >> + return X86EMUL_EXCEPTION; >> + >> + nest->guest_vmxon_pa = gpa; >> + nest->gvmcs_pa = 0; >> + nest->vmcs_valid = 0; >> +#if !CONFIG_VVMCS_MAPPING >> + nest->vvmcs = alloc_xenheap_page(); >> + if ( !nest->vvmcs ) >> + { >> + gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs >> failed\n"); + vmreturn(regs, VMFAIL_INVALID); >> + goto out; >> + } >> +#endif >> + nest->svmcs = alloc_xenheap_page(); >> + if ( !nest->svmcs ) >> + { >> + gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs >> failed\n"); + free_xenheap_page(nest->vvmcs); >> + vmreturn(regs, VMFAIL_INVALID); >> + goto out; >> + } >> + >> + /* >> + * `fork' the host vmcs to shadow_vmcs >> + * vmcs_lock is not needed since we are on current + */ >> + nest->hvmcs = v->arch.hvm_vmx.vmcs; >> + __vmpclear(virt_to_maddr(nest->hvmcs)); >> + memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE); >> + __vmptrld(virt_to_maddr(nest->hvmcs)); >> + v->arch.hvm_vmx.launched = 0; >> + >> + vmreturn(regs, VMSUCCEED); >> + >> +out: >> + return X86EMUL_OKAY; >> + >> +invalid_op: >> + hvm_inject_exception(TRAP_invalid_op, 0, 0); >> + return X86EMUL_EXCEPTION; >> +} >> + >> +int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs) +{ >> + struct vcpu *v = current; >> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; + int >> rc; + >> + if ( unlikely(!nest->guest_vmxon_pa) ) >> + goto invalid_op; >> + >> + rc = vmx_inst_check_privilege(regs); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + nest->guest_vmxon_pa = 0; >> + __vmpclear(virt_to_maddr(nest->svmcs)); >> + >> +#if !CONFIG_VVMCS_MAPPING >> + free_xenheap_page(nest->vvmcs); >> +#endif >> + free_xenheap_page(nest->svmcs); > > These also need to be freed on domain teardown. > >> + vmreturn(regs, VMSUCCEED); >> + return X86EMUL_OKAY; >> + >> +invalid_op: >> + hvm_inject_exception(TRAP_invalid_op, 0, 0); >> + return X86EMUL_EXCEPTION; >> +} >> + >> +int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs) +{ >> + struct vcpu *v = current; >> + struct vmx_inst_decoded decode; >> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; + >> unsigned long gpa = 0; + int rc; >> + >> + if ( unlikely(!nest->guest_vmxon_pa) ) >> + goto invalid_op; >> + >> + rc = vmx_inst_check_privilege(regs); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + rc = decode_vmx_inst(regs, &decode); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY); >> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0); >> + if ( rc != HVMCOPY_okay ) >> + return X86EMUL_EXCEPTION; >> + >> + if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff ) + { >> + vmreturn(regs, VMFAIL_INVALID); >> + goto out; >> + } >> + >> + if ( nest->gvmcs_pa != gpa ) >> + { >> + if ( nest->vmcs_valid ) >> + { >> + rc = __clear_current_vvmcs(nest); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + } >> + nest->gvmcs_pa = gpa; >> + ASSERT(nest->vmcs_valid == 0); >> + } >> + >> + >> + if ( !nest->vmcs_valid ) >> + { >> +#if CONFIG_VVMCS_MAPPING >> + unsigned long mfn; >> + p2m_type_t p2mt; >> + >> + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain), >> + nest->gvmcs_pa >> PAGE_SHIFT, >> &p2mt)); + nest->vvmcs = map_domain_page_global(mfn); +#else >> + rc = hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa, >> PAGE_SIZE); + if ( rc != HVMCOPY_okay ) >> + return X86EMUL_EXCEPTION; >> +#endif >> + nest->vmcs_valid = 1; >> + } >> + >> + vmreturn(regs, VMSUCCEED); >> + >> +out: >> + return X86EMUL_OKAY; >> + >> +invalid_op: >> + hvm_inject_exception(TRAP_invalid_op, 0, 0); >> + return X86EMUL_EXCEPTION; >> +} >> + >> +int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs) +{ >> + struct vcpu *v = current; >> + struct vmx_inst_decoded decode; >> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; + >> unsigned long gpa = 0; + int rc; >> + >> + if ( unlikely(!nest->guest_vmxon_pa) ) >> + goto invalid_op; >> + >> + rc = vmx_inst_check_privilege(regs); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + rc = decode_vmx_inst(regs, &decode); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY); + >> + gpa = nest->gvmcs_pa; >> + >> + rc = hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0); >> + if ( rc != HVMCOPY_okay ) >> + return X86EMUL_EXCEPTION; >> + >> + vmreturn(regs, VMSUCCEED); >> + return X86EMUL_OKAY; >> + >> +invalid_op: >> + hvm_inject_exception(TRAP_invalid_op, 0, 0); >> + return X86EMUL_EXCEPTION; >> +} >> + >> +int vmx_nest_handle_vmclear(struct cpu_user_regs *regs) +{ >> + struct vcpu *v = current; >> + struct vmx_inst_decoded decode; >> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; + >> unsigned long gpa = 0; + int rc; >> + >> + if ( unlikely(!nest->guest_vmxon_pa) ) >> + goto invalid_op; >> + >> + rc = vmx_inst_check_privilege(regs); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + rc = decode_vmx_inst(regs, &decode); >> + if ( rc != X86EMUL_OKAY ) >> + return rc; >> + >> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY); >> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0); > > Is it guaranteed that decode.len is always <= sizeof gpa here, even > with a malicious guest? Reusing hvm_virtual_to_linear_addr w/ consideration of last byte may be the best choice :) Thx, Eddie _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |