[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86 hvm: Allow cross-vendor migration
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1243346496 -3600 # Node ID f0e2df69a8eb2f560af5626807b81ca4f41afd39 # Parent 1c627434605e7747689047e1761c193ceb4f9ef0 x86 hvm: Allow cross-vendor migration Intercept #UD and emulate SYSCALL/SYSENTER/SYSEXIT as necessary. Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx> Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> --- xen/arch/x86/hvm/svm/svm.c | 100 ++++++++++++++- xen/arch/x86/hvm/svm/vmcb.c | 8 - xen/arch/x86/hvm/vmx/vmcs.c | 3 xen/arch/x86/hvm/vmx/vmx.c | 36 +++++ xen/arch/x86/x86_emulate/x86_emulate.c | 212 ++++++++++++++++++++++++++++++++- xen/include/asm-x86/hvm/svm/vmcb.h | 9 + xen/include/public/arch-x86/hvm/save.h | 4 7 files changed, 354 insertions(+), 18 deletions(-) diff -r 1c627434605e -r f0e2df69a8eb xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue May 26 11:52:31 2009 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Tue May 26 15:01:36 2009 +0100 @@ -37,6 +37,7 @@ #include <asm/debugreg.h> #include <asm/msr.h> #include <asm/spinlock.h> +#include <asm/hvm/emulate.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/io.h> @@ -199,9 +200,9 @@ static int svm_vmcb_save(struct vcpu *v, c->cr3 = v->arch.hvm_vcpu.guest_cr[3]; c->cr4 = v->arch.hvm_vcpu.guest_cr[4]; - c->sysenter_cs = vmcb->sysenter_cs; - c->sysenter_esp = vmcb->sysenter_esp; - c->sysenter_eip = vmcb->sysenter_eip; + c->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs; + c->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp; + c->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip; c->pending_event = 0; c->error_code = 0; @@ -258,9 +259,9 @@ static int svm_vmcb_restore(struct vcpu svm_update_guest_cr(v, 2); svm_update_guest_cr(v, 4); - vmcb->sysenter_cs = c->sysenter_cs; - vmcb->sysenter_esp = c->sysenter_esp; - vmcb->sysenter_eip = c->sysenter_eip; + v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs; + v->arch.hvm_svm.guest_sysenter_esp = c->sysenter_esp; + v->arch.hvm_svm.guest_sysenter_eip = c->sysenter_eip; if ( paging_mode_hap(v->domain) ) { @@ -286,7 +287,7 @@ static int svm_vmcb_restore(struct vcpu return 0; } - + static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; @@ -985,6 +986,16 @@ static int svm_msr_read_intercept(struct msr_content = v->arch.hvm_vcpu.guest_efer; break; + case MSR_IA32_SYSENTER_CS: + msr_content = v->arch.hvm_svm.guest_sysenter_cs; + break; + case MSR_IA32_SYSENTER_ESP: + msr_content = v->arch.hvm_svm.guest_sysenter_esp; + break; + case MSR_IA32_SYSENTER_EIP: + msr_content = v->arch.hvm_svm.guest_sysenter_eip; + break; + case MSR_IA32_MC4_MISC: /* Threshold register */ case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: /* @@ -1067,6 +1078,16 @@ static int svm_msr_write_intercept(struc case MSR_K8_VM_HSAVE_PA: goto gpf; + case MSR_IA32_SYSENTER_CS: + v->arch.hvm_svm.guest_sysenter_cs = msr_content; + break; + case MSR_IA32_SYSENTER_ESP: + v->arch.hvm_svm.guest_sysenter_esp = msr_content; + break; + case MSR_IA32_SYSENTER_EIP: + v->arch.hvm_svm.guest_sysenter_eip = msr_content; + break; + case MSR_IA32_DEBUGCTLMSR: vmcb->debugctlmsr = msr_content; if ( !msr_content || !cpu_has_svm_lbrv ) @@ -1165,6 +1186,66 @@ static void svm_vmexit_do_rdtsc(struct c hvm_rdtsc_intercept(regs); } +static void svm_dump_regs(const char *from, struct cpu_user_regs *regs) +{ + printk("Dumping guest's current registers at %s...\n", from); + printk("Size of regs = 0x%lx, address = %p\n", + sizeof(struct cpu_user_regs), regs); + + printk("r15 = 0x%016"PRIx64", r14 = 0x%016"PRIx64"\n", + regs->r15, regs->r14); + printk("r13 = 0x%016"PRIx64", r12 = 0x%016"PRIx64"\n", + regs->r13, regs->r12); + printk("rbp = 0x%016"PRIx64", rbx = 0x%016"PRIx64"\n", + regs->rbp, regs->rbx); + printk("r11 = 0x%016"PRIx64", r10 = 0x%016"PRIx64"\n", + regs->r11, regs->r10); + printk("r9 = 0x%016"PRIx64", r8 = 0x%016"PRIx64"\n", + regs->r9, regs->r8); + printk("rax = 0x%016"PRIx64", rcx = 0x%016"PRIx64"\n", + regs->rax, regs->rcx); + printk("rdx = 0x%016"PRIx64", rsi = 0x%016"PRIx64"\n", + regs->rdx, regs->rsi); + printk("rdi = 0x%016"PRIx64", rsp = 0x%016"PRIx64"\n", + regs->rdi, regs->rsp); + printk("error code = 0x%08"PRIx32", entry_vector = 0x%08"PRIx32"\n", + regs->error_code, regs->entry_vector); + printk("rip = 0x%016"PRIx64", rflags = 0x%016"PRIx64"\n", + regs->rip, regs->rflags); +} + +static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs) +{ + struct hvm_emulate_ctxt ctxt; + int rc; + + hvm_emulate_prepare(&ctxt, regs); + + rc = hvm_emulate_one(&ctxt); + + switch ( rc ) + { + case X86EMUL_UNHANDLEABLE: + gdprintk(XENLOG_WARNING, + "instruction emulation failed @ %04x:%lx: " + "%02x %02x %02x %02x %02x %02x\n", + hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel, + ctxt.insn_buf_eip, + ctxt.insn_buf[0], ctxt.insn_buf[1], + ctxt.insn_buf[2], ctxt.insn_buf[3], + ctxt.insn_buf[4], ctxt.insn_buf[5]); + return; + case X86EMUL_EXCEPTION: + if ( ctxt.exn_pending ) + hvm_inject_exception(ctxt.exn_vector, ctxt.exn_error_code, 0); + break; + default: + break; + } + + hvm_emulate_writeback(&ctxt); +} + static void wbinvd_ipi(void *info) { wbinvd(); @@ -1229,6 +1310,7 @@ asmlinkage void svm_vmexit_handler(struc if ( unlikely(exit_reason == VMEXIT_INVALID) ) { svm_dump_vmcb(__func__, vmcb); + svm_dump_regs(__func__, regs); goto exit_and_crash; } @@ -1304,6 +1386,10 @@ asmlinkage void svm_vmexit_handler(struc svm_inject_exception(TRAP_page_fault, regs->error_code, va); break; } + + case VMEXIT_EXCEPTION_UD: + svm_vmexit_ud_intercept(regs); + break; /* Asynchronous event, handled when we STGI'd after the VMEXIT. */ case VMEXIT_EXCEPTION_MC: diff -r 1c627434605e -r f0e2df69a8eb xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Tue May 26 11:52:31 2009 +0100 +++ b/xen/arch/x86/hvm/svm/vmcb.c Tue May 26 15:01:36 2009 +0100 @@ -150,9 +150,6 @@ static int construct_vmcb(struct vcpu *v svm_disable_intercept_for_msr(v, MSR_LSTAR); svm_disable_intercept_for_msr(v, MSR_STAR); svm_disable_intercept_for_msr(v, MSR_SYSCALL_MASK); - svm_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS); - svm_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP); - svm_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP); vmcb->msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm); vmcb->iopm_base_pa = (u64)virt_to_maddr(hvm_io_bitmap); @@ -222,7 +219,10 @@ static int construct_vmcb(struct vcpu *v paging_update_paging_modes(v); - vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_no_device); + vmcb->exception_intercepts = + HVM_TRAP_MASK + | (1U << TRAP_no_device) + | (1U << TRAP_invalid_op); if ( paging_mode_hap(v->domain) ) { diff -r 1c627434605e -r f0e2df69a8eb xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue May 26 11:52:31 2009 +0100 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue May 26 15:01:36 2009 +0100 @@ -668,7 +668,8 @@ static int construct_vmcs(struct vcpu *v __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault)) - | (1U << TRAP_no_device)); + | (1U << TRAP_no_device) + | (1U << TRAP_invalid_op)); v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; hvm_update_guest_cr(v, 0); diff -r 1c627434605e -r f0e2df69a8eb xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue May 26 11:52:31 2009 +0100 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue May 26 15:01:36 2009 +0100 @@ -37,6 +37,7 @@ #include <asm/spinlock.h> #include <asm/paging.h> #include <asm/p2m.h> +#include <asm/hvm/emulate.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/vmx/vmx.h> @@ -2248,6 +2249,38 @@ asmlinkage void vmx_enter_realmode(struc regs->eflags |= (X86_EFLAGS_VM | X86_EFLAGS_IOPL); } +static void vmx_vmexit_ud_intercept(struct cpu_user_regs *regs) +{ + struct hvm_emulate_ctxt ctxt; + int rc; + + hvm_emulate_prepare(&ctxt, regs); + + rc = hvm_emulate_one(&ctxt); + + switch ( rc ) + { + case X86EMUL_UNHANDLEABLE: + gdprintk(XENLOG_WARNING, + "instruction emulation failed @ %04x:%lx: " + "%02x %02x %02x %02x %02x %02x\n", + hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel, + ctxt.insn_buf_eip, + ctxt.insn_buf[0], ctxt.insn_buf[1], + ctxt.insn_buf[2], ctxt.insn_buf[3], + ctxt.insn_buf[4], ctxt.insn_buf[5]); + return; + case X86EMUL_EXCEPTION: + if ( ctxt.exn_pending ) + hvm_inject_exception(ctxt.exn_vector, ctxt.exn_error_code, 0); + break; + default: + break; + } + + hvm_emulate_writeback(&ctxt); +} + asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) { unsigned int exit_reason, idtv_info; @@ -2434,6 +2467,9 @@ asmlinkage void vmx_vmexit_handler(struc HVMTRACE_0D(MCE); do_machine_check(regs); break; + case TRAP_invalid_op: + vmx_vmexit_ud_intercept(regs); + break; default: goto exit_and_crash; } diff -r 1c627434605e -r f0e2df69a8eb xen/arch/x86/x86_emulate/x86_emulate.c --- a/xen/arch/x86/x86_emulate/x86_emulate.c Tue May 26 11:52:31 2009 +0100 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c Tue May 26 15:01:36 2009 +0100 @@ -172,7 +172,7 @@ static uint8_t opcode_table[256] = { static uint8_t twobyte_table[256] = { /* 0x00 - 0x07 */ - SrcMem16|ModRM, ImplicitOps|ModRM, 0, 0, 0, 0, ImplicitOps, 0, + SrcMem16|ModRM, ImplicitOps|ModRM, 0, 0, 0, ImplicitOps, ImplicitOps, 0, /* 0x08 - 0x0F */ ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps|ModRM, 0, 0, /* 0x10 - 0x17 */ @@ -186,7 +186,8 @@ static uint8_t twobyte_table[256] = { /* 0x28 - 0x2F */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x37 */ - ImplicitOps, ImplicitOps, ImplicitOps, 0, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, 0, + ImplicitOps, ImplicitOps, 0, 0, /* 0x38 - 0x3F */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x47 */ @@ -280,7 +281,17 @@ struct operand { }; /* MSRs. */ -#define MSR_TSC 0x10 +#define MSR_TSC 0x00000010 +#define MSR_SYSENTER_CS 0x00000174 +#define MSR_SYSENTER_ESP 0x00000175 +#define MSR_SYSENTER_EIP 0x00000176 +#define MSR_EFER 0xc0000080 +#define EFER_SCE (1u<<0) +#define EFER_LMA (1u<<10) +#define MSR_STAR 0xc0000081 +#define MSR_LSTAR 0xc0000082 +#define MSR_CSTAR 0xc0000083 +#define MSR_FMASK 0xc0000084 /* Control register flags. */ #define CR0_PE (1<<0) @@ -942,6 +953,20 @@ in_protmode( } static int +in_longmode( + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + uint64_t efer; + + if (ops->read_msr == NULL) + return -1; + + ops->read_msr(MSR_EFER, &efer, ctxt); + return !!(efer & EFER_LMA); +} + +static int realmode_load_seg( enum x86_segment seg, uint16_t sel, @@ -3544,6 +3569,71 @@ x86_emulate( break; } + case 0x05: /* syscall */ { + uint64_t msr_content; + struct segment_register cs = { 0 }, ss = { 0 }; + int rc; + + fail_if(ops->read_msr == NULL); + fail_if(ops->read_segment == NULL); + fail_if(ops->write_segment == NULL); + + generate_exception_if(in_realmode(ctxt, ops), EXC_UD, 0); + generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, 0); + generate_exception_if(lock_prefix, EXC_UD, 0); + + /* Inject #UD if syscall/sysret are disabled. */ + rc = ops->read_msr(MSR_EFER, &msr_content, ctxt); + fail_if(rc != 0); + generate_exception_if((msr_content & EFER_SCE) == 0, EXC_UD, 0); + + rc = ops->read_msr(MSR_STAR, &msr_content, ctxt); + fail_if(rc != 0); + + msr_content >>= 32; + cs.sel = (uint16_t)(msr_content & 0xfffc); + ss.sel = (uint16_t)(msr_content + 8); + + cs.base = ss.base = 0; /* flat segment */ + cs.limit = ss.limit = ~0u; /* 4GB limit */ + cs.attr.bytes = 0xc9b; /* G+DB+P+S+Code */ + ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */ + + if ( in_longmode(ctxt, ops) ) + { + cs.attr.fields.db = 0; + cs.attr.fields.l = 1; + + _regs.rcx = _regs.rip; + _regs.r11 = _regs.eflags & ~EFLG_RF; + + rc = ops->read_msr(mode_64bit() ? MSR_LSTAR : MSR_CSTAR, + &msr_content, ctxt); + fail_if(rc != 0); + + _regs.rip = msr_content; + + rc = ops->read_msr(MSR_FMASK, &msr_content, ctxt); + fail_if(rc != 0); + _regs.eflags &= ~(msr_content | EFLG_RF); + } + else + { + rc = ops->read_msr(MSR_STAR, &msr_content, ctxt); + fail_if(rc != 0); + + _regs.rcx = _regs.rip; + _regs.eip = (uint32_t)msr_content; + _regs.eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); + } + + if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) || + (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) ) + goto done; + + break; + } + case 0x06: /* clts */ generate_exception_if(!mode_ring0(), EXC_GP, 0); fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL)); @@ -3644,6 +3734,122 @@ x86_emulate( if ( !test_cc(b, _regs.eflags) ) dst.type = OP_NONE; break; + + case 0x34: /* sysenter */ { + uint64_t msr_content; + struct segment_register cs, ss; + int rc; + + fail_if(ops->read_msr == NULL); + fail_if(ops->read_segment == NULL); + fail_if(ops->write_segment == NULL); + + generate_exception_if(mode_ring0(), EXC_GP, 0); + generate_exception_if(in_realmode(ctxt, ops), EXC_GP, 0); + generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0); + generate_exception_if(lock_prefix, EXC_UD, 0); + + rc = ops->read_msr(MSR_SYSENTER_CS, &msr_content, ctxt); + fail_if(rc != 0); + + if ( mode_64bit() ) + generate_exception_if(msr_content == 0, EXC_GP, 0); + else + generate_exception_if((msr_content & 0xfffc) == 0, EXC_GP, 0); + + _regs.eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); + + ops->read_segment(x86_seg_cs, &cs, ctxt); + cs.sel = (uint16_t)msr_content & ~3; /* SELECTOR_RPL_MASK */ + cs.base = 0; /* flat segment */ + cs.limit = ~0u; /* 4GB limit */ + cs.attr.bytes = 0xc9b; /* G+DB+P+S+Code */ + + ss.sel = cs.sel + 8; + ss.base = 0; /* flat segment */ + ss.limit = ~0u; /* 4GB limit */ + ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */ + + if ( in_longmode(ctxt, ops) ) + { + cs.attr.fields.db = 0; + cs.attr.fields.l = 1; + } + + rc = ops->write_segment(x86_seg_cs, &cs, ctxt); + fail_if(rc != 0); + rc = ops->write_segment(x86_seg_ss, &ss, ctxt); + fail_if(rc != 0); + + rc = ops->read_msr(MSR_SYSENTER_EIP, &msr_content, ctxt); + fail_if(rc != 0); + _regs.rip = msr_content; + + rc = ops->read_msr(MSR_SYSENTER_ESP, &msr_content, ctxt); + fail_if(rc != 0); + _regs.rsp = msr_content; + + break; + } + + case 0x35: /* sysexit */ { + uint64_t msr_content; + struct segment_register cs, ss; + int user64 = !!(rex_prefix & 8); /* REX.W */ + int rc; + + fail_if(ops->read_msr == NULL); + fail_if(ops->read_segment == NULL); + fail_if(ops->write_segment == NULL); + + generate_exception_if(!mode_ring0(), EXC_GP, 0); + generate_exception_if(in_realmode(ctxt, ops), EXC_GP, 0); + generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0); + generate_exception_if(lock_prefix, EXC_UD, 0); + + rc = ops->read_msr(MSR_SYSENTER_CS, &msr_content, ctxt); + fail_if(rc != 0); + rc = ops->read_segment(x86_seg_cs, &cs, ctxt); + fail_if(rc != 0); + + if ( user64 ) + { + cs.sel = (uint16_t)(msr_content + 32); + ss.sel = (cs.sel + 8); + generate_exception_if(msr_content == 0, EXC_GP, 0); + } + else + { + cs.sel = (uint16_t)(msr_content + 16); + ss.sel = (uint16_t)(msr_content + 24); + generate_exception_if((msr_content & 0xfffc) == 0, EXC_GP, 0); + } + + cs.sel |= 0x3; /* SELECTOR_RPL_MASK */ + cs.base = 0; /* flat segment */ + cs.limit = ~0u; /* 4GB limit */ + cs.attr.bytes = 0xcfb; /* G+DB+P+DPL3+S+Code */ + + ss.sel |= 0x3; /* SELECTOR_RPL_MASK */ + ss.base = 0; /* flat segment */ + ss.limit = ~0u; /* 4GB limit */ + ss.attr.bytes = 0xcf3; /* G+DB+P+DPL3+S+Data */ + + if ( user64 ) + { + cs.attr.fields.db = 0; + cs.attr.fields.l = 1; + } + + rc = ops->write_segment(x86_seg_cs, &cs, ctxt); + fail_if(rc != 0); + rc = ops->write_segment(x86_seg_ss, &ss, ctxt); + fail_if(rc != 0); + + _regs.rip = _regs.rdx; + _regs.rsp = _regs.rcx; + break; + } case 0x6f: /* movq mm/m64,mm */ { uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 }; diff -r 1c627434605e -r f0e2df69a8eb xen/include/asm-x86/hvm/svm/vmcb.h --- a/xen/include/asm-x86/hvm/svm/vmcb.h Tue May 26 11:52:31 2009 +0100 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Tue May 26 15:01:36 2009 +0100 @@ -459,6 +459,15 @@ struct arch_svm_struct { unsigned long *msrpm; int launch_core; bool_t vmcb_in_sync; /* VMCB sync'ed with VMSAVE? */ + + /* Upper four bytes are undefined in the VMCB, therefore we can't + * use the fields in the VMCB. Write a 64bit value and then read a 64bit + * value is fine unless there's a VMRUN/VMEXIT in between which clears + * the upper four bytes. + */ + uint64_t guest_sysenter_cs; + uint64_t guest_sysenter_esp; + uint64_t guest_sysenter_eip; }; struct vmcb_struct *alloc_vmcb(void); diff -r 1c627434605e -r f0e2df69a8eb xen/include/public/arch-x86/hvm/save.h --- a/xen/include/public/arch-x86/hvm/save.h Tue May 26 11:52:31 2009 +0100 +++ b/xen/include/public/arch-x86/hvm/save.h Tue May 26 15:01:36 2009 +0100 @@ -123,9 +123,7 @@ struct hvm_hw_cpu { uint32_t tr_arbytes; uint32_t ldtr_arbytes; - uint32_t sysenter_cs; - uint32_t padding0; - + uint64_t sysenter_cs; uint64_t sysenter_esp; uint64_t sysenter_eip; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |