# HG changeset patch # User cegger # Date 1271330301 -7200 Nested Virtualization core implementation diff -r 6fca66c4d6f6 -r c7f560bcc31f xen/arch/x86/hvm/Makefile --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -10,6 +10,7 @@ obj-y += intercept.o obj-y += io.o obj-y += irq.o obj-y += mtrr.o +obj-y += nestedhvm.o obj-y += pmtimer.o obj-y += quirks.o obj-y += rtc.o diff -r 6fca66c4d6f6 -r c7f560bcc31f xen/arch/x86/hvm/nestedhvm.c --- /dev/null +++ b/xen/arch/x86/hvm/nestedhvm.c @@ -0,0 +1,901 @@ +/* + * Nested SVM + * Copyright (c) 2010, Advanced Micro Devices, Inc. + * Author: Christoph Egger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include /* for HVM_DELIVER_NO_ERROR_CODE */ +#include +#include +#include /* for VIRIDIAN_MSR_* */ +#include /* for local_event_delivery_(en|dis)able */ +#include /* for paging_mode_hap() */ + +#include /* for svm_dump_vmcb */ + +/* Nested SVM on/off per domain */ +bool_t +nestedhvm_enabled(struct domain *d) +{ + bool_t enabled; + + enabled = !!(d->arch.hvm_domain.nestedhvm_enabled); + /* sanity check */ + BUG_ON(enabled && !is_hvm_domain(d)); + + if (!is_hvm_domain(d)) + return 0; + + return enabled; +} + +/* Nested VMCB */ +static bool_t +nestedhvm_vmcb_addr_isvalid(uint64_t addr) +{ + if (addr > 0xfd00000000) + return 0; + if ((addr & ~PAGE_MASK) != 0) + return 0; + return 1; +} + +/* Get VMCB from the guest for the nested guest. */ +enum hvm_copy_result +nestedhvm_vmcb_fromguest(struct vmcb_struct *vmcb, uint64_t vmcbaddr) +{ + return hvm_copy_from_guest_phys(vmcb, vmcbaddr, + sizeof(struct vmcb_struct)); +} + +/* Put VMCB into the guest for the nested guest. */ +enum hvm_copy_result +nestedhvm_vmcb_toguest(struct vmcb_struct *vmcb, uint64_t vmcbaddr) +{ + return hvm_copy_to_guest_phys(vmcbaddr, vmcb, + sizeof(struct vmcb_struct)); +} + +struct vmcb_struct * +nestedhvm_vmcb_create(struct vcpu *v) +{ + return alloc_vmcb(); +} + +int +nestedhvm_vmcb_destroy(struct vcpu *v, struct vmcb_struct *nestedvmcb) +{ + free_vmcb(nestedvmcb); + return 0; +} + +/* Nested VCPU */ +static int +nestedhvm_vcpu_state_validate(struct vcpu *v, uint64_t vmcbaddr) +{ + struct segment_register reg; + + if ( !nestedhvm_enabled(v->domain) ) + return TRAP_invalid_op; + + if ( !hvm_svm_enabled(v) || hvm_guest_x86_mode(v) < 2 ) + return TRAP_invalid_op; + + /* if CPL != 0 inject #GP */ + hvm_get_segment_register(v, x86_seg_ss, ®); + if (reg.attr.fields.dpl != 0) + return TRAP_gp_fault; + + if (!nestedhvm_vmcb_addr_isvalid(vmcbaddr)) + return TRAP_gp_fault; + + return 0; +} + +bool_t +nestedhvm_vcpu_in_guestmode(struct vcpu *v) +{ + return VCPU_NESTEDHVM(v).nh_guestmode; +} + +int +nestedhvm_vcpu_initialise(struct vcpu *v) +{ + int rc; + struct vmcb_struct *vmcb; + + if (!nestedhvm_enabled(v->domain)) + return 0; + + memset(&VCPU_NESTEDHVM(v), 0, sizeof(VCPU_NESTEDHVM(v))); + VCPU_NESTEDHVM(v).nh_vmcb = vmcb = nestedhvm_vmcb_create(v); + if (vmcb == NULL) + return -ENOMEM; + + VCPU_NESTEDHVM(v).nh_vmcb->np_enable = 0; + VCPU_NESTEDHVM(v).nh_vmcb->g_pat = MSR_IA32_CR_PAT_RESET; + VCPU_NESTEDHVM(v).nh_hap_enabled = 0; + VCPU_NESTEDHVM(v).nh_vmcb_cr3 = 0; + VCPU_NESTEDHVM(v).nh_vmcb_hcr3 = 0; + VCPU_NESTEDHVM(v).nh_guest_asid = 0; + VCPU_NESTEDHVM(v).nh_old_guest_asid = 0; + VCPU_NESTEDHVM(v).nh_tlb_control = 0; + VCPU_NESTEDHVM(v).nh_p2m = NULL; + + /* Enable interrupts */ + nestedhvm_vcpu_stgi(v); + /* vcpu is in host mode */ + nestedhvm_vcpu_exit_guestmode(v); + + /* initialise hostsave, for example */ + rc = hvm_nestedhvm_vcpu_initialise(v); + if (rc) { + nestedhvm_vmcb_destroy(v, vmcb); + return rc; + } + + VCPU_NESTEDHVM(v).nh_guestmode = 0; + return 0; +} + +int +nestedhvm_vcpu_destroy(struct vcpu *v) +{ + int ret = 0; + + if (!nestedhvm_enabled(v->domain)) + return 0; + + if (nestedhvm_vcpu_in_guestmode(v)) { + nestedhvm_vcpu_exit_guestmode(v); + ret = -EBUSY; + } + + /* Enable interrupts or the guest won't see any interrupts + * after nested guest exited. + */ + nestedhvm_vcpu_stgi(v); + nestedhvm_vmcb_destroy(v, VCPU_NESTEDHVM(v).nh_vmcb); + + hvm_nestedhvm_vcpu_destroy(v); + return ret; +} + +static bool_t +nestedhvm_vmcb_sanitycheck(const char *from, struct vmcb_struct *vmcb, + bool_t verbose) +{ + bool_t ret = 0; /* ok */ + +#define PRINTF(...) \ + if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \ + } else return 1; + + if ((vmcb->efer & EFER_SVME) == 0) { + PRINTF("EFER: SVME bit not set (0x%"PRIx64")\n", vmcb->efer); + } + + if ((vmcb->cr0 & X86_CR0_CD) == 0 && (vmcb->cr0 & X86_CR0_NW) != 0) { + PRINTF("CR0: CD bit is zero and NW bit set (0x%"PRIx64")\n", + vmcb->cr0); + } + + if ((vmcb->cr0 >> 32U) != 0) { + PRINTF("CR0: bits [63:32] are not zero (0x%"PRIx64")\n", + vmcb->cr0); + } + +#if 0 + if ((vmcb->cr3 & ... )) { + PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3); + } +#endif + + if ((vmcb->cr4 >> 11U) != 0) { + PRINTF("CR4: bits [63:11] are not zero (0x%"PRIx64")\n", + vmcb->cr4); + } + + if ((vmcb->dr6 >> 32U) != 0) { + PRINTF("DR6: bits [63:32] are not zero (0x%"PRIx64")\n", + vmcb->dr6); + } + + if ((vmcb->dr7 >> 32U) != 0) { + PRINTF("DR7: bits [63:32] are not zero (0x%"PRIx64")\n", + vmcb->dr7); + } + + if ((vmcb->efer >> 15U) != 0) { + PRINTF("EFER: bits [63:15] are not zero (0x%"PRIx64")\n", + vmcb->efer); + } + +#if 0 + if (!cpu_has_longmode && (vmcb->efer & (EFER_LMA | EFER_LME)) != 0) { + PRINTF("EFER: cpu does not support longmode, but longmode enabled\n", + vmcb->efer); + } +#endif + + if ((vmcb->efer & EFER_LME) != 0 && ((vmcb->cr0 & X86_CR0_PG) != 0)) { + if ((vmcb->cr4 & X86_CR4_PAE) == 0) { + PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n"); + } + if ((vmcb->cr0 & X86_CR0_PE) == 0) { + PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n"); + } + + } + + if ((vmcb->efer & EFER_LME) != 0 + && (vmcb->cr0 & X86_CR0_PG) != 0 + && (vmcb->cr4 & X86_CR4_PAE) != 0 + && (vmcb->cs.attr.fields.l != 0) + && (vmcb->cs.attr.fields.db != 0)) + { + PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n"); + } + + if ((vmcb->general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) { + PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (0x%"PRIx32")\n", + vmcb->general2_intercepts); + } + +#undef PRINTF + return ret; +} + +static bool_t +nestedhvm_isintercepted_by_guest(struct vcpu *v, uint64_t exit_reason) +{ + uint64_t exit_bits; + + switch (exit_reason) { + case VMEXIT_CR0_READ ... VMEXIT_CR15_READ: + case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE: + exit_bits = 1ULL << (exit_reason - VMEXIT_CR0_READ); + if (VCPU_NESTEDHVM(v).nh_cr_intercepts & exit_bits) + break; + return 0; + + case VMEXIT_DR0_READ ... VMEXIT_DR7_READ: + case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE: + exit_bits = 1ULL << (exit_reason - VMEXIT_DR0_READ); + if (VCPU_NESTEDHVM(v).nh_dr_intercepts & exit_bits) + break; + return 0; + + case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF: + exit_bits = 1ULL << (exit_reason - VMEXIT_EXCEPTION_DE); + if (VCPU_NESTEDHVM(v).nh_exception_intercepts & exit_bits) + break; + return 0; + + case VMEXIT_INTR ... VMEXIT_SHUTDOWN: + exit_bits = 1ULL << (exit_reason - VMEXIT_INTR); + if (VCPU_NESTEDHVM(v).nh_general1_intercepts & exit_bits) + break; + return 0; + + /* case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL: */ + default: + exit_bits = 1ULL << (exit_reason - VMEXIT_VMRUN); + if (VCPU_NESTEDHVM(v).nh_general2_intercepts & exit_bits) + break; + return 0; + } + + return 1; +} + +int +nestedhvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t vmcbaddr, unsigned int inst_len) +{ + int ret; + enum nestedhvm_vmexits vmret; + enum hvm_copy_result rc; + struct vmcb_struct *ns_vmcb; + uint64_t exitreason; + uint64_t forceexitreason; + bool_t alter_rip = 1; + struct hvm_intack intack; + + ret = nestedhvm_vcpu_state_validate(v, vmcbaddr); + if (ret) { + gdprintk(XENLOG_ERR, + "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", + ret); + hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0); + return ret; + } + + /* Save vmcbaddr. Needed for VMEXIT */ + VCPU_NESTEDHVM(v).nh_vmcbaddr = vmcbaddr; + + ns_vmcb = VCPU_NESTEDHVM(v).nh_vmcb; + + /* get nested vmcb */ + rc = nestedhvm_vmcb_fromguest(ns_vmcb, vmcbaddr); + if (rc) { + vmret = NESTEDHVM_VMEXIT_FATALERROR; + gdprintk(XENLOG_ERR, + "nestedhvm_vmcb_fromguest failed, injecting #GP\n"); + hvm_inject_exception(TRAP_gp_fault, + HVM_DELIVER_NO_ERROR_CODE, 0); + return TRAP_gp_fault; + } + + /* Save cr3/hcr3 values. Needed for Nested-on-Nested and + * Shadow-on-Shadow paging. + */ + VCPU_NESTEDHVM(v).nh_vmcb_cr3 = ns_vmcb->cr3; + VCPU_NESTEDHVM(v).nh_vmcb_hcr3 = ns_vmcb->h_cr3; + VCPU_NESTEDHVM(v).nh_old_guest_asid = VCPU_NESTEDHVM(v).nh_guest_asid; + VCPU_NESTEDHVM(v).nh_guest_asid = ns_vmcb->guest_asid; + VCPU_NESTEDHVM(v).nh_tlb_control = ns_vmcb->tlb_control; + + intack = hvm_vcpu_has_pending_irq(v); + forceexitreason = VMEXIT_INVALID; + switch (intack.source) { + case hvm_intsrc_none: + break; + case hvm_intsrc_pic: /* too much traffic blocks nested guest */ + case hvm_intsrc_lapic: /* too much traffic blocks nested guest */ + /* forceexitreason = VMEXIT_INTR */; + break; + case hvm_intsrc_nmi: + forceexitreason = VMEXIT_NMI; + break; + case hvm_intsrc_mce: + forceexitreason = VMEXIT_EXCEPTION_MC; + break; + } + + if ( forceexitreason != VMEXIT_INVALID && + nestedhvm_isintercepted_by_guest(v, forceexitreason) ) + { + VCPU_NESTEDHVM(v).nh_forcevmexit_exitcode = forceexitreason; + VCPU_NESTEDHVM(v).nh_hostflags.fields.forcevmexit = 1; + vmret = nestedhvm_vcpu_vmexit(v, regs, forceexitreason); + VCPU_NESTEDHVM(v).nh_hostflags.fields.forcevmexit = 0; + + switch (vmret) { + case NESTEDHVM_VMEXIT_DONE: + case NESTEDHVM_VMEXIT_CONTINUE: + break; + case NESTEDHVM_VMEXIT_HOST: + case NESTEDHVM_VMEXIT_ERROR: + case NESTEDHVM_VMEXIT_FATALERROR: + default: + return TRAP_gp_fault; + } + + return 0; + } + + /* nested paging for the guest */ + if (ns_vmcb->np_enable) + VCPU_NESTEDHVM(v).nh_hap_enabled = 1; + else + VCPU_NESTEDHVM(v).nh_hap_enabled = 0; + + /* save host state */ + ret = hvm_nestedhvm_vcpu_hostsave(v, inst_len); + if (ret) { + gdprintk(XENLOG_ERR, + "hvm_nestedhvm_vcpu_hostsave failed, injecting #UD\n"); + hvm_inject_exception(TRAP_invalid_op, + HVM_DELIVER_NO_ERROR_CODE, 0); + return ret; + } + + /* Switch vcpu to guest mode. nestedhvm_vcpu_vmexit() clears + * guest mode if an error occurs. + */ + nestedhvm_vcpu_enter_guestmode(v); + + /* Remember the V_INTR_MASK in hostflags */ + if (ns_vmcb->vintr.fields.intr_masking) + VCPU_NESTEDHVM(v).nh_hostflags.fields.vintrmask = 1; + else + VCPU_NESTEDHVM(v).nh_hostflags.fields.vintrmask = 0; + + /* prepare vmcb for the nested guest */ + ret = hvm_nestedhvm_vmcb_prepare4vmrun(v); + if (ret) { + exitreason = VMEXIT_INVALID; + goto err0; + } + + if (nestedhvm_vmcb_sanitycheck(__func__, ns_vmcb, 1) != 0) { + gdprintk(XENLOG_WARNING, "nested vmcb invalid\n"); + exitreason = VMEXIT_INVALID; + goto err1; + } + + /* VMRUN, do some SVM/VMX specific tweaks to make it work. */ + ret = hvm_nestedhvm_vcpu_vmrun(v, regs, NESTEDHVM_VMRUN_INTERCEPT); + if (ret) { + exitreason = VMEXIT_INVALID; + goto err1; + } + + ret = nestedhvm_vmcb_toguest(ns_vmcb, vmcbaddr); + if (ret) { + exitreason = VMEXIT_SHUTDOWN; + goto err1; + } + + nestedhvm_vcpu_stgi(v); + + return 0; + +err1: + rc = hvm_nestedhvm_vcpu_hostrestore(v, regs); + if (rc) + exitreason = VMEXIT_SHUTDOWN; + alter_rip = 0; +err0: + vmret = nestedhvm_vcpu_vmexit(v, regs, exitreason); + switch (vmret) { + case NESTEDHVM_VMEXIT_DONE: + case NESTEDHVM_VMEXIT_CONTINUE: + case NESTEDHVM_VMEXIT_HOST: + /* Move RIP right after the VMRUN instruction */ + if (alter_rip) + regs->rip += inst_len; + break; + default: + hvm_inject_exception(TRAP_gp_fault, + HVM_DELIVER_NO_ERROR_CODE, 0); + return TRAP_gp_fault; + } + return 0; +} + +static enum nestedhvm_vmexits +nestedhvm_vmexit_ioio(unsigned long *ioio_bitmap, uint64_t exitinfo1) +{ + /* not yet implemented */ + return NESTEDHVM_VMEXIT_CONTINUE; +} + +static enum nestedhvm_vmexits +nestedhvm_vmexit_msr(unsigned long *msr_bitmap, uint32_t msr, + uint64_t exitinfo1) +{ + bool_t enabled; + unsigned long *msr_bit = NULL; + + /* + * See AMD64 Programmers Manual, Vol 2, Section 15.10 + * (MSR-Bitmap Address). + */ + if ( msr <= 0x1fff ) + msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG; + else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) + msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG; + else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) ) + msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG; + + if (msr_bit == NULL) { + /* Test for Viridian MSRs */ + switch (msr) { + case VIRIDIAN_MSR_GUEST_OS_ID: + case VIRIDIAN_MSR_HYPERCALL: + case VIRIDIAN_MSR_VP_INDEX: + case VIRIDIAN_MSR_EOI: + case VIRIDIAN_MSR_ICR: + case VIRIDIAN_MSR_TPR: + case VIRIDIAN_MSR_APIC_ASSIST: + return NESTEDHVM_VMEXIT_INJECT; + default: + return NESTEDHVM_VMEXIT_HOST; + } + } + BUG_ON(msr_bit == NULL); + + msr &= 0x1fff; + + if (exitinfo1 == 0) + /* read access */ + enabled = test_bit(msr * 2, msr_bit); + else + /* write access */ + enabled = test_bit(msr * 2 + 1, msr_bit); + + if (!enabled) + return NESTEDHVM_VMEXIT_HOST; + + return NESTEDHVM_VMEXIT_CONTINUE; +} + +int +nestedhvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack) +{ + uint64_t exitcode = VMEXIT_INTR; + ASSERT(nestedhvm_vcpu_in_guestmode(v)); + + if ( !VCPU_NESTEDHVM(v).nh_hostflags.fields.vintrmask ) + return NESTEDHVM_INTR_VINTR_MASKED; + + if ( !VCPU_NESTEDHVM(v).nh_hostflags.fields.rflagsif ) + return NESTEDHVM_INTR_MASKED; + + switch (intack.source) { + case hvm_intsrc_pic: + case hvm_intsrc_lapic: + exitcode = VMEXIT_INTR; + break; + case hvm_intsrc_nmi: + exitcode = VMEXIT_NMI; + break; + case hvm_intsrc_mce: + case hvm_intsrc_none: + return NESTEDHVM_INTR_NOTHANDLED; + default: + BUG(); + } + + if ( nestedhvm_isintercepted_by_guest(v, exitcode) ) { + VCPU_NESTEDHVM(v).nh_forcevmexit_exitcode = exitcode; + VCPU_NESTEDHVM(v).nh_hostflags.fields.forcevmexit = 1; + return NESTEDHVM_INTR_FORCEVMEXIT; + } + + return NESTEDHVM_INTR_NOTINTERCEPTED; +} + +static enum nestedhvm_vmexits +nestedhvm_vmexit_intercepts(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t exit_reason) +{ + bool_t is_intercepted; + + is_intercepted = nestedhvm_isintercepted_by_guest(v, exit_reason); + + if (VCPU_NESTEDHVM(v).nh_hostflags.fields.forcevmexit) + { + if (is_intercepted) + return NESTEDHVM_VMEXIT_INJECT; + gdprintk(XENLOG_ERR, + "forced VMEXIT can't happen as guest can't " + "handle the intercept\n"); + return NESTEDHVM_VMEXIT_FATALERROR; + } + + switch (exit_reason) { + case VMEXIT_INVALID: + if (is_intercepted) + return NESTEDHVM_VMEXIT_INJECT; + return NESTEDHVM_VMEXIT_HOST; + + case VMEXIT_INTR: + return NESTEDHVM_VMEXIT_HOST; + case VMEXIT_NMI: + return NESTEDHVM_VMEXIT_HOST; + + case VMEXIT_NPF: + if (nestedhvm_paging_mode_hap(v)) { + if (!is_intercepted) + return NESTEDHVM_VMEXIT_FATALERROR; + /* host nested paging + guest nested paging */ + return NESTEDHVM_VMEXIT_HOST; + } + if (paging_mode_hap(v->domain)) { + if (is_intercepted) + return NESTEDHVM_VMEXIT_FATALERROR; + /* host nested paging + guest shadow paging */ + return NESTEDHVM_VMEXIT_HOST; + } + /* host shadow paging + guest shadow paging */ + /* Can this happen? */ + BUG(); + return NESTEDHVM_VMEXIT_FATALERROR; + case VMEXIT_EXCEPTION_PF: + if (nestedhvm_paging_mode_hap(v)) { + /* host nested paging + guest nested paging */ + if (!is_intercepted) + /* guest intercepts #PF unnecessarily */ + return NESTEDHVM_VMEXIT_HOST; + /* nested guest intercepts #PF unnecessarily */ + return NESTEDHVM_VMEXIT_INJECT; + } + if (!paging_mode_hap(v->domain)) { + /* host shadow paging + guest shadow paging */ + return NESTEDHVM_VMEXIT_HOST; + } + /* host nested paging + guest shadow paging */ + return NESTEDHVM_VMEXIT_INJECT; + case VMEXIT_MSR: + if (regs == NULL) + return NESTEDHVM_VMEXIT_HOST; + return nestedhvm_vmexit_msr(VCPU_NESTEDHVM(v).nh_msrpm, + regs->rcx, + VCPU_NESTEDHVM(v).nh_vmcb->exitinfo1); + case VMEXIT_IOIO: + return nestedhvm_vmexit_ioio(VCPU_NESTEDHVM(v).nh_iopm, + VCPU_NESTEDHVM(v).nh_vmcb->exitinfo1); + default: + break; + } + + if (is_intercepted) + return NESTEDHVM_VMEXIT_CONTINUE; + return NESTEDHVM_VMEXIT_HOST; +} + +static enum nestedhvm_vmexits +nestedhvm_vmexit(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t exit_reason) +{ + int rc; + enum nestedhvm_vmexits ret; + struct vmcb_struct *ns_vmcb; + + /* nested VMCB */ + ns_vmcb = VCPU_NESTEDHVM(v).nh_vmcb; + + ASSERT(nestedhvm_vcpu_in_guestmode(v)); + + rc = hvm_nestedhvm_vmcb_prepare4vmexit(v); + if (rc) { + ret = NESTEDHVM_VMEXIT_ERROR; + goto err1; + } + + ret = nestedhvm_vmexit_intercepts(v, regs, exit_reason); + switch (ret) { + case NESTEDHVM_VMEXIT_CONTINUE: + case NESTEDHVM_VMEXIT_INJECT: + break; + case NESTEDHVM_VMEXIT_ERROR: + case NESTEDHVM_VMEXIT_FATALERROR: + goto err1; + case NESTEDHVM_VMEXIT_HOST: + return ret; + default: + break; + } + + rc = hvm_nestedhvm_vcpu_hostrestore(v, regs); + if (rc) { + ret = NESTEDHVM_VMEXIT_FATALERROR; + goto err0; + } + + nestedhvm_vcpu_exit_guestmode(v); + + return ret; + +err1: + rc = hvm_nestedhvm_vcpu_hostrestore(v, regs); + if (rc) + ret = NESTEDHVM_VMEXIT_FATALERROR; +err0: + nestedhvm_vcpu_exit_guestmode(v); + return ret; +} + +static enum nestedhvm_vmexits +nestedhvm_vmexit_inject(struct vcpu *v, uint64_t vmcbaddr, uint64_t exitcode) +{ + enum nestedhvm_vmexits ret; + enum hvm_copy_result rc; + struct vmcb_struct *ns_vmcb; + + ns_vmcb = VCPU_NESTEDHVM(v).nh_vmcb; + ret = NESTEDHVM_VMEXIT_CONTINUE; + + /* Nothing to do for asynchronous events such as + * VMEXIT_INTR, VMEXIT_NMI, VMEXIT_SMI, etc. + * They get injected on VMEXIT_STGI. + */ + ns_vmcb->exitcode = exitcode; + ns_vmcb->eventinj.bytes = 0; + + rc = nestedhvm_vmcb_toguest(ns_vmcb, vmcbaddr); + switch (rc) { + case HVMCOPY_okay: + break; + default: + return NESTEDHVM_VMEXIT_FATALERROR; + } + + return ret; +} + +enum nestedhvm_vmexits +nestedhvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t exit_reason) +{ + int rc; + uint64_t vmcbaddr; + enum nestedhvm_vmexits ret; + + if (nestedhvm_vcpu_in_guestmode(v)) { + ret = nestedhvm_vmexit(v, regs, exit_reason); + switch (ret) { + case NESTEDHVM_VMEXIT_FATALERROR: + case NESTEDHVM_VMEXIT_HOST: + return ret; + case NESTEDHVM_VMEXIT_ERROR: + exit_reason = VMEXIT_INVALID; + break; + default: + ASSERT(!nestedhvm_vcpu_in_guestmode(v)); + break; + } + + /* host state has been restored */ + } + + nestedhvm_vcpu_clgi(v); + vmcbaddr = VCPU_NESTEDHVM(v).nh_vmcbaddr; + + /* Prepare for running the guest. Do some final SVM/VMX + * specific tweaks if necessary to make it work. + */ + rc = hvm_nestedhvm_vcpu_vmrun(v, regs, NESTEDHVM_VMRUN_VMEXIT); + if (rc) { + return NESTEDHVM_VMEXIT_FATALERROR; + } + + ASSERT(!nestedhvm_vcpu_in_guestmode(v)); + /* Inject VMEXIT into guest. */ + ret = nestedhvm_vmexit_inject(v, vmcbaddr, exit_reason); + switch (ret) { + case NESTEDHVM_VMEXIT_FATALERROR: + return ret; + case NESTEDHVM_VMEXIT_CONTINUE: + ret = NESTEDHVM_VMEXIT_DONE; + default: + break; + } + + return ret; +} + +int +nestedhvm_vcpu_vmload(struct vcpu *v, uint64_t vmcbaddr) +{ + int ret; + + ret = nestedhvm_vcpu_state_validate(v, vmcbaddr); + if (ret) { + gdprintk(XENLOG_ERR, + "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", + ret); + hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0); + return ret; + } + + ret = hvm_nestedhvm_vcpu_vmload(v, vmcbaddr); + if (ret) { + gdprintk(XENLOG_ERR, + "hvm_nestedsm_vcpu_vmload failed, injecting 0x%x\n", + ret); + hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0); + return ret; + } + + return 0; +} + +int nestedhvm_vcpu_vmsave(struct vcpu *v, uint64_t vmcbaddr) +{ + int ret; + + ret = nestedhvm_vcpu_state_validate(v, vmcbaddr); + if (ret) { + gdprintk(XENLOG_ERR, + "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", + ret); + hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0); + return ret; + } + + ret = hvm_nestedhvm_vcpu_vmsave(v, vmcbaddr); + if (ret) { + gdprintk(XENLOG_ERR, + "hvm_nestedsm_vcpu_vmsave failed, injecting 0x%x\n", + ret); + hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0); + return ret; + } + + return 0; +} + +/* Virtual GIF */ +int +nestedhvm_vcpu_clgi(struct vcpu *v) +{ + if (!nestedhvm_enabled(v->domain)) { + hvm_inject_exception(TRAP_invalid_op, 0, 0); + return -1; + } + + /* clear gif flag */ + VCPU_NESTEDHVM(v).nh_gif = 0; + local_event_delivery_disable(v); /* mask events for PV drivers */ + return 0; +} + +int +nestedhvm_vcpu_stgi(struct vcpu *v) +{ + if (!nestedhvm_enabled(v->domain)) { + hvm_inject_exception(TRAP_invalid_op, 0, 0); + return -1; + } + + /* set gif flag */ + VCPU_NESTEDHVM(v).nh_gif = 1; + local_event_delivery_enable(v); /* unmask events for PV drivers */ + return 0; +} + +/* Emulate MSRs for Nested SVM */ +int +nestedhvm_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) +{ + int ret = 1; + + *val = 0; + + switch (msr) { + case MSR_K8_VM_CR: + break; + case MSR_K8_VM_HSAVE_PA: + *val = VCPU_NESTEDHVM(v).nh_msr_hsavepa; + break; + default: + ret = 0; + break; + } + + return ret; +} + +int +nestedhvm_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) +{ + int ret = 1; + + switch (msr) { + case MSR_K8_VM_CR: + /* ignore write. handle all bits as read-only. */ + break; + case MSR_K8_VM_HSAVE_PA: + if (!nestedhvm_vmcb_addr_isvalid(val)) { + gdprintk(XENLOG_ERR, + "MSR_K8_VM_HSAVE_PA value invalid\n"); + ret = -1; /* inject #GP */ + break; + } + VCPU_NESTEDHVM(v).nh_msr_hsavepa = val; + break; + default: + ret = 0; + break; + } + + return ret; +} diff -r 6fca66c4d6f6 -r c7f560bcc31f xen/include/asm-x86/hvm/nestedhvm.h --- /dev/null +++ b/xen/include/asm-x86/hvm/nestedhvm.h @@ -0,0 +1,86 @@ +/* + * Nested HVM + * Copyright (c) 2010, Advanced Micro Devices, Inc. + * Author: Christoph Egger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#ifndef _HVM_NESTEDHVM_H +#define _HVM_NESTEDHVM_H + +#include /* for uintNN_t */ +#include /* for struct vcpu, struct domain */ +#include /* for struct vmcb_struct */ +#include /* for VCPU_NESTEDHVM */ + +enum nestedhvm_vmexits { + NESTEDHVM_VMEXIT_ERROR = 0, /* inject VMEXIT w/ invalid VMCB */ + NESTEDHVM_VMEXIT_FATALERROR = 1, /* crash first level guest */ + NESTEDHVM_VMEXIT_HOST = 2, /* exit handled on host level */ + NESTEDHVM_VMEXIT_CONTINUE = 3, /* further handling */ + NESTEDHVM_VMEXIT_INJECT = 4, /* inject VMEXIT */ + NESTEDHVM_VMEXIT_DONE = 5, /* VMEXIT handled */ +}; + +/* Nested SVM on/off per domain */ +bool_t nestedhvm_enabled(struct domain *d); +int nestedhvm_initialise(struct domain *d); +#define nestedhvm_paging_mode_hap(v) (!!VCPU_NESTEDHVM((v)).nh_hap_enabled) + +/* Nested VMCB */ +struct vmcb_struct *nestedhvm_vmcb_create(struct vcpu *v); +int nestedhvm_vmcb_destroy(struct vcpu *v, struct vmcb_struct *nestedvmcb); +int nestedhvm_vmcb_prepare4vmrun(struct vcpu *v, struct vmcb_struct *nestedvmcb); +int nestedhvm_vmcb_prepare4vmexit(struct vcpu *v, struct vmcb_struct *nestedvmcb); +enum hvm_copy_result +nestedhvm_vmcb_fromguest(struct vmcb_struct *vmcb, uint64_t vmcbaddr); +enum hvm_copy_result +nestedhvm_vmcb_toguest(struct vmcb_struct *vmcb, uint64_t vmcbaddr); + +/* Nested VCPU */ +int nestedhvm_vcpu_initialise(struct vcpu *v); +int nestedhvm_vcpu_destroy(struct vcpu *v); +bool_t nestedhvm_vcpu_in_guestmode(struct vcpu *v); +#define nestedhvm_vcpu_enter_guestmode(v) VCPU_NESTEDHVM((v)).nh_guestmode = 1 +#define nestedhvm_vcpu_exit_guestmode(v) VCPU_NESTEDHVM((v)).nh_guestmode = 0 +int nestedhvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t vmcbaddr, unsigned int inst_len); +enum nestedhvm_vmexits nestedhvm_vcpu_vmexit(struct vcpu *v, + struct cpu_user_regs *regs, uint64_t exit_reason); +int nestedhvm_vcpu_vmload(struct vcpu *v, uint64_t vmcbaddr); +int nestedhvm_vcpu_vmsave(struct vcpu *v, uint64_t vmcbaddr); +int nestedhvm_vcpu_clgi(struct vcpu *v); +int nestedhvm_vcpu_stgi(struct vcpu *v); + +/* Interrupts */ +#define nestedhvm_gif_isset(v) (!!VCPU_NESTEDHVM((v)).nh_gif) +#define NESTEDHVM_INTR_NOTHANDLED 4 +#define NESTEDHVM_INTR_NOTINTERCEPTED 3 +#define NESTEDHVM_INTR_FORCEVMEXIT 2 +#define NESTEDHVM_INTR_VINTR_MASKED 1 +#define NESTEDHVM_INTR_MASKED 0 +int nestedhvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack); + +/* Emulate MSRs for Nested SVM */ +int nestedhvm_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val); +int nestedhvm_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val); + +/* Nested paging */ +#define NESTEDHVM_PAGEFAULT_DONE 0 +#define NESTEDHVM_PAGEFAULT_INJECT 1 +#define NESTEDHVM_PAGEFAULT_ERROR 2 +int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t L2_gpa); + +#endif /* _HVM_NESTEDHVM_H */