[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 09/13] Nested Virtualization: svm specific implementation
On Tuesday 16 November 2010 15:54:11 Tim Deegan wrote: > Hi, > > At 18:43 +0000 on 12 Nov (1289587425), Christoph Egger wrote: > > +static int nsvm_vmrun_permissionmap(struct vcpu *v) > > +{ > > + struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; > > + struct nestedsvm *svm = &vcpu_nestedsvm(v); > > + struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx; > > + struct vmcb_struct *host_vmcb = arch_svm->vmcb; > > + unsigned long *ns_msrpm_ptr; > > + unsigned int i; > > + enum hvm_copy_result ret; > > + > > + ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm; > > + > > + ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm, > > + ns_vmcb->msrpm_base_pa, MSRPM_SIZE); > > + if (ret != HVMCOPY_okay) { > > + gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", > > ret); + return 1; > > + } > > + > > + /* Skip io bitmap merge since hvm_io_bitmap has all bits set but > > + * 0x80 and 0xed. > > + */ > > What if the L1 hypervisor wants to intercept port 0x80 or port 0xed? Thanks for the pointer. This item is work in progress. > > > + /* v->arch.hvm_svm.msrpm has type unsigned long, thus > > + * BYTES_PER_LONG. > > + */ > > + for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++) > > + svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i]; > > + > > + host_vmcb->iopm_base_pa = > > + (uint64_t)virt_to_maddr(hvm_io_bitmap); > > + host_vmcb->msrpm_base_pa = > > + (uint64_t)virt_to_maddr(svm->ns_merged_msrpm); > > + > > + return 0; > > +} > > + > > +static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs > > *regs) +{ > > + struct nestedsvm *svm = &vcpu_nestedsvm(v); > > + struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx; > > + struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb; > > + int rc; > > + > > + /* Enable nested guest intercepts */ > > + svm->ns_cr_intercepts = ns_vmcb->cr_intercepts; > > + svm->ns_dr_intercepts = ns_vmcb->dr_intercepts; > > + svm->ns_exception_intercepts = ns_vmcb->exception_intercepts; > > + svm->ns_general1_intercepts = ns_vmcb->general1_intercepts; > > + svm->ns_general2_intercepts = ns_vmcb->general2_intercepts; > > + > > + host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts; > > + host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts; > > + host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts; > > + host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts; > > + host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts; > > + > > + /* Nested Pause Filter */ > > + if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE) > > + host_vmcb->pause_filter_count = > > + min(ns_vmcb->pause_filter_count, > > host_vmcb->pause_filter_count); + else > > + host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT; > > Why ignore the L0 count if the L1 doesn't intercept PAUSE? SVM_PAUSEFILTER_INIT is what is also used in construct_vmcb(). > > > + > > + /* Nested IO permission bitmaps */ > > + rc = nsvm_vmrun_permissionmap(v); > > + if (rc) > > + return rc; > > + > > + /* TSC offset */ > > + hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset); > > hvm_set_guest_tsc takes an absolute value, not an offset. ITYM to call > the hvm_funcs pointer directly. Thanks. Fixed. > > > + > > + /* ASID */ > > + hvm_asid_flush_vcpu(v); > > + /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */ > > + > > + /* TLB control */ > > + host_vmcb->tlb_control |= ns_vmcb->tlb_control; > > + > > + /* Virtual Interrupts */ > > + host_vmcb->vintr = ns_vmcb->vintr; > > + host_vmcb->vintr.fields.intr_masking = 1; > > + > > + /* Shadow Mode */ > > + host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow; > > + > > + /* Exit codes */ > > + host_vmcb->exitcode = ns_vmcb->exitcode; > > + host_vmcb->exitinfo1 = ns_vmcb->exitinfo1; > > + host_vmcb->exitinfo2 = ns_vmcb->exitinfo2; > > + host_vmcb->exitintinfo = ns_vmcb->exitintinfo; > > + > > + /* Pending Interrupts */ > > + host_vmcb->eventinj = ns_vmcb->eventinj; > > + > > + /* LBR virtualization */ > > + svm->ns_lbr_control = ns_vmcb->lbr_control; > > + host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes; > > + > > + /* NextRIP */ > > + host_vmcb->nextrip = ns_vmcb->nextrip; > > + > > + /* > > + * VMCB Save State Area > > + */ > > + > > + /* Segments */ > > + host_vmcb->es = ns_vmcb->es; > > + host_vmcb->cs = ns_vmcb->cs; > > + host_vmcb->ss = ns_vmcb->ss; > > + host_vmcb->ds = ns_vmcb->ds; > > + host_vmcb->gdtr = ns_vmcb->gdtr; > > + host_vmcb->idtr = ns_vmcb->idtr; > > + > > + /* CPL */ > > + host_vmcb->cpl = ns_vmcb->cpl; > > + > > + /* EFER */ > > + v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer; > > + rc = hvm_set_efer(ns_vmcb->efer); > > + if (rc != X86EMUL_OKAY) > > + gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc); > > + > > + /* CR4 */ > > + v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4; > > + rc = hvm_set_cr4(ns_vmcb->cr4); > > + if (rc != X86EMUL_OKAY) > > + gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc); > > + > > + /* CR0 */ > > + v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0; > > + rc = hvm_set_cr0(ns_vmcb->cr0); > > + if (rc != X86EMUL_OKAY) > > + gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc); > > + > > + /* CR2 */ > > + v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2; > > + hvm_update_guest_cr(v, 2); > > + > > + /* Nested paging mode */ > > + if (nestedhvm_paging_mode_hap(v)) { > > + /* host nested paging + guest nested paging. */ > > + > > + /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. > > */ + rc = hvm_set_cr3(ns_vmcb->cr3); > > + if (rc != X86EMUL_OKAY) > > + gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc); > > + } else if (paging_mode_hap(v->domain)) { > > + /* host nested paging + guest shadow paging. */ > > + host_vmcb->np_enable = 1; > > + /* Keep h_cr3 as it is. */ > > + /* Guest shadow paging: Must intercept pagefaults. */ > > + host_vmcb->exception_intercepts |= (1U << TRAP_page_fault); > > No - it's the L1 hypervisor's job to intercep #PF if it wants to use > shadow paging. We shouldn't special-case it here. Fixed. > > > + /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. > > */ + rc = hvm_set_cr3(ns_vmcb->cr3); > > + if (rc != X86EMUL_OKAY) > > + gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc); > > + } else { > > + /* host shadow paging + guest shadow paging. */ > > + host_vmcb->np_enable = 0; > > + host_vmcb->h_cr3 = 0x0; > > + > > + /* TODO: Once shadow-shadow paging is in place come back to here > > + * and set host_vmcb->cr3 to the shadowed shadow table. > > + */ > > + } > > + > > + /* DRn */ > > + host_vmcb->dr7 = ns_vmcb->dr7; > > + host_vmcb->dr6 = ns_vmcb->dr6; > > + > > + /* RFLAGS */ > > + host_vmcb->rflags = ns_vmcb->rflags; > > + > > + /* RIP */ > > + host_vmcb->rip = ns_vmcb->rip; > > + > > + /* RSP */ > > + host_vmcb->rsp = ns_vmcb->rsp; > > + > > + /* RAX */ > > + host_vmcb->rax = ns_vmcb->rax; > > + > > + /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase, > > + * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp, > > + * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation. > > + */ > > + > > + /* Page tables */ > > + host_vmcb->pdpe0 = ns_vmcb->pdpe0; > > + host_vmcb->pdpe1 = ns_vmcb->pdpe1; > > + host_vmcb->pdpe2 = ns_vmcb->pdpe2; > > + host_vmcb->pdpe3 = ns_vmcb->pdpe3; > > + > > + /* PAT */ > > + host_vmcb->g_pat = ns_vmcb->g_pat; > > + > > + /* Debug Control MSR */ > > + host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr; > > + > > + /* LBR MSRs */ > > + host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip; > > + host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip; > > + host_vmcb->lastintfromip = ns_vmcb->lastintfromip; > > + host_vmcb->lastinttoip = ns_vmcb->lastinttoip; > > + > > + rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1); > > + if (rc) { > > + gdprintk(XENLOG_ERR, "nested vmcb invalid\n"); > > + return rc; > > + } > > + > > + rc = svm_vmcb_isvalid(__func__, host_vmcb, 1); > > + if (rc) { > > + gdprintk(XENLOG_ERR, "host vmcb invalid\n"); > > + return rc; > > + } > > + > > + /* Switch guest registers to nested guest */ > > + regs->eax = ns_vmcb->rax; > > + regs->eip = ns_vmcb->rip; > > + regs->esp = ns_vmcb->rsp; > > + regs->eflags = ns_vmcb->rflags; > > + > > + return 0; > > +} > > + > > > > +int > > +nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v, > > + struct cpu_user_regs *regs, uint64_t exitcode) > > +{ > > + uint64_t exit_bits; > > + struct nestedvcpu *nv = &vcpu_nestedhvm(v); > > + struct nestedsvm *svm = &vcpu_nestedsvm(v); > > + struct vmcb_struct *ns_vmcb = nv->nv_vmcx; > > + enum nestedhvm_vmexits vmexits; > > + > > + switch (exitcode) { > > + case VMEXIT_CR0_READ ... VMEXIT_CR15_READ: > > + case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE: > > + exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ); > > + if (svm->ns_cr_intercepts & exit_bits) > > + break; > > + return 0; > > + > > + case VMEXIT_DR0_READ ... VMEXIT_DR7_READ: > > + case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE: > > + exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ); > > + if (svm->ns_dr_intercepts & exit_bits) > > + break; > > + return 0; > > + > > + case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF: > > + exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE); > > + if (svm->ns_exception_intercepts & exit_bits) > > + break; > > + return 0; > > + > > + case VMEXIT_INTR ... VMEXIT_SHUTDOWN: > > + exit_bits = 1ULL << (exitcode - VMEXIT_INTR); > > + if (svm->ns_general1_intercepts & exit_bits) > > + break; > > + return 0; > > + > > + case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL: > > + exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN); > > + if (svm->ns_general2_intercepts & exit_bits) > > + break; > > + return 0; > > + > > + case VMEXIT_NPF: > > + case VMEXIT_INVALID: > > + /* Always intercepted */ > > + break; > > + > > + default: > > + gdprintk(XENLOG_ERR, "Illegal exitcode 0x%"PRIx64"\n", > > exitcode); + BUG(); > > + break; > > + } > > + > > + /* Special cases: Do more detailed checks */ > > + switch (exitcode) { > > + case VMEXIT_MSR: > > + ASSERT(regs != NULL); > > + nestedsvm_vmcb_map(v, nv->nv_vmcxaddr); > > + ASSERT(nv->nv_vmcx != NULL); > > + ns_vmcb = nv->nv_vmcx; > > + vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm, > > + regs->ecx, ns_vmcb->exitinfo1 != 0); > > + if (vmexits == NESTEDHVM_VMEXIT_HOST) > > + return 0; > > + break; > > + > > + case VMEXIT_IOIO: > > + /* always intercepted */ > > + break; > > What if the guest doesn't want to intercept it? Won't that make it > crash or misbehave? Yes, right. Fixed. Christoph -- ---to satisfy European Law for business letters: Advanced Micro Devices GmbH Einsteinring 24, 85609 Dornach b. Muenchen Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen Registergericht Muenchen, HRB Nr. 43632 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |