[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [V10 PATCH 19/23] PVH xen: vmcs related changes
I did a quick edit to see what a unified function would look like (hasn't even been compile tested). I think it looks just fine. -George PVH xen: vmcs related changes This patch contains vmcs changes related for PVH, mainly creating a VMCS for PVH guest. This version modified to unify the PVH and HVM vmcs construction functions. Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 36f167f..0dc1ce5 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -634,7 +634,7 @@ void vmx_vmcs_exit(struct vcpu *v) { /* Don't confuse vmx_do_resume (for @v or @current!) */ vmx_clear_vmcs(v); - if ( is_hvm_vcpu(current) ) + if ( !is_pv_vcpu(current) ) vmx_load_vmcs(current); spin_unlock(&v->arch.hvm_vmx.vmcs_lock); @@ -856,6 +856,54 @@ static void vmx_set_common_host_vmcs_fields(struct vcpu *v) __vmwrite(HOST_SYSENTER_EIP, sysenter_eip); } +static int pvh_check_requirements(struct vcpu *v) +{ + u64 required, tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features); + + if ( !paging_mode_hap(v->domain) ) + { + printk(XENLOG_G_INFO "HAP is required for PVH guest.\n"); + return -EINVAL; + } + if ( !cpu_has_vmx_pat ) + { + printk(XENLOG_G_INFO "PVH: CPU does not have PAT support\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_msr_bitmap ) + { + printk(XENLOG_G_INFO "PVH: CPU does not have msr bitmap\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_vpid ) + { + printk(XENLOG_G_INFO "PVH: CPU doesn't have VPID support\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_secondary_exec_control ) + { + printk(XENLOG_G_INFO "CPU Secondary exec is required to run PVH\n"); + return -ENOSYS; + } + + if ( v->domain->arch.vtsc ) + { + printk(XENLOG_G_INFO + "At present PVH only supports the default timer mode\n"); + return -ENOSYS; + } + + required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR; + if ( (tmpval & required) != required ) + { + printk(XENLOG_G_INFO "PVH: required CR4 features not available:%lx\n", + required); + return -ENOSYS; + } + + return 0; +} + static int construct_vmcs(struct vcpu *v) { struct domain *d = v->domain; @@ -864,6 +912,13 @@ static int construct_vmcs(struct vcpu *v) vmx_vmcs_enter(v); + if ( is_pvh_vcpu(v) ) + { + int rc = pvh_check_requirements(v); + if ( rc ) + return rc; + } + /* VMCS controls. */ __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); @@ -871,6 +926,20 @@ static int construct_vmcs(struct vcpu *v) if ( d->arch.vtsc ) v->arch.hvm_vmx.exec_control |= CPU_BASED_RDTSC_EXITING; + if ( is_pvh_vcpu(v) ) + { + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW; + + /* ? */ + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING; + + ASSERT(v->arch.hvm_vmx.exec_conrol & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) + ASSERT(v->arch.hvm_vmx.exec_conrol & CPU_BASED_RDTSC_EXITING) + ASSERT(v->arch.hvm_vmx.exec_conrol & CPU_BASED_ACTIVATE_MSR_BITMAP) + } + + v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; /* Disable VPID for now: we decide when to enable it on VMENTER. */ @@ -900,7 +969,30 @@ static int construct_vmcs(struct vcpu *v) /* Do not enable Monitor Trap Flag unless start single step debug */ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; + if ( is_pvh_vcpu(v) ) + { + /* FIXME: Just disable the things we need to disable */ + v->arch.hvm_vmx.secondary_exec_control = SECONDARY_EXEC_ENABLE_EPT; + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID; + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_PAUSE_LOOP_EXITING; + } + vmx_update_cpu_exec_control(v); + + if ( is_pvh_vcpu(v) ) + { + /* + * Note: we run with default VM_ENTRY_LOAD_DEBUG_CTLS of 1, which means + * upon vmentry, the cpu reads/loads VMCS.DR7 and VMCS.DEBUGCTLS, and not + * use the host values. 0 would cause it to not use the VMCS values. + */ + vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_EFER; + vmentry_ctl &= ~VM_ENTRY_SMM; + vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR; + /* PVH 32bitfixme. */ + vmentry_ctl |= VM_ENTRY_IA32E_MODE; /* GUEST_EFER.LME/LMA ignored */ + } + __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl); __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl); @@ -933,6 +1025,8 @@ static int construct_vmcs(struct vcpu *v) vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W); if ( cpu_has_vmx_pat && paging_mode_hap(d) ) vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W); + if ( is_pvh_domain(v) ) + vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type); } /* I/O access bitmap. */ @@ -980,6 +1074,17 @@ static int construct_vmcs(struct vcpu *v) __vmwrite(GUEST_ACTIVITY_STATE, 0); + if ( is_pvh_domain(v) ) + { + /* These are sorta irrelevant as we load the discriptors directly. */ + __vmwrite(GUEST_CS_SELECTOR, 0); + __vmwrite(GUEST_DS_SELECTOR, 0); + __vmwrite(GUEST_SS_SELECTOR, 0); + __vmwrite(GUEST_ES_SELECTOR, 0); + __vmwrite(GUEST_FS_SELECTOR, 0); + __vmwrite(GUEST_GS_SELECTOR, 0); + } + /* Guest segment bases. */ __vmwrite(GUEST_ES_BASE, 0); __vmwrite(GUEST_SS_BASE, 0); @@ -1002,7 +1107,11 @@ static int construct_vmcs(struct vcpu *v) __vmwrite(GUEST_DS_AR_BYTES, 0xc093); __vmwrite(GUEST_FS_AR_BYTES, 0xc093); __vmwrite(GUEST_GS_AR_BYTES, 0xc093); - __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ + if ( is_pvh_domain(v) ) + /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme. */ + __vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + else + __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ /* Guest IDT. */ __vmwrite(GUEST_IDTR_BASE, 0); @@ -1028,16 +1137,24 @@ static int construct_vmcs(struct vcpu *v) __vmwrite(VMCS_LINK_POINTER, ~0UL); v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK - | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault)) - | (1U << TRAP_no_device); + | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault)) + | (is_pvh_domain(d) ? (1U << TRAP_debug) | (1U << TRAP_int3) : 0 + | (1U << TRAP_no_device); vmx_update_exception_bitmap(v); - v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; + v->arch.hvm_vcpu.guest_cr[0] = is_pvh_domain(v) ? + ( X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP ) + : ( X86_CR0_PE | X86_CR0_ET ); hvm_update_guest_cr(v, 0); - v->arch.hvm_vcpu.guest_cr[4] = 0; + v->arch.hvm_vcpu.guest_cr[4] = is_pvh_domain(v) ? + real_cr4_to_pv_guest_cr4(mmu_cr4_features) + : 0; hvm_update_guest_cr(v, 4); + if ( is_pvh_domain(v) ) + v->arch.hvm_vmx.vmx_realmode = 0; + if ( cpu_has_vmx_tpr_shadow ) { __vmwrite(VIRTUAL_APIC_PAGE_ADDR, @@ -1294,6 +1411,9 @@ void vmx_do_resume(struct vcpu *v) hvm_asid_flush_vcpu(v); } + if ( is_pvh_vcpu(v) ) + reset_stack_and_jump(vmx_asm_do_vmentry); + debug_state = v->domain->debugger_attached || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3] || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP]; @@ -1477,7 +1597,7 @@ static void vmcs_dump(unsigned char ch) for_each_domain ( d ) { - if ( !is_hvm_domain(d) ) + if ( is_pv_domain(d) ) continue; printk("\n>>> Domain %d <<<\n", d->domain_id); for_each_vcpu ( d, v ) On Mon, Aug 12, 2013 at 11:17 AM, George Dunlap <George.Dunlap@xxxxxxxxxxxxx> wrote: > On Mon, Aug 12, 2013 at 11:15 AM, George Dunlap > <George.Dunlap@xxxxxxxxxxxxx> wrote: >> On Sat, Aug 10, 2013 at 1:23 AM, Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> >> wrote: >>> On Fri, 9 Aug 2013 11:25:36 +0100 >>> George Dunlap <dunlapg@xxxxxxxxx> wrote: >>> >>>> On Wed, Jul 24, 2013 at 2:59 AM, Mukesh Rathor >>>> <mukesh.rathor@xxxxxxxxxx> wrote: >>>> > This patch contains vmcs changes related for PVH, mainly creating a >>>> > VMCS for PVH guest. >>> ..... >>>> > + v->arch.hvm_vmx.vmx_realmode = 0; >>>> > + >>>> > + ept->asr = pagetable_get_pfn(p2m_get_pagetable(p2m)); >>>> > + __vmwrite(EPT_POINTER, ept_get_eptp(ept)); >>>> > + >>>> > + rdmsrl(MSR_IA32_CR_PAT, host_pat); >>>> > + __vmwrite(HOST_PAT, host_pat); >>>> > + __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET); >>>> > + >>>> > + /* The paging mode is updated for PVH by >>>> > arch_set_info_guest(). */ + >>>> > + return 0; >>>> > +} >>>> >>>> The majority of this function seems to be duplicating code in >>>> construct_vmcs(), but in a different order so that it's very difficult >>>> to tell which is which. Wouldn't it be better to just sprinkle >>>> if(is_pvh_domain()) around consrtuct_vmcs? >>> >>> >>> Nah, just makes the function extremely messy! Other maintainers I >>> consulted with were OK with making it a separate function. The function >>> is mostly orderded by vmx sections in the intel SDM. >> >> Does it? Messier than the domain building functions where we also do >> a lot of if(is_pvh_domain())'s? >> >> From my analysis, most of the differences are because the HVM code >> allows two ways of doing things (e.g., either HAP or shadow) while you >> only allow one. These include: >> - disabling TSC exiting (if in the correct mode, the HVM code would >> also do this) >> - Disabling invlpg and cr3 load/store exiting (same for HVM in HAP mode) >> - Unconditionally enabling secondary controls (HVM will enable if present) >> - Enabling the MSR bitmap (HVM will enable if present) >> - Updating cpu_based_exec_control directly (HVM has a function that >> switches between this and something required for nested virt) >> - Unconditionally enabling VPID (HVM will enable it somewhere else if >> appropriate) >> - &c &c > > I should have also said, since you would be calling > pvh_check_requirements() at the beginning of the shared function > anyway, all of these are guaranteed to set things up as required by > PVH. > > -George Attachment:
pvh-vmcs-unified.diff _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |