|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v5 rebased 4/4] nested vmx: enable VMCS shadowing feature
On Wed, Jan 23, 2013 at 6:32 AM, Dongxiao Xu <dongxiao.xu@xxxxxxxxx> wrote:
>
> The current logic for handling the non-root VMREAD/VMWRITE is by
> VM-Exit and emulate, which may bring certain overhead.
>
> On new Intel platform, it introduces a new feature called VMCS
> shadowing, where non-root VMREAD/VMWRITE will not trigger VM-Exit,
> and the hardware will read/write the virtual VMCS instead.
> This is proved to have performance improvement with the feature.
>
> Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
> ---
> xen/arch/x86/hvm/vmx/vmcs.c | 9 ++++
> xen/arch/x86/hvm/vmx/vvmx.c | 82
> ++++++++++++++++++++++++++++++++++++
> xen/include/asm-x86/hvm/vmx/vmcs.h | 18 +++++++-
> 3 files changed, 108 insertions(+), 1 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index f89ea93..69b7dae 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -92,6 +92,7 @@ static void __init vmx_display_features(void)
> P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
> P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
> P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
> + P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
> #undef P
>
> if ( !printed )
> @@ -133,6 +134,7 @@ static int vmx_init_vmcs_config(void)
> u32 _vmx_cpu_based_exec_control;
> u32 _vmx_secondary_exec_control = 0;
> u64 _vmx_ept_vpid_cap = 0;
> + u64 _vmx_misc_cap = 0;
> u32 _vmx_vmexit_control;
> u32 _vmx_vmentry_control;
> bool_t mismatch = 0;
> @@ -180,6 +182,9 @@ static int vmx_init_vmcs_config(void)
> SECONDARY_EXEC_ENABLE_RDTSCP |
> SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> SECONDARY_EXEC_ENABLE_INVPCID);
> + rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
> + if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
> + opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
> if ( opt_vpid_enabled )
> opt |= SECONDARY_EXEC_ENABLE_VPID;
> if ( opt_unrestricted_guest_enabled )
> @@ -383,6 +388,8 @@ static void __vmx_clear_vmcs(void *info)
> if ( arch_vmx->active_cpu == smp_processor_id() )
> {
> __vmpclear(virt_to_maddr(arch_vmx->vmcs));
> + if ( arch_vmx->vmcs_shadow_maddr )
> + __vmpclear(arch_vmx->vmcs_shadow_maddr);
>
> arch_vmx->active_cpu = -1;
> arch_vmx->launched = 0;
> @@ -720,6 +727,8 @@ void vmx_vmcs_switch(struct vmcs_struct *from, struct
> vmcs_struct *to)
> spin_lock(&vmx->vmcs_lock);
>
> __vmpclear(virt_to_maddr(from));
> + if ( vmx->vmcs_shadow_maddr )
> + __vmpclear(vmx->vmcs_shadow_maddr);
> __vmptrld(virt_to_maddr(to));
>
> vmx->vmcs = to;
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 1e1ad56..3c152c5 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -64,6 +64,48 @@ int nvmx_vcpu_initialise(struct vcpu *v)
> gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs
> failed\n");
> goto out;
> }
> +
> + /* non-root VMREAD/VMWRITE bitmap. */
> + if ( cpu_has_vmx_vmcs_shadowing )
> + {
> + struct page_info *vmread_bitmap, *vmwrite_bitmap;
> + unsigned long *vr, *vw;
> +
> + vmread_bitmap = alloc_domheap_page(NULL, 0);
> + if ( !vmread_bitmap )
> + {
> + gdprintk(XENLOG_ERR, "nest: allocation for vmread bitmap
> failed\n");
> + goto out1;
> + }
> + v->arch.hvm_vmx.vmread_bitmap = vmread_bitmap;
> +
> + vmwrite_bitmap = alloc_domheap_page(NULL, 0);
> + if ( !vmwrite_bitmap )
> + {
> + gdprintk(XENLOG_ERR, "nest: allocation for vmwrite bitmap
> failed\n");
> + goto out2;
> + }
> + v->arch.hvm_vmx.vmwrite_bitmap = vmwrite_bitmap;
> +
> + vr = __map_domain_page(vmread_bitmap);
> + vw = __map_domain_page(vmwrite_bitmap);
> +
> + clear_page(vr);
> + clear_page(vw);
> +
> + /*
> + * For the following 4 encodings, we need to handle them in VMM.
> + * Let them vmexit as usual.
> + */
> + set_bit(IO_BITMAP_A, vw);
> + set_bit(IO_BITMAP_A_HIGH, vw);
> + set_bit(IO_BITMAP_B, vw);
> + set_bit(IO_BITMAP_B_HIGH, vw);
> +
> + unmap_domain_page(vr);
> + unmap_domain_page(vw);
> + }
> +
> nvmx->ept.enabled = 0;
> nvmx->guest_vpid = 0;
> nvmx->vmxon_region_pa = 0;
> @@ -76,6 +118,10 @@ int nvmx_vcpu_initialise(struct vcpu *v)
> nvmx->msrbitmap = NULL;
> INIT_LIST_HEAD(&nvmx->launched_list);
> return 0;
> +out2:
> + free_domheap_page(v->arch.hvm_vmx.vmread_bitmap);
> +out1:
> + free_xenheap_page(nvcpu->nv_n2vmcx);
> out:
> return -ENOMEM;
> }
> @@ -106,6 +152,11 @@ void nvmx_vcpu_destroy(struct vcpu *v)
> list_del(&item->node);
> xfree(item);
> }
> +
> + if ( v->arch.hvm_vmx.vmread_bitmap )
> + free_domheap_page(v->arch.hvm_vmx.vmread_bitmap);
> + if ( v->arch.hvm_vmx.vmwrite_bitmap )
> + free_domheap_page(v->arch.hvm_vmx.vmwrite_bitmap);
> }
>
> void nvmx_domain_relinquish_resources(struct domain *d)
> @@ -1035,6 +1086,32 @@ static bool_t nvmx_vpid_enabled(struct nestedvcpu
> *nvcpu)
> return 0;
> }
>
> +static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct
> *vvmcs)
> +{
> + unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
> + paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
> +
> + __vmpclear(vvmcs_maddr);
> + vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
> + v->arch.hvm_vmx.vmcs_shadow_maddr = vvmcs_maddr;
> + __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
> + __vmwrite(VMREAD_BITMAP,
> page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
> + __vmwrite(VMWRITE_BITMAP,
> page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
> +}
> +
> +static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct
> *vvmcs)
> +{
> + unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
> + paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
> +
> + __vmpclear(vvmcs_maddr);
> + vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
> + v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
> + __vmwrite(VMCS_LINK_POINTER, ~0ul);
> + __vmwrite(VMREAD_BITMAP, 0);
> + __vmwrite(VMWRITE_BITMAP, 0);
> +}
> +
> static void virtual_vmentry(struct cpu_user_regs *regs)
> {
> struct vcpu *v = current;
> @@ -1476,6 +1553,9 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
> __map_msr_bitmap(v);
> }
>
> + if ( cpu_has_vmx_vmcs_shadowing )
> + nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
> +
> vmreturn(regs, VMSUCCEED);
>
> out:
> @@ -1526,6 +1606,8 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs)
>
> if ( gpa == nvcpu->nv_vvmcxaddr )
> {
> + if ( cpu_has_vmx_vmcs_shadowing )
> + nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
> clear_vvmcs_launched(&nvmx->launched_list,
> domain_page_map_to_mfn(nvcpu->nv_vvmcx));
> nvmx_purge_vvmcs(v);
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h
> b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index 652dc21..ba02221 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -81,6 +81,8 @@ struct vmx_domain {
> struct arch_vmx_struct {
> /* Virtual address of VMCS. */
> struct vmcs_struct *vmcs;
> + /* VMCS shadow machine address. */
> + paddr_t vmcs_shadow_maddr;
>
> /* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */
> spinlock_t vmcs_lock;
> @@ -125,6 +127,10 @@ struct arch_vmx_struct {
> /* Remember EFLAGS while in virtual 8086 mode */
> uint32_t vm86_saved_eflags;
> int hostenv_migrated;
> +
> + /* Bitmap to control vmexit policy for Non-root VMREAD/VMWRITE */
> + struct page_info *vmread_bitmap;
> + struct page_info *vmwrite_bitmap;
> };
>
> int vmx_create_vmcs(struct vcpu *v);
> @@ -191,6 +197,7 @@ extern u32 vmx_vmentry_control;
> #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
> #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
> #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
> +#define SECONDARY_EXEC_ENABLE_VMCS_SHADOWING 0x00004000
> extern u32 vmx_secondary_exec_control;
>
> extern bool_t cpu_has_vmx_ins_outs_instr_info;
> @@ -205,6 +212,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
> #define VMX_EPT_INVEPT_SINGLE_CONTEXT 0x02000000
> #define VMX_EPT_INVEPT_ALL_CONTEXT 0x04000000
>
> +#define VMX_MISC_VMWRITE_ALL 0x20000000
> +
> #define VMX_VPID_INVVPID_INSTRUCTION
> 0x100000000ULL
> #define VMX_VPID_INVVPID_INDIVIDUAL_ADDR
> 0x10000000000ULL
> #define VMX_VPID_INVVPID_SINGLE_CONTEXT
> 0x20000000000ULL
> @@ -244,7 +253,10 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
> (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
> #define cpu_has_vmx_virtual_intr_delivery \
> (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
> -#define cpu_has_vmx_vmcs_shadowing 0
> +#define cpu_has_vmx_vmcs_shadowing \
> + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VMCS_SHADOWING)
> +
> +#define VMCS_RID_TYPE_MASK 0x80000000
>
> /* GUEST_INTERRUPTIBILITY_INFO flags. */
> #define VMX_INTR_SHADOW_STI 0x00000001
> @@ -305,6 +317,10 @@ enum vmcs_field {
> EOI_EXIT_BITMAP2_HIGH = 0x00002021,
> EOI_EXIT_BITMAP3 = 0x00002022,
> EOI_EXIT_BITMAP3_HIGH = 0x00002023,
> + VMREAD_BITMAP = 0x00002026,
> + VMREAD_BITMAP_HIGH = 0x00002027,
> + VMWRITE_BITMAP = 0x00002028,
> + VMWRITE_BITMAP_HIGH = 0x00002029,
> GUEST_PHYSICAL_ADDRESS = 0x00002400,
> GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
> VMCS_LINK_POINTER = 0x00002800,
> --
> 1.7.1
>
Acked-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
--
Jun
Intel Open Source Technology Center
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |