Xen project Mailing List

[Xen-devel] [PATCH v2 04/10] EPT: Make ept data structure or operations neutral

From: Zhang Xiantao <xiantao.zhang@xxxxxxxxx> Share the current EPT logic with nested EPT case, so make the related data structure or operations netural to comment EPT and nested EPT. Signed-off-by: Zhang Xiantao <xiantao.zhang@xxxxxxxxx> --- xen/arch/x86/hvm/vmx/vmcs.c | 9 +++- xen/arch/x86/hvm/vmx/vmx.c | 53 ++----------------- xen/arch/x86/mm/p2m-ept.c | 104 ++++++++++++++++++++++++++++-------- xen/arch/x86/mm/p2m.c | 23 ++++++--- xen/include/asm-x86/hvm/vmx/vmcs.h | 23 ++++---- xen/include/asm-x86/hvm/vmx/vmx.h | 10 +++- xen/include/asm-x86/p2m.h | 4 ++ 7 files changed, 133 insertions(+), 93 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 9adc7a4..379b75c 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -941,8 +941,13 @@ static int construct_vmcs(struct vcpu *v) __vmwrite(TPR_THRESHOLD, 0); } - if ( paging_mode_hap(d) ) - __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp); + if ( paging_mode_hap(d) ) { + struct p2m_domain *p2m = p2m_get_hostp2m(d); + struct ept_data *ept = &p2m->ept; + + ept->asr = pagetable_get_pfn(p2m_get_pagetable(p2m)); + __vmwrite(EPT_POINTER, ept_get_eptp(ept)); + } if ( cpu_has_vmx_pat && paging_mode_hap(d) ) { diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 4abfa90..d74aae0 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -74,38 +74,19 @@ static void vmx_fpu_dirty_intercept(void); static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content); static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content); static void vmx_invlpg_intercept(unsigned long vaddr); -static void __ept_sync_domain(void *info); static int vmx_domain_initialise(struct domain *d) { int rc; - /* Set the memory type used when accessing EPT paging structures. */ - d->arch.hvm_domain.vmx.ept_control.ept_mt = EPT_DEFAULT_MT; - - /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */ - d->arch.hvm_domain.vmx.ept_control.ept_wl = 3; - - d->arch.hvm_domain.vmx.ept_control.asr = - pagetable_get_pfn(p2m_get_pagetable(p2m_get_hostp2m(d))); - - if ( !zalloc_cpumask_var(&d->arch.hvm_domain.vmx.ept_synced) ) - return -ENOMEM; - if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 ) - { - free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); return rc; - } return 0; } static void vmx_domain_destroy(struct domain *d) { - if ( paging_mode_hap(d) ) - on_each_cpu(__ept_sync_domain, d, 1); - free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); vmx_free_vlapic_mapping(d); } @@ -641,6 +622,7 @@ static void vmx_ctxt_switch_to(struct vcpu *v) { struct domain *d = v->domain; unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features; + struct ept_data *ept_data = &p2m_get_hostp2m(d)->ept; /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */ if ( old_cr4 != new_cr4 ) @@ -650,10 +632,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v) { unsigned int cpu = smp_processor_id(); /* Test-and-test-and-set this CPU in the EPT-is-synced mask. */ - if ( !cpumask_test_cpu(cpu, d->arch.hvm_domain.vmx.ept_synced) && + if ( !cpumask_test_cpu(cpu, ept_get_synced_mask(ept_data)) && !cpumask_test_and_set_cpu(cpu, - d->arch.hvm_domain.vmx.ept_synced) ) - __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(d), 0); + ept_get_synced_mask(ept_data)) ) + __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0); } vmx_restore_guest_msrs(v); @@ -1216,33 +1198,6 @@ static void vmx_update_guest_efer(struct vcpu *v) (v->arch.hvm_vcpu.guest_efer & EFER_SCE)); } -static void __ept_sync_domain(void *info) -{ - struct domain *d = info; - __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(d), 0); -} - -void ept_sync_domain(struct domain *d) -{ - /* Only if using EPT and this domain has some VCPUs to dirty. */ - if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] ) - return; - - ASSERT(local_irq_is_enabled()); - - /* - * Flush active cpus synchronously. Flush others the next time this domain - * is scheduled onto them. We accept the race of other CPUs adding to - * the ept_synced mask before on_selected_cpus() reads it, resulting in - * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack. - */ - cpumask_and(d->arch.hvm_domain.vmx.ept_synced, - d->domain_dirty_cpumask, &cpu_online_map); - - on_selected_cpus(d->arch.hvm_domain.vmx.ept_synced, - __ept_sync_domain, d, 1); -} - void nvmx_enqueue_n2_exceptions(struct vcpu *v, unsigned long intr_fields, int error_code) { diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index c964f54..e33f415 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -291,9 +291,11 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, int need_modify_vtd_table = 1; int vtd_pte_present = 0; int needs_sync = 1; - struct domain *d = p2m->domain; ept_entry_t old_entry = { .epte = 0 }; + struct ept_data *ept = &p2m->ept; + struct domain *d = p2m->domain; + ASSERT(ept); /* * the caller must make sure: * 1. passing valid gfn and mfn at order boundary. @@ -301,17 +303,17 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, * 3. passing a valid order. */ if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) || - ((u64)gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) || + ((u64)gfn >> ((ept_get_wl(ept) + 1) * EPT_TABLE_ORDER)) || (order % EPT_TABLE_ORDER) ) return 0; - ASSERT((target == 2 && hvm_hap_has_1gb(d)) || - (target == 1 && hvm_hap_has_2mb(d)) || + ASSERT((target == 2 && hvm_hap_has_1gb()) || + (target == 1 && hvm_hap_has_2mb()) || (target == 0)); - table = map_domain_page(ept_get_asr(d)); + table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); - for ( i = ept_get_wl(d); i > target; i-- ) + for ( i = ept_get_wl(ept); i > target; i-- ) { ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i); if ( !ret ) @@ -439,9 +441,11 @@ out: unmap_domain_page(table); if ( needs_sync ) - ept_sync_domain(p2m->domain); + ept_sync_domain(p2m); - if ( rv && iommu_enabled && need_iommu(p2m->domain) && need_modify_vtd_table ) + /* For non-nested p2m, may need to change VT-d page table.*/ + if ( rv && !p2m_is_nestedp2m(p2m) && iommu_enabled && need_iommu(p2m->domain) && + need_modify_vtd_table ) { if ( iommu_hap_pt_share ) iommu_pte_flush(d, gfn, (u64*)ept_entry, order, vtd_pte_present); @@ -488,14 +492,14 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t* a, p2m_query_t q, unsigned int *page_order) { - struct domain *d = p2m->domain; - ept_entry_t *table = map_domain_page(ept_get_asr(d)); + ept_entry_t *table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); unsigned long gfn_remainder = gfn; ept_entry_t *ept_entry; u32 index; int i; int ret = 0; mfn_t mfn = _mfn(INVALID_MFN); + struct ept_data *ept = &p2m->ept; *t = p2m_mmio_dm; *a = p2m_access_n; @@ -506,7 +510,7 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m, /* Should check if gfn obeys GAW here. */ - for ( i = ept_get_wl(d); i > 0; i-- ) + for ( i = ept_get_wl(ept); i > 0; i-- ) { retry: ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); @@ -588,19 +592,20 @@ out: static ept_entry_t ept_get_entry_content(struct p2m_domain *p2m, unsigned long gfn, int *level) { - ept_entry_t *table = map_domain_page(ept_get_asr(p2m->domain)); + ept_entry_t *table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); unsigned long gfn_remainder = gfn; ept_entry_t *ept_entry; ept_entry_t content = { .epte = 0 }; u32 index; int i; int ret=0; + struct ept_data *ept = &p2m->ept; /* This pfn is higher than the highest the p2m map currently holds */ if ( gfn > p2m->max_mapped_pfn ) goto out; - for ( i = ept_get_wl(p2m->domain); i > 0; i-- ) + for ( i = ept_get_wl(ept); i > 0; i-- ) { ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); if ( !ret || ret == GUEST_TABLE_POD_PAGE ) @@ -622,7 +627,8 @@ static ept_entry_t ept_get_entry_content(struct p2m_domain *p2m, void ept_walk_table(struct domain *d, unsigned long gfn) { struct p2m_domain *p2m = p2m_get_hostp2m(d); - ept_entry_t *table = map_domain_page(ept_get_asr(d)); + struct ept_data *ept = &p2m->ept; + ept_entry_t *table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); unsigned long gfn_remainder = gfn; int i; @@ -638,7 +644,7 @@ void ept_walk_table(struct domain *d, unsigned long gfn) goto out; } - for ( i = ept_get_wl(d); i >= 0; i-- ) + for ( i = ept_get_wl(ept); i >= 0; i-- ) { ept_entry_t *ept_entry, *next; u32 index; @@ -778,24 +784,76 @@ static void ept_change_entry_type_page(mfn_t ept_page_mfn, int ept_page_level, static void ept_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt) { - struct domain *d = p2m->domain; - if ( ept_get_asr(d) == 0 ) + struct ept_data *ept = &p2m->ept; + if ( ept_get_asr(ept) == 0 ) return; BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); BUG_ON(ot != nt && (ot == p2m_mmio_direct || nt == p2m_mmio_direct)); - ept_change_entry_type_page(_mfn(ept_get_asr(d)), ept_get_wl(d), ot, nt); + ept_change_entry_type_page(_mfn(ept_get_asr(ept)), + ept_get_wl(ept), ot, nt); + + ept_sync_domain(p2m); +} + +static void __ept_sync_domain(void *info) +{ + struct ept_data *ept = &((struct p2m_domain *)info)->ept; - ept_sync_domain(d); + __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0); } -void ept_p2m_init(struct p2m_domain *p2m) +void ept_sync_domain(struct p2m_domain *p2m) { + struct domain *d = p2m->domain; + struct ept_data *ept = &p2m->ept; + /* Only if using EPT and this domain has some VCPUs to dirty. */ + if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] ) + return; + + ASSERT(local_irq_is_enabled()); + + /* + * Flush active cpus synchronously. Flush others the next time this domain + * is scheduled onto them. We accept the race of other CPUs adding to + * the ept_synced mask before on_selected_cpus() reads it, resulting in + * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack. + */ + cpumask_and(ept_get_synced_mask(ept), + d->domain_dirty_cpumask, &cpu_online_map); + + on_selected_cpus(ept_get_synced_mask(ept), + __ept_sync_domain, p2m, 1); +} + +int ept_p2m_init(struct p2m_domain *p2m) +{ + struct ept_data *ept = &p2m->ept; + p2m->set_entry = ept_set_entry; p2m->get_entry = ept_get_entry; p2m->change_entry_type_global = ept_change_entry_type_global; p2m->audit_p2m = NULL; + + /* Set the memory type used when accessing EPT paging structures. */ + ept->ept_mt = EPT_DEFAULT_MT; + + /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */ + ept->ept_wl = 3; + + if ( !zalloc_cpumask_var(&ept->synced_mask) ) + return -ENOMEM; + + on_each_cpu(__ept_sync_domain, p2m, 1); + + return 0; +} + +void ept_p2m_uninit(struct p2m_domain *p2m) +{ + struct ept_data *ept = &p2m->ept; + free_cpumask_var(ept->synced_mask); } static void ept_dump_p2m_table(unsigned char key) @@ -811,6 +869,7 @@ static void ept_dump_p2m_table(unsigned char key) unsigned long gfn, gfn_remainder; unsigned long record_counter = 0; struct p2m_domain *p2m; + struct ept_data *ept; for_each_domain(d) { @@ -818,15 +877,16 @@ static void ept_dump_p2m_table(unsigned char key) continue; p2m = p2m_get_hostp2m(d); + ept = &p2m->ept; printk("\ndomain%d EPT p2m table: \n", d->domain_id); for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += (1 << order) ) { gfn_remainder = gfn; mfn = _mfn(INVALID_MFN); - table = map_domain_page(ept_get_asr(d)); + table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); - for ( i = ept_get_wl(d); i > 0; i-- ) + for ( i = ept_get_wl(ept); i > 0; i-- ) { ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); if ( ret != GUEST_TABLE_NORMAL_PAGE ) diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 6a4bdd9..1f59410 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -57,8 +57,10 @@ boolean_param("hap_2mb", opt_hap_2mb); /* Init the datastructures for later use by the p2m code */ -static void p2m_initialise(struct domain *d, struct p2m_domain *p2m) +static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) { + int ret = 0; + mm_rwlock_init(&p2m->lock); mm_lock_init(&p2m->pod.lock); INIT_LIST_HEAD(&p2m->np2m_list); @@ -72,11 +74,11 @@ static void p2m_initialise(struct domain *d, struct p2m_domain *p2m) p2m->np2m_base = P2M_BASE_EADDR; if ( hap_enabled(d) && cpu_has_vmx ) - ept_p2m_init(p2m); + ret = ept_p2m_init(p2m); else p2m_pt_init(p2m); - return; + return ret; } static int @@ -119,7 +121,7 @@ int p2m_init(struct domain *d) * since nestedhvm_enabled(d) returns false here. * (p2m_init runs too early for HVM_PARAM_* options) */ rc = p2m_init_nestedp2m(d); - if ( rc ) + if ( rc ) p2m_final_teardown(d); return rc; } @@ -424,12 +426,16 @@ void p2m_teardown(struct p2m_domain *p2m) static void p2m_teardown_nestedp2m(struct domain *d) { uint8_t i; + struct p2m_domain *p2m; for (i = 0; i < MAX_NESTEDP2M; i++) { if ( !d->arch.nested_p2m[i] ) continue; - free_cpumask_var(d->arch.nested_p2m[i]->dirty_cpumask); - xfree(d->arch.nested_p2m[i]); + p2m = d->arch.nested_p2m[i]; + free_cpumask_var(p2m->dirty_cpumask); + if ( hap_enabled(d) && cpu_has_vmx ) + ept_p2m_uninit(p2m); + xfree(p2m); d->arch.nested_p2m[i] = NULL; } } @@ -437,9 +443,12 @@ static void p2m_teardown_nestedp2m(struct domain *d) void p2m_final_teardown(struct domain *d) { /* Iterate over all p2m tables per domain */ - if ( d->arch.p2m ) + struct p2m_domain *p2m = p2m_get_hostp2m(d); + if ( p2m ) { free_cpumask_var(d->arch.p2m->dirty_cpumask); + if ( hap_enabled(d) && cpu_has_vmx ) + ept_p2m_uninit(p2m); xfree(d->arch.p2m); d->arch.p2m = NULL; } diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 9a728b6..2d38b43 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -56,26 +56,27 @@ struct vmx_msr_state { #define EPT_DEFAULT_MT MTRR_TYPE_WRBACK -struct vmx_domain { - unsigned long apic_access_mfn; +struct ept_data{ union { - struct { + struct { u64 ept_mt :3, ept_wl :3, rsvd :6, asr :52; }; u64 eptp; - } ept_control; - cpumask_var_t ept_synced; + }; + cpumask_var_t synced_mask; +}; + +struct vmx_domain { + unsigned long apic_access_mfn; }; -#define ept_get_wl(d) \ - ((d)->arch.hvm_domain.vmx.ept_control.ept_wl) -#define ept_get_asr(d) \ - ((d)->arch.hvm_domain.vmx.ept_control.asr) -#define ept_get_eptp(d) \ - ((d)->arch.hvm_domain.vmx.ept_control.eptp) +#define ept_get_wl(ept) ((ept)->ept_wl) +#define ept_get_asr(ept) ((ept)->asr) +#define ept_get_eptp(ept) ((ept)->eptp) +#define ept_get_synced_mask(ept) ((ept)->synced_mask) struct arch_vmx_struct { /* Virtual address of VMCS. */ diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h index feaaa80..2600694 100644 --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -360,7 +360,7 @@ static inline void ept_sync_all(void) __invept(INVEPT_ALL_CONTEXT, 0, 0); } -void ept_sync_domain(struct domain *d); +void ept_sync_domain(struct p2m_domain *p2m); static inline void vpid_sync_vcpu_gva(struct vcpu *v, unsigned long gva) { @@ -422,12 +422,18 @@ void vmx_get_segment_register(struct vcpu *, enum x86_segment, void vmx_inject_extint(int trap); void vmx_inject_nmi(void); -void ept_p2m_init(struct p2m_domain *p2m); +int ept_p2m_init(struct p2m_domain *p2m); +void ept_p2m_uninit(struct p2m_domain *p2m); + void ept_walk_table(struct domain *d, unsigned long gfn); void setup_ept_dump(void); void update_guest_eip(void); +int alloc_p2m_hap_data(struct p2m_domain *p2m); +void free_p2m_hap_data(struct p2m_domain *p2m); +void p2m_init_hap_data(struct p2m_domain *p2m); + /* EPT violation qualifications definitions */ #define _EPT_READ_VIOLATION 0 #define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION) diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index ce26594..b6a84b6 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -277,6 +277,10 @@ struct p2m_domain { mm_lock_t lock; /* Locking of private pod structs, * * not relying on the p2m lock. */ } pod; + union { + struct ept_data ept; + /* NPT-equivalent structure could be added here. */ + }; }; /* get host p2m table */ -- 1.7.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.