[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2] x86/domain_page: implement pure per-vCPU mapping infrastructure
Rewrite the mapcache to be purely per-vCPU instead of partially per-vcpu and partially per-domain. The existing mapcache implementation keeps per-vcpu hash tables for caching, but also creates a per-domain region which is shared by all vcpus and protected by a per-domain lock. When the vcpu count of a domain is large, the per-domain lock contention is high. Also, several data structures are shared among all vcpus, potentially creating serious cache ping-pong effects. Performance degradation can be seen on domains with >16 vcpus. This patch is needed to address these issues when we start relying on the mapcache (e.g., when we do not have a direct map). It partitions the per-domain region into per-vcpu regions so that no mapcache state is shared among vcpus. As a result, no locking is required and a much simpler maphash design can be used. The performance improvement after this patch is quite noticeable. Benchmarks (baseline uses direct map instead of the mapcache in map_domain_page, old is the existing mapcache and new is after this patch): Geekbench on a 32-vCPU guest, performance drop old new single core 0.04% 0.18% multi core 2.43% 0.08% fio on a 32-vCPU guest, LVM atop 8*SSD, performance drop old new 3.05% 0.28% There should be room for futher optimisations, but this already improves over the old mapcache by a lot. In the new cache design, maphash entries themselves also occupy inuse slots. The existing configuration of 16 slots for each vcpu is no longer sufficient to include both the maphash and a nested page table walk. Fortunately, the per-domain inuse and garbage bit vectors are removed from the PERDOMAIN region, giving us enough room to expand the available mapping slots. Signed-off-by: Hongyan Xia <hongyxia@xxxxxxxxxx> --- Changed in v2: - reword the commit message. - code clean-up and style fixes - avoid initialising the mapcache in NDEBUG build - move freeing of the maphash into pv_vcpu_destroy because for now only pv has a mapcache. - use is_idle and is_hvm in map_domain_page to filter out real pv domains. --- xen/arch/x86/domain.c | 2 - xen/arch/x86/domain_page.c | 233 ++++++++++++----------------------- xen/arch/x86/pv/domain.c | 1 + xen/include/asm-x86/config.h | 2 +- xen/include/asm-x86/domain.h | 30 +---- 5 files changed, 84 insertions(+), 184 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index f53ae5ff86..5622a26b5c 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -633,8 +633,6 @@ int arch_domain_create(struct domain *d, } else if ( is_pv_domain(d) ) { - mapcache_domain_init(d); - if ( (rc = pv_domain_initialise(d)) != 0 ) goto fail; } diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c index dd32712d2f..bec5bd09ab 100644 --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -69,12 +69,11 @@ void __init mapcache_override_current(struct vcpu *v) void *map_domain_page(mfn_t mfn) { - unsigned long flags; - unsigned int idx, i; + unsigned long flags, *phashmfn; + unsigned int idx, glb_idx, *phashidx; struct vcpu *v; - struct mapcache_domain *dcache; struct mapcache_vcpu *vcache; - struct vcpu_maphash_entry *hashent; + void *ret; #ifdef NDEBUG if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) @@ -82,104 +81,60 @@ void *map_domain_page(mfn_t mfn) #endif v = mapcache_current_vcpu(); - if ( !v || !is_pv_vcpu(v) ) + if ( !v || is_hvm_vcpu(v) || is_idle_vcpu(v) || !v->arch.pv.mapcache ) return mfn_to_virt(mfn_x(mfn)); - dcache = &v->domain->arch.pv.mapcache; - vcache = &v->arch.pv.mapcache; - if ( !dcache->inuse ) - return mfn_to_virt(mfn_x(mfn)); + vcache = v->arch.pv.mapcache; + phashmfn = &vcache->hash_mfn[MAPHASH_HASHFN(mfn_x(mfn))]; + phashidx = &vcache->hash_idx[MAPHASH_HASHFN(mfn_x(mfn))]; perfc_incr(map_domain_page_count); local_irq_save(flags); - hashent = &vcache->hash[MAPHASH_HASHFN(mfn_x(mfn))]; - if ( hashent->mfn == mfn_x(mfn) ) + if ( *phashmfn != mfn_x(mfn) ) { - idx = hashent->idx; - ASSERT(idx < dcache->entries); - hashent->refcnt++; - ASSERT(hashent->refcnt); - ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn)); - goto out; - } + idx = find_first_zero_bit(vcache->inuse, MAPCACHE_VCPU_ENTRIES); + BUG_ON(idx >= MAPCACHE_VCPU_ENTRIES); - spin_lock(&dcache->lock); + ASSERT(vcache->refcnt[idx] == 0); + __set_bit(idx, vcache->inuse); - /* Has some other CPU caused a wrap? We must flush if so. */ - if ( unlikely(dcache->epoch != vcache->shadow_epoch) ) - { - vcache->shadow_epoch = dcache->epoch; - if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) ) - { - perfc_incr(domain_page_tlb_flush); - flush_tlb_local(); - } - } + glb_idx = idx + v->vcpu_id * MAPCACHE_VCPU_ENTRIES; + l1e_write(&MAPCACHE_L1ENT(glb_idx), + l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW)); + ret = (void *)MAPCACHE_VIRT_START + pfn_to_paddr(glb_idx); + flush_tlb_one_local(ret); + + /* Evict the entry from maphash. Clear inuse if its refcnt is 0. */ + if ( *phashidx != MAPHASHENT_NOTINUSE && !vcache->refcnt[*phashidx] ) + __clear_bit(*phashidx, vcache->inuse); - idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor); - if ( unlikely(idx >= dcache->entries) ) + /* Add the new slot into the maphash. */ + *phashmfn = mfn_x(mfn); + *phashidx = idx; + } + else /* We hit in the maphash, just return the entry. */ { - unsigned long accum = 0, prev = 0; - - /* /First/, clean the garbage map and update the inuse list. */ - for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ ) - { - accum |= prev; - dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0); - prev = ~dcache->inuse[i]; - } - - if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) ) - idx = find_first_zero_bit(dcache->inuse, dcache->entries); - else - { - /* Replace a hash entry instead. */ - i = MAPHASH_HASHFN(mfn_x(mfn)); - do { - hashent = &vcache->hash[i]; - if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt ) - { - idx = hashent->idx; - ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == hashent->mfn); - l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty()); - hashent->idx = MAPHASHENT_NOTINUSE; - hashent->mfn = ~0UL; - break; - } - if ( ++i == MAPHASH_ENTRIES ) - i = 0; - } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) ); - } - BUG_ON(idx >= dcache->entries); - - /* /Second/, flush TLBs. */ - perfc_incr(domain_page_tlb_flush); - flush_tlb_local(); - vcache->shadow_epoch = ++dcache->epoch; - dcache->tlbflush_timestamp = tlbflush_current_time(); + idx = *phashidx; + glb_idx = idx + v->vcpu_id * MAPCACHE_VCPU_ENTRIES; + ret = (void *)MAPCACHE_VIRT_START + pfn_to_paddr(glb_idx); } - set_bit(idx, dcache->inuse); - dcache->cursor = idx + 1; + vcache->refcnt[idx]++; + ASSERT(vcache->refcnt[idx]); + ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(glb_idx)) == mfn_x(mfn)); - spin_unlock(&dcache->lock); - - l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW)); - - out: local_irq_restore(flags); - return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx); + return ret; } void unmap_domain_page(const void *ptr) { - unsigned int idx; + unsigned int idx, glb_idx; struct vcpu *v; - struct mapcache_domain *dcache; - unsigned long va = (unsigned long)ptr, mfn, flags; - struct vcpu_maphash_entry *hashent; + struct mapcache_vcpu *vcache; + unsigned long va = (unsigned long)ptr, mfn, hashmfn, flags; if ( va >= DIRECTMAP_VIRT_START ) return; @@ -189,114 +144,78 @@ void unmap_domain_page(const void *ptr) v = mapcache_current_vcpu(); ASSERT(v && is_pv_vcpu(v)); - dcache = &v->domain->arch.pv.mapcache; - ASSERT(dcache->inuse); + vcache = v->arch.pv.mapcache; + ASSERT(vcache); - idx = PFN_DOWN(va - MAPCACHE_VIRT_START); - mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx)); - hashent = &v->arch.pv.mapcache.hash[MAPHASH_HASHFN(mfn)]; + glb_idx = PFN_DOWN(va - MAPCACHE_VIRT_START); + idx = glb_idx - v->vcpu_id * MAPCACHE_VCPU_ENTRIES; local_irq_save(flags); - if ( hashent->idx == idx ) - { - ASSERT(hashent->mfn == mfn); - ASSERT(hashent->refcnt); - hashent->refcnt--; - } - else if ( !hashent->refcnt ) - { - if ( hashent->idx != MAPHASHENT_NOTINUSE ) - { - /* /First/, zap the PTE. */ - ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(hashent->idx)) == - hashent->mfn); - l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty()); - /* /Second/, mark as garbage. */ - set_bit(hashent->idx, dcache->garbage); - } - - /* Add newly-freed mapping to the maphash. */ - hashent->mfn = mfn; - hashent->idx = idx; - } - else - { - /* /First/, zap the PTE. */ - l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty()); - /* /Second/, mark as garbage. */ - set_bit(idx, dcache->garbage); - } + mfn = l1e_get_pfn(MAPCACHE_L1ENT(glb_idx)); + hashmfn = vcache->hash_mfn[MAPHASH_HASHFN(mfn)]; + + vcache->refcnt[idx]--; + /* If refcnt drops to 0, we only clear inuse when it's not in the maphash. */ + if ( hashmfn != mfn && !vcache->refcnt[idx] ) + __clear_bit(idx, vcache->inuse); local_irq_restore(flags); } -int mapcache_domain_init(struct domain *d) +int mapcache_vcpu_init(struct vcpu *v) { + struct domain *d = v->domain; struct mapcache_domain *dcache = &d->arch.pv.mapcache; - unsigned int bitmap_pages; - - ASSERT(is_pv_domain(d)); + unsigned long i; + unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES; #ifdef NDEBUG if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return 0; #endif - BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 + - 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) > - MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20)); - bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)); - dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE; - dcache->garbage = dcache->inuse + - (bitmap_pages + 1) * PAGE_SIZE / sizeof(long); - - spin_lock_init(&dcache->lock); - - return create_perdomain_mapping(d, (unsigned long)dcache->inuse, - 2 * bitmap_pages + 1, - NIL(l1_pgentry_t *), NULL); -} - -int mapcache_vcpu_init(struct vcpu *v) -{ - struct domain *d = v->domain; - struct mapcache_domain *dcache = &d->arch.pv.mapcache; - unsigned long i; - unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES; - unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long)); - - if ( !is_pv_vcpu(v) || !dcache->inuse ) + if ( is_idle_vcpu(v) || is_hvm_vcpu(v) ) return 0; + BUILD_BUG_ON(MAPCACHE_VIRT_END > ARG_XLAT_VIRT_START); + if ( ents > dcache->entries ) { + int rc; + + ASSERT(ents * PAGE_SIZE <= (PERDOMAIN_SLOT_MBYTES << 20)); + /* Populate page tables. */ - int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, ents, + rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, ents, NIL(l1_pgentry_t *), NULL); - /* Populate bit maps. */ - if ( !rc ) - rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse, - nr, NULL, NIL(struct page_info *)); - if ( !rc ) - rc = create_perdomain_mapping(d, (unsigned long)dcache->garbage, - nr, NULL, NIL(struct page_info *)); - if ( rc ) return rc; dcache->entries = ents; } - /* Mark all maphash entries as not in use. */ BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES); + /* MAPHASH_ENTRIES has to be power-of-two to make hashing work. */ + BUILD_BUG_ON(MAPHASH_ENTRIES & (MAPHASH_ENTRIES - 1)); + /* + * Since entries in the maphash also occupy inuse slots, we have to make + * sure MAPCACHE_VCPU_ENTRIES is large enough to accommodate both the + * maphash and a nested page table walk. + */ + BUILD_BUG_ON(MAPCACHE_VCPU_ENTRIES - MAPHASH_ENTRIES < + CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS); + + v->arch.pv.mapcache = xzalloc(struct mapcache_vcpu); + if ( !v->arch.pv.mapcache ) + return -ENOMEM; + + /* Mark all maphash entries as not in use. */ for ( i = 0; i < MAPHASH_ENTRIES; i++ ) { - struct vcpu_maphash_entry *hashent = &v->arch.pv.mapcache.hash[i]; - - hashent->mfn = ~0UL; /* never valid to map */ - hashent->idx = MAPHASHENT_NOTINUSE; + v->arch.pv.mapcache->hash_mfn[i] = ~0UL; + v->arch.pv.mapcache->hash_idx[i] = MAPHASHENT_NOTINUSE; } return 0; diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c index c3473b9a47..be819ddfac 100644 --- a/xen/arch/x86/pv/domain.c +++ b/xen/arch/x86/pv/domain.c @@ -234,6 +234,7 @@ void pv_vcpu_destroy(struct vcpu *v) pv_destroy_gdt_ldt_l1tab(v); XFREE(v->arch.pv.trap_ctxt); + XFREE(v->arch.pv.mapcache); } int pv_vcpu_initialise(struct vcpu *v) diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index a34053c4c0..ef48190abf 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -296,7 +296,7 @@ extern unsigned long xen_phys_start; (GDT_VIRT_START(v) + (64*1024)) /* map_domain_page() map cache. The second per-domain-mapping sub-area. */ -#define MAPCACHE_VCPU_ENTRIES (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS) +#define MAPCACHE_VCPU_ENTRIES 32 #define MAPCACHE_ENTRIES (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES) #define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(1) #define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \ diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index a3ae5d9a20..367bba7110 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -40,35 +40,17 @@ struct trap_bounce { #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1)) #define MAPHASHENT_NOTINUSE ((u32)~0U) struct mapcache_vcpu { - /* Shadow of mapcache_domain.epoch. */ - unsigned int shadow_epoch; - - /* Lock-free per-VCPU hash of recently-used mappings. */ - struct vcpu_maphash_entry { - unsigned long mfn; - uint32_t idx; - uint32_t refcnt; - } hash[MAPHASH_ENTRIES]; + unsigned long hash_mfn[MAPHASH_ENTRIES]; + uint32_t hash_idx[MAPHASH_ENTRIES]; + + uint8_t refcnt[MAPCACHE_VCPU_ENTRIES]; + unsigned long inuse[BITS_TO_LONGS(MAPCACHE_VCPU_ENTRIES)]; }; struct mapcache_domain { - /* The number of array entries, and a cursor into the array. */ unsigned int entries; - unsigned int cursor; - - /* Protects map_domain_page(). */ - spinlock_t lock; - - /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */ - unsigned int epoch; - u32 tlbflush_timestamp; - - /* Which mappings are in use, and which are garbage to reap next epoch? */ - unsigned long *inuse; - unsigned long *garbage; }; -int mapcache_domain_init(struct domain *); int mapcache_vcpu_init(struct vcpu *); void mapcache_override_current(struct vcpu *); @@ -473,7 +455,7 @@ struct arch_domain struct pv_vcpu { /* map_domain_page() mapping cache. */ - struct mapcache_vcpu mapcache; + struct mapcache_vcpu *mapcache; unsigned int vgc_flags; -- 2.17.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |