Xen project Mailing List

Re: [Xen-devel] [PATCH] x86/domain_page: implement pure per-vCPU mapping infrastructure

From: "Xia, Hongyan" <hongyxia@xxxxxxxxxx>

Date: Tue, 4 Feb 2020 14:47:51 +0000

Accept-language: en-GB, en-US

Cc: "andrew.cooper3@xxxxxxxxxx" <andrew.cooper3@xxxxxxxxxx>, "Grall, Julien" <jgrall@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>, "jbeulich@xxxxxxxx" <jbeulich@xxxxxxxx>, "roger.pau@xxxxxxxxxx" <roger.pau@xxxxxxxxxx>

Delivery-date: Tue, 04 Feb 2020 14:48:06 +0000

Ironport-sdr: ZNPQqNxycPF4aJBY6cAeRm76XcTCkOPEwR987cDpkAgUE2K0h4xjAsa7ZLyjj5uMj3z9hnVySQ 9iipAsGhCiQA==

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Thread-index: AQHV22Ow3BwvwUhIDEmN1fWcklky06gLHV0A

Thread-topic: [PATCH] x86/domain_page: implement pure per-vCPU mapping infrastructure

On Tue, 2020-02-04 at 14:00 +0000, Wei Liu wrote: > On Mon, Feb 03, 2020 at 06:36:53PM +0000, Hongyan Xia wrote: > [...] > > diff --git a/xen/arch/x86/domain_page.c > > b/xen/arch/x86/domain_page.c > > index dd32712d2f..52971d2ecc 100644 > > --- a/xen/arch/x86/domain_page.c > > +++ b/xen/arch/x86/domain_page.c > > @@ -69,12 +69,11 @@ void __init mapcache_override_current(struct > > vcpu *v) > > > > void *map_domain_page(mfn_t mfn) > > { > > - unsigned long flags; > > - unsigned int idx, i; > > + unsigned long flags, *phashmfn; > > + unsigned int idx, glb_idx, *phashidx, ohashidx; > > struct vcpu *v; > > - struct mapcache_domain *dcache; > > struct mapcache_vcpu *vcache; > > - struct vcpu_maphash_entry *hashent; > > + void *ret; > > > > #ifdef NDEBUG > > if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > > @@ -82,104 +81,59 @@ void *map_domain_page(mfn_t mfn) > > #endif > > > > v = mapcache_current_vcpu(); > > - if ( !v || !is_pv_vcpu(v) ) > > + if ( !v || !is_pv_vcpu(v) || !v->arch.pv.mapcache ) > > return mfn_to_virt(mfn_x(mfn)); > > > > - dcache = &v->domain->arch.pv.mapcache; > > - vcache = &v->arch.pv.mapcache; > > - if ( !dcache->inuse ) > > - return mfn_to_virt(mfn_x(mfn)); > > + vcache = v->arch.pv.mapcache; > > + phashmfn = &vcache->hash_mfn[MAPHASH_HASHFN(mfn_x(mfn))]; > > + phashidx = &vcache->hash_idx[MAPHASH_HASHFN(mfn_x(mfn))]; > > > > perfc_incr(map_domain_page_count); > > > > local_irq_save(flags); > > > > - hashent = &vcache->hash[MAPHASH_HASHFN(mfn_x(mfn))]; > > - if ( hashent->mfn == mfn_x(mfn) ) > > + ohashidx = *phashidx; > > This is a bit redundant because you never rewrite *phashidx until the > last minute. I think ohashidx can be removed, but it's up to you. I actually just want to tell the compiler that ohashidx does not change and it does not have to re-read phashidx every time, in case it attempts to do so. Functionally speaking, I agree that removing it does not change anything. > > > + if ( *phashmfn != mfn_x(mfn) ) > > { > > - idx = hashent->idx; > > - ASSERT(idx < dcache->entries); > > - hashent->refcnt++; > > - ASSERT(hashent->refcnt); > > - ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn)); > > - goto out; > > - } > > + idx = find_first_zero_bit(vcache->inuse, > > MAPCACHE_VCPU_ENTRIES); > > + BUG_ON(idx >= MAPCACHE_VCPU_ENTRIES); > > > > - spin_lock(&dcache->lock); > > + ASSERT(vcache->refcnt[idx] == 0); > > + __set_bit(idx, vcache->inuse); > > > > - /* Has some other CPU caused a wrap? We must flush if so. */ > > [...] > > void unmap_domain_page(const void *ptr) > > { > > - unsigned int idx; > > + unsigned int idx, glb_idx; > > struct vcpu *v; > > - struct mapcache_domain *dcache; > > - unsigned long va = (unsigned long)ptr, mfn, flags; > > - struct vcpu_maphash_entry *hashent; > > + struct mapcache_vcpu *vcache; > > + unsigned long va = (unsigned long)ptr, mfn, hashmfn, flags; > > > > if ( va >= DIRECTMAP_VIRT_START ) > > return; > > @@ -189,73 +143,21 @@ void unmap_domain_page(const void *ptr) > > v = mapcache_current_vcpu(); > > ASSERT(v && is_pv_vcpu(v)); > > > > - dcache = &v->domain->arch.pv.mapcache; > > - ASSERT(dcache->inuse); > > - > > - idx = PFN_DOWN(va - MAPCACHE_VIRT_START); > > - mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx)); > > - hashent = &v->arch.pv.mapcache.hash[MAPHASH_HASHFN(mfn)]; > > + vcache = v->arch.pv.mapcache; > > + ASSERT(vcache); > > > > + glb_idx = PFN_DOWN(va - MAPCACHE_VIRT_START); > > + idx = glb_idx - v->vcpu_id * MAPCACHE_VCPU_ENTRIES; > > Add a blank line here please. > > > ... > > - > > -int mapcache_domain_init(struct domain *d) > > -{ > > - struct mapcache_domain *dcache = &d->arch.pv.mapcache; > > - unsigned int bitmap_pages; > > + mfn = l1e_get_pfn(MAPCACHE_L1ENT(glb_idx)); > > + hashmfn = vcache->hash_mfn[MAPHASH_HASHFN(mfn)]; > > > > - ASSERT(is_pv_domain(d)); > > + vcache->refcnt[idx]--; > > + if ( hashmfn != mfn && !vcache->refcnt[idx] ) > > + __clear_bit(idx, vcache->inuse); > > > > -#ifdef NDEBUG > > - if ( !mem_hotplug && max_page <= > > PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > > - return 0; > > -#endif > > Would be great if some form or shape of this check can be added to > mapcache_vcpu_init such that you don't unnecessarily initialise data > structures that will never be used. Good idea. > > ... > > diff --git a/xen/include/asm-x86/config.h b/xen/include/asm- > > x86/config.h > > index d0cfbb70a8..4b2217151b 100644 > > --- a/xen/include/asm-x86/config.h > > +++ b/xen/include/asm-x86/config.h > > @@ -296,7 +296,7 @@ extern unsigned long xen_phys_start; > > (GDT_VIRT_START(v) + (64*1024)) > > > > /* map_domain_page() map cache. The second per-domain-mapping sub- > > area. */ > > -#define MAPCACHE_VCPU_ENTRIES (CONFIG_PAGING_LEVELS * > > CONFIG_PAGING_LEVELS) > > +#define MAPCACHE_VCPU_ENTRIES 32 > > #define MAPCACHE_ENTRIES (MAX_VIRT_CPUS * > > MAPCACHE_VCPU_ENTRIES) > > #define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(1) > > #define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \ > > diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm- > > x86/domain.h > > index a3ae5d9a20..367bba7110 100644 > > --- a/xen/include/asm-x86/domain.h > > +++ b/xen/include/asm-x86/domain.h > > @@ -40,35 +40,17 @@ struct trap_bounce { > > #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1)) > > #define MAPHASHENT_NOTINUSE ((u32)~0U) > > struct mapcache_vcpu { > > - /* Shadow of mapcache_domain.epoch. */ > > - unsigned int shadow_epoch; > > - > > - /* Lock-free per-VCPU hash of recently-used mappings. */ > > - struct vcpu_maphash_entry { > > - unsigned long mfn; > > - uint32_t idx; > > - uint32_t refcnt; > > - } hash[MAPHASH_ENTRIES]; > > + unsigned long hash_mfn[MAPHASH_ENTRIES]; > > + uint32_t hash_idx[MAPHASH_ENTRIES]; > > + > > + uint8_t refcnt[MAPCACHE_VCPU_ENTRIES]; > > What's the reason for breaking vcpu_maphash_entry into three distinct > arrays? > > Would it make the code shorter/cleaner if you make everything > MAPCACHE_VCPU_ENTRIES? A vcpu now only needs to manage its own > subregion > after all. In this design I cannot. MAPCACHE_VCPU_ENTRIES tells how many slots a vcpu can use, and MAPHASH_ENTRIES tells how many entries there are in the per-vcpu hash table (the true cache). Maphash will not immediately clear the inuse bit when the refcnt drops to 0, so the worst case is that even if you unmap everything, you still only have MAPCACHE_VCPU_ENTRIES - MAPHASH_ENTRIES free slots, so MAPCACHE_VCPU_ENTRIES has to be larger than MAPHASH_ENTRIES. Also I refcnt all slots, not just entries in the maphash. hash_mfn and hash_idx should be able to combine into a single structure, but I don't like the 25% wasted memory padding when you have unsigned long followed by an int in a struct... > > + unsigned long inuse[BITS_TO_LONGS(MAPCACHE_VCPU_ENTRIES)]; > > }; > > > > struct mapcache_domain { > > - /* The number of array entries, and a cursor into the array. > > */ > > unsigned int entries; > > - unsigned int cursor; > > - > > - /* Protects map_domain_page(). */ > > - spinlock_t lock; > > - > > - /* Garbage mappings are flushed from TLBs in batches called > > 'epochs'. */ > > - unsigned int epoch; > > - u32 tlbflush_timestamp; > > - > > - /* Which mappings are in use, and which are garbage to reap > > next epoch? */ > > - unsigned long *inuse; > > - unsigned long *garbage; > > }; > > > > -int mapcache_domain_init(struct domain *); > > int mapcache_vcpu_init(struct vcpu *); > > void mapcache_override_current(struct vcpu *); > > > > @@ -473,7 +455,7 @@ struct arch_domain > > struct pv_vcpu > > { > > /* map_domain_page() mapping cache. */ > > - struct mapcache_vcpu mapcache; > > + struct mapcache_vcpu *mapcache; > > And why do you need to change this to a pointer? Is it now very large > and causing issue(s)? Yes. It is now pushing struct vcpu close to the size of a page, which we cannot exceed. Also, changing it into a pointer allows us to experiment with larger caches later. Hongyan _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.