[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86/domain_page: implement pure per-vCPU mapping infrastructure



On Tue, 2020-02-04 at 14:00 +0000, Wei Liu wrote:
> On Mon, Feb 03, 2020 at 06:36:53PM +0000, Hongyan Xia wrote:
> [...]
> > diff --git a/xen/arch/x86/domain_page.c
> > b/xen/arch/x86/domain_page.c
> > index dd32712d2f..52971d2ecc 100644
> > --- a/xen/arch/x86/domain_page.c
> > +++ b/xen/arch/x86/domain_page.c
> > @@ -69,12 +69,11 @@ void __init mapcache_override_current(struct
> > vcpu *v)
> >  
> >  void *map_domain_page(mfn_t mfn)
> >  {
> > -    unsigned long flags;
> > -    unsigned int idx, i;
> > +    unsigned long flags, *phashmfn;
> > +    unsigned int idx, glb_idx, *phashidx, ohashidx;
> >      struct vcpu *v;
> > -    struct mapcache_domain *dcache;
> >      struct mapcache_vcpu *vcache;
> > -    struct vcpu_maphash_entry *hashent;
> > +    void *ret;
> >  
> >  #ifdef NDEBUG
> >      if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
> > @@ -82,104 +81,59 @@ void *map_domain_page(mfn_t mfn)
> >  #endif
> >  
> >      v = mapcache_current_vcpu();
> > -    if ( !v || !is_pv_vcpu(v) )
> > +    if ( !v || !is_pv_vcpu(v) || !v->arch.pv.mapcache )
> >          return mfn_to_virt(mfn_x(mfn));
> >  
> > -    dcache = &v->domain->arch.pv.mapcache;
> > -    vcache = &v->arch.pv.mapcache;
> > -    if ( !dcache->inuse )
> > -        return mfn_to_virt(mfn_x(mfn));
> > +    vcache = v->arch.pv.mapcache;
> > +    phashmfn = &vcache->hash_mfn[MAPHASH_HASHFN(mfn_x(mfn))];
> > +    phashidx = &vcache->hash_idx[MAPHASH_HASHFN(mfn_x(mfn))];
> >  
> >      perfc_incr(map_domain_page_count);
> >  
> >      local_irq_save(flags);
> >  
> > -    hashent = &vcache->hash[MAPHASH_HASHFN(mfn_x(mfn))];
> > -    if ( hashent->mfn == mfn_x(mfn) )
> > +    ohashidx = *phashidx;
> 
> This is a bit redundant because you never rewrite *phashidx until the
> last minute. I think ohashidx can be removed, but it's up to you.

I actually just want to tell the compiler that ohashidx does not change
and it does not have to re-read phashidx every time, in case it
attempts to do so. Functionally speaking, I agree that removing it does
not change anything.

> 
> > +    if ( *phashmfn != mfn_x(mfn) )
> >      {
> > -        idx = hashent->idx;
> > -        ASSERT(idx < dcache->entries);
> > -        hashent->refcnt++;
> > -        ASSERT(hashent->refcnt);
> > -        ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
> > -        goto out;
> > -    }
> > +        idx = find_first_zero_bit(vcache->inuse,
> > MAPCACHE_VCPU_ENTRIES);
> > +        BUG_ON(idx >= MAPCACHE_VCPU_ENTRIES);
> >  
> > -    spin_lock(&dcache->lock);
> > +        ASSERT(vcache->refcnt[idx] == 0);
> > +        __set_bit(idx, vcache->inuse);
> >  
> > -    /* Has some other CPU caused a wrap? We must flush if so. */
> 
> [...]
> >  void unmap_domain_page(const void *ptr)
> >  {
> > -    unsigned int idx;
> > +    unsigned int idx, glb_idx;
> >      struct vcpu *v;
> > -    struct mapcache_domain *dcache;
> > -    unsigned long va = (unsigned long)ptr, mfn, flags;
> > -    struct vcpu_maphash_entry *hashent;
> > +    struct mapcache_vcpu *vcache;
> > +    unsigned long va = (unsigned long)ptr, mfn, hashmfn, flags;
> >  
> >      if ( va >= DIRECTMAP_VIRT_START )
> >          return;
> > @@ -189,73 +143,21 @@ void unmap_domain_page(const void *ptr)
> >      v = mapcache_current_vcpu();
> >      ASSERT(v && is_pv_vcpu(v));
> >  
> > -    dcache = &v->domain->arch.pv.mapcache;
> > -    ASSERT(dcache->inuse);
> > -
> > -    idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
> > -    mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
> > -    hashent = &v->arch.pv.mapcache.hash[MAPHASH_HASHFN(mfn)];
> > +    vcache = v->arch.pv.mapcache;
> > +    ASSERT(vcache);
> >  
> > +    glb_idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
> > +    idx = glb_idx - v->vcpu_id * MAPCACHE_VCPU_ENTRIES;
> 
> Add a blank line here please.
> 
> > ...
> > -
> > -int mapcache_domain_init(struct domain *d)
> > -{
> > -    struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> > -    unsigned int bitmap_pages;
> > +    mfn = l1e_get_pfn(MAPCACHE_L1ENT(glb_idx));
> > +    hashmfn = vcache->hash_mfn[MAPHASH_HASHFN(mfn)];
> >  
> > -    ASSERT(is_pv_domain(d));
> > +    vcache->refcnt[idx]--;
> > +    if ( hashmfn != mfn && !vcache->refcnt[idx] )
> > +        __clear_bit(idx, vcache->inuse);
> >  
> > -#ifdef NDEBUG
> > -    if ( !mem_hotplug && max_page <=
> > PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
> > -        return 0;
> > -#endif
> 
> Would be great if some form or shape of this check can be added to
> mapcache_vcpu_init such that you don't unnecessarily initialise data
> structures that will never be used.

Good idea.

> > ...
> > diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-
> > x86/config.h
> > index d0cfbb70a8..4b2217151b 100644
> > --- a/xen/include/asm-x86/config.h
> > +++ b/xen/include/asm-x86/config.h
> > @@ -296,7 +296,7 @@ extern unsigned long xen_phys_start;
> >      (GDT_VIRT_START(v) + (64*1024))
> >  
> >  /* map_domain_page() map cache. The second per-domain-mapping sub-
> > area. */
> > -#define MAPCACHE_VCPU_ENTRIES    (CONFIG_PAGING_LEVELS *
> > CONFIG_PAGING_LEVELS)
> > +#define MAPCACHE_VCPU_ENTRIES    32
> >  #define MAPCACHE_ENTRIES         (MAX_VIRT_CPUS *
> > MAPCACHE_VCPU_ENTRIES)
> >  #define MAPCACHE_VIRT_START      PERDOMAIN_VIRT_SLOT(1)
> >  #define MAPCACHE_VIRT_END        (MAPCACHE_VIRT_START + \
> > diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-
> > x86/domain.h
> > index a3ae5d9a20..367bba7110 100644
> > --- a/xen/include/asm-x86/domain.h
> > +++ b/xen/include/asm-x86/domain.h
> > @@ -40,35 +40,17 @@ struct trap_bounce {
> >  #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
> >  #define MAPHASHENT_NOTINUSE ((u32)~0U)
> >  struct mapcache_vcpu {
> > -    /* Shadow of mapcache_domain.epoch. */
> > -    unsigned int shadow_epoch;
> > -
> > -    /* Lock-free per-VCPU hash of recently-used mappings. */
> > -    struct vcpu_maphash_entry {
> > -        unsigned long mfn;
> > -        uint32_t      idx;
> > -        uint32_t      refcnt;
> > -    } hash[MAPHASH_ENTRIES];
> > +    unsigned long hash_mfn[MAPHASH_ENTRIES];
> > +    uint32_t hash_idx[MAPHASH_ENTRIES];
> > +
> > +    uint8_t refcnt[MAPCACHE_VCPU_ENTRIES];
> 
> What's the reason for breaking vcpu_maphash_entry into three distinct
> arrays?
> 
> Would it make the code shorter/cleaner if you make everything
> MAPCACHE_VCPU_ENTRIES? A vcpu now only needs to manage its own
> subregion
> after all.

In this design I cannot. MAPCACHE_VCPU_ENTRIES tells how many slots a
vcpu can use, and MAPHASH_ENTRIES tells how many entries there are in
the per-vcpu hash table (the true cache). Maphash will not immediately
clear the inuse bit when the refcnt drops to 0, so the worst case is
that even if you unmap everything, you still only have
MAPCACHE_VCPU_ENTRIES - MAPHASH_ENTRIES free slots, so
MAPCACHE_VCPU_ENTRIES has to be larger than MAPHASH_ENTRIES. Also I
refcnt all slots, not just entries in the maphash.

hash_mfn and hash_idx should be able to combine into a single
structure, but I don't like the 25% wasted memory padding when you have
unsigned long followed by an int in a struct...

> > +    unsigned long inuse[BITS_TO_LONGS(MAPCACHE_VCPU_ENTRIES)];
> >  };
> >  
> >  struct mapcache_domain {
> > -    /* The number of array entries, and a cursor into the array.
> > */
> >      unsigned int entries;
> > -    unsigned int cursor;
> > -
> > -    /* Protects map_domain_page(). */
> > -    spinlock_t lock;
> > -
> > -    /* Garbage mappings are flushed from TLBs in batches called
> > 'epochs'. */
> > -    unsigned int epoch;
> > -    u32 tlbflush_timestamp;
> > -
> > -    /* Which mappings are in use, and which are garbage to reap
> > next epoch? */
> > -    unsigned long *inuse;
> > -    unsigned long *garbage;
> >  };
> >  
> > -int mapcache_domain_init(struct domain *);
> >  int mapcache_vcpu_init(struct vcpu *);
> >  void mapcache_override_current(struct vcpu *);
> >  
> > @@ -473,7 +455,7 @@ struct arch_domain
> >  struct pv_vcpu
> >  {
> >      /* map_domain_page() mapping cache. */
> > -    struct mapcache_vcpu mapcache;
> > +    struct mapcache_vcpu *mapcache;
> 
> And why do you need to change this to a pointer? Is it now very large
> and causing issue(s)?

Yes. It is now pushing struct vcpu close to the size of a page, which
we cannot exceed. Also, changing it into a pointer allows us to
experiment with larger caches later.

Hongyan
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.