[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCHv8 2/3] gnttab: per-active entry locking
From: Matt Wilson <msw@xxxxxxxxxx> Introduce a per-active entry spin lock to protect active entry state instead of requiring a double maptrack lock. The grant table read lock must be locked before acquiring (locking) a active entry. The reduces lock contention on grant map/unmap to the maptrack lock during maptrack handle get/put, significantly improving performance with many concurrent map/unmap operations. Signed-off-by: Matt Wilson <msw@xxxxxxxxxx> [chegger: ported to xen-staging, split into multiple commits] Signed-off-by: Christoph Egger <chegger@xxxxxxxxx> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> --- docs/misc/grant-tables.txt | 25 ++++++ xen/common/grant_table.c | 212 +++++++++++++++++++++++++++----------------- 2 files changed, 154 insertions(+), 83 deletions(-) diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt index a01f4bf..c30906a 100644 --- a/docs/misc/grant-tables.txt +++ b/docs/misc/grant-tables.txt @@ -63,6 +63,7 @@ is complete. act->domid : remote domain being granted rights act->frame : machine frame being granted act->pin : used to hold reference counts + act->lock : spinlock used to serialize access to active entry state Map tracking ~~~~~~~~~~~~ @@ -87,6 +88,8 @@ is complete. version, partially initialized active table pages, etc. grant_table->maptrack_lock : spinlock used to protect the maptrack state + active_grant_entry->lock : spinlock used to serialize modifications to + active entries The primary lock for the grant table is a read/write spinlock. All functions that access members of struct grant_table must acquire a @@ -105,6 +108,28 @@ is complete. The maptrack lock may be locked while holding the read or write grant table lock. + Active entries are obtained by calling active_entry_acquire(gt, ref). + This function returns a pointer to the active entry after locking its + spinlock. The caller must hold the rwlock for the gt in question + before calling active_entry_acquire(). This is because the grant + table can be dynamically extended via gnttab_grow_table() while a + domain is running and must be fully initialized. Once all access to + the active entry is complete, release the lock by calling + active_entry_release(act). + + Summary of rules for locking: + active_entry_acquire() and active_entry_release() can only be + called when holding the relevant grant table's lock. I.e.: + read_lock(>->lock); + act = active_entry_acquire(gt, ref); + ... + active_entry_release(act); + read_unlock(>->lock); + + Active entries cannot be acquired while holding the maptrack lock. + Multiple active entries can only be acquired while holding the grant + table _write_ lock. + ******************************************************************************** Granting a foreign domain access to frames diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 54e4411..76de00e 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -157,10 +157,13 @@ struct active_grant_entry { in the page. */ unsigned length:16; /* For sub-page grants, the length of the grant. */ + spinlock_t lock; /* lock to protect access of this entry. + see docs/misc/grant-tables.txt for + locking protocol */ }; #define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry)) -#define active_entry(t, e) \ +#define _active_entry(t, e) \ ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE]) static inline void gnttab_flush_tlb(const struct domain *d) @@ -188,6 +191,28 @@ nr_active_grant_frames(struct grant_table *gt) return num_act_frames_from_sha_frames(nr_grant_frames(gt)); } +static inline struct active_grant_entry * +active_entry_acquire(struct grant_table *t, grant_ref_t e) +{ + struct active_grant_entry *act; + + /* + * not perfect, but better than nothing for a debug build + * sanity check + */ + ASSERT(rw_is_locked(&t->lock)); + + act = &_active_entry(t, e); + spin_lock(&act->lock); + + return act; +} + +static inline void active_entry_release(struct active_grant_entry *act) +{ + spin_unlock(&act->lock); +} + /* Check if the page has been paged out, or needs unsharing. If rc == GNTST_okay, *page contains the page struct with a ref taken. Caller must do put_page(*page). @@ -228,30 +253,6 @@ static int __get_paged_frame(unsigned long gfn, unsigned long *frame, struct pag return rc; } -static inline void -double_maptrack_lock(struct grant_table *lgt, struct grant_table *rgt) -{ - if ( lgt < rgt ) - { - spin_lock(&lgt->maptrack_lock); - spin_lock(&rgt->maptrack_lock); - } - else - { - if ( lgt != rgt ) - spin_lock(&rgt->maptrack_lock); - spin_lock(&lgt->maptrack_lock); - } -} - -static inline void -double_maptrack_unlock(struct grant_table *lgt, struct grant_table *rgt) -{ - spin_unlock(&lgt->maptrack_lock); - if ( lgt != rgt ) - spin_unlock(&rgt->maptrack_lock); -} - static inline int __get_maptrack_handle( struct grant_table *t) @@ -316,7 +317,10 @@ get_maptrack_handle( return handle; } -/* Number of grant table entries. Caller must hold d's grant table lock. */ +/* + * Number of grant table entries. Caller must hold d's grant table + * read lock. + */ static unsigned int nr_grant_entries(struct grant_table *gt) { ASSERT(gt->gt_version != 0); @@ -505,26 +509,23 @@ static int grant_map_exists(const struct domain *ld, unsigned long mfn, unsigned int *ref_count) { - const struct active_grant_entry *act; + struct active_grant_entry *act; unsigned int ref, max_iter; ASSERT(rw_is_locked(&rgt->lock)); max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT), nr_grant_entries(rgt)); - for ( ref = *ref_count; ref < max_iter; ref++ ) + for ( ref = *ref_count; ref < max_iter; active_entry_release(act), ref++ ) { - act = &active_entry(rgt, ref); - - if ( !act->pin ) - continue; - - if ( act->domid != ld->domain_id ) - continue; + act = active_entry_acquire(rgt, ref); - if ( act->frame != mfn ) + if ( !act->pin || + act->domid != ld->domain_id || + act->frame != mfn ) continue; + active_entry_release(act); return 0; } @@ -537,6 +538,13 @@ static int grant_map_exists(const struct domain *ld, return -EINVAL; } +/* + * Count the number of mapped ro or rw entries tracked in the local + * grant table given a provided mfn provided by a foreign domain. + * + * This function takes the maptrack lock from the local grant table + * and must be called with the remote grant table's rwlock held. + */ static void mapcount( struct grant_table *lgt, struct domain *rd, unsigned long mfn, unsigned int *wrc, unsigned int *rdc) @@ -546,15 +554,28 @@ static void mapcount( *wrc = *rdc = 0; + /* + * N.B.: while taking the local maptrack spinlock prevents any + * mapping changes, the remote active entries could be changing + * while we are counting. The caller has to hold the grant table + * write lock, or some other mechanism should be used to prevent + * concurrent changes during this operation. This is tricky + * because we can't promote a read lock into a write lock. + */ + ASSERT(rw_is_locked(&rd->grant_table->lock)); + spin_lock(&lgt->maptrack_lock); + for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) { map = &maptrack_entry(lgt, handle); if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) || map->domid != rd->domain_id ) continue; - if ( active_entry(rd->grant_table, map->ref).frame == mfn ) + if ( _active_entry(rd->grant_table, map->ref).frame == mfn ) (map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++; } + + spin_unlock(&lgt->maptrack_lock); } /* @@ -576,7 +597,6 @@ __gnttab_map_grant_ref( struct page_info *pg = NULL; int rc = GNTST_okay; u32 old_pin; - u32 act_pin; unsigned int cache_flags; struct active_grant_entry *act = NULL; struct grant_mapping *mt; @@ -639,7 +659,7 @@ __gnttab_map_grant_ref( if ( unlikely(op->ref >= nr_grant_entries(rgt))) PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref); - act = &active_entry(rgt, op->ref); + act = active_entry_acquire(rgt, op->ref); shah = shared_entry_header(rgt, op->ref); if (rgt->gt_version == 1) { sha1 = &shared_entry_v1(rgt, op->ref); @@ -656,7 +676,7 @@ __gnttab_map_grant_ref( ((act->domid != ld->domain_id) || (act->pin & 0x80808080U) != 0 || (act->is_sub_page)) ) - PIN_FAIL(unlock_out, GNTST_general_error, + PIN_FAIL(act_release_out, GNTST_general_error, "Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n", act->domid, ld->domain_id, act->pin, act->is_sub_page); @@ -667,7 +687,7 @@ __gnttab_map_grant_ref( if ( (rc = _set_status(rgt->gt_version, ld->domain_id, op->flags & GNTMAP_readonly, 1, shah, act, status) ) != GNTST_okay ) - goto unlock_out; + goto act_release_out; if ( !act->pin ) { @@ -698,12 +718,9 @@ __gnttab_map_grant_ref( GNTPIN_hstr_inc : GNTPIN_hstw_inc; frame = act->frame; - act_pin = act->pin; cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); - read_unlock(&rgt->lock); - /* pg may be set, with a refcount included, from __get_paged_frame */ if ( !pg ) { @@ -778,8 +795,6 @@ __gnttab_map_grant_ref( goto undo_out; } - double_maptrack_lock(lgt, rgt); - if ( gnttab_need_iommu_mapping(ld) ) { unsigned int wrc, rdc; @@ -787,21 +802,20 @@ __gnttab_map_grant_ref( /* We're not translated, so we know that gmfns and mfns are the same things, so the IOMMU entry is always 1-to-1. */ mapcount(lgt, rd, frame, &wrc, &rdc); - if ( (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && + if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) { if ( wrc == 0 ) err = iommu_map_page(ld, frame, frame, IOMMUF_readable|IOMMUF_writable); } - else if ( act_pin && !old_pin ) + else if ( act->pin && !old_pin ) { if ( (wrc + rdc) == 0 ) err = iommu_map_page(ld, frame, frame, IOMMUF_readable); } if ( err ) { - double_maptrack_unlock(lgt, rgt); rc = GNTST_general_error; goto undo_out; } @@ -814,7 +828,7 @@ __gnttab_map_grant_ref( mt->ref = op->ref; mt->flags = op->flags; - double_maptrack_unlock(lgt, rgt); + active_entry_release(act); read_unlock(&rgt->lock); op->dev_bus_addr = (u64)frame << PAGE_SHIFT; @@ -838,10 +852,6 @@ __gnttab_map_grant_ref( put_page(pg); } - read_lock(&rgt->lock); - - act = &active_entry(rgt, op->ref); - if ( op->flags & GNTMAP_device_map ) act->pin -= (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; @@ -857,6 +867,9 @@ __gnttab_map_grant_ref( if ( !act->pin ) gnttab_clear_flag(_GTF_reading, status); + act_release_out: + active_entry_release(act); + unlock_out: read_unlock(&rgt->lock); op->status = rc; @@ -943,18 +956,17 @@ __gnttab_unmap_common( rgt = rd->grant_table; read_lock(&rgt->lock); - double_maptrack_lock(lgt, rgt); op->flags = op->map->flags; if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) ) { gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle); rc = GNTST_bad_handle; - goto unmap_out; + goto read_unlock_out; } op->rd = rd; - act = &active_entry(rgt, op->map->ref); + act = active_entry_acquire(rgt, op->map->ref); if ( op->frame == 0 ) { @@ -963,7 +975,7 @@ __gnttab_unmap_common( else { if ( unlikely(op->frame != act->frame) ) - PIN_FAIL(unmap_out, GNTST_general_error, + PIN_FAIL(act_release_out, GNTST_general_error, "Bad frame number doesn't match gntref. (%lx != %lx)\n", op->frame, act->frame); if ( op->flags & GNTMAP_device_map ) @@ -982,7 +994,7 @@ __gnttab_unmap_common( if ( (rc = replace_grant_host_mapping(op->host_addr, op->frame, op->new_addr, op->flags)) < 0 ) - goto unmap_out; + goto act_release_out; ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)); op->map->flags &= ~GNTMAP_host_map; @@ -1004,7 +1016,7 @@ __gnttab_unmap_common( if ( err ) { rc = GNTST_general_error; - goto unmap_out; + goto act_release_out; } } @@ -1012,8 +1024,9 @@ __gnttab_unmap_common( if ( !(op->flags & GNTMAP_readonly) ) gnttab_mark_dirty(rd, op->frame); - unmap_out: - double_maptrack_unlock(lgt, rgt); + act_release_out: + active_entry_release(act); + read_unlock_out: read_unlock(&rgt->lock); op->status = rc; @@ -1048,9 +1061,9 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) - goto unmap_out; + goto unlock_out; - act = &active_entry(rgt, op->map->ref); + act = active_entry_acquire(rgt, op->map->ref); sha = shared_entry_header(rgt, op->map->ref); if ( rgt->gt_version == 1 ) @@ -1064,7 +1077,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) * Suggests that __gntab_unmap_common failed early and so * nothing further to do */ - goto unmap_out; + goto act_release_out; } pg = mfn_to_page(op->frame); @@ -1088,7 +1101,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) * Suggests that __gntab_unmap_common failed in * replace_grant_host_mapping() so nothing further to do */ - goto unmap_out; + goto act_release_out; } if ( !is_iomem_page(op->frame) ) @@ -1109,8 +1122,12 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) if ( act->pin == 0 ) gnttab_clear_flag(_GTF_reading, status); - unmap_out: + act_release_out: + active_entry_release(act); + + unlock_out: read_unlock(&rgt->lock); + if ( put_handle ) { op->map->flags = 0; @@ -1304,7 +1321,7 @@ int gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) { struct grant_table *gt = d->grant_table; - unsigned int i; + unsigned int i, j; ASSERT(req_nr_frames <= max_grant_frames); @@ -1319,6 +1336,8 @@ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) if ( (gt->active[i] = alloc_xenheap_page()) == NULL ) goto active_alloc_failed; clear_page(gt->active[i]); + for ( j = 0; j < ACGNT_PER_PAGE; j++ ) + spin_lock_init(>->active[i][j].lock); } /* Shared */ @@ -1814,7 +1833,7 @@ __release_grant_for_copy( read_lock(&rgt->lock); - act = &active_entry(rgt, gref); + act = active_entry_acquire(rgt, gref); sha = shared_entry_header(rgt, gref); r_frame = act->frame; @@ -1853,6 +1872,7 @@ __release_grant_for_copy( released_read = 1; } + active_entry_release(act); read_unlock(&rgt->lock); if ( td != rd ) @@ -1914,14 +1934,14 @@ __acquire_grant_for_copy( read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) - PIN_FAIL(unlock_out, GNTST_general_error, + PIN_FAIL(gnt_unlock_out, GNTST_general_error, "remote grant table not ready\n"); if ( unlikely(gref >= nr_grant_entries(rgt)) ) - PIN_FAIL(unlock_out, GNTST_bad_gntref, + PIN_FAIL(gnt_unlock_out, GNTST_bad_gntref, "Bad grant reference %ld\n", gref); - act = &active_entry(rgt, gref); + act = active_entry_acquire(rgt, gref); shah = shared_entry_header(rgt, gref); if ( rgt->gt_version == 1 ) { @@ -1986,6 +2006,7 @@ __acquire_grant_for_copy( * the right table (if rd == td), so we have to drop the * lock here and reacquire */ + active_entry_release(act); read_unlock(&rgt->lock); rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, @@ -1993,8 +2014,11 @@ __acquire_grant_for_copy( &trans_page_off, &trans_length, 0); read_lock(&rgt->lock); + act = active_entry_acquire(rgt, gref); + if ( rc != GNTST_okay ) { __fixup_status_for_copy_pin(act, status); + active_entry_release(act); read_unlock(&rgt->lock); rcu_unlock_domain(td); return rc; @@ -2008,6 +2032,7 @@ __acquire_grant_for_copy( { __fixup_status_for_copy_pin(act, status); rcu_unlock_domain(td); + active_entry_release(act); read_unlock(&rgt->lock); put_page(*page); return __acquire_grant_for_copy(rd, gref, ldom, readonly, @@ -2076,6 +2101,7 @@ __acquire_grant_for_copy( *length = act->length; *frame = act->frame; + active_entry_release(act); read_unlock(&rgt->lock); return rc; @@ -2088,6 +2114,9 @@ __acquire_grant_for_copy( gnttab_clear_flag(_GTF_reading, status); unlock_out: + active_entry_release(act); + + gnt_unlock_out: read_unlock(&rgt->lock); return rc; @@ -2389,7 +2418,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop) gnttab_set_version_t op; struct domain *d = current->domain; struct grant_table *gt = d->grant_table; - struct active_grant_entry *act; grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES]; long res; int i; @@ -2414,8 +2442,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop) { for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ ) { - act = &active_entry(gt, i); - if ( act->pin != 0 ) + if ( read_atomic(&_active_entry(gt, i).pin) != 0 ) { gdprintk(XENLOG_WARNING, "tried to change grant table version from %d to %d, but some grant entries still in use\n", @@ -2602,9 +2629,17 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) { struct domain *d = rcu_lock_current_domain(); struct grant_table *gt = d->grant_table; - struct active_grant_entry *act; + struct active_grant_entry *act_a = NULL; + struct active_grant_entry *act_b = NULL; s16 rc = GNTST_okay; + if ( ref_a == ref_b ) + /* + * noop, so avoid acquiring the same active entry + * twice which would case a deadlock. + */ + return rc; + write_lock(>->lock); /* Bounds check on the grant refs */ @@ -2613,12 +2648,12 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) if ( unlikely(ref_b >= nr_grant_entries(d->grant_table))) PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b); - act = &active_entry(gt, ref_a); - if ( act->pin ) + act_a = active_entry_acquire(gt, ref_a); + if ( act_a->pin ) PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a); - act = &active_entry(gt, ref_b); - if ( act->pin ) + act_b = active_entry_acquire(gt, ref_b); + if ( act_b->pin ) PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b); if ( gt->gt_version == 1 ) @@ -2645,6 +2680,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) } out: + if ( act_b != NULL ) + active_entry_release(act_b); + if ( act_a != NULL ) + active_entry_release(act_a); write_unlock(>->lock); rcu_unlock_domain(d); @@ -2954,7 +2993,7 @@ grant_table_create( struct domain *d) { struct grant_table *t; - int i; + unsigned int i, j; if ( (t = xzalloc(struct grant_table)) == NULL ) goto no_mem_0; @@ -2974,6 +3013,8 @@ grant_table_create( if ( (t->active[i] = alloc_xenheap_page()) == NULL ) goto no_mem_2; clear_page(t->active[i]); + for ( j = 0; j < ACGNT_PER_PAGE; j++ ) + spin_lock_init(&t->active[i][j].lock); } /* Tracking of mapped foreign frames table */ @@ -3070,7 +3111,7 @@ gnttab_release_mappings( rgt = rd->grant_table; read_lock(&rgt->lock); - act = &active_entry(rgt, ref); + act = active_entry_acquire(rgt, ref); sha = shared_entry_header(rgt, ref); if (rgt->gt_version == 1) status = &sha->flags; @@ -3128,6 +3169,7 @@ gnttab_release_mappings( if ( act->pin == 0 ) gnttab_clear_flag(_GTF_reading, status); + active_entry_release(act); read_unlock(&rgt->lock); rcu_unlock_domain(rd); @@ -3190,9 +3232,12 @@ static void gnttab_usage_print(struct domain *rd) uint16_t status; uint64_t frame; - act = &active_entry(gt, ref); + act = active_entry_acquire(gt, ref); if ( !act->pin ) + { + active_entry_release(act); continue; + } sha = shared_entry_header(gt, ref); @@ -3222,6 +3267,7 @@ static void gnttab_usage_print(struct domain *rd) printk("[%3d] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n", ref, act->domid, act->frame, act->pin, sha->domid, frame, status); + active_entry_release(act); } out: -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |