[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 1/2] gnttab: Introduce rwlock to protect updates to grant table state
Split grant table lock into two separate locks. One to protect maptrack state and change the other into a rwlock. The rwlock is used to prevent readers from accessing inconsistent grant table state such as current version, partially initialized active table pages, etc. Signed-off-by: Matt Wilson <msw@xxxxxxxxxx> [chegger: ported to xen-staging, split into multiple commits] v5: * Addressed locking issue pointed out by Jan Beulich * Fixed git rebase merge issue introduced in v4 (acquiring locking twice) * Coding style fixes v4: * Coding style nits from Jan Beulich * Fixup read locks pointed out by Jan Beulich * renamed double_gt_(un)lock to double_maptrack_(un)lock per request from Jan Beulich v3: * Addressed gnttab_swap_grant_ref() comment from Andrew Cooper v2: * Add arm part per request from Julien Grall Signed-off-by: Christoph Egger <chegger@xxxxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Keir Fraser <keir@xxxxxxx> Cc: Julien Grall <julien.grall@xxxxxxxxxx> Adjust locking --- docs/misc/grant-tables.txt | 28 +++++++- xen/arch/arm/mm.c | 4 +- xen/arch/x86/mm.c | 4 +- xen/common/grant_table.c | 146 ++++++++++++++++++++++++----------------- xen/include/xen/grant_table.h | 17 +++-- 5 files changed, 127 insertions(+), 72 deletions(-) diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt index 19db4ec..c9ae8f2 100644 --- a/docs/misc/grant-tables.txt +++ b/docs/misc/grant-tables.txt @@ -74,7 +74,33 @@ is complete. matching map track entry is then removed, as if unmap had been invoked. These are not used by the transfer mechanism. map->domid : owner of the mapped frame - map->ref_and_flags : grant reference, ro/rw, mapped for host or device access + map->ref : grant reference + map->flags : ro/rw, mapped for host or device access + +******************************************************************************** + Locking + ~~~~~~~ + Xen uses several locks to serialize access to the internal grant table state. + + grant_table->lock : rwlock used to prevent readers from accessing + inconsistent grant table state such as current + version, partially initialized active table pages, + etc. + grant_table->maptrack_lock : spinlock used to protect the maptrack state + + The primary lock for the grant table is a read/write spinlock. All + functions that access members of struct grant_table must acquire a + read lock around critical sections. Any modification to the members + of struct grant_table (e.g., nr_status_frames, nr_grant_frames, + active frames, etc.) must only be made if the write lock is + held. These elements are read-mostly, and read critical sections can + be large, which makes a rwlock a good choice. + + The maptrack state is protected by its own spinlock. Any access (read + or write) of struct grant_table members that have a "maptrack_" + prefix must be made while holding the maptrack lock. The maptrack + state can be rapidly modified under some workloads, and the critical + sections are very small, thus we use a spinlock to protect them. ******************************************************************************** diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c index 7d4ba0c..2765683 100644 --- a/xen/arch/arm/mm.c +++ b/xen/arch/arm/mm.c @@ -1037,7 +1037,7 @@ int xenmem_add_to_physmap_one( switch ( space ) { case XENMAPSPACE_grant_table: - spin_lock(&d->grant_table->lock); + write_lock(&d->grant_table->lock); if ( d->grant_table->gt_version == 0 ) d->grant_table->gt_version = 1; @@ -1067,7 +1067,7 @@ int xenmem_add_to_physmap_one( t = p2m_ram_rw; - spin_unlock(&d->grant_table->lock); + write_unlock(&d->grant_table->lock); break; case XENMAPSPACE_shared_info: if ( idx != 0 ) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index d4965da..af4762d 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4569,7 +4569,7 @@ int xenmem_add_to_physmap_one( mfn = virt_to_mfn(d->shared_info); break; case XENMAPSPACE_grant_table: - spin_lock(&d->grant_table->lock); + write_lock(&d->grant_table->lock); if ( d->grant_table->gt_version == 0 ) d->grant_table->gt_version = 1; @@ -4591,7 +4591,7 @@ int xenmem_add_to_physmap_one( mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); } - spin_unlock(&d->grant_table->lock); + write_unlock(&d->grant_table->lock); break; case XENMAPSPACE_gmfn_range: case XENMAPSPACE_gmfn: diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 1a11766..f52a51d 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -229,27 +229,27 @@ static int __get_paged_frame(unsigned long gfn, unsigned long *frame, struct pag } static inline void -double_gt_lock(struct grant_table *lgt, struct grant_table *rgt) +double_maptrack_lock(struct grant_table *lgt, struct grant_table *rgt) { if ( lgt < rgt ) { - spin_lock(&lgt->lock); - spin_lock(&rgt->lock); + spin_lock(&lgt->maptrack_lock); + spin_lock(&rgt->maptrack_lock); } else { if ( lgt != rgt ) - spin_lock(&rgt->lock); - spin_lock(&lgt->lock); + spin_lock(&rgt->maptrack_lock); + spin_lock(&lgt->maptrack_lock); } } static inline void -double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt) +double_maptrack_unlock(struct grant_table *lgt, struct grant_table *rgt) { - spin_unlock(&lgt->lock); + spin_unlock(&lgt->maptrack_lock); if ( lgt != rgt ) - spin_unlock(&rgt->lock); + spin_unlock(&rgt->maptrack_lock); } static inline int @@ -267,10 +267,10 @@ static inline void put_maptrack_handle( struct grant_table *t, int handle) { - spin_lock(&t->lock); + spin_lock(&t->maptrack_lock); maptrack_entry(t, handle).ref = t->maptrack_head; t->maptrack_head = handle; - spin_unlock(&t->lock); + spin_unlock(&t->maptrack_lock); } static inline int @@ -282,7 +282,7 @@ get_maptrack_handle( struct grant_mapping *new_mt; unsigned int new_mt_limit, nr_frames; - spin_lock(&lgt->lock); + spin_lock(&lgt->maptrack_lock); while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) ) { @@ -311,7 +311,7 @@ get_maptrack_handle( nr_frames + 1); } - spin_unlock(&lgt->lock); + spin_unlock(&lgt->maptrack_lock); return handle; } @@ -508,7 +508,7 @@ static int grant_map_exists(const struct domain *ld, const struct active_grant_entry *act; unsigned int ref, max_iter; - ASSERT(spin_is_locked(&rgt->lock)); + ASSERT(rw_is_locked(&rgt->lock)); max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT), nr_grant_entries(rgt)); @@ -629,7 +629,7 @@ __gnttab_map_grant_ref( } rgt = rd->grant_table; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) PIN_FAIL(unlock_out, GNTST_general_error, @@ -702,8 +702,6 @@ __gnttab_map_grant_ref( cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); - spin_unlock(&rgt->lock); - /* pg may be set, with a refcount included, from __get_paged_frame */ if ( !pg ) { @@ -778,7 +776,7 @@ __gnttab_map_grant_ref( goto undo_out; } - double_gt_lock(lgt, rgt); + double_maptrack_lock(lgt, rgt); if ( gnttab_need_iommu_mapping(ld) ) { @@ -801,7 +799,7 @@ __gnttab_map_grant_ref( } if ( err ) { - double_gt_unlock(lgt, rgt); + double_maptrack_unlock(lgt, rgt); rc = GNTST_general_error; goto undo_out; } @@ -814,7 +812,8 @@ __gnttab_map_grant_ref( mt->ref = op->ref; mt->flags = op->flags; - double_gt_unlock(lgt, rgt); + double_maptrack_unlock(lgt, rgt); + read_unlock(&rgt->lock); op->dev_bus_addr = (u64)frame << PAGE_SHIFT; op->handle = handle; @@ -837,7 +836,7 @@ __gnttab_map_grant_ref( put_page(pg); } - spin_lock(&rgt->lock); + read_lock(&rgt->lock); act = &active_entry(rgt, op->ref); @@ -857,7 +856,7 @@ __gnttab_map_grant_ref( gnttab_clear_flag(_GTF_reading, status); unlock_out: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); op->status = rc; put_maptrack_handle(lgt, handle); rcu_unlock_domain(rd); @@ -907,18 +906,19 @@ __gnttab_unmap_common( } op->map = &maptrack_entry(lgt, op->handle); - spin_lock(&lgt->lock); + + read_lock(&lgt->lock); if ( unlikely(!op->map->flags) ) { - spin_unlock(&lgt->lock); + read_unlock(&lgt->lock); gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle); op->status = GNTST_bad_handle; return; } dom = op->map->domid; - spin_unlock(&lgt->lock); + read_unlock(&lgt->lock); if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) ) { @@ -939,7 +939,9 @@ __gnttab_unmap_common( TRACE_1D(TRC_MEM_PAGE_GRANT_UNMAP, dom); rgt = rd->grant_table; - double_gt_lock(lgt, rgt); + + read_lock(&rgt->lock); + double_maptrack_lock(lgt, rgt); op->flags = op->map->flags; if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) ) @@ -1009,7 +1011,9 @@ __gnttab_unmap_common( gnttab_mark_dirty(rd, op->frame); unmap_out: - double_gt_unlock(lgt, rgt); + double_maptrack_unlock(lgt, rgt); + read_unlock(&rgt->lock); + op->status = rc; rcu_unlock_domain(rd); } @@ -1039,8 +1043,8 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) rcu_lock_domain(rd); rgt = rd->grant_table; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) goto unmap_out; @@ -1104,7 +1108,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) gnttab_clear_flag(_GTF_reading, status); unmap_out: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); if ( put_handle ) { op->map->flags = 0; @@ -1290,10 +1294,14 @@ gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) gt->nr_status_frames = 0; } +/* + * Grow the grant table. The caller must hold the grant table's + * write lock before calling this function. + */ int gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) { - /* d's grant table lock must be held by the caller */ + /* d's grant table write lock must be held by the caller */ struct grant_table *gt = d->grant_table; unsigned int i; @@ -1398,7 +1406,7 @@ gnttab_setup_table( } gt = d->grant_table; - spin_lock(>->lock); + write_lock(>->lock); if ( gt->gt_version == 0 ) gt->gt_version = 1; @@ -1426,7 +1434,7 @@ gnttab_setup_table( } out3: - spin_unlock(>->lock); + write_unlock(>->lock); out2: rcu_unlock_domain(d); out1: @@ -1468,13 +1476,13 @@ gnttab_query_size( goto query_out_unlock; } - spin_lock(&d->grant_table->lock); + read_lock(&d->grant_table->lock); op.nr_frames = nr_grant_frames(d->grant_table); op.max_nr_frames = max_grant_frames; op.status = GNTST_okay; - spin_unlock(&d->grant_table->lock); + read_unlock(&d->grant_table->lock); query_out_unlock: @@ -1500,7 +1508,7 @@ gnttab_prepare_for_transfer( union grant_combo scombo, prev_scombo, new_scombo; int retries = 0; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) { @@ -1551,11 +1559,11 @@ gnttab_prepare_for_transfer( scombo = prev_scombo; } - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); return 1; fail: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); return 0; } @@ -1747,7 +1755,7 @@ gnttab_transfer( TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id); /* Tell the guest about its new page frame. */ - spin_lock(&e->grant_table->lock); + read_lock(&e->grant_table->lock); if ( e->grant_table->gt_version == 1 ) { @@ -1765,7 +1773,7 @@ gnttab_transfer( shared_entry_header(e->grant_table, gop.ref)->flags |= GTF_transfer_completed; - spin_unlock(&e->grant_table->lock); + read_unlock(&e->grant_table->lock); rcu_unlock_domain(e); @@ -1803,7 +1811,7 @@ __release_grant_for_copy( released_read = 0; released_write = 0; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); act = &active_entry(rgt, gref); sha = shared_entry_header(rgt, gref); @@ -1844,7 +1852,7 @@ __release_grant_for_copy( released_read = 1; } - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); if ( td != rd ) { @@ -1902,7 +1910,7 @@ __acquire_grant_for_copy( *page = NULL; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) PIN_FAIL(unlock_out, GNTST_general_error, @@ -1971,17 +1979,23 @@ __acquire_grant_for_copy( PIN_FAIL(unlock_out_clear, GNTST_general_error, "transitive grant referenced bad domain %d\n", trans_domid); - spin_unlock(&rgt->lock); + + /* + * __acquire_grant_for_copy() could take the read lock on + * the right table (if rd == td), so we have to drop the + * lock here and reacquire + */ + read_unlock(&rgt->lock); rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, readonly, &grant_frame, page, &trans_page_off, &trans_length, 0); - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rc != GNTST_okay ) { __fixup_status_for_copy_pin(act, status); + read_unlock(&rgt->lock); rcu_unlock_domain(td); - spin_unlock(&rgt->lock); return rc; } @@ -1993,7 +2007,7 @@ __acquire_grant_for_copy( { __fixup_status_for_copy_pin(act, status); rcu_unlock_domain(td); - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); put_page(*page); return __acquire_grant_for_copy(rd, gref, ldom, readonly, frame, page, page_off, length, @@ -2061,7 +2075,7 @@ __acquire_grant_for_copy( *length = act->length; *frame = act->frame; - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); return rc; unlock_out_clear: @@ -2073,7 +2087,8 @@ __acquire_grant_for_copy( gnttab_clear_flag(_GTF_reading, status); unlock_out: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); + return rc; } @@ -2389,7 +2404,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop) if ( gt->gt_version == op.version ) goto out; - spin_lock(>->lock); + write_lock(>->lock); /* Make sure that the grant table isn't currently in use when we change the version number, except for the first 8 entries which are allowed to be in use (xenstore/xenconsole keeps them mapped). @@ -2475,7 +2490,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop) gt->gt_version = op.version; out_unlock: - spin_unlock(>->lock); + write_unlock(>->lock); out: op.version = gt->gt_version; @@ -2531,7 +2546,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop, op.status = GNTST_okay; - spin_lock(>->lock); + read_lock(>->lock); for ( i = 0; i < op.nr_frames; i++ ) { @@ -2540,7 +2555,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop, op.status = GNTST_bad_virt_addr; } - spin_unlock(>->lock); + read_unlock(>->lock); out2: rcu_unlock_domain(d); out1: @@ -2589,7 +2604,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) struct active_grant_entry *act; s16 rc = GNTST_okay; - spin_lock(>->lock); + write_lock(>->lock); /* Bounds check on the grant refs */ if ( unlikely(ref_a >= nr_grant_entries(d->grant_table))) @@ -2629,7 +2644,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) } out: - spin_unlock(>->lock); + write_unlock(>->lock); rcu_unlock_domain(d); @@ -2700,12 +2715,12 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t *cflush, if ( d != owner ) { - spin_lock(&owner->grant_table->lock); + read_lock(&owner->grant_table->lock); ret = grant_map_exists(d, owner->grant_table, mfn, ref_count); if ( ret != 0 ) { - spin_unlock(&owner->grant_table->lock); + read_unlock(&owner->grant_table->lock); rcu_unlock_domain(d); put_page(page); return ret; @@ -2725,7 +2740,7 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t *cflush, ret = 0; if ( d != owner ) - spin_unlock(&owner->grant_table->lock); + read_unlock(&owner->grant_table->lock); unmap_domain_page(v); put_page(page); @@ -2944,7 +2959,8 @@ grant_table_create( goto no_mem_0; /* Simple stuff. */ - spin_lock_init(&t->lock); + rwlock_init(&t->lock); + spin_lock_init(&t->maptrack_lock); t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES; /* Active grant table. */ @@ -3051,7 +3067,8 @@ gnttab_release_mappings( } rgt = rd->grant_table; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); + double_maptrack_lock(gt, rgt); act = &active_entry(rgt, ref); sha = shared_entry_header(rgt, ref); @@ -3111,7 +3128,8 @@ gnttab_release_mappings( if ( act->pin == 0 ) gnttab_clear_flag(_GTF_reading, status); - spin_unlock(&rgt->lock); + double_maptrack_unlock(gt, rgt); + read_unlock(&rgt->lock); rcu_unlock_domain(rd); @@ -3129,7 +3147,9 @@ grant_table_destroy( if ( t == NULL ) return; - + + write_lock(&t->lock); + for ( i = 0; i < nr_grant_frames(t); i++ ) free_xenheap_page(t->shared_raw[i]); xfree(t->shared_raw); @@ -3146,6 +3166,8 @@ grant_table_destroy( free_xenheap_page(t->status[i]); xfree(t->status); + write_unlock(&t->lock); + xfree(t); d->grant_table = NULL; } @@ -3159,7 +3181,7 @@ static void gnttab_usage_print(struct domain *rd) printk(" -------- active -------- -------- shared --------\n"); printk("[ref] localdom mfn pin localdom gmfn flags\n"); - spin_lock(>->lock); + read_lock(>->lock); if ( gt->gt_version == 0 ) goto out; @@ -3208,7 +3230,7 @@ static void gnttab_usage_print(struct domain *rd) } out: - spin_unlock(>->lock); + read_unlock(>->lock); if ( first ) printk("grant-table for remote domain:%5d ... " diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index 32f5786..298031e 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -82,10 +82,17 @@ struct grant_table { struct grant_mapping **maptrack; unsigned int maptrack_head; unsigned int maptrack_limit; - /* Lock protecting updates to active and shared grant tables. */ - spinlock_t lock; - /* The defined versions are 1 and 2. Set to 0 if we don't know - what version to use yet. */ + /* Lock protecting the maptrack page list, head, and limit */ + spinlock_t maptrack_lock; + /* + * Lock protecting updates to grant table state (version, active + * entry list, etc.) + */ + rwlock_t lock; + /* + * The defined versions are 1 and 2. Set to 0 if we don't know + * what version to use yet. + */ unsigned gt_version; }; @@ -101,7 +108,7 @@ gnttab_release_mappings( struct domain *d); /* Increase the size of a domain's grant table. - * Caller must hold d's grant table lock. + * Caller must hold d's grant table write lock. */ int gnttab_grow_table(struct domain *d, unsigned int req_nr_frames); -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |