|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCHv8 2/3] gnttab: per-active entry locking
From: Matt Wilson <msw@xxxxxxxxxx>
Introduce a per-active entry spin lock to protect active entry state
instead of requiring a double maptrack lock. The grant table read
lock must be locked before acquiring (locking) a active entry.
The reduces lock contention on grant map/unmap to the maptrack lock
during maptrack handle get/put, significantly improving performance
with many concurrent map/unmap operations.
Signed-off-by: Matt Wilson <msw@xxxxxxxxxx>
[chegger: ported to xen-staging, split into multiple commits]
Signed-off-by: Christoph Egger <chegger@xxxxxxxxx>
Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
docs/misc/grant-tables.txt | 25 ++++++
xen/common/grant_table.c | 212 +++++++++++++++++++++++++++-----------------
2 files changed, 154 insertions(+), 83 deletions(-)
diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
index a01f4bf..c30906a 100644
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -63,6 +63,7 @@ is complete.
act->domid : remote domain being granted rights
act->frame : machine frame being granted
act->pin : used to hold reference counts
+ act->lock : spinlock used to serialize access to active entry state
Map tracking
~~~~~~~~~~~~
@@ -87,6 +88,8 @@ is complete.
version, partially initialized active table
pages,
etc.
grant_table->maptrack_lock : spinlock used to protect the maptrack state
+ active_grant_entry->lock : spinlock used to serialize modifications to
+ active entries
The primary lock for the grant table is a read/write spinlock. All
functions that access members of struct grant_table must acquire a
@@ -105,6 +108,28 @@ is complete.
The maptrack lock may be locked while holding the read or write grant
table lock.
+ Active entries are obtained by calling active_entry_acquire(gt, ref).
+ This function returns a pointer to the active entry after locking its
+ spinlock. The caller must hold the rwlock for the gt in question
+ before calling active_entry_acquire(). This is because the grant
+ table can be dynamically extended via gnttab_grow_table() while a
+ domain is running and must be fully initialized. Once all access to
+ the active entry is complete, release the lock by calling
+ active_entry_release(act).
+
+ Summary of rules for locking:
+ active_entry_acquire() and active_entry_release() can only be
+ called when holding the relevant grant table's lock. I.e.:
+ read_lock(>->lock);
+ act = active_entry_acquire(gt, ref);
+ ...
+ active_entry_release(act);
+ read_unlock(>->lock);
+
+ Active entries cannot be acquired while holding the maptrack lock.
+ Multiple active entries can only be acquired while holding the grant
+ table _write_ lock.
+
********************************************************************************
Granting a foreign domain access to frames
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 54e4411..76de00e 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -157,10 +157,13 @@ struct active_grant_entry {
in the page. */
unsigned length:16; /* For sub-page grants, the length of the
grant. */
+ spinlock_t lock; /* lock to protect access of this entry.
+ see docs/misc/grant-tables.txt for
+ locking protocol */
};
#define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
-#define active_entry(t, e) \
+#define _active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
static inline void gnttab_flush_tlb(const struct domain *d)
@@ -188,6 +191,28 @@ nr_active_grant_frames(struct grant_table *gt)
return num_act_frames_from_sha_frames(nr_grant_frames(gt));
}
+static inline struct active_grant_entry *
+active_entry_acquire(struct grant_table *t, grant_ref_t e)
+{
+ struct active_grant_entry *act;
+
+ /*
+ * not perfect, but better than nothing for a debug build
+ * sanity check
+ */
+ ASSERT(rw_is_locked(&t->lock));
+
+ act = &_active_entry(t, e);
+ spin_lock(&act->lock);
+
+ return act;
+}
+
+static inline void active_entry_release(struct active_grant_entry *act)
+{
+ spin_unlock(&act->lock);
+}
+
/* Check if the page has been paged out, or needs unsharing.
If rc == GNTST_okay, *page contains the page struct with a ref taken.
Caller must do put_page(*page).
@@ -228,30 +253,6 @@ static int __get_paged_frame(unsigned long gfn, unsigned
long *frame, struct pag
return rc;
}
-static inline void
-double_maptrack_lock(struct grant_table *lgt, struct grant_table *rgt)
-{
- if ( lgt < rgt )
- {
- spin_lock(&lgt->maptrack_lock);
- spin_lock(&rgt->maptrack_lock);
- }
- else
- {
- if ( lgt != rgt )
- spin_lock(&rgt->maptrack_lock);
- spin_lock(&lgt->maptrack_lock);
- }
-}
-
-static inline void
-double_maptrack_unlock(struct grant_table *lgt, struct grant_table *rgt)
-{
- spin_unlock(&lgt->maptrack_lock);
- if ( lgt != rgt )
- spin_unlock(&rgt->maptrack_lock);
-}
-
static inline int
__get_maptrack_handle(
struct grant_table *t)
@@ -316,7 +317,10 @@ get_maptrack_handle(
return handle;
}
-/* Number of grant table entries. Caller must hold d's grant table lock. */
+/*
+ * Number of grant table entries. Caller must hold d's grant table
+ * read lock.
+ */
static unsigned int nr_grant_entries(struct grant_table *gt)
{
ASSERT(gt->gt_version != 0);
@@ -505,26 +509,23 @@ static int grant_map_exists(const struct domain *ld,
unsigned long mfn,
unsigned int *ref_count)
{
- const struct active_grant_entry *act;
+ struct active_grant_entry *act;
unsigned int ref, max_iter;
ASSERT(rw_is_locked(&rgt->lock));
max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
nr_grant_entries(rgt));
- for ( ref = *ref_count; ref < max_iter; ref++ )
+ for ( ref = *ref_count; ref < max_iter; active_entry_release(act), ref++ )
{
- act = &active_entry(rgt, ref);
-
- if ( !act->pin )
- continue;
-
- if ( act->domid != ld->domain_id )
- continue;
+ act = active_entry_acquire(rgt, ref);
- if ( act->frame != mfn )
+ if ( !act->pin ||
+ act->domid != ld->domain_id ||
+ act->frame != mfn )
continue;
+ active_entry_release(act);
return 0;
}
@@ -537,6 +538,13 @@ static int grant_map_exists(const struct domain *ld,
return -EINVAL;
}
+/*
+ * Count the number of mapped ro or rw entries tracked in the local
+ * grant table given a provided mfn provided by a foreign domain.
+ *
+ * This function takes the maptrack lock from the local grant table
+ * and must be called with the remote grant table's rwlock held.
+ */
static void mapcount(
struct grant_table *lgt, struct domain *rd, unsigned long mfn,
unsigned int *wrc, unsigned int *rdc)
@@ -546,15 +554,28 @@ static void mapcount(
*wrc = *rdc = 0;
+ /*
+ * N.B.: while taking the local maptrack spinlock prevents any
+ * mapping changes, the remote active entries could be changing
+ * while we are counting. The caller has to hold the grant table
+ * write lock, or some other mechanism should be used to prevent
+ * concurrent changes during this operation. This is tricky
+ * because we can't promote a read lock into a write lock.
+ */
+ ASSERT(rw_is_locked(&rd->grant_table->lock));
+ spin_lock(&lgt->maptrack_lock);
+
for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
{
map = &maptrack_entry(lgt, handle);
if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
map->domid != rd->domain_id )
continue;
- if ( active_entry(rd->grant_table, map->ref).frame == mfn )
+ if ( _active_entry(rd->grant_table, map->ref).frame == mfn )
(map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++;
}
+
+ spin_unlock(&lgt->maptrack_lock);
}
/*
@@ -576,7 +597,6 @@ __gnttab_map_grant_ref(
struct page_info *pg = NULL;
int rc = GNTST_okay;
u32 old_pin;
- u32 act_pin;
unsigned int cache_flags;
struct active_grant_entry *act = NULL;
struct grant_mapping *mt;
@@ -639,7 +659,7 @@ __gnttab_map_grant_ref(
if ( unlikely(op->ref >= nr_grant_entries(rgt)))
PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
shah = shared_entry_header(rgt, op->ref);
if (rgt->gt_version == 1) {
sha1 = &shared_entry_v1(rgt, op->ref);
@@ -656,7 +676,7 @@ __gnttab_map_grant_ref(
((act->domid != ld->domain_id) ||
(act->pin & 0x80808080U) != 0 ||
(act->is_sub_page)) )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bad domain (%d != %d), or risk of counter overflow %08x, or
subpage %d\n",
act->domid, ld->domain_id, act->pin, act->is_sub_page);
@@ -667,7 +687,7 @@ __gnttab_map_grant_ref(
if ( (rc = _set_status(rgt->gt_version, ld->domain_id,
op->flags & GNTMAP_readonly,
1, shah, act, status) ) != GNTST_okay )
- goto unlock_out;
+ goto act_release_out;
if ( !act->pin )
{
@@ -698,12 +718,9 @@ __gnttab_map_grant_ref(
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
frame = act->frame;
- act_pin = act->pin;
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
- read_unlock(&rgt->lock);
-
/* pg may be set, with a refcount included, from __get_paged_frame */
if ( !pg )
{
@@ -778,8 +795,6 @@ __gnttab_map_grant_ref(
goto undo_out;
}
- double_maptrack_lock(lgt, rgt);
-
if ( gnttab_need_iommu_mapping(ld) )
{
unsigned int wrc, rdc;
@@ -787,21 +802,20 @@ __gnttab_map_grant_ref(
/* We're not translated, so we know that gmfns and mfns are
the same things, so the IOMMU entry is always 1-to-1. */
mapcount(lgt, rd, frame, &wrc, &rdc);
- if ( (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
+ if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
!(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
{
if ( wrc == 0 )
err = iommu_map_page(ld, frame, frame,
IOMMUF_readable|IOMMUF_writable);
}
- else if ( act_pin && !old_pin )
+ else if ( act->pin && !old_pin )
{
if ( (wrc + rdc) == 0 )
err = iommu_map_page(ld, frame, frame, IOMMUF_readable);
}
if ( err )
{
- double_maptrack_unlock(lgt, rgt);
rc = GNTST_general_error;
goto undo_out;
}
@@ -814,7 +828,7 @@ __gnttab_map_grant_ref(
mt->ref = op->ref;
mt->flags = op->flags;
- double_maptrack_unlock(lgt, rgt);
+ active_entry_release(act);
read_unlock(&rgt->lock);
op->dev_bus_addr = (u64)frame << PAGE_SHIFT;
@@ -838,10 +852,6 @@ __gnttab_map_grant_ref(
put_page(pg);
}
- read_lock(&rgt->lock);
-
- act = &active_entry(rgt, op->ref);
-
if ( op->flags & GNTMAP_device_map )
act->pin -= (op->flags & GNTMAP_readonly) ?
GNTPIN_devr_inc : GNTPIN_devw_inc;
@@ -857,6 +867,9 @@ __gnttab_map_grant_ref(
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
+ act_release_out:
+ active_entry_release(act);
+
unlock_out:
read_unlock(&rgt->lock);
op->status = rc;
@@ -943,18 +956,17 @@ __gnttab_unmap_common(
rgt = rd->grant_table;
read_lock(&rgt->lock);
- double_maptrack_lock(lgt, rgt);
op->flags = op->map->flags;
if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) )
{
gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle);
rc = GNTST_bad_handle;
- goto unmap_out;
+ goto read_unlock_out;
}
op->rd = rd;
- act = &active_entry(rgt, op->map->ref);
+ act = active_entry_acquire(rgt, op->map->ref);
if ( op->frame == 0 )
{
@@ -963,7 +975,7 @@ __gnttab_unmap_common(
else
{
if ( unlikely(op->frame != act->frame) )
- PIN_FAIL(unmap_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bad frame number doesn't match gntref. (%lx != %lx)\n",
op->frame, act->frame);
if ( op->flags & GNTMAP_device_map )
@@ -982,7 +994,7 @@ __gnttab_unmap_common(
if ( (rc = replace_grant_host_mapping(op->host_addr,
op->frame, op->new_addr,
op->flags)) < 0 )
- goto unmap_out;
+ goto act_release_out;
ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
op->map->flags &= ~GNTMAP_host_map;
@@ -1004,7 +1016,7 @@ __gnttab_unmap_common(
if ( err )
{
rc = GNTST_general_error;
- goto unmap_out;
+ goto act_release_out;
}
}
@@ -1012,8 +1024,9 @@ __gnttab_unmap_common(
if ( !(op->flags & GNTMAP_readonly) )
gnttab_mark_dirty(rd, op->frame);
- unmap_out:
- double_maptrack_unlock(lgt, rgt);
+ act_release_out:
+ active_entry_release(act);
+ read_unlock_out:
read_unlock(&rgt->lock);
op->status = rc;
@@ -1048,9 +1061,9 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common
*op)
read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
- goto unmap_out;
+ goto unlock_out;
- act = &active_entry(rgt, op->map->ref);
+ act = active_entry_acquire(rgt, op->map->ref);
sha = shared_entry_header(rgt, op->map->ref);
if ( rgt->gt_version == 1 )
@@ -1064,7 +1077,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common
*op)
* Suggests that __gntab_unmap_common failed early and so
* nothing further to do
*/
- goto unmap_out;
+ goto act_release_out;
}
pg = mfn_to_page(op->frame);
@@ -1088,7 +1101,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common
*op)
* Suggests that __gntab_unmap_common failed in
* replace_grant_host_mapping() so nothing further to do
*/
- goto unmap_out;
+ goto act_release_out;
}
if ( !is_iomem_page(op->frame) )
@@ -1109,8 +1122,12 @@ __gnttab_unmap_common_complete(struct
gnttab_unmap_common *op)
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
- unmap_out:
+ act_release_out:
+ active_entry_release(act);
+
+ unlock_out:
read_unlock(&rgt->lock);
+
if ( put_handle )
{
op->map->flags = 0;
@@ -1304,7 +1321,7 @@ int
gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
{
struct grant_table *gt = d->grant_table;
- unsigned int i;
+ unsigned int i, j;
ASSERT(req_nr_frames <= max_grant_frames);
@@ -1319,6 +1336,8 @@ gnttab_grow_table(struct domain *d, unsigned int
req_nr_frames)
if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
goto active_alloc_failed;
clear_page(gt->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(>->active[i][j].lock);
}
/* Shared */
@@ -1814,7 +1833,7 @@ __release_grant_for_copy(
read_lock(&rgt->lock);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
r_frame = act->frame;
@@ -1853,6 +1872,7 @@ __release_grant_for_copy(
released_read = 1;
}
+ active_entry_release(act);
read_unlock(&rgt->lock);
if ( td != rd )
@@ -1914,14 +1934,14 @@ __acquire_grant_for_copy(
read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(gnt_unlock_out, GNTST_general_error,
"remote grant table not ready\n");
if ( unlikely(gref >= nr_grant_entries(rgt)) )
- PIN_FAIL(unlock_out, GNTST_bad_gntref,
+ PIN_FAIL(gnt_unlock_out, GNTST_bad_gntref,
"Bad grant reference %ld\n", gref);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
shah = shared_entry_header(rgt, gref);
if ( rgt->gt_version == 1 )
{
@@ -1986,6 +2006,7 @@ __acquire_grant_for_copy(
* the right table (if rd == td), so we have to drop the
* lock here and reacquire
*/
+ active_entry_release(act);
read_unlock(&rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
@@ -1993,8 +2014,11 @@ __acquire_grant_for_copy(
&trans_page_off, &trans_length, 0);
read_lock(&rgt->lock);
+ act = active_entry_acquire(rgt, gref);
+
if ( rc != GNTST_okay ) {
__fixup_status_for_copy_pin(act, status);
+ active_entry_release(act);
read_unlock(&rgt->lock);
rcu_unlock_domain(td);
return rc;
@@ -2008,6 +2032,7 @@ __acquire_grant_for_copy(
{
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
+ active_entry_release(act);
read_unlock(&rgt->lock);
put_page(*page);
return __acquire_grant_for_copy(rd, gref, ldom, readonly,
@@ -2076,6 +2101,7 @@ __acquire_grant_for_copy(
*length = act->length;
*frame = act->frame;
+ active_entry_release(act);
read_unlock(&rgt->lock);
return rc;
@@ -2088,6 +2114,9 @@ __acquire_grant_for_copy(
gnttab_clear_flag(_GTF_reading, status);
unlock_out:
+ active_entry_release(act);
+
+ gnt_unlock_out:
read_unlock(&rgt->lock);
return rc;
@@ -2389,7 +2418,6 @@
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
gnttab_set_version_t op;
struct domain *d = current->domain;
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES];
long res;
int i;
@@ -2414,8 +2442,7 @@
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
{
for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ )
{
- act = &active_entry(gt, i);
- if ( act->pin != 0 )
+ if ( read_atomic(&_active_entry(gt, i).pin) != 0 )
{
gdprintk(XENLOG_WARNING,
"tried to change grant table version from %d to %d,
but some grant entries still in use\n",
@@ -2602,9 +2629,17 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t
ref_b)
{
struct domain *d = rcu_lock_current_domain();
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
+ struct active_grant_entry *act_a = NULL;
+ struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
+ if ( ref_a == ref_b )
+ /*
+ * noop, so avoid acquiring the same active entry
+ * twice which would case a deadlock.
+ */
+ return rc;
+
write_lock(>->lock);
/* Bounds check on the grant refs */
@@ -2613,12 +2648,12 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t
ref_b)
if ( unlikely(ref_b >= nr_grant_entries(d->grant_table)))
PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b);
- act = &active_entry(gt, ref_a);
- if ( act->pin )
+ act_a = active_entry_acquire(gt, ref_a);
+ if ( act_a->pin )
PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a);
- act = &active_entry(gt, ref_b);
- if ( act->pin )
+ act_b = active_entry_acquire(gt, ref_b);
+ if ( act_b->pin )
PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b);
if ( gt->gt_version == 1 )
@@ -2645,6 +2680,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t
ref_b)
}
out:
+ if ( act_b != NULL )
+ active_entry_release(act_b);
+ if ( act_a != NULL )
+ active_entry_release(act_a);
write_unlock(>->lock);
rcu_unlock_domain(d);
@@ -2954,7 +2993,7 @@ grant_table_create(
struct domain *d)
{
struct grant_table *t;
- int i;
+ unsigned int i, j;
if ( (t = xzalloc(struct grant_table)) == NULL )
goto no_mem_0;
@@ -2974,6 +3013,8 @@ grant_table_create(
if ( (t->active[i] = alloc_xenheap_page()) == NULL )
goto no_mem_2;
clear_page(t->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(&t->active[i][j].lock);
}
/* Tracking of mapped foreign frames table */
@@ -3070,7 +3111,7 @@ gnttab_release_mappings(
rgt = rd->grant_table;
read_lock(&rgt->lock);
- act = &active_entry(rgt, ref);
+ act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
if (rgt->gt_version == 1)
status = &sha->flags;
@@ -3128,6 +3169,7 @@ gnttab_release_mappings(
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
+ active_entry_release(act);
read_unlock(&rgt->lock);
rcu_unlock_domain(rd);
@@ -3190,9 +3232,12 @@ static void gnttab_usage_print(struct domain *rd)
uint16_t status;
uint64_t frame;
- act = &active_entry(gt, ref);
+ act = active_entry_acquire(gt, ref);
if ( !act->pin )
+ {
+ active_entry_release(act);
continue;
+ }
sha = shared_entry_header(gt, ref);
@@ -3222,6 +3267,7 @@ static void gnttab_usage_print(struct domain *rd)
printk("[%3d] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n",
ref, act->domid, act->frame, act->pin,
sha->domid, frame, status);
+ active_entry_release(act);
}
out:
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |