[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCHv6 5/5] gnttab: use per-VCPU maptrack free lists



From: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>

Performance analysis of aggregate network throughput with many VMs
shows that performance is signficantly limited by contention on the
maptrack lock when obtaining/releasing maptrack handles from the free
list.

Instead of a single free list use a per-VCPU list. This avoids any
contention when obtaining a handle.  Handles must be released back to
their original list and since this may occur on a different VCPU there
is some contention on the destination VCPU's free list tail pointer
(but this is much better than a per-domain lock).

A domain's maptrack limit is multiplied by the number of VCPUs.  This
ensures that a worst case domain that only performs grant table
operations via one VCPU will not see a lower map track limit.

Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 xen/common/grant_table.c      |  129 ++++++++++++++++++++++++-----------------
 xen/include/xen/grant_table.h |    8 ++-
 xen/include/xen/sched.h       |    5 ++
 3 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 8f3d125..a237d11 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -118,9 +118,9 @@ struct gnttab_unmap_common {
     ((t)->maptrack[(e)/MAPTRACK_PER_PAGE][(e)%MAPTRACK_PER_PAGE])
 
 static inline unsigned int
-nr_maptrack_frames(struct grant_table *t)
+nr_vcpu_maptrack_frames(struct vcpu *v)
 {
-    return t->maptrack_limit / MAPTRACK_PER_PAGE;
+    return v->maptrack_limit / MAPTRACK_PER_PAGE;
 }
 
 #define MAPTRACK_TAIL (~0u)
@@ -249,66 +249,91 @@ static int __get_paged_frame(unsigned long gfn, unsigned 
long *frame, struct pag
 
 static inline int
 __get_maptrack_handle(
-    struct grant_table *t)
+    struct grant_table *t,
+    struct vcpu *v)
 {
-    unsigned int h;
-    if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
+    unsigned int head, next;
+
+    head = v->maptrack_head;
+    if ( unlikely(head == MAPTRACK_TAIL) )
+        return -1;
+
+    next = maptrack_entry(t, head).ref;
+    if ( unlikely(next == MAPTRACK_TAIL) )
         return -1;
-    t->maptrack_head = maptrack_entry(t, h).ref;
-    return h;
+
+    v->maptrack_head = next;
+
+    return head;
 }
 
 static inline void
 put_maptrack_handle(
     struct grant_table *t, int handle)
 {
-    spin_lock(&t->maptrack_lock);
-    maptrack_entry(t, handle).ref = t->maptrack_head;
-    t->maptrack_head = handle;
-    spin_unlock(&t->maptrack_lock);
+    struct domain *d = current->domain;
+    struct vcpu *v;
+    u32 prev_tail, cur_tail;
+
+    /* Set current hande to have TAIL reference */
+    maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
+    v = d->vcpu[maptrack_entry(t,handle).vcpu];
+    cur_tail = v->maptrack_tail;
+    do {
+        prev_tail = cur_tail;
+        cur_tail = cmpxchg((u32 *)&v->maptrack_tail, prev_tail, handle);
+    } while ( cur_tail != prev_tail );
+    maptrack_entry(t, prev_tail).ref = handle;
 }
 
 static inline int
 get_maptrack_handle(
     struct grant_table *lgt)
 {
+    struct vcpu          *v = current;
     int                   i;
     grant_handle_t        handle;
     struct grant_mapping *new_mt;
     unsigned int          new_mt_limit, nr_frames;
 
-    spin_lock(&lgt->maptrack_lock);
-
-    while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
-    {
-        nr_frames = nr_maptrack_frames(lgt);
-        if ( nr_frames >= max_maptrack_frames )
-            break;
-
-        new_mt = alloc_xenheap_page();
-        if ( !new_mt )
-            break;
+    handle = __get_maptrack_handle(lgt, v);
+    if ( likely(handle != -1) )
+        return handle;
 
-        clear_page(new_mt);
+    nr_frames = nr_vcpu_maptrack_frames(v);
+    if ( nr_frames >= max_maptrack_frames )
+        return -1;
 
-        new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
+    new_mt = alloc_xenheap_page();
+    if ( !new_mt )
+        return -1;
 
-        for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
-            new_mt[i - 1].ref = lgt->maptrack_limit + i;
-        new_mt[i - 1].ref = lgt->maptrack_head;
-        lgt->maptrack_head = lgt->maptrack_limit;
+    clear_page(new_mt);
 
-        lgt->maptrack[nr_frames] = new_mt;
-        smp_wmb();
-        lgt->maptrack_limit      = new_mt_limit;
+    new_mt_limit = v->maptrack_limit + MAPTRACK_PER_PAGE;
 
-        gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
-                 nr_frames + 1);
+    for ( i = 1; i < MAPTRACK_PER_PAGE; i++ ){
+        new_mt[i - 1].ref = (lgt->maptrack_pages * MAPTRACK_PER_PAGE) + i;
+        new_mt[i - 1].vcpu = v->vcpu_id;
+    }
+    /* Set last entry vcpu and ref */
+    new_mt[i - 1].ref = v->maptrack_head;
+    new_mt[i - 1].vcpu = v->vcpu_id;
+    v->maptrack_head = lgt->maptrack_pages * MAPTRACK_PER_PAGE;
+    if (v->maptrack_tail == MAPTRACK_TAIL)
+    {
+        v->maptrack_tail = (lgt->maptrack_pages * MAPTRACK_PER_PAGE)
+            + MAPTRACK_PER_PAGE - 1;
+        new_mt[i - 1].ref = MAPTRACK_TAIL;
     }
 
+    spin_lock(&lgt->maptrack_lock);
+    lgt->maptrack[lgt->maptrack_pages++] = new_mt;
     spin_unlock(&lgt->maptrack_lock);
 
-    return handle;
+    v->maptrack_limit = new_mt_limit;
+
+    return __get_maptrack_handle(lgt, v);
 }
 
 /* 
@@ -558,7 +583,7 @@ static void mapcount(
     ASSERT(spin_is_locked(&rd->grant_table->lock));
     spin_lock(&lgt->maptrack_lock);
 
-    for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
+    for ( handle = 0; handle < lgt->maptrack_pages * MAPTRACK_PER_PAGE; 
handle++ )
     {
         struct active_grant_entry *act;
 
@@ -906,7 +931,7 @@ __gnttab_unmap_common(
 
     op->frame = (unsigned long)(op->dev_bus_addr >> PAGE_SHIFT);
 
-    if ( unlikely(op->handle >= lgt->maptrack_limit) )
+    if ( unlikely(op->handle >= lgt->maptrack_pages * MAPTRACK_PER_PAGE) )
     {
         gdprintk(XENLOG_INFO, "Bad handle (%d).\n", op->handle);
         op->status = GNTST_bad_handle;
@@ -1371,6 +1396,7 @@ gnttab_setup_table(
     struct gnttab_setup_table op;
     struct domain *d;
     struct grant_table *gt;
+    struct vcpu *v;
     int            i;
     xen_pfn_t  gmfn;
 
@@ -1422,6 +1448,17 @@ gnttab_setup_table(
     }
     gt->maptrack_pages = 0;
 
+    /* Tracking of mapped foreign frames table */
+    if ( (gt->maptrack = xzalloc_array(struct grant_mapping *,
+                                       max_maptrack_frames * d->max_vcpus)) == 
NULL )
+        goto out2;
+    for_each_vcpu( d, v )
+    {
+        v->maptrack_head = MAPTRACK_TAIL;
+        v->maptrack_tail = MAPTRACK_TAIL;
+    }
+    gt->maptrack_pages = 0;
+
     if ( gt->gt_version == 0 )
         gt->gt_version = 1;
 
@@ -2984,18 +3021,6 @@ grant_table_create(
             spin_lock_init(&t->active[i][j].lock);
     }
 
-    /* Tracking of mapped foreign frames table */
-    if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
-                                      max_maptrack_frames)) == NULL )
-        goto no_mem_2;
-    if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
-        goto no_mem_3;
-    clear_page(t->maptrack[0]);
-    t->maptrack_limit = MAPTRACK_PER_PAGE;
-    for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
-        t->maptrack[0][i - 1].ref = i;
-    t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
-
     /* Shared grant table. */
     if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
         goto no_mem_3;
@@ -3026,12 +3051,10 @@ grant_table_create(
         free_xenheap_page(t->shared_raw[i]);
     xfree(t->shared_raw);
  no_mem_3:
-    free_xenheap_page(t->maptrack[0]);
-    xfree(t->maptrack);
- no_mem_2:
     for ( i = 0;
           i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
         free_xenheap_page(t->active[i]);
+ no_mem_2:
     xfree(t->active);
  no_mem_1:
     xfree(t);
@@ -3055,7 +3078,7 @@ gnttab_release_mappings(
 
     BUG_ON(!d->is_dying);
 
-    for ( handle = 0; handle < gt->maptrack_limit; handle++ )
+    for ( handle = 0; handle < (gt->maptrack_pages * MAPTRACK_PER_PAGE); 
handle++ )
     {
         map = &maptrack_entry(gt, handle);
         if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) )
@@ -3159,7 +3182,7 @@ grant_table_destroy(
         free_xenheap_page(t->shared_raw[i]);
     xfree(t->shared_raw);
 
-    for ( i = 0; i < nr_maptrack_frames(t); i++ )
+    for ( i = 0; i < t->maptrack_pages; i++ )
         free_xenheap_page(t->maptrack[i]);
     xfree(t->maptrack);
 
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index d78c381..feb435d 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -60,6 +60,8 @@ struct grant_mapping {
     u32      ref;           /* grant ref */
     u16      flags;         /* 0-4: GNTMAP_* ; 5-15: unused */
     domid_t  domid;         /* granting domain */
+    u32      vcpu;          /* vcpu which created the grant mapping */
+    u16      pad[2];
 };
 
 /* Per-domain grant information. */
@@ -78,10 +80,10 @@ struct grant_table {
     grant_status_t       **status;
     /* Active grant table. */
     struct active_grant_entry **active;
-    /* Mapping tracking table. */
+    /* Mapping tracking table per vcpu. */
     struct grant_mapping **maptrack;
-    unsigned int          maptrack_head;
-    unsigned int          maptrack_limit;
+    /* Total pages used for mapping tracking table */
+    unsigned int          maptrack_pages;
     /* Lock protecting the maptrack page list, head, and limit */
     spinlock_t            maptrack_lock;
     /* 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 80c6f62..7f8422e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -219,6 +219,11 @@ struct vcpu
     /* VCPU paused by system controller. */
     int              controller_pause_count;
 
+    /* Maptrack */
+    unsigned int     maptrack_head;
+    unsigned int     maptrack_tail;
+    unsigned int     maptrack_limit;
+
     /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
     evtchn_port_t    virq_to_evtchn[NR_VIRQS];
     spinlock_t       virq_lock;
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.