[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Segment dirty log for performance



Represent dirty log as an array of bitmaps.
Also, when bookkeeping the valid HVM pfn ranges, breaks the PFNs
into two ranges  -- RAM and VGA.  This allows the dirty
page bitmaps to conform to these ranges and to skip the
(sometimes large) empty PFN range between them.

Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxx>
Signed-off-by: Dave Lively <dlively@xxxxxxxxxxxxxxx>

diff -r 9bdb3e7a99c9 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/p2m.c     Tue Oct 23 12:58:25 2007 -0400
@@ -241,9 +241,12 @@ set_p2m_entry(struct domain *d, unsigned
                                0, L1_PAGETABLE_ENTRIES);
     ASSERT(p2m_entry);
 
-    /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) )
-        d->arch.p2m.max_mapped_pfn = gfn;
+    if ( mfn_valid(mfn) ) {
+        paging_pfn_range_append(d, gfn);
+        /* Track the highest gfn for which we have ever had a valid mapping */
+        if (gfn > d->arch.p2m.max_mapped_pfn ) 
+            d->arch.p2m.max_mapped_pfn = gfn;
+    }
 
     if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
         entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c  Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/paging.c  Tue Oct 23 12:59:41 2007 -0400
@@ -96,36 +96,98 @@
         spin_unlock(&(_d)->arch.paging.log_dirty.lock);                   \
     } while (0)
 
+void paging_pfn_range_append(struct domain *d, unsigned long gfn)
+{
+    /* Maintain a very small number of pfn ranges; ie 4
+     * Don't bother with an optimal representation (by consolidating ranges, 
etc.)
+     * because in practice it isn't required. */
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+
+    for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+    {
+        int last_range;
+        
+        /* unused range? */
+        if ( unlikely(pr->len == 0) ) {
+            *pr = (struct pfn_range){ gfn, 1 };
+            return;
+        }
+        
+        last_range = (pr == pr0 + PFN_RANGE_NR - 1);
+        
+#define CLOSE_ENOUGH 0x20 /* keep 0x00..0x9f and 0xc0... in same range  */
+        
+        /* gfn precedes existing range by a substantial amount? */
+        if (unlikely(gfn + CLOSE_ENOUGH < pr->start && !last_range)) /* yes */
+        {
+            /* insert a new range */
+            memmove(pr+1, pr, (PFN_RANGE_NR - (pr - pr0) - 1) * sizeof(*pr));
+            *pr = (struct pfn_range){ gfn, 1 };
+            return;
+        }
+        
+        /* gfn precedes existing range? */
+        if (unlikely(gfn < pr->start)) /* yes */
+        {
+            /* extend start of range */
+            pr->len += pr->start - gfn;
+            pr->start = gfn;
+            return;
+        }
+        
+        /* gfn within existing range? */
+        if ( unlikely(pr->start <= gfn && gfn < pr->start + pr->len) ) /* yes 
*/
+            return;
+        
+        /* gfn abuts or closely follows existing range? or this is last range? 
*/
+        if ( likely(gfn <= pr->start + pr->len + CLOSE_ENOUGH || last_range) )
+        {
+            /* extend end of range */
+            pr->len = gfn - pr->start + 1;
+            return;
+        }
+    }
+    BUG();
+}
+
 /* allocate bitmap resources for log dirty */
 int paging_alloc_log_dirty_bitmap(struct domain *d)
 {
-    if ( d->arch.paging.log_dirty.bitmap != NULL )
-        return 0;
-
-    d->arch.paging.log_dirty.bitmap_size =
-        (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
-    d->arch.paging.log_dirty.bitmap =
-        xmalloc_array(unsigned long,
-                      d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG);
-    if ( d->arch.paging.log_dirty.bitmap == NULL )
-    {
-        d->arch.paging.log_dirty.bitmap_size = 0;
-        return -ENOMEM;
-    }
-    memset(d->arch.paging.log_dirty.bitmap, 0,
-           d->arch.paging.log_dirty.bitmap_size/8);
-
-    return 0;
+     struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+  
+     for (pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++)
+     {
+         ASSERT(pr->dirty_bitmap == NULL);
+         pr->dirty_bitmap_size = (pr->len + (BITS_PER_LONG - 1)) & 
+             ~(BITS_PER_LONG - 1);
+         printk("%s: allocating %dKB for domain %d dirty log range %ld\n",
+                 __FUNCTION__, (pr->dirty_bitmap_size + 8195)/8196,
+                 d->domain_id, pr - pr0);
+         pr->dirty_bitmap =
+             xmalloc_array(uint8_t, pr->dirty_bitmap_size/8);
+         if ( pr->dirty_bitmap == NULL )
+         {
+             pr->dirty_bitmap_size = 0;
+             return -ENOMEM;
+         }
+         
+         memset(pr->dirty_bitmap, 0x0, pr->dirty_bitmap_size/8);
+     }
+     return 0;
 }
 
 /* free bitmap resources */
 void paging_free_log_dirty_bitmap(struct domain *d)
 {
-    d->arch.paging.log_dirty.bitmap_size = 0;
-    if ( d->arch.paging.log_dirty.bitmap )
-    {
-        xfree(d->arch.paging.log_dirty.bitmap);
-        d->arch.paging.log_dirty.bitmap = NULL;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+    for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+    {
+        pr->dirty_bitmap_size = 0;
+        if ( pr->dirty_bitmap )
+        {
+            xfree(pr->dirty_bitmap);
+            pr->dirty_bitmap = NULL;
+        }
     }
 }
 
@@ -174,7 +236,7 @@ int paging_log_dirty_disable(struct doma
     /* Safe because the domain is paused. */
     ret = d->arch.paging.log_dirty.disable_log_dirty(d);
     log_dirty_lock(d);
-    if ( !paging_mode_log_dirty(d) )
+    if ( !ret )
         paging_free_log_dirty_bitmap(d);
     log_dirty_unlock(d);
     domain_unpause(d);
@@ -187,6 +249,7 @@ void paging_mark_dirty(struct domain *d,
 {
     unsigned long pfn;
     mfn_t gmfn;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
 
     gmfn = _mfn(guest_mfn);
 
@@ -211,30 +274,40 @@ void paging_mark_dirty(struct domain *d,
         return;
     }
 
-    if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) )
-    {
-        if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) )
-        {
-            PAGING_DEBUG(LOGDIRTY,
-                         "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
-                         mfn_x(gmfn), pfn, d->domain_id);
-            d->arch.paging.log_dirty.dirty_count++;
-        }
-    }
-    else
-    {
-        PAGING_PRINTK("mark_dirty OOR! "
-                      "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
-                      "owner=%d c=%08x t=%" PRtype_info "\n",
-                      mfn_x(gmfn),
-                      pfn,
-                      d->arch.paging.log_dirty.bitmap_size,
-                      d->domain_id,
-                      (page_get_owner(mfn_to_page(gmfn))
-                       ? page_get_owner(mfn_to_page(gmfn))->domain_id
-                       : -1),
-                      mfn_to_page(gmfn)->count_info,
+    for ( pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++ )
+    {
+        ASSERT(pr->dirty_bitmap != NULL);
+        if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) ) 
+        {
+            if ( !__test_and_set_bit(pfn - pr->start, pr->dirty_bitmap) )
+            {
+                PAGING_DEBUG(LOGDIRTY,
+                             "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+                              mfn_x(gmfn), pfn, d->domain_id);
+                d->arch.paging.log_dirty.dirty_count++;
+            }
+            log_dirty_unlock(d);
+            return;
+        }
+    }
+
+    PAGING_PRINTK("mark_dirty OOR! "
+                  "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+                  "owner=%d c=%08x t=%" PRtype_info "\n",
+                  mfn_x(gmfn),
+                  pfn,
+                  d->arch.paging.log_dirty.bitmap_size,
+                  d->domain_id,
+                  (page_get_owner(mfn_to_page(gmfn))
+                   ? page_get_owner(mfn_to_page(gmfn))->domain_id
+                   : -1),
+                  mfn_to_page(gmfn)->count_info,
                       mfn_to_page(gmfn)->u.inuse.type_info);
+    for ( pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++ )
+    {
+        PAGING_PRINTK("   pfn_range[%ld] start:0x%"
+                      PRI_mfn " len:0x%" PRI_mfn "\n",
+                      pr - pr0, pr->start, pr->len);
     }
 
     log_dirty_unlock(d);
@@ -245,6 +318,8 @@ int paging_log_dirty_op(struct domain *d
 int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
 {
     int i, rv = 0, clean = 0, peek = 1;
+    int bits;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
 
     domain_pause(d);
     log_dirty_lock(d);
@@ -270,37 +345,70 @@ int paging_log_dirty_op(struct domain *d
         /* caller may have wanted just to clean the state or access stats. */
         peek = 0;
 
-    if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) )
-    {
-        rv = -EINVAL; /* perhaps should be ENOMEM? */
-        goto out;
-    }
-
-    if ( sc->pages > d->arch.paging.log_dirty.bitmap_size )
-        sc->pages = d->arch.paging.log_dirty.bitmap_size;
-
-#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
-    for ( i = 0; i < sc->pages; i += CHUNK )
-    {
-        int bytes = ((((sc->pages - i) > CHUNK)
-                      ? CHUNK
-                      : (sc->pages - i)) + 7) / 8;
-
-        if ( likely(peek) )
-        {
+    pr = pr0;
+#define BITS_PER_BYTE 8
+#define CHUNK (BITS_PER_BYTE*1024) /* Transfer and clear in 8kB chunks for L1 
cache. */
+    for ( i = 0; i < sc->pages; i += bits )
+    {
+        /* In gap between ranges? */
+        if ( i < pr->start )      /* yes */
+        {
+            static uint8_t zeroes[CHUNK];
+            uint8_t *pzeroes = zeroes;
+            /* copy zeroes to guest */
+            bits = pr->start - i;
+            if ( bits > sc->pages - i )
+                bits = sc->pages - i;
+            if ( bits > CHUNK * BITS_PER_BYTE )
+                bits = CHUNK * BITS_PER_BYTE;
+            bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
             if ( copy_to_guest_offset(
-                sc->dirty_bitmap, i/8,
-                (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) )
+                     sc->dirty_bitmap, 
+                     i/BITS_PER_BYTE,
+                     pzeroes,
+                     bits/BITS_PER_BYTE) )
             {
-                rv = -EFAULT;
+                rv = -EINVAL;
                 goto out;
             }
         }
 
-        if ( clean )
-            memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0, 
bytes);
+        /* Within a range? */
+        else if ( i < pr->start + pr->len ) /* yes */
+        {
+            bits = pr->start + pr->len - i;
+            if ( bits > sc->pages - i )
+                bits = sc->pages - i;
+            if ( bits > CHUNK * BITS_PER_BYTE )
+                bits = CHUNK * BITS_PER_BYTE;
+            bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
+            if ( copy_to_guest_offset(
+                     sc->dirty_bitmap, 
+                     i/BITS_PER_BYTE,
+                     pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+                     bits/BITS_PER_BYTE) )
+            {
+                rv = -EINVAL;
+                goto out;
+            }
+            if ( clean )
+                memset(pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+                       0, bits/BITS_PER_BYTE);
+        }
+        /* Last range? */
+        else if (pr - pr0 == PFN_RANGE_NR-1) /* yes */
+        {
+            sc->pages = pr->start + pr->len;
+            break;
+        }
+        else
+        { /* Use next range */
+            pr++;
+            bits = 0;
+        }
     }
 #undef CHUNK
+#undef BITS_PER_BYTE
 
     log_dirty_unlock(d);
 
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/shadow/private.h  Tue Oct 23 12:58:25 2007 -0400
@@ -491,17 +491,22 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
 /* Is this guest page dirty?  Call only in log-dirty mode. */
 {
     unsigned long pfn;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+    
     ASSERT(shadow_mode_log_dirty(d));
-    ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
-    if ( likely(VALID_M2P(pfn))
-         && likely(pfn < d->arch.paging.log_dirty.bitmap_size) 
-         && test_bit(pfn, d->arch.paging.log_dirty.bitmap) )
-        return 1;
-
-    return 0;
+    if ( unlikely(!VALID_M2P(pfn)) )
+         return 0;
+         
+    for (pr = pr0; likely(pr - pr0 != PFN_RANGE_NR && pr->len > 0); pr++)
+    {
+        ASSERT(pr->dirty_bitmap != NULL);
+        if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) )
+            return test_bit(pfn - pr->start, pr->dirty_bitmap);
+    }
+    return 0; /* shouldn't get here */
 }
 
 
diff -r 9bdb3e7a99c9 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/domain.h      Tue Oct 23 12:58:25 2007 -0400
@@ -158,13 +158,18 @@ struct log_dirty_domain {
     int            locker; /* processor that holds the lock */
     const char    *locker_function; /* func that took it */
 
-    /* log-dirty bitmap to record dirty pages */
-    unsigned long *bitmap;
-    unsigned int   bitmap_size;  /* in pages, bit per page */
-
     /* log-dirty mode stats */
     unsigned int   fault_count;
     unsigned int   dirty_count;
+
+    /* segmented log-dirty bitmap to record dirty pages */
+#define PFN_RANGE_NR 4
+    struct pfn_range {
+        unsigned long start;
+        unsigned long len;
+        uint8_t *dirty_bitmap;
+        unsigned int dirty_bitmap_size;  /* in pages, bit per page */
+    } pfn_range[PFN_RANGE_NR];
 
     /* functions which are paging mode specific */
     int            (*enable_log_dirty   )(struct domain *d);
diff -r 9bdb3e7a99c9 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h      Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/paging.h      Tue Oct 23 12:58:25 2007 -0400
@@ -258,6 +258,8 @@ static inline int paging_cmpxchg_guest_e
         return (!cmpxchg_user(p, *old, new));
 }
 
+void paging_pfn_range_append(struct domain *d, unsigned long gfn);
+
 /* Helper function that writes a pte in such a way that a concurrent read 
  * never sees a half-written entry that has _PAGE_PRESENT set */
 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.