[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] Segment dirty log for performance
Represent dirty log as an array of bitmaps. Also, when bookkeeping the valid HVM pfn ranges, breaks the PFNs into two ranges -- RAM and VGA. This allows the dirty page bitmaps to conform to these ranges and to skip the (sometimes large) empty PFN range between them. Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxx> Signed-off-by: Dave Lively <dlively@xxxxxxxxxxxxxxx> diff -r 9bdb3e7a99c9 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Tue Oct 23 10:26:00 2007 -0400 +++ b/xen/arch/x86/mm/p2m.c Tue Oct 23 12:58:25 2007 -0400 @@ -241,9 +241,12 @@ set_p2m_entry(struct domain *d, unsigned 0, L1_PAGETABLE_ENTRIES); ASSERT(p2m_entry); - /* Track the highest gfn for which we have ever had a valid mapping */ - if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) - d->arch.p2m.max_mapped_pfn = gfn; + if ( mfn_valid(mfn) ) { + paging_pfn_range_append(d, gfn); + /* Track the highest gfn for which we have ever had a valid mapping */ + if (gfn > d->arch.p2m.max_mapped_pfn ) + d->arch.p2m.max_mapped_pfn = gfn; + } if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt)); diff -r 9bdb3e7a99c9 xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Tue Oct 23 10:26:00 2007 -0400 +++ b/xen/arch/x86/mm/paging.c Tue Oct 23 12:59:41 2007 -0400 @@ -96,36 +96,98 @@ spin_unlock(&(_d)->arch.paging.log_dirty.lock); \ } while (0) +void paging_pfn_range_append(struct domain *d, unsigned long gfn) +{ + /* Maintain a very small number of pfn ranges; ie 4 + * Don't bother with an optimal representation (by consolidating ranges, etc.) + * because in practice it isn't required. */ + struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range; + + for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++) + { + int last_range; + + /* unused range? */ + if ( unlikely(pr->len == 0) ) { + *pr = (struct pfn_range){ gfn, 1 }; + return; + } + + last_range = (pr == pr0 + PFN_RANGE_NR - 1); + +#define CLOSE_ENOUGH 0x20 /* keep 0x00..0x9f and 0xc0... in same range */ + + /* gfn precedes existing range by a substantial amount? */ + if (unlikely(gfn + CLOSE_ENOUGH < pr->start && !last_range)) /* yes */ + { + /* insert a new range */ + memmove(pr+1, pr, (PFN_RANGE_NR - (pr - pr0) - 1) * sizeof(*pr)); + *pr = (struct pfn_range){ gfn, 1 }; + return; + } + + /* gfn precedes existing range? */ + if (unlikely(gfn < pr->start)) /* yes */ + { + /* extend start of range */ + pr->len += pr->start - gfn; + pr->start = gfn; + return; + } + + /* gfn within existing range? */ + if ( unlikely(pr->start <= gfn && gfn < pr->start + pr->len) ) /* yes */ + return; + + /* gfn abuts or closely follows existing range? or this is last range? */ + if ( likely(gfn <= pr->start + pr->len + CLOSE_ENOUGH || last_range) ) + { + /* extend end of range */ + pr->len = gfn - pr->start + 1; + return; + } + } + BUG(); +} + /* allocate bitmap resources for log dirty */ int paging_alloc_log_dirty_bitmap(struct domain *d) { - if ( d->arch.paging.log_dirty.bitmap != NULL ) - return 0; - - d->arch.paging.log_dirty.bitmap_size = - (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); - d->arch.paging.log_dirty.bitmap = - xmalloc_array(unsigned long, - d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG); - if ( d->arch.paging.log_dirty.bitmap == NULL ) - { - d->arch.paging.log_dirty.bitmap_size = 0; - return -ENOMEM; - } - memset(d->arch.paging.log_dirty.bitmap, 0, - d->arch.paging.log_dirty.bitmap_size/8); - - return 0; + struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range; + + for (pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++) + { + ASSERT(pr->dirty_bitmap == NULL); + pr->dirty_bitmap_size = (pr->len + (BITS_PER_LONG - 1)) & + ~(BITS_PER_LONG - 1); + printk("%s: allocating %dKB for domain %d dirty log range %ld\n", + __FUNCTION__, (pr->dirty_bitmap_size + 8195)/8196, + d->domain_id, pr - pr0); + pr->dirty_bitmap = + xmalloc_array(uint8_t, pr->dirty_bitmap_size/8); + if ( pr->dirty_bitmap == NULL ) + { + pr->dirty_bitmap_size = 0; + return -ENOMEM; + } + + memset(pr->dirty_bitmap, 0x0, pr->dirty_bitmap_size/8); + } + return 0; } /* free bitmap resources */ void paging_free_log_dirty_bitmap(struct domain *d) { - d->arch.paging.log_dirty.bitmap_size = 0; - if ( d->arch.paging.log_dirty.bitmap ) - { - xfree(d->arch.paging.log_dirty.bitmap); - d->arch.paging.log_dirty.bitmap = NULL; + struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range; + for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++) + { + pr->dirty_bitmap_size = 0; + if ( pr->dirty_bitmap ) + { + xfree(pr->dirty_bitmap); + pr->dirty_bitmap = NULL; + } } } @@ -174,7 +236,7 @@ int paging_log_dirty_disable(struct doma /* Safe because the domain is paused. */ ret = d->arch.paging.log_dirty.disable_log_dirty(d); log_dirty_lock(d); - if ( !paging_mode_log_dirty(d) ) + if ( !ret ) paging_free_log_dirty_bitmap(d); log_dirty_unlock(d); domain_unpause(d); @@ -187,6 +249,7 @@ void paging_mark_dirty(struct domain *d, { unsigned long pfn; mfn_t gmfn; + struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range; gmfn = _mfn(guest_mfn); @@ -211,30 +274,40 @@ void paging_mark_dirty(struct domain *d, return; } - if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) ) - { - if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) ) - { - PAGING_DEBUG(LOGDIRTY, - "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n", - mfn_x(gmfn), pfn, d->domain_id); - d->arch.paging.log_dirty.dirty_count++; - } - } - else - { - PAGING_PRINTK("mark_dirty OOR! " - "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n" - "owner=%d c=%08x t=%" PRtype_info "\n", - mfn_x(gmfn), - pfn, - d->arch.paging.log_dirty.bitmap_size, - d->domain_id, - (page_get_owner(mfn_to_page(gmfn)) - ? page_get_owner(mfn_to_page(gmfn))->domain_id - : -1), - mfn_to_page(gmfn)->count_info, + for ( pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++ ) + { + ASSERT(pr->dirty_bitmap != NULL); + if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) ) + { + if ( !__test_and_set_bit(pfn - pr->start, pr->dirty_bitmap) ) + { + PAGING_DEBUG(LOGDIRTY, + "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n", + mfn_x(gmfn), pfn, d->domain_id); + d->arch.paging.log_dirty.dirty_count++; + } + log_dirty_unlock(d); + return; + } + } + + PAGING_PRINTK("mark_dirty OOR! " + "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n" + "owner=%d c=%08x t=%" PRtype_info "\n", + mfn_x(gmfn), + pfn, + d->arch.paging.log_dirty.bitmap_size, + d->domain_id, + (page_get_owner(mfn_to_page(gmfn)) + ? page_get_owner(mfn_to_page(gmfn))->domain_id + : -1), + mfn_to_page(gmfn)->count_info, mfn_to_page(gmfn)->u.inuse.type_info); + for ( pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++ ) + { + PAGING_PRINTK(" pfn_range[%ld] start:0x%" + PRI_mfn " len:0x%" PRI_mfn "\n", + pr - pr0, pr->start, pr->len); } log_dirty_unlock(d); @@ -245,6 +318,8 @@ int paging_log_dirty_op(struct domain *d int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) { int i, rv = 0, clean = 0, peek = 1; + int bits; + struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range; domain_pause(d); log_dirty_lock(d); @@ -270,37 +345,70 @@ int paging_log_dirty_op(struct domain *d /* caller may have wanted just to clean the state or access stats. */ peek = 0; - if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) ) - { - rv = -EINVAL; /* perhaps should be ENOMEM? */ - goto out; - } - - if ( sc->pages > d->arch.paging.log_dirty.bitmap_size ) - sc->pages = d->arch.paging.log_dirty.bitmap_size; - -#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */ - for ( i = 0; i < sc->pages; i += CHUNK ) - { - int bytes = ((((sc->pages - i) > CHUNK) - ? CHUNK - : (sc->pages - i)) + 7) / 8; - - if ( likely(peek) ) - { + pr = pr0; +#define BITS_PER_BYTE 8 +#define CHUNK (BITS_PER_BYTE*1024) /* Transfer and clear in 8kB chunks for L1 cache. */ + for ( i = 0; i < sc->pages; i += bits ) + { + /* In gap between ranges? */ + if ( i < pr->start ) /* yes */ + { + static uint8_t zeroes[CHUNK]; + uint8_t *pzeroes = zeroes; + /* copy zeroes to guest */ + bits = pr->start - i; + if ( bits > sc->pages - i ) + bits = sc->pages - i; + if ( bits > CHUNK * BITS_PER_BYTE ) + bits = CHUNK * BITS_PER_BYTE; + bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1); if ( copy_to_guest_offset( - sc->dirty_bitmap, i/8, - (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) ) + sc->dirty_bitmap, + i/BITS_PER_BYTE, + pzeroes, + bits/BITS_PER_BYTE) ) { - rv = -EFAULT; + rv = -EINVAL; goto out; } } - if ( clean ) - memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0, bytes); + /* Within a range? */ + else if ( i < pr->start + pr->len ) /* yes */ + { + bits = pr->start + pr->len - i; + if ( bits > sc->pages - i ) + bits = sc->pages - i; + if ( bits > CHUNK * BITS_PER_BYTE ) + bits = CHUNK * BITS_PER_BYTE; + bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1); + if ( copy_to_guest_offset( + sc->dirty_bitmap, + i/BITS_PER_BYTE, + pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE), + bits/BITS_PER_BYTE) ) + { + rv = -EINVAL; + goto out; + } + if ( clean ) + memset(pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE), + 0, bits/BITS_PER_BYTE); + } + /* Last range? */ + else if (pr - pr0 == PFN_RANGE_NR-1) /* yes */ + { + sc->pages = pr->start + pr->len; + break; + } + else + { /* Use next range */ + pr++; + bits = 0; + } } #undef CHUNK +#undef BITS_PER_BYTE log_dirty_unlock(d); diff -r 9bdb3e7a99c9 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Tue Oct 23 10:26:00 2007 -0400 +++ b/xen/arch/x86/mm/shadow/private.h Tue Oct 23 12:58:25 2007 -0400 @@ -491,17 +491,22 @@ sh_mfn_is_dirty(struct domain *d, mfn_t /* Is this guest page dirty? Call only in log-dirty mode. */ { unsigned long pfn; + struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range; + ASSERT(shadow_mode_log_dirty(d)); - ASSERT(d->arch.paging.log_dirty.bitmap != NULL); /* We /really/ mean PFN here, even for non-translated guests. */ pfn = get_gpfn_from_mfn(mfn_x(gmfn)); - if ( likely(VALID_M2P(pfn)) - && likely(pfn < d->arch.paging.log_dirty.bitmap_size) - && test_bit(pfn, d->arch.paging.log_dirty.bitmap) ) - return 1; - - return 0; + if ( unlikely(!VALID_M2P(pfn)) ) + return 0; + + for (pr = pr0; likely(pr - pr0 != PFN_RANGE_NR && pr->len > 0); pr++) + { + ASSERT(pr->dirty_bitmap != NULL); + if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) ) + return test_bit(pfn - pr->start, pr->dirty_bitmap); + } + return 0; /* shouldn't get here */ } diff -r 9bdb3e7a99c9 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Oct 23 10:26:00 2007 -0400 +++ b/xen/include/asm-x86/domain.h Tue Oct 23 12:58:25 2007 -0400 @@ -158,13 +158,18 @@ struct log_dirty_domain { int locker; /* processor that holds the lock */ const char *locker_function; /* func that took it */ - /* log-dirty bitmap to record dirty pages */ - unsigned long *bitmap; - unsigned int bitmap_size; /* in pages, bit per page */ - /* log-dirty mode stats */ unsigned int fault_count; unsigned int dirty_count; + + /* segmented log-dirty bitmap to record dirty pages */ +#define PFN_RANGE_NR 4 + struct pfn_range { + unsigned long start; + unsigned long len; + uint8_t *dirty_bitmap; + unsigned int dirty_bitmap_size; /* in pages, bit per page */ + } pfn_range[PFN_RANGE_NR]; /* functions which are paging mode specific */ int (*enable_log_dirty )(struct domain *d); diff -r 9bdb3e7a99c9 xen/include/asm-x86/paging.h --- a/xen/include/asm-x86/paging.h Tue Oct 23 10:26:00 2007 -0400 +++ b/xen/include/asm-x86/paging.h Tue Oct 23 12:58:25 2007 -0400 @@ -258,6 +258,8 @@ static inline int paging_cmpxchg_guest_e return (!cmpxchg_user(p, *old, new)); } +void paging_pfn_range_append(struct domain *d, unsigned long gfn); + /* Helper function that writes a pte in such a way that a concurrent read * never sees a half-written entry that has _PAGE_PRESENT set */ static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |