[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 3/3] xen: use idle vcpus to scrub pages
In case of heavy lock contention, use two percpu lists. - Delist a batch of pages to a percpu list from _heap[] free page list. - Scrub pages on this percpu list and add to another percpu free list. - Free those clean pages to _heap[], merge with other chunks if needed. v2: * Avoid having two hyperthreads within the same core doing scrubbing * Limit (1<<SCRUB_BATCH_ORDER) pages to percpu list in one go * Won't spin on heap lock when there is nothing to scrub * Partial numa aware Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx> --- xen/arch/arm/domain.c | 1 + xen/arch/x86/domain.c | 1 + xen/common/page_alloc.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++ xen/include/xen/mm.h | 1 + 4 files changed, 133 insertions(+) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index 04d0cd0..b6bc3ac 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -44,6 +44,7 @@ void idle_loop(void) if ( cpu_is_offline(smp_processor_id()) ) stop_cpu(); + scrub_free_pages(); local_irq_disable(); if ( cpu_is_haltable(smp_processor_id()) ) { diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index e896210..e8d4fe7 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -116,6 +116,7 @@ static void idle_loop(void) { if ( cpu_is_offline(smp_processor_id()) ) play_dead(); + scrub_free_pages(); (*pm_idle)(); do_tasklet(); do_softirq(); diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index ab293c8..6ab1d1d 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -86,6 +86,12 @@ PAGE_LIST_HEAD(page_offlined_list); /* Broken page list, protected by heap_lock. */ PAGE_LIST_HEAD(page_broken_list); +/* A rough flag to indicate whether a node have need_scrub pages */ +static bool_t node_need_scrub[MAX_NUMNODES]; +static DEFINE_PER_CPU(bool_t, is_scrubbing); +static DEFINE_PER_CPU(struct page_list_head, scrub_list_cpu); +static DEFINE_PER_CPU(struct page_list_head, free_list_cpu); + /************************* * BOOT-TIME ALLOCATOR */ @@ -948,6 +954,7 @@ static void free_heap_pages( { if ( !tainted ) { + node_need_scrub[node] = 1; for ( i = 0; i < (1 << order); i++ ) pg[i].count_info |= PGC_need_scrub; } @@ -1525,7 +1532,130 @@ void __init scrub_heap_pages(void) setup_low_mem_virq(); } +#define SCRUB_BATCH_ORDER 12 +static void __scrub_free_pages(unsigned int node, unsigned int cpu) +{ + struct page_info *pg, *tmp; + unsigned int i; + int order; + struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu); + struct page_list_head *local_free_list = &this_cpu(free_list_cpu); + + /* Scrub percpu list */ + while ( !page_list_empty(local_scrub_list) ) + { + pg = page_list_remove_head(local_scrub_list); + order = PFN_ORDER(pg); + ASSERT( pg && order <= SCRUB_BATCH_ORDER ); + for ( i = 0; i < (1 << order); i++ ) + { + ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) ); + scrub_one_page(&pg[i]); + } + page_list_add_tail(pg, local_free_list); + if ( softirq_pending(cpu) ) + return; + } + + /* free percpu free list */ + if ( !page_list_empty(local_free_list) ) + { + spin_lock(&heap_lock); + page_list_for_each_safe( pg, tmp, local_free_list ) + { + order = PFN_ORDER(pg); + page_list_del(pg, local_free_list); + for ( i = 0; i < (1 << order); i++ ) + { + pg[i].count_info |= PGC_state_free; + pg[i].count_info &= ~PGC_need_scrub; + } + merge_free_trunks(pg, order, node, page_to_zone(pg), 0); + } + spin_unlock(&heap_lock); + } +} + +void scrub_free_pages(void) +{ + int order; + struct page_info *pg, *tmp; + unsigned int i, zone, nr_delisted = 0; + unsigned int cpu = smp_processor_id(); + unsigned int node = cpu_to_node(cpu); + struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu); + + /* Return if our sibling already started scrubbing */ + for_each_cpu( i, per_cpu(cpu_sibling_mask,cpu) ) + if ( per_cpu(is_scrubbing, i) ) + return; + this_cpu(is_scrubbing) = 1; + + while ( !softirq_pending(cpu) ) + { + if ( !node_need_scrub[node] ) + { + /* Free local per cpu list before we exit */ + __scrub_free_pages(node, cpu); + goto out; + } + + /* Delist a batch of pages from global scrub list */ + if ( page_list_empty(local_scrub_list) ) + { + spin_lock(&heap_lock); + for ( zone = 0; zone < NR_ZONES; zone++ ) + { + for ( order = MAX_ORDER; order >= 0; order-- ) + { + page_list_for_each_safe( pg, tmp, &heap(node, zone, order) ) + { + if ( !test_bit(_PGC_need_scrub, &(pg->count_info)) ) + continue; + + page_list_del( pg, &heap(node, zone, order) ); + if ( order > SCRUB_BATCH_ORDER) + { + /* putback extra pages */ + i = order; + while ( i != SCRUB_BATCH_ORDER ) + { + PFN_ORDER(pg) = --i; + page_list_add_tail(pg, &heap(node, zone, i)); + pg += 1 << i; + } + PFN_ORDER(pg) = SCRUB_BATCH_ORDER; + } + + for ( i = 0; i < (1 << PFN_ORDER(pg)); i++ ) + { + ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) ); + ASSERT( !test_bit(_PGC_broken, &pg[i].count_info) ); + mark_page_offline(&pg[i], 0); + } + page_list_add_tail(pg, local_scrub_list); + nr_delisted += ( 1 << PFN_ORDER(pg) ); + if ( nr_delisted >= (1 << SCRUB_BATCH_ORDER) ) + { + nr_delisted = 0; + spin_unlock(&heap_lock); + goto start_scrub; + } + } + } + } + + node_need_scrub[node] = 0; + spin_unlock(&heap_lock); + } + start_scrub: + __scrub_free_pages(node, cpu); + } + + out: + this_cpu(is_scrubbing) = 0; +} /************************* * XEN-HEAP SUB-ALLOCATOR diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index b183189..1fa8c3d 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -78,6 +78,7 @@ int query_page_offline(unsigned long mfn, uint32_t *status); unsigned long total_free_pages(void); void scrub_heap_pages(void); +void scrub_free_pages(void); int assign_pages( struct domain *d, -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |