diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index b36a66e..0c9f12f 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -64,6 +64,8 @@ string_param("badpage", opt_badpage); static bool_t opt_bootscrub __initdata = 1; boolean_param("bootscrub", opt_bootscrub); +static bool_t opt_nonsmt __initdata = 0; +boolean_param("nonsmt", opt_nonsmt); /* * bootscrub_blocksize -> Size (bytes) of mem block to scrub with heaplock held */ @@ -103,6 +105,7 @@ struct scrub_region { u64 start; u64 chunk_size; u64 cpu_block_size; + cpumask_t cpu; }; static struct scrub_region __initdata region[MAX_NUMNODES]; @@ -1286,6 +1289,7 @@ void __init smp_scrub_heap_pages(void *data) /* Determine if we are scrubbing using the boot CPU */ if ( region->cpu_block_size != ~0ULL ) /* Determine the current CPU's index into CPU's linked to this node*/ + /* TODO :Ignore the siblings! */ for_each_cpu( temp_cpu, &node_to_cpumask(local_node) ) { if ( cpu == temp_cpu ) @@ -1304,7 +1308,6 @@ void __init smp_scrub_heap_pages(void *data) else end_mfn = start_mfn + region->chunk_size; - for ( mfn = start_mfn; mfn < end_mfn; mfn++ ) { pg = mfn_to_page(mfn); @@ -1313,10 +1316,9 @@ void __init smp_scrub_heap_pages(void *data) if ( !mfn_valid(mfn) || !page_state_is(pg, free) ) continue; - /* Every 100MB, print a progress dot. */ - if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 ) + /* Every 1G, print a progress dot. */ + if ( (mfn % ((1000*1024*1024)/PAGE_SIZE)) == 0 ) printk("."); - /* Do the scrub if possible */ if ( page_state_is(pg, free) ) scrub_one_page(pg); @@ -1331,23 +1333,26 @@ void __init smp_scrub_heap_pages(void *data) */ void __init scrub_heap_pages(void) { - cpumask_t node_cpus, total_node_cpus_mask = {{ 0 }}; - unsigned int i, boot_cpu_node, total_node_cpus, cpu = smp_processor_id(); + cpumask_t node_cpus, node_cpus_nonsmt, total_node_cpus_mask = {{ 0 }}; + unsigned int i, j,boot_cpu_node, total_cpus, cpu = smp_processor_id(), sibling; unsigned long mfn, mfn_off, chunk_size, max_cpu_blk_size = 0; unsigned long mem_start, mem_end; + s_time_t start, end; if ( !opt_bootscrub ) return; boot_cpu_node = cpu_to_node(cpu); - printk("Scrubbing Free RAM: "); + printk("Scrubbing Free RAM on %d nodes\n", num_online_nodes()); /* Scrub block size */ chunk_size = opt_bootscrub_blocksize >> PAGE_SHIFT; if ( chunk_size == 0 ) chunk_size = 1; + printk("CPUs have %d threads.\n", cpumask_weight(per_cpu(cpu_sibling_mask, 0))); + printk("CPUs have %d cores.\n", cpumask_weight(per_cpu(cpu_core_mask, 0)) / cpumask_weight(per_cpu(cpu_sibling_mask, 0))); /* Determine the amount of memory to scrub, per CPU on each Node */ for_each_online_node ( i ) { @@ -1359,27 +1364,49 @@ void __init scrub_heap_pages(void) /* It's possible a node has no CPU's */ if ( cpumask_empty(&node_cpus) ) continue; - cpumask_or(&total_node_cpus_mask, &total_node_cpus_mask, &node_cpus); + node_cpus_nonsmt = node_to_cpumask(i); + for_each_cpu(j, &node_cpus) + { + cpu = 0; + for_each_cpu(sibling, per_cpu(cpu_sibling_mask, j)) { + if (cpu++ == 0) /* Skip core */ + continue; + cpumask_clear_cpu(sibling, &node_cpus_nonsmt); + } + } + printk("node%d has %d CPUs non-SMT\n", i, cpumask_weight(&node_cpus_nonsmt)); + for_each_cpu(j, &node_cpus_nonsmt) + printk("#%d,", j); + + printk("\n"); + if (opt_nonsmt) + cpumask_copy(&node_cpus, &node_cpus_nonsmt); + + cpumask_or(&total_node_cpus_mask, &total_node_cpus_mask, &node_cpus); region[i].cpu_block_size = (mem_end - mem_start) / cpumask_weight(&node_cpus); region[i].start = mem_start; + cpumask_copy(®ion[i].cpu, &node_cpus); + printk("NODE%d scrubbing %lx PFNs spread across %d CPUs\n", i, mem_end - mem_start, cpumask_weight(&node_cpus)); if ( region[i].cpu_block_size > max_cpu_blk_size ) max_cpu_blk_size = region[i].cpu_block_size; } - + cpu = smp_processor_id(); /* We re-used it in the loop. */ /* Round default chunk size down if required */ if ( max_cpu_blk_size && chunk_size > max_cpu_blk_size ) chunk_size = max_cpu_blk_size; - total_node_cpus = cpumask_weight(&total_node_cpus_mask); + total_cpus = cpumask_weight(&total_node_cpus_mask); + printk("Using a total of %d CPUS.\n", total_cpus); + start = NOW(); /* Start all CPU's scrubbing memory, chunk_size at a time */ for ( mfn_off = 0; mfn_off < max_cpu_blk_size; mfn_off += chunk_size ) { process_pending_softirqs(); - atomic_set(&bootscrub_count, total_node_cpus); + atomic_set(&bootscrub_count, total_cpus); spin_lock(&heap_lock); @@ -1388,7 +1415,7 @@ void __init scrub_heap_pages(void) { region[i].chunk_size = chunk_size; region[i].offset = mfn_off; - node_cpus = node_to_cpumask(i); + cpumask_copy(&node_cpus, ®ion[i].cpu); /* Clear local cpu ID */ cpumask_clear_cpu(cpu, &node_cpus); /* Start page scrubbing on all other CPU's */ @@ -1406,6 +1433,10 @@ void __init scrub_heap_pages(void) spin_unlock(&heap_lock); } + end = NOW(); + printk("Done SMP scrubbing (%d seconds). Boot scrub on BSP:\n", + (u32)((end-start) >> 30)); + start = NOW(); /* Use the boot CPU to scrub any nodes which have no CPU's linked to them */ for_each_online_node ( i ) { @@ -1416,6 +1447,7 @@ void __init scrub_heap_pages(void) mem_start = max(node_start_pfn(i), first_valid_mfn); mem_end = min(mem_start + node_spanned_pages(i), max_page); + printk("NODE%d scrubbing %lx->%lx\n", i, mem_start, mem_end); region[0].offset = 0; region[0].cpu_block_size = ~0ULL; @@ -1435,7 +1467,8 @@ void __init scrub_heap_pages(void) process_pending_softirqs(); } } - printk("done.\n"); + end = NOW(); + printk("done. (%d seconds)\n", (u32)(end - start) >> 30); /* Now that the heap is initialized, run checks and set bounds * for the low mem virq algorithm. */