|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] Xen: Spread boot time page scrubbing across all available CPU's (v5)
>>> On 04.06.14 at 15:29, <konrad.wilk@xxxxxxxxxx> wrote:
> --- a/docs/misc/xen-command-line.markdown
> +++ b/docs/misc/xen-command-line.markdown
> @@ -198,6 +198,16 @@ Scrub free RAM during boot. This is a safety feature to
> prevent
> accidentally leaking sensitive VM data into other VMs if Xen crashes
> and reboots.
>
> +### bootscrub_chunk_
Looking at other examples in that file, underscores appear to need
backslash escaping here. And I don't think the trailing one should
be there.
> +> `= <size>`
> +
> +> Default: `128MB`
> +
> +Maximum RAM block size chunks to be scrubbed whilst holding the page heap
> lock
> +and not running softirqs. Reduce this if softirqs are not being run
> frequently
> +enough. Setting this to a high value may cause cause boot failure,
> particularly
Double "cause".
> +static void __init smp_scrub_heap_pages(void *data)
> +{
> + unsigned long mfn, start, end;
> + struct page_info *pg;
> + struct scrub_region *r;
> + unsigned int temp_cpu, node, cpu_idx = 0;
> + unsigned int cpu = smp_processor_id();
> +
> + if ( data )
> + r = data;
> + else
> + {
> + node = cpu_to_node(cpu);
> + if ( node == NUMA_NO_NODE )
> + return;
> + r = ®ion[node];
> + }
> +
> + /* Determine the current CPU's index into CPU's linked to this node. */
> + for_each_cpu ( temp_cpu, &r->cpus )
> + {
> + if ( cpu == temp_cpu )
> + break;
> + cpu_idx++;
> + }
> +
> + /* Calculate the starting mfn for this CPU's memory block. */
> + start = r->start + (r->per_cpu_sz * cpu_idx) + r->offset;
> +
> + /* Calculate the end mfn into this CPU's memory block for this
> iteration. */
> + if ( r->offset + chunk_size >= r->per_cpu_sz )
> + {
> + end = r->start + (r->per_cpu_sz * cpu_idx) + r->per_cpu_sz;
> +
> + if ( r->rem && ((cpumask_weight(&r->cpus) - 1 == cpu_idx )) )
> + end += r->rem;
> + }
> + else
> + end = start + chunk_size;
> +
> + for ( mfn = start; mfn < end; mfn++ )
> + {
> + pg = mfn_to_page(mfn);
> +
> + /* Check the mfn is valid and page is free. */
> + if ( !mfn_valid(mfn) || !page_state_is(pg, free) )
> + continue;
> +
> + scrub_one_page(pg);
> + }
> +}
> +
> +static int __init find_non_smt(unsigned int node, cpumask_t *dest)
> +{
> + cpumask_t node_cpus;
> + unsigned int i, cpu;
> +
> + cpumask_and(&node_cpus, &node_to_cpumask(node), &cpu_online_map);
> + cpumask_clear(dest);
> + for_each_cpu ( i, &node_cpus )
> + {
> + if ( cpumask_intersects(dest, per_cpu(cpu_sibling_mask, i)) )
> + continue;
> + cpu = cpumask_first(per_cpu(cpu_sibling_mask, i));
> + cpumask_set_cpu(cpu, dest);
> + }
> + return cpumask_weight(dest);
> +}
> +
> /*
> - * Scrub all unallocated pages in all heap zones. This function is more
> - * convoluted than appears necessary because we do not want to continuously
> - * hold the lock while scrubbing very large memory areas.
> + * Scrub all unallocated pages in all heap zones. This function uses all
> + * online cpu's to scrub the memory in parallel.
> */
> void __init scrub_heap_pages(void)
> {
> - unsigned long mfn;
> - struct page_info *pg;
> + cpumask_t node_cpus, all_worker_cpus;
> + unsigned int i, j;
> + unsigned long offset, max_per_cpu_sz = 0;
> + unsigned long start, end;
> + unsigned long rem = 0;
> + int last_distance, best_node;
>
> if ( !opt_bootscrub )
> return;
>
> - printk("Scrubbing Free RAM: ");
> + cpumask_clear(&all_worker_cpus);
> + /* Scrub block size. */
> + chunk_size = opt_bootscrub_chunk >> PAGE_SHIFT;
> + if ( chunk_size == 0 )
> + chunk_size = MB(128);
> +
> + /* Round #0 - figure out amounts and which CPUs to use. */
> + for_each_online_node ( i )
> + {
> + if ( !node_spanned_pages(i) )
> + continue;
> + /* Calculate Node memory start and end address. */
> + start = max(node_start_pfn(i), first_valid_mfn);
This implies you're aware that possibly node_start_pfn(i) <
first_valid_mfn.
> + end = min(node_start_pfn(i) + node_spanned_pages(i), max_page);
Which in turn means that this may yield end < start. Is all the rest
of the code prepared to deal with this? At least the divide and
modulo operations on the difference further down doesn't look like
so.
> + /* CPUs that are online and on this node (if none, that it is OK). */
> + find_non_smt(i, &node_cpus);
Here you could latch the weight, avoiding ...
> + cpumask_or(&all_worker_cpus, &all_worker_cpus, &node_cpus);
> + if ( cpumask_empty(&node_cpus) )
... the extra operation on the mask here and the explicit use of
cpumask_weight() on node_cpus in the else path.
> + {
> + /* No CPUs on this node. Round #2 will take of it. */
> + rem = 0;
> + region[i].per_cpu_sz = (end - start);
> + } else {
Coding style - this takes 3 lines.
> + rem = (end - start) % cpumask_weight(&node_cpus);
> + region[i].per_cpu_sz = (end - start) /
> cpumask_weight(&node_cpus);
> + if ( region[i].per_cpu_sz > max_per_cpu_sz )
> + max_per_cpu_sz = region[i].per_cpu_sz;
> + }
> + region[i].start = start;
> + region[i].rem = rem;
> + cpumask_copy(®ion[i].cpus, &node_cpus);
> +
> + }
> +
> + printk("Scrubbing Free RAM on %u nodes using %u CPUs\n",
> num_online_nodes(),
> + cpumask_weight(&all_worker_cpus));
>
> - for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
> + /* Round: #1 - do NUMA nodes with CPUs. */
> + for ( offset = 0; offset < max_per_cpu_sz; offset += chunk_size )
> {
> + for_each_online_node ( i )
> + region[i].offset = offset;
> +
> process_pending_softirqs();
>
> - pg = mfn_to_page(mfn);
> + spin_lock(&heap_lock);
> + on_selected_cpus(&all_worker_cpus, smp_scrub_heap_pages, NULL, 1);
> + spin_unlock(&heap_lock);
>
> - /* Quick lock-free check. */
> - if ( !mfn_valid(mfn) || !page_state_is(pg, free) )
> + printk(".");
> + }
> +
> + /*
> + * Round #2: NUMA nodes with no CPUs get scrubbed with CPUs on the node
> + * closest to us and with CPUs.
> + */
> + for_each_online_node ( i )
> + {
> + node_cpus = node_to_cpumask(i);
> +
> + if ( !cpumask_empty(&node_cpus) )
> continue;
>
> - /* Every 100MB, print a progress dot. */
> - if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
> - printk(".");
> + last_distance = INT_MAX;
> + best_node = first_node(node_online_map);
> + /* Figure out which NODE CPUs are close. */
> + for_each_online_node ( j )
> + {
> + int distance;
>
> - spin_lock(&heap_lock);
> + if ( i == j )
> + continue;
This could be replaced with cpumask_empty(&node_to_cpumask(j)),
allowing ...
> + distance = __node_distance(i, j);
> + if ( distance < last_distance )
> + {
> + if ( cpumask_empty(&node_to_cpumask(j)) )
> + continue;
this check to be dropped.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |