[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86: introduce and use prefill_possible_map()
Jan, If you look in the current xen-unstable staging tree, you'll see I've added a cpu notifier chain to Xen. We can use this to dynamically allocate/initialise/de-allocate subsystem state in response to CPU hotplug events, which is better than static initialisation off cpu_possible_map. My aim is to get rid of cpu_possible_map from the x86 build altogether (sio it is not used in common or x86 code) and to restrict cpu_present_map only to the CPU hotplug subsystem. Next thing in my sights is to improve the percpu memory area management, but any other uses of cpu_possible_map, cpu_present_map, or bad for-loops 0...NR_CPUS, please do provide patches to switch to cpu notifiers if you have time! -- Keir On 04/05/2010 17:00, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote: > With the larger default NR_CPUS config setting (and the more with > build time settings exceeding this default) the wasting of memory (and > potentially other resources) just because cpu_possible_map doesn't get > set up properly increases. Use Linux' (accordingly modified to fit > Xen) prefill_possible_map() to overcome this. > > This makes necessary an adjustment to tasklet initialization (which > must not happen before cpu_possible_map is guaranteed to be fully set > up - according to my static code analysis this was a problem on ia64 > anyway). > > Tracing code also needed a minor adjustment, as it still had a simple > counted loop accessing per-CPU data in its body. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> > > --- 2010-05-04.orig/xen/arch/ia64/linux-xen/smpboot.c 2010-05-04 > 16:04:09.000000000 +0200 > +++ 2010-05-04/xen/arch/ia64/linux-xen/smpboot.c 2010-05-04 > 13:22:06.000000000 +0200 > @@ -776,7 +776,7 @@ void __cpu_die(unsigned int cpu) > #endif /* CONFIG_HOTPLUG_CPU */ > > void > -smp_cpus_done (unsigned int dummy) > +smp_cpus_done(void) > { > int cpu; > unsigned long bogosum = 0; > --- 2010-05-04.orig/xen/arch/ia64/xen/xensetup.c 2010-05-04 > 16:04:09.000000000 +0200 > +++ 2010-05-04/xen/arch/ia64/xen/xensetup.c 2010-05-04 16:42:36.000000000 > +0200 > @@ -562,10 +562,12 @@ skip_move: > end_boot_allocator(); > > softirq_init(); > - tasklet_subsys_init(); > + tasklet_early_init(); > > late_setup_arch(&cmdline); > > + tasklet_subsys_init(); > + > scheduler_init(); > idle_vcpu[0] = (struct vcpu*) ia64_r13; > idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0); > @@ -626,7 +628,7 @@ printk("num_online_cpus=%d, max_cpus=%d\ > local_irq_disable(); > > printk("Brought up %ld CPUs\n", (long)num_online_cpus()); > - smp_cpus_done(max_cpus); > + smp_cpus_done(); > #endif > > initialise_gdb(); /* could be moved earlier */ > --- 2010-05-04.orig/xen/arch/x86/mpparse.c 2010-05-04 16:04:09.000000000 > +0200 > +++ 2010-05-04/xen/arch/x86/mpparse.c 2010-05-04 13:22:06.000000000 +0200 > @@ -35,7 +35,6 @@ > > /* Have we found an MP table */ > int smp_found_config; > -unsigned int __devinitdata maxcpus = NR_CPUS; > > /* > * Various Linux-internal data structures created from the > @@ -66,7 +65,7 @@ unsigned int def_to_bigsmp = 0; > /* Processor that is doing the boot up */ > unsigned int boot_cpu_physical_apicid = -1U; > /* Internal processor count */ > -static unsigned int __devinitdata num_processors; > +unsigned int __devinitdata num_processors; > > /* Bitmask of physically existing CPUs */ > physid_mask_t phys_cpu_present_map; > @@ -105,8 +104,10 @@ static int __devinit MP_processor_info ( > int ver, apicid, cpu = 0; > physid_mask_t phys_cpu; > > - if (!(m->mpc_cpuflag & CPU_ENABLED)) > + if (!(m->mpc_cpuflag & CPU_ENABLED)) { > + ++disabled_cpus; > return -EINVAL; > + } > > apicid = mpc_apic_id(m, translation_table[mpc_record]); > > @@ -185,9 +186,9 @@ static int __devinit MP_processor_info ( > return -ENOSPC; > } > > - if (num_processors >= maxcpus) { > - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." > - " Processor ignored.\n", maxcpus); > + if (max_cpus && num_processors >= max_cpus) { > + printk(KERN_WARNING "WARNING: maxcpus limit of %u reached." > + " Processor ignored.\n", max_cpus); > return -ENOSPC; > } > > --- 2010-05-04.orig/xen/arch/x86/setup.c 2010-05-04 16:04:09.000000000 > +0200 > +++ 2010-05-04/xen/arch/x86/setup.c 2010-05-04 16:43:22.000000000 +0200 > @@ -61,7 +61,7 @@ static int __initdata opt_nosmp = 0; > boolean_param("nosmp", opt_nosmp); > > /* maxcpus: maximum number of CPUs to activate. */ > -static unsigned int __initdata max_cpus = NR_CPUS; > +unsigned int __devinitdata max_cpus; > integer_param("maxcpus", max_cpus); > > /* opt_watchdog: If true, run a watchdog NMI on each processor. */ > @@ -568,6 +568,11 @@ void __init __start_xen(unsigned long mb > if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 ) > EARLY_FAIL("Misaligned CPU0 stack.\n"); > > + if ( opt_nosmp ) > + max_cpus = prefill_possible_map(1); > + else if ( max_cpus ) > + max_cpus = prefill_possible_map(max_cpus); > + > if ( e820_raw_nr != 0 ) > { > memmap_type = "Xen-e820"; > @@ -978,7 +983,7 @@ void __init __start_xen(unsigned long mb > #endif > > softirq_init(); > - tasklet_subsys_init(); > + tasklet_early_init(); > > early_cpu_init(); > > @@ -1017,6 +1022,11 @@ void __init __start_xen(unsigned long mb > zap_low_mappings(); > #endif > > + if ( !max_cpus ) > + max_cpus = prefill_possible_map(0); > + > + tasklet_subsys_init(); > + > init_apic_mappings(); > > percpu_free_unused_areas(); > @@ -1049,12 +1059,9 @@ void __init __start_xen(unsigned long mb > vesa_mtrr_init(); > #endif > > - if ( opt_nosmp ) > - max_cpus = 0; > - > iommu_setup(); /* setup iommu if available */ > > - smp_prepare_cpus(max_cpus); > + smp_prepare_cpus(!opt_nosmp * max_cpus); > > spin_debug_enable(); > > @@ -1087,7 +1094,7 @@ void __init __start_xen(unsigned long mb > } > > printk("Brought up %ld CPUs\n", (long)num_online_cpus()); > - smp_cpus_done(max_cpus); > + smp_cpus_done(); > > initialise_gdb(); /* could be moved earlier */ > > --- 2010-05-04.orig/xen/arch/x86/smpboot.c 2010-05-04 16:04:09.000000000 > +0200 > +++ 2010-05-04/xen/arch/x86/smpboot.c 2010-05-04 13:22:06.000000000 +0200 > @@ -83,10 +83,12 @@ EXPORT_SYMBOL(cpu_online_map); > cpumask_t cpu_callin_map; > cpumask_t cpu_callout_map; > EXPORT_SYMBOL(cpu_callout_map); > -cpumask_t cpu_possible_map = CPU_MASK_ALL; > +cpumask_t cpu_possible_map; > EXPORT_SYMBOL(cpu_possible_map); > static cpumask_t smp_commenced_mask; > > +unsigned int __devinitdata disabled_cpus; > + > /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), > there > * is no way to resync one AP against BP. TBD: for prescott and above, we > * should use IA64's algorithm > @@ -829,7 +831,11 @@ int alloc_cpu_id(void) > { > cpumask_t tmp_map; > int cpu; > - cpus_complement(tmp_map, cpu_present_map); > + > + if (max_cpus) > + cpus_andnot(tmp_map, cpu_possible_map, cpu_present_map); > + else > + cpus_complement(tmp_map, cpu_present_map); > cpu = first_cpu(tmp_map); > if (cpu >= NR_CPUS) > return -ENODEV; > @@ -1243,6 +1249,52 @@ void __devinit smp_prepare_boot_cpu(void > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > } > > +/* > + * cpu_possible_mask should be static, it cannot change as cpu's > + * are onlined, or offlined. The reason is per-cpu data-structures > + * are allocated by some modules at init time, and dont expect to > + * do this dynamically on cpu arrival/departure. > + * cpu_present_mask on the other hand can change dynamically. > + * In case when cpu_hotplug is not compiled, then we resort to current > + * behaviour, which is cpu_possible == cpu_present. > + * - Ashok Raj > + * > + * Three ways to find out the number of additional hotplug CPUs: > + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. > + * - The user can overwrite it with max_cpus=NUM > + * - Otherwise don't reserve additional CPUs. > + * We do this because additional CPUs waste a lot of memory. > + * -AK > + */ > +unsigned int __init prefill_possible_map(unsigned int max_cpus) > +{ > + unsigned int i, possible; > + > + /* no processor from mptable or madt */ > + if (!num_processors) > + num_processors = 1; > + > + if (!max_cpus) > + possible = num_processors + disabled_cpus; > + else > + possible = max_cpus; > + > + if (possible > NR_CPUS) { > + printk(KERN_WARNING > + "%u processors exceeds NR_CPUS limit of %d\n", > + possible, NR_CPUS); > + possible = NR_CPUS; > + } > + > + printk(KERN_INFO "SMP: Allowing %u CPUs, %d hotplug CPUs\n", > + possible, max_t(int, possible - num_processors, 0)); > + > + for (i = 0; i < possible; i++) > + cpu_set(i, cpu_possible_map); > + > + return possible; > +} > + > static void > remove_siblinginfo(int cpu) > { > @@ -1568,7 +1620,7 @@ int __devinit __cpu_up(unsigned int cpu) > } > > > -void __init smp_cpus_done(unsigned int max_cpus) > +void __init smp_cpus_done(void) > { > #ifdef CONFIG_X86_IO_APIC > setup_ioapic_dest(); > --- 2010-05-04.orig/xen/common/tasklet.c 2010-04-22 14:43:25.000000000 > +0200 > +++ 2010-05-04/xen/common/tasklet.c 2010-05-04 16:46:03.000000000 +0200 > @@ -18,7 +18,7 @@ > #include <xen/tasklet.h> > > /* Some subsystems call into us before we are initialised. We ignore them. */ > -static bool_t tasklets_initialised; > +static unsigned int __read_mostly tasklets_initialised = UINT_MAX; > > /* > * NB. Any modification to a tasklet_list requires the scheduler to run > @@ -35,7 +35,8 @@ void tasklet_schedule_on_cpu(struct task > > spin_lock_irqsave(&tasklet_lock, flags); > > - if ( tasklets_initialised && !t->is_dead ) > + if ( (tasklets_initialised == NR_CPUS || tasklets_initialised == cpu) && > + !t->is_dead ) > { > t->scheduled_on = cpu; > if ( !t->is_running ) > @@ -161,14 +162,24 @@ void tasklet_init( > t->data = data; > } > > +void __init tasklet_early_init(void) > +{ > + unsigned int cpu = smp_processor_id(); > + > + INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu)); > + > + tasklets_initialised = cpu; > +} > + > void __init tasklet_subsys_init(void) > { > unsigned int cpu; > > for_each_possible_cpu ( cpu ) > - INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu)); > + if ( cpu != tasklets_initialised ) > + INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu)); > > - tasklets_initialised = 1; > + tasklets_initialised = NR_CPUS; > } > > /* > --- 2010-05-04.orig/xen/common/trace.c 2010-04-22 14:43:25.000000000 +0200 > +++ 2010-05-04/xen/common/trace.c 2010-05-04 16:26:50.000000000 +0200 > @@ -289,7 +289,7 @@ void __init init_trace_bufs(void) > return; > } > > - for(i = 0; i < NR_CPUS; i++) > + for_each_possible_cpu(i) > spin_lock_init(&per_cpu(t_lock, i)); > > for(i=0; i<T_INFO_PAGES; i++) > --- 2010-05-04.orig/xen/include/asm-x86/smp.h 2010-05-04 16:04:09.000000000 > +0200 > +++ 2010-05-04/xen/include/asm-x86/smp.h 2010-05-04 13:22:06.000000000 > +0200 > @@ -34,6 +34,10 @@ extern void smp_alloc_memory(void); > DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); > DECLARE_PER_CPU(cpumask_t, cpu_core_map); > > +extern unsigned int max_cpus, num_processors, disabled_cpus; > + > +unsigned int prefill_possible_map(unsigned int max_cpus); > + > void smp_send_nmi_allbutself(void); > > void send_IPI_mask(const cpumask_t *mask, int vector); > --- 2010-05-04.orig/xen/include/xen/smp.h 2010-05-04 16:04:09.000000000 > +0200 > +++ 2010-05-04/xen/include/xen/smp.h 2010-05-04 13:22:06.000000000 +0200 > @@ -26,7 +26,7 @@ extern int __cpu_up(unsigned int cpunum) > /* > * Final polishing of CPUs > */ > -extern void smp_cpus_done(unsigned int max_cpus); > +extern void smp_cpus_done(void); > > /* > * Call a function on all other processors > --- 2010-05-04.orig/xen/include/xen/tasklet.h 2010-04-22 14:43:25.000000000 > +0200 > +++ 2010-05-04/xen/include/xen/tasklet.h 2010-05-04 16:42:13.000000000 > +0200 > @@ -32,6 +32,7 @@ void tasklet_kill(struct tasklet *t); > void migrate_tasklets_from_cpu(unsigned int cpu); > void tasklet_init( > struct tasklet *t, void (*func)(unsigned long), unsigned long data); > +void tasklet_early_init(void); > void tasklet_subsys_init(void); > > #endif /* __XEN_TASKLET_H__ */ > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |