[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge i386/x86_64 smpboot.c into a simplified common Xen version.
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID 3d27ee7da0c17a37621e5cded2f0c7d18a6cdc5a # Parent 7169e31606bdff194606d5a991655ef4eed4324f Merge i386/x86_64 smpboot.c into a simplified common Xen version. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Oct 18 16:40:31 2005 @@ -27,7 +27,7 @@ c-obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o c-obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp.o smpboot.o +obj-$(CONFIG_X86_SMP) += smp.o #obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Tue Oct 18 16:40:31 2005 @@ -40,7 +40,7 @@ ENTRY(startup_32) movl %esi,xen_start_info -#ifdef CONFIG_SMP +#if 0 ENTRY(startup_32_smp) #endif /* CONFIG_SMP */ @@ -78,7 +78,7 @@ movl %eax,%gs cld # gcc2 wants the direction flag cleared at all times -#ifdef CONFIG_SMP +#if 0 movb ready, %cl cmpb $1,%cl je 1f # the first CPU calls start_kernel diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Oct 18 16:40:31 2005 @@ -136,9 +136,6 @@ #endif #ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_callout_map); EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Oct 18 16:40:31 2005 @@ -15,4 +15,4 @@ obj-$(CONFIG_PROC_FS) += xen_proc.o obj-$(CONFIG_NET) += skbuff.o -obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smp.o smpboot.o diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Oct 18 16:40:31 2005 @@ -25,7 +25,7 @@ c-obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_SMP) += smp.o smpboot.o +obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Tue Oct 18 16:40:31 2005 @@ -113,14 +113,11 @@ EXPORT_SYMBOL(cpu_pda); #ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); EXPORT_SYMBOL(synchronize_irq); EXPORT_SYMBOL(smp_call_function); -EXPORT_SYMBOL(cpu_callout_map); #endif #ifdef CONFIG_VT diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h Tue Oct 18 16:40:31 2005 @@ -34,7 +34,6 @@ extern cpumask_t cpu_present_mask; extern cpumask_t cpu_possible_map; extern cpumask_t cpu_online_map; -extern cpumask_t cpu_callout_map; /* * Private routines/data @@ -52,8 +51,8 @@ void smp_stop_cpu(void); extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; -extern u8 phys_proc_id[NR_CPUS]; -extern u8 cpu_core_id[NR_CPUS]; +extern int phys_proc_id[NR_CPUS]; +extern int cpu_core_id[NR_CPUS]; #define SMP_TRAMPOLINE_BASE 0x6000 @@ -65,7 +64,7 @@ static inline int num_booting_cpus(void) { - return cpus_weight(cpu_callout_map); + return cpus_weight(cpu_possible_map); } #define __smp_processor_id() read_pda(cpunumber) diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c --- /dev/null Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c Tue Oct 18 16:40:31 2005 @@ -0,0 +1,381 @@ +/* + * Xen SMP booting functions + * + * See arch/i386/kernel/smpboot.c for copyright and credits for derived + * portions of this file. + */ + +#include <linux/module.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/kernel_stat.h> +#include <linux/smp_lock.h> +#include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <asm/desc.h> +#include <asm/arch_hooks.h> +#include <asm/pgalloc.h> +#include <asm-xen/evtchn.h> +#include <asm-xen/xen-public/vcpu.h> +#include <asm-xen/xenbus.h> + +#ifdef CONFIG_SMP_ALTERNATIVES +#include <asm/smp_alt.h> +#endif + +extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); +extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); + +extern void local_setup_timer(unsigned int cpu); +extern void local_teardown_timer(unsigned int cpu); + +extern void hypervisor_callback(void); +extern void failsafe_callback(void); +extern void system_call(void); +extern void smp_trap_init(trap_info_t *); + +extern cpumask_t cpu_initialized; + +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; +int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ +EXPORT_SYMBOL(phys_proc_id); +int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ +EXPORT_SYMBOL(cpu_core_id); + +cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_data); + +#ifdef CONFIG_HOTPLUG_CPU +DEFINE_PER_CPU(int, cpu_state) = { 0 }; +#endif + +static DEFINE_PER_CPU(int, resched_irq); +static DEFINE_PER_CPU(int, callfunc_irq); +static char resched_name[NR_CPUS][15]; +static char callfunc_name[NR_CPUS][15]; + +u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + +void *xquad_portio; + +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_core_map); + +#ifdef __i386__ +u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; +EXPORT_SYMBOL(x86_cpu_to_apicid); +#else +unsigned int maxcpus = NR_CPUS; +#endif + +void __init smp_alloc_memory(void) +{ +} + +static void xen_smp_intr_init(unsigned int cpu) +{ + per_cpu(resched_irq, cpu) = + bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu); + sprintf(resched_name[cpu], "resched%d", cpu); + BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt, + SA_INTERRUPT, resched_name[cpu], NULL)); + + per_cpu(callfunc_irq, cpu) = + bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu); + sprintf(callfunc_name[cpu], "callfunc%d", cpu); + BUG_ON(request_irq(per_cpu(callfunc_irq, cpu), + smp_call_function_interrupt, + SA_INTERRUPT, callfunc_name[cpu], NULL)); + + if (cpu != 0) + local_setup_timer(cpu); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void xen_smp_intr_exit(unsigned int cpu) +{ + if (cpu != 0) + local_teardown_timer(cpu); + + free_irq(per_cpu(resched_irq, cpu), NULL); + unbind_ipi_from_irq(RESCHEDULE_VECTOR, cpu); + + free_irq(per_cpu(callfunc_irq, cpu), NULL); + unbind_ipi_from_irq(CALL_FUNCTION_VECTOR, cpu); +} +#endif + +static void cpu_bringup(void) +{ + if (!cpu_isset(smp_processor_id(), cpu_initialized)) + cpu_init(); + local_irq_enable(); + cpu_idle(); +} + +void vcpu_prepare(int vcpu) +{ + vcpu_guest_context_t ctxt; + struct task_struct *idle = idle_task(vcpu); + + if (vcpu == 0) + return; + + memset(&ctxt, 0, sizeof(ctxt)); + + ctxt.flags = VGCF_IN_KERNEL; + ctxt.user_regs.ds = __USER_DS; + ctxt.user_regs.es = __USER_DS; + ctxt.user_regs.fs = 0; + ctxt.user_regs.gs = 0; + ctxt.user_regs.ss = __KERNEL_DS; + ctxt.user_regs.eip = (unsigned long)cpu_bringup; + ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */ + + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + + smp_trap_init(ctxt.trap_ctxt); + + ctxt.ldt_ents = 0; + + ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address); + ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8; + +#ifdef __i386__ + ctxt.user_regs.cs = __KERNEL_CS; + ctxt.user_regs.esp = idle->thread.esp; + + ctxt.kernel_ss = __KERNEL_DS; + ctxt.kernel_sp = idle->thread.esp0; + + ctxt.event_callback_cs = __KERNEL_CS; + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; + ctxt.failsafe_callback_cs = __KERNEL_CS; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + + ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; +#else + ctxt.user_regs.cs = __KERNEL_CS | 3; + ctxt.user_regs.esp = idle->thread.rsp; + + ctxt.kernel_ss = __KERNEL_DS; + ctxt.kernel_sp = idle->thread.rsp0; + + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + ctxt.syscall_callback_eip = (unsigned long)system_call; + + ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT; +#endif + + BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt)); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int cpu; + struct task_struct *idle; + + if (max_cpus == 0) { + xen_start_info->n_vcpu = 1; + return; + } + + if (max_cpus < xen_start_info->n_vcpu) + xen_start_info->n_vcpu = max_cpus; + + xen_smp_intr_init(0); + + for (cpu = 1; cpu < xen_start_info->n_vcpu; cpu++) { + cpu_data[cpu] = boot_cpu_data; + cpu_2_logical_apicid[cpu] = cpu; + x86_cpu_to_apicid[cpu] = cpu; + + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + + irq_ctx_init(cpu); + + cpu_gdt_descr[cpu].address = + __get_free_page(GFP_KERNEL|__GFP_ZERO); + BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE); + cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; + memcpy((void *)cpu_gdt_descr[cpu].address, + (void *)cpu_gdt_descr[0].address, + cpu_gdt_descr[0].size); + make_page_readonly((void *)cpu_gdt_descr[cpu].address); + + cpu_set(cpu, cpu_possible_map); + + vcpu_prepare(cpu); + } + + /* Currently, Xen gives no dynamic NUMA/HT info. */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + } + +#ifdef CONFIG_X86_IO_APIC + /* + * Here we can be sure that there is an IO-APIC in the system. Let's + * go and set it up: + */ + if (!skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +#endif +} + +void __devinit smp_prepare_boot_cpu(void) +{ + cpu_possible_map = cpumask_of_cpu(0); + cpu_online_map = cpumask_of_cpu(0); + + cpu_data[0] = boot_cpu_data; + cpu_2_logical_apicid[0] = 0; + x86_cpu_to_apicid[0] = 0; + + current_thread_info()->cpu = 0; + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); + + cpus_clear(cpu_core_map[0]); + cpu_set(0, cpu_core_map[0]); +} + +#ifdef CONFIG_HOTPLUG_CPU + +static void handle_vcpu_hotplug_event( + struct xenbus_watch *watch, const char **vec, unsigned int len) +{ + int err, cpu; + char dir[32], state[32]; + char *cpustr; + const char *node = vec[XS_WATCH_PATH]; + + if ((cpustr = strstr(node, "cpu/")) == NULL) + return; + + sscanf(cpustr, "cpu/%d", &cpu); + + sprintf(dir, "cpu/%d", cpu); + err = xenbus_scanf(NULL, dir, "availability", "%s", state); + if (err != 1) { + printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); + return; + } + + if (strcmp(state, "online") == 0) + (void)cpu_up(cpu); + else if (strcmp(state, "offline") == 0) + (void)cpu_down(cpu); + else + printk(KERN_ERR "XENBUS: unknown state(%s) on node(%s)\n", + state, node); +} + +static int setup_cpu_watcher(struct notifier_block *notifier, + unsigned long event, void *data) +{ + static struct xenbus_watch cpu_watch = { + .node = "cpu", + .callback = handle_vcpu_hotplug_event }; + (void)register_xenbus_watch(&cpu_watch); + return NOTIFY_DONE; +} + +static int __init setup_vcpu_hotplug_event(void) +{ + static struct notifier_block xsn_cpu = { + .notifier_call = setup_cpu_watcher }; + register_xenstore_notifier(&xsn_cpu); + return 0; +} + +subsys_initcall(setup_vcpu_hotplug_event); + +int __cpu_disable(void) +{ + cpumask_t map = cpu_online_map; + int cpu = smp_processor_id(); + + if (cpu == 0) + return -EBUSY; + + cpu_clear(cpu, map); + fixup_irqs(map); + cpu_clear(cpu, cpu_online_map); + + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); + } + + xen_smp_intr_exit(cpu); + +#ifdef CONFIG_SMP_ALTERNATIVES + if (num_online_cpus() == 1) + unprepare_for_smp(); +#endif +} + +#else /* ... !CONFIG_HOTPLUG_CPU */ + +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + BUG(); +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +int __devinit __cpu_up(unsigned int cpu) +{ +#ifdef CONFIG_SMP_ALTERNATIVES + if (num_online_cpus() == 1) + prepare_for_smp(); +#endif + + xen_smp_intr_init(cpu); + cpu_set(cpu, cpu_online_map); + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/include/asm-xen/asm-i386/smp.h --- /dev/null Tue Oct 18 14:40:29 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/smp.h Tue Oct 18 16:40:31 2005 @@ -0,0 +1,92 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +/* + * We need the APIC definitions automatically as part of 'smp.h' + */ +#ifndef __ASSEMBLY__ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/threads.h> +#include <linux/cpumask.h> +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +#ifndef __ASSEMBLY__ +#include <asm/fixmap.h> +#include <asm/bitops.h> +#include <asm/mpspec.h> +#ifdef CONFIG_X86_IO_APIC +#include <asm/io_apic.h> +#endif +#include <asm/apic.h> +#endif +#endif + +#define BAD_APICID 0xFFu +#ifdef CONFIG_SMP +#ifndef __ASSEMBLY__ + +/* + * Private routines/data + */ + +extern void smp_alloc_memory(void); +extern int pic_mode; +extern int smp_num_siblings; +extern cpumask_t cpu_sibling_map[]; +extern cpumask_t cpu_core_map[]; + +extern void smp_flush_tlb(void); +extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); +extern void smp_invalidate_rcv(void); /* Process an NMI */ +extern void (*mtrr_hook) (void); +extern void zap_low_mappings (void); + +#define MAX_APICID 256 +extern u8 x86_cpu_to_apicid[]; + +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ +#define __smp_processor_id() (current_thread_info()->cpu) + +extern cpumask_t cpu_possible_map; + +/* We don't mark CPUs online until __cpu_up(), so we need another measure */ +static inline int num_booting_cpus(void) +{ + return cpus_weight(cpu_possible_map); +} + +#ifdef CONFIG_X86_LOCAL_APIC + +#ifdef APIC_DEFINITION +extern int hard_smp_processor_id(void); +#else +#include <mach_apicdef.h> +static inline int hard_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); +} +#endif + +static __inline int logical_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); +} + +#endif + +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); +#endif /* !__ASSEMBLY__ */ + +#define NO_PROC_ID 0xFF /* No processor magic marker */ + +#endif +#endif diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Tue Oct 18 14:40:29 2005 +++ /dev/null Tue Oct 18 16:40:31 2005 @@ -1,1520 +0,0 @@ -/* - * x86 SMP booting functions - * - * (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx> - * (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx> - * - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * Original development of Linux SMP code supported by Caldera. - * - * This code is released under the GNU General Public License version 2 or - * later. - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIPS report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX APICs - * Martin J. Bligh : Added support for multi-quad systems - * Dave Jones : Report invalid combinations of Athlon CPUs. -* Rusty Russell : Hacked into shape for new "hotplug" boot process. */ - -#include <linux/module.h> -#include <linux/config.h> -#include <linux/init.h> -#include <linux/kernel.h> - -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/kernel_stat.h> -#include <linux/smp_lock.h> -#include <linux/irq.h> -#include <linux/bootmem.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/percpu.h> - -#include <linux/delay.h> -#include <linux/mc146818rtc.h> -#include <asm/tlbflush.h> -#include <asm/desc.h> -#include <asm/arch_hooks.h> - -#include <asm/smp_alt.h> - -#ifndef CONFIG_X86_IO_APIC -#define Dprintk(args...) -#endif -#include <mach_wakecpu.h> -#include <smpboot_hooks.h> - -#include <asm-xen/evtchn.h> -#include <asm-xen/xen-public/vcpu.h> -#include <asm-xen/xenbus.h> - -static void xen_smp_intr_init(unsigned int cpu); -static void xen_smp_intr_exit(unsigned int cpu); - -/* Set if we find a B stepping CPU */ -static int __initdata smp_b_stepping; - -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ -EXPORT_SYMBOL(phys_proc_id); -int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ -EXPORT_SYMBOL(cpu_core_id); - -/* bitmap of online cpus */ -cpumask_t cpu_online_map; - -cpumask_t cpu_callin_map; -cpumask_t cpu_callout_map; -static cpumask_t smp_commenced_mask; - -/* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; - -u8 x86_cpu_to_apicid[NR_CPUS] = - { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); - -#if 0 -/* - * Trampoline 80x86 program as an array. - */ - -extern unsigned char trampoline_data []; -extern unsigned char trampoline_end []; -static unsigned char *trampoline_base; -static int trampoline_exec; -#endif - -#ifdef CONFIG_HOTPLUG_CPU -/* State of each CPU. */ -DEFINE_PER_CPU(int, cpu_state) = { 0 }; -#endif - -static DEFINE_PER_CPU(int, resched_irq); -static DEFINE_PER_CPU(int, callfunc_irq); -static char resched_name[NR_CPUS][15]; -static char callfunc_name[NR_CPUS][15]; - -#if 0 -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ - -static unsigned long __init setup_trampoline(void) -{ - memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); - return virt_to_phys(trampoline_base); -} -#endif - -static void map_cpu_to_logical_apicid(void); - -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ -#if 0 - trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); - /* - * Make the SMP trampoline executable: - */ - trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); -#endif -} - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -static void __init smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = cpu_data + id; - - *c = boot_cpu_data; - if (id!=0) - identify_cpu(c); - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) - /* - * Remember we have B step Pentia with bugs - */ - smp_b_stepping = 1; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - - /* Athlon 660/661 is valid. */ - if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model==7) && (c->x86_mask==0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. - * It's worth noting that the A5 stepping (662) of some Athlon XP's - * have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. - */ - if (((c->x86_model==6) && (c->x86_mask>=2)) || - ((c->x86_model==7) && (c->x86_mask>=1)) || - (c->x86_model> 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, it's not a certified SMP capable AMD system. */ - tainted |= TAINT_UNSAFE_SMP; - } - -valid_k7: - ; -} - -#if 0 -/* - * TSC synchronization. - * - * We first check whether all CPUs have their TSC's synchronized, - * then we print a warning if not, and always resync. - */ - -static atomic_t tsc_start_flag = ATOMIC_INIT(0); -static atomic_t tsc_count_start = ATOMIC_INIT(0); -static atomic_t tsc_count_stop = ATOMIC_INIT(0); -static unsigned long long tsc_values[NR_CPUS]; - -#define NR_LOOPS 5 - -static void __init synchronize_tsc_bp (void) -{ - int i; - unsigned long long t0; - unsigned long long sum, avg; - long long delta; - unsigned long one_usec; - int buggy = 0; - - printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); - - /* convert from kcyc/sec to cyc/usec */ - one_usec = cpu_khz / 1000; - - atomic_set(&tsc_start_flag, 1); - wmb(); - - /* - * We loop a few times to get a primed instruction cache, - * then the last pass is more or less synchronized and - * the BP and APs set their cycle counters to zero all at - * once. This reduces the chance of having random offsets - * between the processors, and guarantees that the maximum - * delay between the cycle counters is never bigger than - * the latency of information-passing (cachelines) between - * two CPUs. - */ - for (i = 0; i < NR_LOOPS; i++) { - /* - * all APs synchronize but they loop on '== num_cpus' - */ - while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_stop, 0); - wmb(); - /* - * this lets the APs save their current TSC: - */ - atomic_inc(&tsc_count_start); - - rdtscll(tsc_values[smp_processor_id()]); - /* - * We clear the TSC in the last loop: - */ - if (i == NR_LOOPS-1) - write_tsc(0, 0); - - /* - * Wait for all APs to leave the synchronization point: - */ - while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_start, 0); - wmb(); - atomic_inc(&tsc_count_stop); - } - - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_isset(i, cpu_callout_map)) { - t0 = tsc_values[i]; - sum += t0; - } - } - avg = sum; - do_div(avg, num_booting_cpus()); - - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - delta = tsc_values[i] - avg; - if (delta < 0) - delta = -delta; - /* - * We report bigger than 2 microseconds clock differences. - */ - if (delta > 2*one_usec) { - long realdelta; - if (!buggy) { - buggy = 1; - printk("\n"); - } - realdelta = delta; - do_div(realdelta, one_usec); - if (tsc_values[i] < avg) - realdelta = -realdelta; - - printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); - } - - sum += delta; - } - if (!buggy) - printk("passed.\n"); -} - -static void __init synchronize_tsc_ap (void) -{ - int i; - - /* - * Not every cpu is online at the time - * this gets called, so we first wait for the BP to - * finish SMP initialization: - */ - while (!atomic_read(&tsc_start_flag)) mb(); - - for (i = 0; i < NR_LOOPS; i++) { - atomic_inc(&tsc_count_start); - while (atomic_read(&tsc_count_start) != num_booting_cpus()) - mb(); - - rdtscll(tsc_values[smp_processor_id()]); - if (i == NR_LOOPS-1) - write_tsc(0, 0); - - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); - } -} -#undef NR_LOOPS -#endif - -extern void calibrate_delay(void); - -static atomic_t init_deasserted; - -static void __init smp_callin(void) -{ - int cpuid, phys_id; -#if 0 - unsigned long timeout; - - /* - * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - */ - wait_for_init_deassert(&init_deasserted); -#endif - - /* - * (This works even if the APIC is not enabled.) - */ - phys_id = smp_processor_id(); - cpuid = smp_processor_id(); - if (cpu_isset(cpuid, cpu_callin_map)) { - printk("huh, phys CPU#%d, CPU#%d already present??\n", - phys_id, cpuid); - BUG(); - } - Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - -#if 0 - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpu_isset(cpuid, cpu_callout_map)) - break; - rep_nop(); - } - - if (!time_before(jiffies, timeout)) { - printk("BUG: CPU%d started up but did not get a callout!\n", - cpuid); - BUG(); - } - - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - Dprintk("CALLIN, before setup_local_APIC().\n"); - smp_callin_clear_local_apic(); - setup_local_APIC(); -#endif - map_cpu_to_logical_apicid(); - - /* - * Get our bogomips. - */ - calibrate_delay(); - Dprintk("Stack at about %p\n",&cpuid); - - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - -#if 0 - disable_APIC_timer(); -#endif - - /* - * Allow the master to continue. - */ - cpu_set(cpuid, cpu_callin_map); - -#if 0 - /* - * Synchronize the TSC with the BP - */ - if (cpu_has_tsc && cpu_khz) - synchronize_tsc_ap(); -#endif -} - -static int cpucount; - -/* - * Activate a secondary processor. - */ -static void __init start_secondary(void *unused) -{ - /* - * Dont put anything before smp_callin(), SMP - * booting is too fragile that we want to limit the - * things done here to the most necessary things. - */ - cpu_init(); - smp_callin(); - while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) - rep_nop(); - local_irq_enable(); - /* - * low-memory mappings have been cleared, flush them from - * the local TLBs too. - */ - local_flush_tlb(); - cpu_set(smp_processor_id(), cpu_online_map); - - /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); - - wmb(); - cpu_idle(); -} - -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __init initialize_secondary(void) -{ - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - - asm volatile( - "movl %0,%%esp\n\t" - "jmp *%1" - : - :"r" (current->thread.esp),"r" (current->thread.eip)); -} - -extern struct { - void * esp; - unsigned short ss; -} stack_start; - -#ifdef CONFIG_NUMA - -/* which logical CPUs are on which nodes */ -cpumask_t node_2_cpu_mask[MAX_NUMNODES] = - { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; -/* which node each logical CPU is on */ -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; -EXPORT_SYMBOL(cpu_2_node); - -/* set up a mapping between cpu and node. */ -static inline void map_cpu_to_node(int cpu, int node) -{ - printk("Mapping cpu %d to node %d\n", cpu, node); - cpu_set(cpu, node_2_cpu_mask[node]); - cpu_2_node[cpu] = node; -} - -/* undo a mapping between cpu and node. */ -static inline void unmap_cpu_to_node(int cpu) -{ - int node; - - printk("Unmapping cpu %d from all nodes\n", cpu); - for (node = 0; node < MAX_NUMNODES; node ++) - cpu_clear(cpu, node_2_cpu_mask[node]); - cpu_2_node[cpu] = 0; -} -#else /* !CONFIG_NUMA */ - -#define map_cpu_to_node(cpu, node) ({}) -#define unmap_cpu_to_node(cpu) ({}) - -#endif /* CONFIG_NUMA */ - -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - -static void map_cpu_to_logical_apicid(void) -{ - int cpu = smp_processor_id(); - int apicid = smp_processor_id(); - - cpu_2_logical_apicid[cpu] = apicid; - map_cpu_to_node(cpu, apicid_to_node(apicid)); -} - -static void unmap_cpu_to_logical_apicid(int cpu) -{ - cpu_2_logical_apicid[cpu] = BAD_APICID; - unmap_cpu_to_node(cpu); -} - -#if APIC_DEBUG -static inline void __inquire_remote_apic(int apicid) -{ - int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; - char *names[] = { "ID", "VERSION", "SPIV" }; - int timeout, status; - - printk("Inquiring remote APIC #%d...\n", apicid); - - for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { - printk("... APIC #%d %s: ", apicid, names[i]); - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); - - timeout = 0; - do { - udelay(100); - status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; - } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); - - switch (status) { - case APIC_ICR_RR_VALID: - status = apic_read(APIC_RRR); - printk("%08x\n", status); - break; - default: - printk("failed\n"); - } - } -} -#endif - -#if 0 -#ifdef WAKE_SECONDARY_VIA_NMI -/* - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this - * won't ... remember to clear down the APIC, etc later. - */ -static int __init -wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) -{ - unsigned long send_status = 0, accept_status = 0; - int timeout, maxlvt; - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - maxlvt = get_maxlvt(); - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - Dprintk("NMI sent.\n"); - - if (send_status) - printk("APIC never delivered???\n"); - if (accept_status) - printk("APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} -#endif /* WAKE_SECONDARY_VIA_NMI */ - -#ifdef WAKE_SECONDARY_VIA_INIT -static int __init -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) -{ - unsigned long send_status = 0, accept_status = 0; - int maxlvt, timeout, num_starts, j; - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - Dprintk("Asserting INIT.\n"); - - /* - * Turn INIT on target chip - */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* - * Send IPI - */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - mdelay(10); - - Dprintk("Deasserting INIT.\n"); - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - atomic_set(&init_deasserted, 1); - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; - - /* - * Run STARTUP IPI loop. - */ - Dprintk("#startup loops: %d.\n", num_starts); - - maxlvt = get_maxlvt(); - - for (j = 1; j <= num_starts; j++) { - Dprintk("Sending STARTUP #%d.\n",j); - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - Dprintk("After apic_write.\n"); - - /* - * STARTUP IPI - */ - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(300); - - Dprintk("Startup point 1.\n"); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - if (send_status || accept_status) - break; - } - Dprintk("After Startup.\n"); - - if (send_status) - printk("APIC never delivered???\n"); - if (accept_status) - printk("APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} -#endif /* WAKE_SECONDARY_VIA_INIT */ -#endif - -extern cpumask_t cpu_initialized; - -static int __init do_boot_cpu(int apicid) -/* - * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad - * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. - */ -{ - struct task_struct *idle; - unsigned long boot_error; - int timeout, cpu; - unsigned long start_eip; -#if 0 - unsigned short nmi_high = 0, nmi_low = 0; -#endif - vcpu_guest_context_t ctxt; - extern void startup_32_smp(void); - extern void hypervisor_callback(void); - extern void failsafe_callback(void); - extern void smp_trap_init(trap_info_t *); - - cpu = ++cpucount; - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU %d", cpu); - idle->thread.eip = (unsigned long) start_secondary; - /* start_eip had better be page-aligned! */ - start_eip = (unsigned long)startup_32_smp; - - /* So we see what's up */ - printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); - /* Stack for startup_32 can be just as for start_secondary onwards */ - stack_start.esp = (void *) idle->thread.esp; - - irq_ctx_init(cpu); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - atomic_set(&init_deasserted, 0); - -#if 1 - cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO); - BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE); - cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; - memcpy((void *)cpu_gdt_descr[cpu].address, - (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size); - - memset(&ctxt, 0, sizeof(ctxt)); - - ctxt.user_regs.ds = __USER_DS; - ctxt.user_regs.es = __USER_DS; - ctxt.user_regs.fs = 0; - ctxt.user_regs.gs = 0; - ctxt.user_regs.ss = __KERNEL_DS; - ctxt.user_regs.cs = __KERNEL_CS; - ctxt.user_regs.eip = start_eip; - ctxt.user_regs.esp = idle->thread.esp; -#define X86_EFLAGS_IOPL_RING1 0x1000 - ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1; - - /* FPU is set up to default initial state. */ - memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); - - smp_trap_init(ctxt.trap_ctxt); - - /* No LDT. */ - ctxt.ldt_ents = 0; - - { - unsigned long va; - int f; - - for (va = cpu_gdt_descr[cpu].address, f = 0; - va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size; - va += PAGE_SIZE, f++) { - ctxt.gdt_frames[f] = virt_to_mfn(va); - make_page_readonly((void *)va); - } - ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8; - } - - /* Ring 1 stack is the initial stack. */ - ctxt.kernel_ss = __KERNEL_DS; - ctxt.kernel_sp = idle->thread.esp; - - /* Callback handlers. */ - ctxt.event_callback_cs = __KERNEL_CS; - ctxt.event_callback_eip = (unsigned long)hypervisor_callback; - ctxt.failsafe_callback_cs = __KERNEL_CS; - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; - - ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; - - boot_error = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt); - if (boot_error) - printk("boot error: %ld\n", boot_error); - - if (!boot_error) { - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); - - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("OK.\n"); - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; - } - } - x86_cpu_to_apicid[cpu] = apicid; - if (boot_error) { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); - cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ - cpucount--; - } - -#else - Dprintk("Setting warm reset code and vector.\n"); - - store_NMI_vector(&nmi_high, &nmi_low); - - smpboot_setup_warm_reset_vector(start_eip); - - /* - * Starting actual IPI sequence... - */ - boot_error = wakeup_secondary_cpu(apicid, start_eip); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("OK.\n"); - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; - if (*((volatile unsigned char *)trampoline_base) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); - else - /* trampoline code not run */ - printk("Not responding.\n"); - inquire_remote_apic(apicid); - } - } - x86_cpu_to_apicid[cpu] = apicid; - if (boot_error) { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); - cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ - cpucount--; - } - - /* mark "stuck" area as not stuck */ - *((volatile unsigned long *)trampoline_base) = 0; -#endif - - return boot_error; -} - -static void smp_tune_scheduling (void) -{ - unsigned long cachesize; /* kB */ - unsigned long bandwidth = 350; /* MB/s */ - /* - * Rough estimation for SMP scheduling, this is the number of - * cycles it takes for a fully memory-limited process to flush - * the SMP-local cache. - * - * (For a P5 this pretty much means we will choose another idle - * CPU almost always at wakeup time (this is due to the small - * L1 cache), on PIIs it's around 50-100 usecs, depending on - * the cache size) - */ - - if (!cpu_khz) { - /* - * this basically disables processor-affinity - * scheduling on SMP without a TSC. - */ - return; - } else { - cachesize = boot_cpu_data.x86_cache_size; - if (cachesize == -1) { - cachesize = 16; /* Pentiums, 2x8kB cache */ - bandwidth = 100; - } - } -} - -/* - * Cycle through the processors sending APIC IPIs to boot each. - */ - -#if 0 -static int boot_cpu_logical_apicid; -#endif -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; - -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_core_map); - -static void __init smp_boot_cpus(unsigned int max_cpus) -{ - int cpu, kicked; - unsigned long bogosum = 0; -#if 0 - int apicid, bit; -#endif - - /* - * Setup boot CPU information - */ - smp_store_cpu_info(0); /* Final full version of the data */ - printk("CPU%d: ", 0); - print_cpu_info(&cpu_data[0]); - -#if 0 - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - boot_cpu_logical_apicid = logical_smp_processor_id(); - x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; -#else - // boot_cpu_physical_apicid = 0; - // boot_cpu_logical_apicid = 0; - x86_cpu_to_apicid[0] = 0; -#endif - - current_thread_info()->cpu = 0; - smp_tune_scheduling(); - cpus_clear(cpu_sibling_map[0]); - cpu_set(0, cpu_sibling_map[0]); - - cpus_clear(cpu_core_map[0]); - cpu_set(0, cpu_core_map[0]); - -#ifdef CONFIG_X86_IO_APIC - /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config && !acpi_lapic) { - printk(KERN_NOTICE "SMP motherboard not detected.\n"); - smpboot_clear_io_apic_irqs(); -#if 0 - phys_cpu_present_map = physid_mask_of_physid(0); -#endif -#ifdef CONFIG_X86_LOCAL_APIC - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); -#endif - map_cpu_to_logical_apicid(); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); - return; - } -#endif - -#if 0 - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * Makes no sense to do this check in clustered apic mode, so skip it - */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); - return; - } - - verify_local_APIC(); -#endif - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - xen_start_info->n_vcpu = 1; - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - smpboot_clear_io_apic_irqs(); -#if 0 - phys_cpu_present_map = physid_mask_of_physid(0); -#endif - return; - } - - xen_smp_intr_init(0); - -#if 0 - connect_bsp_APIC(); - setup_local_APIC(); -#endif - map_cpu_to_logical_apicid(); -#if 0 - - - setup_portio_remap(); - - /* - * Scan the CPU present map and fire up the other CPUs via do_boot_cpu - * - * In clustered apic mode, phys_cpu_present_map is a constructed thus: - * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the - * clustered apic ID. - */ - Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); -#endif - Dprintk("CPU present map: %lx\n", (1UL << xen_start_info->n_vcpu) - 1); - - kicked = 1; - for (cpu = 1; kicked < NR_CPUS && cpu < xen_start_info->n_vcpu; cpu++) { - if (max_cpus <= cpucount+1) - continue; - -#ifdef CONFIG_SMP_ALTERNATIVES - if (kicked == 1) - prepare_for_smp(); -#endif - if (do_boot_cpu(cpu)) - printk("CPU #%d not responding - cannot use it.\n", - cpu); - else - ++kicked; - } - -#if 0 - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); -#endif - - /* - * Allow the user to impress friends. - */ - Dprintk("Before bogomips.\n"); - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (cpu_isset(cpu, cpu_callout_map)) - bogosum += cpu_data[cpu].loops_per_jiffy; - printk(KERN_INFO - "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - cpucount+1, - bogosum/(500000/HZ), - (bogosum/(5000/HZ))%100); - - Dprintk("Before bogocount - setting activated=1.\n"); - - if (smp_b_stepping) - printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - if (tainted & TAINT_UNSAFE_SMP) { - if (cpucount) - printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); - else - tainted &= ~TAINT_UNSAFE_SMP; - } - - Dprintk("Boot done.\n"); - - /* - * construct cpu_sibling_map[], so that we can tell sibling CPUs - * efficiently. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - } - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct cpuinfo_x86 *c = cpu_data + cpu; - int siblings = 0; - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; - - if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) { - printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); - smp_num_siblings = siblings; - } - - if (c->x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - } - } - } else { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - } - } - - smpboot_setup_io_apic(); - -#if 0 - setup_boot_APIC_clock(); - - /* - * Synchronize the TSC with the AP - */ - if (cpu_has_tsc && cpucount && cpu_khz) - synchronize_tsc_bp(); -#endif -} - -/* These are wrappers to interface to the new boot process. Someone - who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ -void __init smp_prepare_cpus(unsigned int max_cpus) -{ - smp_commenced_mask = cpumask_of_cpu(0); - cpu_callin_map = cpumask_of_cpu(0); - mb(); - smp_boot_cpus(max_cpus); -} - -void __devinit smp_prepare_boot_cpu(void) -{ - cpu_set(smp_processor_id(), cpu_online_map); - cpu_set(smp_processor_id(), cpu_callout_map); -} - -#ifdef CONFIG_HOTPLUG_CPU - -static void handle_vcpu_hotplug_event( - struct xenbus_watch *watch, const char **vec, unsigned int len) -{ - int err, cpu; - char dir[32], state[32]; - char *cpustr; - const char *node = vec[XS_WATCH_PATH]; - - if ((cpustr = strstr(node, "cpu/")) == NULL) - return; - - sscanf(cpustr, "cpu/%d", &cpu); - - sprintf(dir, "cpu/%d", cpu); - err = xenbus_scanf(NULL, dir, "availability", "%s", state); - if (err != 1) { - printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); - return; - } - - if (strcmp(state, "online") == 0) - (void)cpu_up(cpu); - else if (strcmp(state, "offline") == 0) - (void)cpu_down(cpu); - else - printk(KERN_ERR "XENBUS: unknown state(%s) on node(%s)\n", - state, node); -} - -static int setup_cpu_watcher(struct notifier_block *notifier, - unsigned long event, void *data) -{ - static struct xenbus_watch cpu_watch = { - .node = "cpu", - .callback = handle_vcpu_hotplug_event }; - (void)register_xenbus_watch(&cpu_watch); - return NOTIFY_DONE; -} - -static int __init setup_vcpu_hotplug_event(void) -{ - static struct notifier_block xsn_cpu = { - .notifier_call = setup_cpu_watcher }; - register_xenstore_notifier(&xsn_cpu); - return 0; -} - -subsys_initcall(setup_vcpu_hotplug_event); - -int __cpu_disable(void) -{ - cpumask_t map = cpu_online_map; - int cpu = smp_processor_id(); - - /* - * Perhaps use cpufreq to drop frequency, but that could go - * into generic code. - * - * We won't take down the boot processor on i386 due to some - * interrupts only being able to be serviced by the BSP. - * Especially so if we're not using an IOAPIC -zwane - */ - if (cpu == 0) - return -EBUSY; - - cpu_clear(cpu, map); - fixup_irqs(map); - - /* It's now safe to remove this processor from the online map */ - cpu_clear(cpu, cpu_online_map); - - return 0; -} - -void __cpu_die(unsigned int cpu) -{ - while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ/10); - } - - xen_smp_intr_exit(cpu); - -#ifdef CONFIG_SMP_ALTERNATIVES - if (num_online_cpus() == 1) - unprepare_for_smp(); -#endif -} - -#else /* ... !CONFIG_HOTPLUG_CPU */ -int __cpu_disable(void) -{ - return -ENOSYS; -} - -void __cpu_die(unsigned int cpu) -{ - /* We said "no" in __cpu_disable */ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - -int __devinit __cpu_up(unsigned int cpu) -{ - /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) { - printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - local_irq_enable(); - return -EIO; - } - -#ifdef CONFIG_SMP_ALTERNATIVES - if (num_online_cpus() == 1) - prepare_for_smp(); -#endif - - xen_smp_intr_init(cpu); - cpu_set(cpu, smp_commenced_mask); - cpu_set(cpu, cpu_online_map); - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); - - return 0; -} - -void __init smp_cpus_done(unsigned int max_cpus) -{ -#if 1 -#else -#ifdef CONFIG_X86_IO_APIC - setup_ioapic_dest(); -#endif - zap_low_mappings(); - /* - * Disable executability of the SMP trampoline: - */ - set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); -#endif -} - -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); - -extern void local_setup_timer(unsigned int cpu); -extern void local_teardown_timer(unsigned int cpu); - -static void xen_smp_intr_init(unsigned int cpu) -{ - per_cpu(resched_irq, cpu) = - bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu); - sprintf(resched_name[cpu], "resched%d", cpu); - BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt, - SA_INTERRUPT, resched_name[cpu], NULL)); - - per_cpu(callfunc_irq, cpu) = - bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu); - sprintf(callfunc_name[cpu], "callfunc%d", cpu); - BUG_ON(request_irq(per_cpu(callfunc_irq, cpu), - smp_call_function_interrupt, - SA_INTERRUPT, callfunc_name[cpu], NULL)); - - if (cpu != 0) - local_setup_timer(cpu); -} - -static void xen_smp_intr_exit(unsigned int cpu) -{ - if (cpu != 0) - local_teardown_timer(cpu); - - free_irq(per_cpu(resched_irq, cpu), NULL); - unbind_ipi_from_irq(RESCHEDULE_VECTOR, cpu); - - free_irq(per_cpu(callfunc_irq, cpu), NULL); - unbind_ipi_from_irq(CALL_FUNCTION_VECTOR, cpu); -} - -void vcpu_prepare(int vcpu) -{ - extern void hypervisor_callback(void); - extern void failsafe_callback(void); - extern void smp_trap_init(trap_info_t *); - vcpu_guest_context_t ctxt; - struct task_struct *idle = idle_task(vcpu); - - if (vcpu == 0) - return; - - memset(&ctxt, 0, sizeof(ctxt)); - - ctxt.user_regs.ds = __USER_DS; - ctxt.user_regs.es = __USER_DS; - ctxt.user_regs.fs = 0; - ctxt.user_regs.gs = 0; - ctxt.user_regs.ss = __KERNEL_DS; - ctxt.user_regs.cs = __KERNEL_CS; - ctxt.user_regs.eip = (unsigned long)cpu_idle; - ctxt.user_regs.esp = idle->thread.esp; - ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1; - - memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); - - smp_trap_init(ctxt.trap_ctxt); - - ctxt.ldt_ents = 0; - - ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address); - ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8; - - ctxt.kernel_ss = __KERNEL_DS; - ctxt.kernel_sp = idle->thread.esp0; - - ctxt.event_callback_cs = __KERNEL_CS; - ctxt.event_callback_eip = (unsigned long)hypervisor_callback; - ctxt.failsafe_callback_cs = __KERNEL_CS; - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; - - ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; - - (void)HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt); -} - -/* - * Local variables: - * c-file-style: "linux" - * indent-tabs-mode: t - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Tue Oct 18 14:40:29 2005 +++ /dev/null Tue Oct 18 16:40:31 2005 @@ -1,1253 +0,0 @@ -/* - * x86 SMP booting functions - * - * (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx> - * (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx> - * Copyright 2001 Andi Kleen, SuSE Labs. - * - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * Original development of Linux SMP code supported by Caldera. - * - * This code is released under the GNU General Public License version 2 - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIP report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX APICs - * Andi Kleen : Changed for SMP boot into long mode. - * Rusty Russell : Hacked into shape for new "hotplug" boot process. - * Andi Kleen : Converted to new state machine. - * Various cleanups. - * Probably mostly hotplug CPU ready now. - */ - - -#include <linux/config.h> -#include <linux/init.h> - -#include <linux/mm.h> -#include <linux/kernel_stat.h> -#include <linux/smp_lock.h> -#include <linux/irq.h> -#include <linux/bootmem.h> -#include <linux/thread_info.h> -#include <linux/module.h> -#ifdef CONFIG_XEN -#include <linux/interrupt.h> -#endif - -#include <linux/delay.h> -#include <linux/mc146818rtc.h> -#include <asm/mtrr.h> -#include <asm/pgalloc.h> -#include <asm/desc.h> -#include <asm/kdebug.h> -#include <asm/tlbflush.h> -#include <asm/proto.h> -#include <asm/nmi.h> -#ifdef CONFIG_XEN -#include <asm/arch_hooks.h> -#include <asm-xen/evtchn.h> -#include <asm-xen/xen-public/vcpu.h> -#endif - -/* Change for real CPU hotplug. Note other files need to be fixed - first too. */ -#define __cpuinit __init -#define __cpuinitdata __initdata - -#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST) - unsigned int maxcpus = NR_CPUS; -#endif - -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -/* Package ID of each logical CPU */ -u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -EXPORT_SYMBOL(phys_proc_id); -EXPORT_SYMBOL(cpu_core_id); - -/* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; - -EXPORT_SYMBOL(cpu_online_map); - -/* - * Private maps to synchronize booting between AP and BP. - * Probably not needed anymore, but it makes for easier debugging. -AK - */ -cpumask_t cpu_callin_map; -cpumask_t cpu_callout_map; - -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); - -/* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; - -/* Set when the idlers are all forked */ -int smp_threads_ready; - -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_core_map); - -#ifndef CONFIG_XEN -/* - * Trampoline 80x86 program as an array. - */ - -extern unsigned char trampoline_data[]; -extern unsigned char trampoline_end[]; - -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ - -static unsigned long __cpuinit setup_trampoline(void) -{ - void *tramp = __va(SMP_TRAMPOLINE_BASE); - memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); - return virt_to_phys(tramp); -} -#endif - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -static void __cpuinit smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = cpu_data + id; - - *c = boot_cpu_data; - identify_cpu(c); - print_cpu_info(c); -} - -#ifndef CONFIG_XEN -/* - * New Funky TSC sync algorithm borrowed from IA64. - * Main advantage is that it doesn't reset the TSCs fully and - * in general looks more robust and it works better than my earlier - * attempts. I believe it was written by David Mosberger. Some minor - * adjustments for x86-64 by me -AK - * - * Original comment reproduced below. - * - * Synchronize TSC of the current (slave) CPU with the TSC of the - * MASTER CPU (normally the time-keeper CPU). We use a closed loop to - * eliminate the possibility of unaccounted-for errors (such as - * getting a machine check in the middle of a calibration step). The - * basic idea is for the slave to ask the master what itc value it has - * and to read its own itc before and after the master responds. Each - * iteration gives us three timestamps: - * - * slave master - * - * t0 ---\ - * ---\ - * ---> - * tm - * /--- - * /--- - * t1 <--- - * - * - * The goal is to adjust the slave's TSC such that tm falls exactly - * half-way between t0 and t1. If we achieve this, the clocks are - * synchronized provided the interconnect between the slave and the - * master is symmetric. Even if the interconnect were asymmetric, we - * would still know that the synchronization error is smaller than the - * roundtrip latency (t0 - t1). - * - * When the interconnect is quiet and symmetric, this lets us - * synchronize the TSC to within one or two cycles. However, we can - * only *guarantee* that the synchronization is accurate to within a - * round-trip time, which is typically in the range of several hundred - * cycles (e.g., ~500 cycles). In practice, this means that the TSCs - * are usually almost perfectly synchronized, but we shouldn't assume - * that the accuracy is much better than half a micro second or so. - * - * [there are other errors like the latency of RDTSC and of the - * WRMSR. These can also account to hundreds of cycles. So it's - * probably worse. It claims 153 cycles error on a dual Opteron, - * but I suspect the numbers are actually somewhat worse -AK] - */ - -#define MASTER 0 -#define SLAVE (SMP_CACHE_BYTES/8) - -/* Intentionally don't use cpu_relax() while TSC synchronization - because we don't want to go into funky power save modi or cause - hypervisors to schedule us away. Going to sleep would likely affect - latency and low latency is the primary objective here. -AK */ -#define no_cpu_relax() barrier() - -static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); -static volatile __cpuinitdata unsigned long go[SLAVE + 1]; -static int notscsync __cpuinitdata; - -#undef DEBUG_TSC_SYNC - -#define NUM_ROUNDS 64 /* magic value */ -#define NUM_ITERS 5 /* likewise */ - -/* Callback on boot CPU */ -static __cpuinit void sync_master(void *arg) -{ - unsigned long flags, i; - - if (smp_processor_id() != boot_cpu_id) - return; - - go[MASTER] = 0; - - local_irq_save(flags); - { - for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { - while (!go[MASTER]) - no_cpu_relax(); - go[MASTER] = 0; - rdtscll(go[SLAVE]); - } - } - local_irq_restore(flags); -} - -/* - * Return the number of cycles by which our tsc differs from the tsc - * on the master (time-keeper) CPU. A positive number indicates our - * tsc is ahead of the master, negative that it is behind. - */ -static inline long -get_delta(long *rt, long *master) -{ - unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; - unsigned long tcenter, t0, t1, tm; - int i; - - for (i = 0; i < NUM_ITERS; ++i) { - rdtscll(t0); - go[MASTER] = 1; - while (!(tm = go[SLAVE])) - no_cpu_relax(); - go[SLAVE] = 0; - rdtscll(t1); - - if (t1 - t0 < best_t1 - best_t0) - best_t0 = t0, best_t1 = t1, best_tm = tm; - } - - *rt = best_t1 - best_t0; - *master = best_tm - best_t0; - - /* average best_t0 and best_t1 without overflow: */ - tcenter = (best_t0/2 + best_t1/2); - if (best_t0 % 2 + best_t1 % 2 == 2) - ++tcenter; - return tcenter - best_tm; -} - -static __cpuinit void sync_tsc(void) -{ - int i, done = 0; - long delta, adj, adjust_latency = 0; - unsigned long flags, rt, master_time_stamp, bound; -#if DEBUG_TSC_SYNC - static struct syncdebug { - long rt; /* roundtrip time */ - long master; /* master's timestamp */ - long diff; /* difference between midpoint and master's timestamp */ - long lat; /* estimate of tsc adjustment latency */ - } t[NUM_ROUNDS] __cpuinitdata; -#endif - - go[MASTER] = 1; - - smp_call_function(sync_master, NULL, 1, 0); - - while (go[MASTER]) /* wait for master to be ready */ - no_cpu_relax(); - - spin_lock_irqsave(&tsc_sync_lock, flags); - { - for (i = 0; i < NUM_ROUNDS; ++i) { - delta = get_delta(&rt, &master_time_stamp); - if (delta == 0) { - done = 1; /* let's lock on to this... */ - bound = rt; - } - - if (!done) { - unsigned long t; - if (i > 0) { - adjust_latency += -delta; - adj = -delta + adjust_latency/4; - } else - adj = -delta; - - rdtscll(t); - wrmsrl(MSR_IA32_TSC, t + adj); - } -#if DEBUG_TSC_SYNC - t[i].rt = rt; - t[i].master = master_time_stamp; - t[i].diff = delta; - t[i].lat = adjust_latency/4; -#endif - } - } - spin_unlock_irqrestore(&tsc_sync_lock, flags); - -#if DEBUG_TSC_SYNC - for (i = 0; i < NUM_ROUNDS; ++i) - printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", - t[i].rt, t[i].master, t[i].diff, t[i].lat); -#endif - - printk(KERN_INFO - "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " - "maxerr %lu cycles)\n", - smp_processor_id(), boot_cpu_id, delta, rt); -} - -static void __cpuinit tsc_sync_wait(void) -{ - if (notscsync || !cpu_has_tsc) - return; - printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(), - boot_cpu_id); - sync_tsc(); -} - -static __init int notscsync_setup(char *s) -{ - notscsync = 1; - return 0; -} -__setup("notscsync", notscsync_setup); -#endif - -static atomic_t init_deasserted __cpuinitdata; - -/* - * Report back to the Boot Processor. - * Running on AP. - */ -void __cpuinit smp_callin(void) -{ - int cpuid, phys_id; - unsigned long timeout; - -#ifndef CONFIG_XEN - /* - * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - */ - while (!atomic_read(&init_deasserted)) - cpu_relax(); - -#endif - /* - * (This works even if the APIC is not enabled.) - */ -#ifndef CONFIG_XEN - phys_id = GET_APIC_ID(apic_read(APIC_ID)); -#else - phys_id = smp_processor_id(); -#endif - cpuid = smp_processor_id(); - if (cpu_isset(cpuid, cpu_callin_map)) { - panic("smp_callin: phys CPU#%d, CPU#%d already present??\n", - phys_id, cpuid); - } - Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpu_isset(cpuid, cpu_callout_map)) - break; - cpu_relax(); - } - - if (!time_before(jiffies, timeout)) { - panic("smp_callin: CPU%d started up but did not get a callout!\n", - cpuid); - } - -#ifndef CONFIG_XEN - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - Dprintk("CALLIN, before setup_local_APIC().\n"); - setup_local_APIC(); -#endif - - /* - * Get our bogomips. - */ - calibrate_delay(); - Dprintk("Stack at about %p\n",&cpuid); - -#ifndef CONFIG_XEN - disable_APIC_timer(); -#endif - - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - - /* - * Allow the master to continue. - */ - cpu_set(cpuid, cpu_callin_map); -} - -#ifdef CONFIG_XEN -extern void local_setup_timer(unsigned int cpu); -#endif - -/* - * Setup code on secondary processor (after comming out of the trampoline) - */ -void __cpuinit start_secondary(void) -{ - /* - * Dont put anything before smp_callin(), SMP - * booting is too fragile that we want to limit the - * things done here to the most necessary things. - */ - cpu_init(); - smp_callin(); - - /* otherwise gcc will move up the smp_processor_id before the cpu_init */ - barrier(); - -#ifndef CONFIG_XEN - Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); - setup_secondary_APIC_clock(); - - Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); - - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - enable_NMI_through_LVT0(NULL); - enable_8259A_irq(0); - } - - enable_APIC_timer(); -#else - local_setup_timer(smp_processor_id()); - smp_intr_init(); - local_irq_enable(); -#endif - - /* - * Allow the master to continue. - */ - cpu_set(smp_processor_id(), cpu_online_map); - mb(); - -#ifndef CONFIG_XEN - /* Wait for TSC sync to not schedule things before. - We still process interrupts, which could see an inconsistent - time in that window unfortunately. */ - tsc_sync_wait(); -#endif - - cpu_idle(); -} - -extern volatile unsigned long init_rsp; -extern void (*initial_code)(void); - -#ifndef CONFIG_XEN -#if APIC_DEBUG -static void inquire_remote_apic(int apicid) -{ - unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; - char *names[] = { "ID", "VERSION", "SPIV" }; - int timeout, status; - - printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); - - for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { - printk("... APIC #%d %s: ", apicid, names[i]); - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); - - timeout = 0; - do { - udelay(100); - status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; - } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); - - switch (status) { - case APIC_ICR_RR_VALID: - status = apic_read(APIC_RRR); - printk("%08x\n", status); - break; - default: - printk("failed\n"); - } - } -} -#endif - -/* - * Kick the secondary to wake up. - */ -static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) -{ - unsigned long send_status = 0, accept_status = 0; - int maxlvt, timeout, num_starts, j; - - Dprintk("Asserting INIT.\n"); - - /* - * Turn INIT on target chip - */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* - * Send IPI - */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - mdelay(10); - - Dprintk("Deasserting INIT.\n"); - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - atomic_set(&init_deasserted, 1); - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; - - /* - * Run STARTUP IPI loop. - */ - Dprintk("#startup loops: %d.\n", num_starts); - - maxlvt = get_maxlvt(); - - for (j = 1; j <= num_starts; j++) { - Dprintk("Sending STARTUP #%d.\n",j); - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - Dprintk("After apic_write.\n"); - - /* - * STARTUP IPI - */ - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_rip >> 12)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(300); - - Dprintk("Startup point 1.\n"); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - if (send_status || accept_status) - break; - } - Dprintk("After Startup.\n"); - - if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); - if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} -#endif - -/* - * Boot one CPU. - */ -static int __cpuinit do_boot_cpu(int cpu, int apicid) -{ - struct task_struct *idle; - unsigned long boot_error; - int timeout; - unsigned long start_rip; -#ifdef CONFIG_XEN - vcpu_guest_context_t ctxt; - extern void startup_64_smp(void); - extern void hypervisor_callback(void); - extern void failsafe_callback(void); - extern void smp_trap_init(trap_info_t *); -#endif - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ - idle = fork_idle(cpu); - if (IS_ERR(idle)) { - printk("failed fork for CPU %d\n", cpu); - return PTR_ERR(idle); - } - - cpu_pda[cpu].pcurrent = idle; - -#ifndef CONFIG_XEN - start_rip = setup_trampoline(); -#else - start_rip = (unsigned long)startup_64_smp; -#endif - - init_rsp = idle->thread.rsp; - per_cpu(init_tss,cpu).rsp0 = init_rsp; - initial_code = start_secondary; - clear_ti_thread_flag(idle->thread_info, TIF_FORK); - - printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, - start_rip, init_rsp); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - atomic_set(&init_deasserted, 0); - -#ifdef CONFIG_XEN - cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO); - BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE); - cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; - memcpy((void *)cpu_gdt_descr[cpu].address, - (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size); - - memset(&ctxt, 0, sizeof(ctxt)); - - ctxt.flags = VGCF_IN_KERNEL; - ctxt.user_regs.ds = __USER_DS; - ctxt.user_regs.es = __USER_DS; - ctxt.user_regs.fs = 0; - ctxt.user_regs.gs = 0; - ctxt.user_regs.ss = __KERNEL_DS|0x3; - ctxt.user_regs.cs = __KERNEL_CS|0x3; - ctxt.user_regs.rip = start_rip; - ctxt.user_regs.rsp = idle->thread.rsp; -#define X86_EFLAGS_IOPL_RING3 0x3000 - ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING3; - - /* FPU is set up to default initial state. */ - memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); - - smp_trap_init(ctxt.trap_ctxt); - - /* No LDT. */ - ctxt.ldt_ents = 0; - - { - unsigned long va; - int f; - - for (va = cpu_gdt_descr[cpu].address, f = 0; - va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size; - va += PAGE_SIZE, f++) { - ctxt.gdt_frames[f] = virt_to_mfn(va); - make_page_readonly((void *)va); - } - ctxt.gdt_ents = GDT_ENTRIES; - } - - /* Ring 1 stack is the initial stack. */ - ctxt.kernel_ss = __KERNEL_DS; - ctxt.kernel_sp = idle->thread.rsp; - - /* Callback handlers. */ - ctxt.event_callback_eip = (unsigned long)hypervisor_callback; - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; - ctxt.syscall_callback_eip = (unsigned long)system_call; - - ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT; - - boot_error = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt); - if (boot_error) - printk("boot error: %ld\n", boot_error); - - if (!boot_error) { - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); - - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; - } - } - x86_cpu_to_apicid[cpu] = apicid; -#else - Dprintk("Setting warm reset code and vector.\n"); - - CMOS_WRITE(0xa, 0xf); - local_flush_tlb(); - Dprintk("1.\n"); - *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4; - Dprintk("2.\n"); - *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf; - Dprintk("3.\n"); - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(apic_version[apicid])) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - /* - * Status is now clean - */ - boot_error = 0; - - /* - * Starting actual IPI sequence... - */ - boot_error = wakeup_secondary_via_INIT(apicid, start_rip); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("CPU has booted.\n"); - } else { - boot_error = 1; - if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE)) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); - else - /* trampoline code not run */ - printk("Not responding.\n"); -#if APIC_DEBUG - inquire_remote_apic(apicid); -#endif - } - } -#endif - if (boot_error) { - cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ - cpu_clear(cpu, cpu_present_map); - cpu_clear(cpu, cpu_possible_map); - x86_cpu_to_apicid[cpu] = BAD_APICID; - x86_cpu_to_log_apicid[cpu] = BAD_APICID; - return -EIO; - } - - return 0; -} - -cycles_t cacheflush_time; -unsigned long cache_decay_ticks; - -/* - * Construct cpu_sibling_map[], so that we can tell the sibling CPU - * on SMT systems efficiently. - */ -static __cpuinit void detect_siblings(void) -{ - int cpu; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - } - - for_each_online_cpu (cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; - int siblings = 0; - int i; - if (smp_num_siblings > 1) { - for_each_online_cpu (i) { - if (cpu_core_id[cpu] == cpu_core_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) { - printk(KERN_WARNING - "WARNING: %d siblings found for CPU%d, should be %d\n", - siblings, cpu, smp_num_siblings); - smp_num_siblings = siblings; - } - if (c->x86_num_cores > 1) { - for_each_online_cpu(i) { - if (phys_proc_id[cpu] == phys_proc_id[i]) - cpu_set(i, cpu_core_map[cpu]); - } - } else - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - } -} - -#ifndef CONFIG_XEN -/* - * Cleanup possible dangling ends... - */ -static __cpuinit void smp_cleanup_boot(void) -{ - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - CMOS_WRITE(0, 0xf); - - /* - * Reset trampoline flag - */ - *((volatile int *) phys_to_virt(0x467)) = 0; - -#ifndef CONFIG_HOTPLUG_CPU - /* - * Free pages reserved for SMP bootup. - * When you add hotplug CPU support later remove this - * Note there is more work to be done for later CPU bootup. - */ - - free_page((unsigned long) __va(PAGE_SIZE)); - free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE)); -#endif -} -#endif - -/* - * Fall back to non SMP mode after errors. - * - * RED-PEN audit/test this more. I bet there is more state messed up here. - */ -static __cpuinit void disable_smp(void) -{ - cpu_present_map = cpumask_of_cpu(0); - cpu_possible_map = cpumask_of_cpu(0); -#ifndef CONFIG_XEN - if (smp_found_config) - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); - else - phys_cpu_present_map = physid_mask_of_physid(0); -#endif - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); -} - -/* - * Handle user cpus=... parameter. - */ -static __cpuinit void enforce_max_cpus(unsigned max_cpus) -{ - int i, k; - k = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - if (++k > max_cpus) { - cpu_clear(i, cpu_possible_map); - cpu_clear(i, cpu_present_map); - } - } -} - -/* - * Various sanity checks. - */ -static int __cpuinit smp_sanity_check(unsigned max_cpus) -{ -#ifndef CONFIG_XEN - if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - hard_smp_processor_id()); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config) { - printk(KERN_NOTICE "SMP motherboard not detected.\n"); - disable_smp(); - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); - return -1; - } - - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - */ - if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) { - printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_id); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_id); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - nr_ioapics = 0; - return -1; - } -#endif - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { -#ifdef CONFIG_XEN - xen_start_info->n_vcpu = 1; -#endif - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); -#ifndef CONFIG_XEN - nr_ioapics = 0; -#endif - return -1; - } - - return 0; -} - -/* - * Prepare for SMP bootup. The MP table or ACPI has been read - * earlier. Just do some sanity checking here and enable APIC mode. - */ -void __cpuinit smp_prepare_cpus(unsigned int max_cpus) -{ - int i; - -#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST) -#else - nmi_watchdog_default(); -#endif - current_cpu_data = boot_cpu_data; - current_thread_info()->cpu = 0; /* needed? */ - - enforce_max_cpus(max_cpus); - - /* - * Fill in cpu_present_mask - */ - for (i = 0; i < NR_CPUS; i++) { -#ifndef CONFIG_XEN - int apicid = cpu_present_to_apicid(i); - if (physid_isset(apicid, phys_cpu_present_map)) { -#else - if (i < xen_start_info->n_vcpu) { -#endif - cpu_set(i, cpu_present_map); - /* possible map would be different if we supported real - CPU hotplug. */ - cpu_set(i, cpu_possible_map); - } - } - - if (smp_sanity_check(max_cpus) < 0) { - printk(KERN_INFO "SMP disabled\n"); - disable_smp(); - return; - } - -#ifdef CONFIG_XEN - smp_intr_init(); -#else - - /* - * Switch from PIC to APIC mode. - */ - connect_bsp_APIC(); - setup_local_APIC(); - - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); - /* Or can we switch back to PIC here? */ - } -#endif - - /* - * Now start the IO-APICs - */ -#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST) -#else - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else - nr_ioapics = 0; -#endif - - /* - * Set up local APIC timer on boot CPU. - */ - -#ifndef CONFIG_XEN - setup_boot_APIC_clock(); -#endif -} - -/* - * Early setup to make printk work. - */ -void __init smp_prepare_boot_cpu(void) -{ - int me = smp_processor_id(); - cpu_set(me, cpu_online_map); - cpu_set(me, cpu_callout_map); -} - -/* - * Entry point to boot a CPU. - * - * This is all __cpuinit, not __devinit for now because we don't support - * CPU hotplug (yet). - */ -int __cpuinit __cpu_up(unsigned int cpu) -{ - int err; -#ifndef CONFIG_XEN - int apicid = cpu_present_to_apicid(cpu); -#else - int apicid = cpu; -#endif - - WARN_ON(irqs_disabled()); - - Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); - -#ifndef CONFIG_XEN - if (apicid == BAD_APICID || apicid == boot_cpu_id || - !physid_isset(apicid, phys_cpu_present_map)) { - printk("__cpu_up: bad cpu %d\n", cpu); - return -EINVAL; - } -#endif - - /* Boot it! */ - err = do_boot_cpu(cpu, apicid); - if (err < 0) { - Dprintk("do_boot_cpu failed %d\n", err); - return err; - } - - /* Unleash the CPU! */ - Dprintk("waiting for cpu %d\n", cpu); - - while (!cpu_isset(cpu, cpu_online_map)) - cpu_relax(); - return 0; -} - -/* - * Finish the SMP boot. - */ -void __cpuinit smp_cpus_done(unsigned int max_cpus) -{ -#ifndef CONFIG_XEN - zap_low_mappings(); - smp_cleanup_boot(); - -#ifdef CONFIG_X86_IO_APIC - setup_ioapic_dest(); -#endif -#endif - - detect_siblings(); -#ifndef CONFIG_XEN - time_init_gtod(); - - check_nmi_watchdog(); -#endif -} - -#ifdef CONFIG_XEN -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); - -static DEFINE_PER_CPU(int, resched_irq); -static DEFINE_PER_CPU(int, callfunc_irq); -static char resched_name[NR_CPUS][15]; -static char callfunc_name[NR_CPUS][15]; - -void smp_intr_init(void) -{ - int cpu = smp_processor_id(); - - per_cpu(resched_irq, cpu) = - bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu); - sprintf(resched_name[cpu], "resched%d", cpu); - BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt, - SA_INTERRUPT, resched_name[cpu], NULL)); - - per_cpu(callfunc_irq, cpu) = - bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu); - sprintf(callfunc_name[cpu], "callfunc%d", cpu); - BUG_ON(request_irq(per_cpu(callfunc_irq, cpu), - smp_call_function_interrupt, - SA_INTERRUPT, callfunc_name[cpu], NULL)); -} - -static void smp_intr_exit(void) -{ - int cpu = smp_processor_id(); - - free_irq(per_cpu(resched_irq, cpu), NULL); - unbind_ipi_from_irq(RESCHEDULE_VECTOR, cpu); - - free_irq(per_cpu(callfunc_irq, cpu), NULL); - unbind_ipi_from_irq(CALL_FUNCTION_VECTOR, cpu); -} - -void vcpu_prepare(int vcpu) -{ -} - -#endif diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h Tue Oct 18 14:40:29 2005 +++ /dev/null Tue Oct 18 16:40:31 2005 @@ -1,55 +0,0 @@ -/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws - * which needs to alter them. */ - -static inline void smpboot_clear_io_apic_irqs(void) -{ -#ifdef CONFIG_X86_IO_APIC - io_apic_irqs = 0; -#endif -} - -static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) -{ -#if 1 - printk("smpboot_setup_warm_reset_vector\n"); -#else - CMOS_WRITE(0xa, 0xf); - local_flush_tlb(); - Dprintk("1.\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; - Dprintk("2.\n"); - *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; - Dprintk("3.\n"); -#endif -} - -static inline void smpboot_restore_warm_reset_vector(void) -{ - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - CMOS_WRITE(0, 0xf); - - *((volatile long *) phys_to_virt(0x467)) = 0; -} - -static inline void smpboot_setup_io_apic(void) -{ -#ifdef CONFIG_X86_IO_APIC - /* - * Here we can be sure that there is an IO-APIC in the system. Let's - * go and set it up: - */ - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -#endif -} - - -#define smp_found_config (xen_start_info->n_vcpu > 1) diff -r 7169e31606bd -r 3d27ee7da0c1 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h Tue Oct 18 14:40:29 2005 +++ /dev/null Tue Oct 18 16:40:31 2005 @@ -1,55 +0,0 @@ -/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws - * which needs to alter them. */ - -static inline void smpboot_clear_io_apic_irqs(void) -{ -#ifdef CONFIG_X86_IO_APIC - io_apic_irqs = 0; -#endif -} - -static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) -{ -#if 1 - printk("smpboot_setup_warm_reset_vector\n"); -#else - CMOS_WRITE(0xa, 0xf); - local_flush_tlb(); - Dprintk("1.\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; - Dprintk("2.\n"); - *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; - Dprintk("3.\n"); -#endif -} - -static inline void smpboot_restore_warm_reset_vector(void) -{ - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - CMOS_WRITE(0, 0xf); - - *((volatile long *) phys_to_virt(0x467)) = 0; -} - -static inline void smpboot_setup_io_apic(void) -{ -#ifdef CONFIG_X86_IO_APIC - /* - * Here we can be sure that there is an IO-APIC in the system. Let's - * go and set it up: - */ - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -#endif -} - - -#define smp_found_config (xen_start_info->n_vcpu > 1) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |