[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Pull necessary Linux CPU hotplug logic into Xen. Due to
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Date 1184171289 -3600 # Node ID e00547dcda097c10e4c1390f0e2873deee741c0c # Parent ad11f74d298c3afe7a5778b9fcf4f8a000a9d0eb Pull necessary Linux CPU hotplug logic into Xen. Due to distinct difference on basic hierarchy and features, this pull is done on function level. Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx> --- xen/arch/x86/cpu/common.c | 12 ++ xen/arch/x86/domain.c | 26 +++++ xen/arch/x86/irq.c | 41 ++++++++ xen/arch/x86/smp.c | 10 ++ xen/arch/x86/smpboot.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++ xen/include/xen/irq.h | 10 ++ 6 files changed, 309 insertions(+) diff -r ad11f74d298c -r e00547dcda09 xen/arch/x86/cpu/common.c --- a/xen/arch/x86/cpu/common.c Wed Jul 11 17:23:09 2007 +0100 +++ b/xen/arch/x86/cpu/common.c Wed Jul 11 17:28:09 2007 +0100 @@ -594,3 +594,15 @@ void __devinit cpu_init(void) /* Install correct page table. */ write_ptbase(current); } + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif diff -r ad11f74d298c -r e00547dcda09 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Jul 11 17:23:09 2007 +0100 +++ b/xen/arch/x86/domain.c Wed Jul 11 17:28:09 2007 +0100 @@ -76,6 +76,32 @@ static void default_idle(void) local_irq_enable(); } +#ifdef CONFIG_HOTPLUG_CPU +#include <asm/nmi.h> +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + /* This must be done before dead CPU ack */ + cpu_exit_clear(); + wbinvd(); + mb(); + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* + * With physical CPU hotplug, we should halt the cpu + */ + local_irq_disable(); + while (1) + halt(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + void idle_loop(void) { for ( ; ; ) diff -r ad11f74d298c -r e00547dcda09 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Wed Jul 11 17:23:09 2007 +0100 +++ b/xen/arch/x86/irq.c Wed Jul 11 17:28:09 2007 +0100 @@ -654,3 +654,44 @@ static int __init setup_dump_irqs(void) return 0; } __initcall(setup_dump_irqs); + +#ifdef CONFIG_HOTPLUG_CPU +#include <mach_apic.h> + +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for (irq = 0; irq < NR_IRQS; irq++) { + cpumask_t mask; + if (irq == 2) + continue; + + cpus_and(mask, irq_desc[irq].affinity, map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].chip->set_affinity) + irq_desc[irq].chip->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + +#if 0 + barrier(); + /* Ingo Molnar says: "after the IO-APIC masks have been redirected + [note the nop - the interrupt-enable boundary on x86 is two + instructions from sti] - to flush out pending hardirqs and + IPIs. After this point nothing is supposed to reach this CPU." */ + __asm__ __volatile__("sti; nop; cli"); + barrier(); +#else + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +#endif +} +#endif diff -r ad11f74d298c -r e00547dcda09 xen/arch/x86/smp.c --- a/xen/arch/x86/smp.c Wed Jul 11 17:23:09 2007 +0100 +++ b/xen/arch/x86/smp.c Wed Jul 11 17:28:09 2007 +0100 @@ -256,6 +256,16 @@ static DEFINE_SPINLOCK(call_lock); static DEFINE_SPINLOCK(call_lock); static struct call_data_struct *call_data; +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + int smp_call_function( void (*func) (void *info), void *info, diff -r ad11f74d298c -r e00547dcda09 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Wed Jul 11 17:23:09 2007 +0100 +++ b/xen/arch/x86/smpboot.c Wed Jul 11 17:28:09 2007 +0100 @@ -897,6 +897,85 @@ static int __devinit do_boot_cpu(int api return boot_error; } +#ifdef CONFIG_HOTPLUG_CPU +void cpu_exit_clear(void) +{ + int cpu = raw_smp_processor_id(); + + idle_task_exit(); + + cpucount --; + cpu_uninit(); + irq_ctx_exit(cpu); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + + cpu_clear(cpu, smp_commenced_mask); + unmap_cpu_to_logical_apicid(cpu); +} + +struct warm_boot_cpu_info { + struct completion *complete; + int apicid; + int cpu; +}; + +static void __cpuinit do_warm_boot_cpu(void *p) +{ + struct warm_boot_cpu_info *info = p; + do_boot_cpu(info->apicid, info->cpu); + complete(info->complete); +} + +static int __cpuinit __smp_prepare_cpu(int cpu) +{ + DECLARE_COMPLETION(done); + struct warm_boot_cpu_info info; + struct work_struct task; + int apicid, ret; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + + apicid = x86_cpu_to_apicid[cpu]; + if (apicid == BAD_APICID) { + ret = -ENODEV; + goto exit; + } + + /* + * the CPU isn't initialized at boot time, allocate gdt table here. + * cpu_init will initialize it + */ + if (!cpu_gdt_descr->address) { + cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); + if (!cpu_gdt_descr->address) + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); + ret = -ENOMEM; + goto exit; + } + + info.complete = &done; + info.apicid = apicid; + info.cpu = cpu; + INIT_WORK(&task, do_warm_boot_cpu, &info); + + tsc_sync_disabled = 1; + + /* init low mem mapping */ + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + KERNEL_PGD_PTRS); + flush_tlb_all(); + schedule_work(&task); + wait_for_completion(&done); + + tsc_sync_disabled = 0; + zap_low_mappings(); + ret = 0; +exit: + return ret; +} +#endif + /* * Cycle through the processors sending APIC IPIs to boot each. */ @@ -1097,6 +1176,136 @@ void __devinit smp_prepare_boot_cpu(void /*per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;*/ } +#ifdef CONFIG_HOTPLUG_CPU +static void +remove_siblinginfo(int cpu) +{ + int sibling; + struct cpuinfo_x86 *c = cpu_data; + + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { + cpu_clear(cpu, cpu_core_map[sibling]); + /* + * last thread sibling in this cpu core going down + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) + c[sibling].booted_cores--; + } + + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[sibling]); + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + c[cpu].phys_proc_id = 0; + c[cpu].cpu_core_id = 0; + cpu_clear(cpu, cpu_sibling_setup_map); +} + +int __cpu_disable(void) +{ + cpumask_t map = cpu_online_map; + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + clear_local_APIC(); + /* Allow any queued timer interrupts to get serviced */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + remove_siblinginfo(cpu); + + cpu_clear(cpu, map); + fixup_irqs(map); + /* It's now safe to remove this processor from the online map */ + cpu_clear(cpu, cpu_online_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + /* We don't do anything here: idle task is faking death itself. */ + unsigned int i; + + for (i = 0; i < 10; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + printk ("CPU %d is now offline\n", cpu); + if (1 == num_online_cpus()) + alternatives_smp_switch(0); + return; + } + msleep(100); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} + +/* From kernel/power/main.c */ +/* This is protected by pm_sem semaphore */ +static cpumask_t frozen_cpus; + +void disable_nonboot_cpus(void) +{ + int cpu, error; + + error = 0; + cpus_clear(frozen_cpus); + printk("Freezing cpus ...\n"); + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + error = cpu_down(cpu); + if (!error) { + cpu_set(cpu, frozen_cpus); + printk("CPU%d is down\n", cpu); + continue; + } + printk("Error taking cpu %d down: %d\n", cpu, error); + } + BUG_ON(raw_smp_processor_id() != 0); + if (error) + panic("cpus not sleeping"); +} + +void enable_nonboot_cpus(void) +{ + int cpu, error; + + printk("Thawing cpus ...\n"); + for_each_cpu_mask(cpu, frozen_cpus) { + error = cpu_up(cpu); + if (!error) { + printk("CPU%d is up\n", cpu); + continue; + } + printk("Error taking cpu %d up: %d\n", cpu, error); + panic("Not enough cpus"); + } + cpus_clear(frozen_cpus); +} +#else /* ... !CONFIG_HOTPLUG_CPU */ +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + int __devinit __cpu_up(unsigned int cpu) { /* In case one didn't come up */ @@ -1117,6 +1326,7 @@ int __devinit __cpu_up(unsigned int cpu) return 0; } + void __init smp_cpus_done(unsigned int max_cpus) { #ifdef CONFIG_X86_IO_APIC diff -r ad11f74d298c -r e00547dcda09 xen/include/xen/irq.h --- a/xen/include/xen/irq.h Wed Jul 11 17:23:09 2007 +0100 +++ b/xen/include/xen/irq.h Wed Jul 11 17:28:09 2007 +0100 @@ -57,6 +57,7 @@ typedef struct { struct irqaction *action; /* IRQ action list */ unsigned int depth; /* nested irq disables */ spinlock_t lock; + cpumask_t affinity; } __cacheline_aligned irq_desc_t; extern irq_desc_t irq_desc[NR_IRQS]; @@ -74,4 +75,13 @@ extern int pirq_guest_bind(struct vcpu * extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share); extern int pirq_guest_unbind(struct domain *d, int irq); +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ + irq_desc[irq].affinity = mask; +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} #endif /* __XEN_IRQ_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |