[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: Dynamically allocate percpu data area when a CPU comes online.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1274192025 -3600 # Node ID 8d7eba5be1d83f80ee2f4633d5ad7d00c4ef55a7 # Parent c1b6647c682816f30f2b6c13da62c3b6cc617ff7 x86: Dynamically allocate percpu data area when a CPU comes online. At the same time, the data area starts life zeroed. Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> --- xen/arch/x86/Makefile | 1 xen/arch/x86/irq.c | 9 +-- xen/arch/x86/nmi.c | 2 xen/arch/x86/percpu.c | 69 ++++++++++++++++++++++++ xen/arch/x86/setup.c | 75 +-------------------------- xen/arch/x86/smpboot.c | 2 xen/arch/x86/tboot.c | 10 +-- xen/arch/x86/traps.c | 10 +++ xen/arch/x86/x86_32/supervisor_mode_kernel.S | 2 xen/arch/x86/xen.lds.S | 31 +++-------- xen/common/rcupdate.c | 2 xen/common/sched_credit.c | 2 xen/drivers/cpufreq/utility.c | 4 + xen/include/asm-x86/percpu.h | 11 ++- xen/include/asm-x86/x86_32/asm_defns.h | 2 xen/include/asm-x86/x86_64/asm_defns.h | 2 xen/xsm/flask/avc.c | 2 17 files changed, 119 insertions(+), 117 deletions(-) diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/Makefile Tue May 18 15:13:45 2010 +0100 @@ -36,6 +36,7 @@ obj-y += nmi.o obj-y += nmi.o obj-y += numa.o obj-y += pci.o +obj-y += percpu.o obj-y += physdev.o obj-y += setup.o obj-y += shutdown.o diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/irq.c Tue May 18 15:13:45 2010 +0100 @@ -50,9 +50,7 @@ static struct timer *__read_mostly irq_g static DEFINE_SPINLOCK(vector_lock); -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... NR_VECTORS - 1] = -1 -}; +DEFINE_PER_CPU(vector_irq_t, vector_irq); DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs); @@ -269,7 +267,10 @@ int init_irq_data(void) { struct irq_desc *desc; struct irq_cfg *cfg; - int irq; + int irq, vector; + + for (vector = 0; vector < NR_VECTORS; ++vector) + this_cpu(vector_irq)[vector] = -1; irq_desc = xmalloc_array(struct irq_desc, nr_irqs); irq_cfg = xmalloc_array(struct irq_cfg, nr_irqs); diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/nmi.c --- a/xen/arch/x86/nmi.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/nmi.c Tue May 18 15:13:45 2010 +0100 @@ -230,7 +230,7 @@ static inline void write_watchdog_counte do_div(count, nmi_hz); if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); + Dprintk("setting %s to -0x%"PRIx64"\n", descr, count); wrmsrl(nmi_perfctr_msr, 0 - count); } diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/percpu.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/percpu.c Tue May 18 15:13:45 2010 +0100 @@ -0,0 +1,69 @@ +#include <xen/config.h> +#include <xen/percpu.h> +#include <xen/cpu.h> +#include <xen/init.h> +#include <xen/mm.h> + +unsigned long __per_cpu_offset[NR_CPUS]; +#define INVALID_PERCPU_AREA (-(long)__per_cpu_start) +#define PERCPU_ORDER (get_order_from_bytes(__per_cpu_data_end-__per_cpu_start)) + +void __init percpu_init_areas(void) +{ + unsigned int cpu; + for ( cpu = 1; cpu < NR_CPUS; cpu++ ) + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; +} + +static int init_percpu_area(unsigned int cpu) +{ + char *p; + if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) + return 0; + if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) + return -ENOMEM; + memset(p, 0, __per_cpu_data_end - __per_cpu_start); + __per_cpu_offset[cpu] = p - __per_cpu_start; + return 0; +} + +static void free_percpu_area(unsigned int cpu) +{ + char *p = __per_cpu_start + __per_cpu_offset[cpu]; + free_xenheap_pages(p, PERCPU_ORDER); + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; +} + +static int cpu_percpu_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + int rc = 0; + + switch ( action ) + { + case CPU_UP_PREPARE: + rc = init_percpu_area(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + free_percpu_area(cpu); + break; + default: + break; + } + + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); +} + +static struct notifier_block cpu_percpu_nfb = { + .notifier_call = cpu_percpu_callback, + .priority = 100 /* highest priority */ +}; + +static int __init percpu_presmp_init(void) +{ + register_cpu_notifier(&cpu_percpu_nfb); + return 0; +} +presmp_initcall(percpu_presmp_init); diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/setup.c Tue May 18 15:13:45 2010 +0100 @@ -107,12 +107,6 @@ unsigned long __read_mostly xenheap_phys unsigned long __read_mostly xenheap_phys_end; #endif -DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table) = boot_cpu_gdt_table; -#ifdef CONFIG_COMPAT -DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table) - = boot_cpu_compat_gdt_table; -#endif - DEFINE_PER_CPU(struct tss_struct, init_tss); char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE]; @@ -192,46 +186,6 @@ static void free_xen_data(char *s, char } extern char __init_begin[], __init_end[], __bss_start[]; -extern char __per_cpu_start[], __per_cpu_data_end[]; - -static void __init percpu_init_areas(void) -{ - unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start; - - BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK); - BUG_ON((unsigned long)__per_cpu_data_end & ~PAGE_MASK); - BUG_ON(data_size > PERCPU_SIZE); - - /* Initialise per-cpu data area for all possible secondary CPUs. */ - for ( i = 1; i < NR_CPUS; i++ ) - memcpy(__per_cpu_start + (i << PERCPU_SHIFT), - __per_cpu_start, - data_size); -} - -static void __init percpu_free_unused_areas(void) -{ - unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start; - unsigned int first_unused; - - /* Find first 'impossible' secondary CPU. */ - for ( i = 1; i < NR_CPUS; i++ ) - if ( !cpu_possible(i) ) - break; - first_unused = i; - - /* Check that there are no holes in cpu_possible_map. */ - for ( ; i < NR_CPUS; i++ ) - BUG_ON(cpu_possible(i)); - - /* Free all unused per-cpu data areas. */ - free_xen_data(&__per_cpu_start[first_unused << PERCPU_SHIFT], __bss_start); - - if ( data_size != PERCPU_SIZE ) - for ( i = 0; i < first_unused; i++ ) - free_xen_data(&__per_cpu_start[(i << PERCPU_SHIFT) + data_size], - &__per_cpu_start[(i+1) << PERCPU_SHIFT]); -} static void __init init_idle_domain(void) { @@ -1013,8 +967,6 @@ void __init __start_xen(unsigned long mb init_apic_mappings(); - percpu_free_unused_areas(); - init_IRQ(); xsm_init(&initrdidx, mbi, initial_images_start); @@ -1200,7 +1152,7 @@ int xen_in_range(unsigned long mfn) paddr_t start, end; int i; - enum { region_s3, region_text, region_percpu, region_bss, nr_regions }; + enum { region_s3, region_text, region_bss, nr_regions }; static struct { paddr_t s, e; } xen_regions[nr_regions]; @@ -1214,10 +1166,6 @@ int xen_in_range(unsigned long mfn) /* hypervisor code + data */ xen_regions[region_text].s =__pa(&_stext); xen_regions[region_text].e = __pa(&__init_begin); - /* per-cpu data */ - xen_regions[region_percpu].s = __pa(__per_cpu_start); - xen_regions[region_percpu].e = xen_regions[region_percpu].s + - (((paddr_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT); /* bss */ xen_regions[region_bss].s = __pa(&__bss_start); xen_regions[region_bss].e = __pa(&_end); @@ -1226,25 +1174,8 @@ int xen_in_range(unsigned long mfn) start = (paddr_t)mfn << PAGE_SHIFT; end = start + PAGE_SIZE; for ( i = 0; i < nr_regions; i++ ) - { - if ( (start >= xen_regions[i].e) || (end <= xen_regions[i].s) ) - continue; - - if ( i == region_percpu ) - { - /* - * Check if the given page falls into an unused (and therefore - * freed) section of the per-cpu data space. Each CPU's data - * area is page-aligned, so the following arithmetic is safe. - */ - unsigned int off = ((start - __pa(__per_cpu_start)) - & (PERCPU_SIZE - 1)); - unsigned int data_sz = __per_cpu_data_end - __per_cpu_start; - return off < data_sz; - } - - return 1; - } + if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) ) + return 1; return 0; } diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/smpboot.c Tue May 18 15:13:45 2010 +0100 @@ -487,7 +487,7 @@ static int wakeup_secondary_cpu(int phys for ( i = 0; i < num_starts; i++ ) { - Dprintk("Sending STARTUP #%d.\n",j); + Dprintk("Sending STARTUP #%d.\n", i+1); apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/tboot.c --- a/xen/arch/x86/tboot.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/tboot.c Tue May 18 15:13:45 2010 +0100 @@ -357,7 +357,7 @@ void tboot_shutdown(uint32_t shutdown_ty /* * Xen regions for tboot to MAC */ - g_tboot_shared->num_mac_regions = 4; + g_tboot_shared->num_mac_regions = 3; /* S3 resume code (and other real mode trampoline code) */ g_tboot_shared->mac_regions[0].start = bootsym_phys(trampoline_start); g_tboot_shared->mac_regions[0].size = bootsym_phys(trampoline_end) - @@ -366,13 +366,9 @@ void tboot_shutdown(uint32_t shutdown_ty g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext); g_tboot_shared->mac_regions[1].size = __pa(&__init_begin) - __pa(&_stext); - /* per-cpu data */ - g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__per_cpu_start); - g_tboot_shared->mac_regions[2].size = - (((uint64_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT); /* bss */ - g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start); - g_tboot_shared->mac_regions[3].size = __pa(&_end) - __pa(&__bss_start); + g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__bss_start); + g_tboot_shared->mac_regions[2].size = __pa(&_end) - __pa(&__bss_start); /* * MAC domains and other Xen memory diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/traps.c Tue May 18 15:13:45 2010 +0100 @@ -82,6 +82,11 @@ string_param("nmi", opt_nmi); string_param("nmi", opt_nmi); DEFINE_PER_CPU_READ_MOSTLY(u32, ler_msr); + +DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table); +#ifdef CONFIG_COMPAT +DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table); +#endif /* Master table, used by CPU0. */ idt_entry_t idt_table[IDT_ENTRIES]; @@ -3290,6 +3295,11 @@ void __init trap_init(void) /* CPU0 uses the master IDT. */ idt_tables[0] = idt_table; + this_cpu(gdt_table) = boot_cpu_gdt_table; +#ifdef CONFIG_COMPAT + this_cpu(compat_gdt_table) = boot_cpu_compat_gdt_table; +#endif + percpu_traps_init(); cpu_init(); diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/x86_32/supervisor_mode_kernel.S --- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S Tue May 18 15:13:45 2010 +0100 @@ -102,7 +102,7 @@ ENTRY(fixup_ring0_guest_stack) movl $PER_CPU_GDT_ENTRY*8,%ecx lsll %ecx,%ecx - shll $PERCPU_SHIFT,%ecx + movl __per_cpu_offset(,%ecx,4),%ecx addl $per_cpu__init_tss,%ecx # Load Xen stack from TSS. diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/xen.lds.S --- a/xen/arch/x86/xen.lds.S Tue May 18 15:05:54 2010 +0100 +++ b/xen/arch/x86/xen.lds.S Tue May 18 15:13:45 2010 +0100 @@ -59,7 +59,7 @@ SECTIONS CONSTRUCTORS } :text - . = ALIGN(128); + . = ALIGN(SMP_CACHE_BYTES); .data.read_mostly : { *(.data.read_mostly) } :text @@ -71,7 +71,7 @@ SECTIONS __lock_profile_end = .; #endif - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -99,33 +99,22 @@ SECTIONS *(.xsm_initcall.init) __xsm_initcall_end = .; } :text - . = ALIGN(PAGE_SIZE); + . = ALIGN(STACK_SIZE); __init_end = .; - __per_cpu_shift = PERCPU_SHIFT; /* kdump assist */ - .data.percpu : { - __per_cpu_start = .; - *(.data.percpu) - . = ALIGN(SMP_CACHE_BYTES); - *(.data.percpu.read_mostly) - . = ALIGN(PAGE_SIZE); - __per_cpu_data_end = .; - } :text - . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT); - . = ALIGN(PAGE_SIZE); - - /* - * Do not insert anything here - the unused portion of .data.percpu - * will be freed/unmapped up to __bss_start (defined below). - */ - .bss : { /* BSS */ - . = ALIGN(STACK_SIZE); __bss_start = .; *(.bss.stack_aligned) . = ALIGN(PAGE_SIZE); *(.bss.page_aligned) *(.bss) + . = ALIGN(SMP_CACHE_BYTES); + __per_cpu_start = .; + *(.bss.percpu) + . = ALIGN(SMP_CACHE_BYTES); + *(.bss.percpu.read_mostly) + . = ALIGN(SMP_CACHE_BYTES); + __per_cpu_data_end = .; } :text _end = . ; diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/common/rcupdate.c --- a/xen/common/rcupdate.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/common/rcupdate.c Tue May 18 15:13:45 2010 +0100 @@ -53,7 +53,7 @@ struct rcu_ctrlblk rcu_ctrlblk = { .cpumask = CPU_MASK_NONE, }; -DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; +DEFINE_PER_CPU(struct rcu_data, rcu_data); static int blimit = 10; static int qhimark = 10000; diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/common/sched_credit.c --- a/xen/common/sched_credit.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/common/sched_credit.c Tue May 18 15:13:45 2010 +0100 @@ -231,7 +231,7 @@ static int opt_tickle_one_idle __read_mo static int opt_tickle_one_idle __read_mostly = 1; boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle); -DEFINE_PER_CPU(unsigned int, last_tickle_cpu) = 0; +DEFINE_PER_CPU(unsigned int, last_tickle_cpu); static inline void __runq_tickle(unsigned int cpu, struct csched_vcpu *new) diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/drivers/cpufreq/utility.c --- a/xen/drivers/cpufreq/utility.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/drivers/cpufreq/utility.c Tue May 18 15:13:45 2010 +0100 @@ -36,7 +36,7 @@ struct processor_pminfo *__read_mostly p struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS]; struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS]; -DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock) = SPIN_LOCK_UNLOCKED; +DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock); /********************************************************************* * Px STATISTIC INFO * @@ -94,6 +94,8 @@ int cpufreq_statistic_init(unsigned int const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, cpuid); + + spin_lock_init(cpufreq_statistic_lock); if ( !pmpt ) return -EINVAL; diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/include/asm-x86/percpu.h --- a/xen/include/asm-x86/percpu.h Tue May 18 15:05:54 2010 +0100 +++ b/xen/include/asm-x86/percpu.h Tue May 18 15:13:45 2010 +0100 @@ -1,17 +1,20 @@ #ifndef __X86_PERCPU_H__ #define __X86_PERCPU_H__ -#define PERCPU_SHIFT 13 -#define PERCPU_SIZE (1UL << PERCPU_SHIFT) +#ifndef __ASSEMBLY__ +extern char __per_cpu_start[], __per_cpu_data_end[]; +extern unsigned long __per_cpu_offset[NR_CPUS]; +void percpu_init_areas(void); +#endif /* Separate out the type, so (int[3], foo) works. */ #define __DEFINE_PER_CPU(type, name, suffix) \ - __attribute__((__section__(".data.percpu" #suffix))) \ + __attribute__((__section__(".bss.percpu" #suffix))) \ __typeof__(type) per_cpu_##name /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) \ - (*RELOC_HIDE(&per_cpu__##var, ((unsigned int)(cpu))<<PERCPU_SHIFT)) + (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) #define __get_cpu_var(var) \ (per_cpu(var, smp_processor_id())) diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/include/asm-x86/x86_32/asm_defns.h --- a/xen/include/asm-x86/x86_32/asm_defns.h Tue May 18 15:05:54 2010 +0100 +++ b/xen/include/asm-x86/x86_32/asm_defns.h Tue May 18 15:13:45 2010 +0100 @@ -82,7 +82,7 @@ 1: addl $4,%esp; #define PERFC_INCR(_name,_idx,_cur) \ pushl _cur; \ movl VCPU_processor(_cur),_cur; \ - shll $PERCPU_SHIFT,_cur; \ + movl __per_cpu_offset(,_cur,4),_cur; \ incl per_cpu__perfcounters+_name*4(_cur,_idx,4);\ popl _cur #else diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/include/asm-x86/x86_64/asm_defns.h --- a/xen/include/asm-x86/x86_64/asm_defns.h Tue May 18 15:05:54 2010 +0100 +++ b/xen/include/asm-x86/x86_64/asm_defns.h Tue May 18 15:13:45 2010 +0100 @@ -68,7 +68,7 @@ 1: addq $8,%rsp; movslq VCPU_processor(_cur),_cur; \ pushq %rdx; \ leaq per_cpu__perfcounters(%rip),%rdx; \ - shlq $PERCPU_SHIFT,_cur; \ + movq __per_cpu_offset(,_cur,8),_cur; \ addq %rdx,_cur; \ popq %rdx; \ incl _name*4(_cur,_idx,4); \ diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/xsm/flask/avc.c --- a/xen/xsm/flask/avc.c Tue May 18 15:05:54 2010 +0100 +++ b/xen/xsm/flask/avc.c Tue May 18 15:13:45 2010 +0100 @@ -118,7 +118,7 @@ unsigned int avc_cache_threshold = AVC_D unsigned int avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD; #ifdef FLASK_AVC_STATS -DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 }; +DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats); #endif static struct avc_cache avc_cache; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |