[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen stable-4.7] x86: distinguish CPU offlining from CPU removal
commit 97aff087fd0bf7d2c64c949de1c3a0282609b3e1 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Mon Jul 30 14:10:58 2018 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Mon Jul 30 14:10:58 2018 +0200 x86: distinguish CPU offlining from CPU removal In order to be able to service #MC on offlined CPUs, the GDT, IDT, stack, and per-CPU data (which includes the TSS) need to be kept allocated. They should only be freed upon CPU removal (which we currently don't support, so some code is becoming effectively dead for the moment). Note that for now park_offline_cpus doesn't get set to true anywhere - this is going to be the subject of a subsequent patch. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Wei Liu <wei.liu2@xxxxxxxxxx> Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> master commit: 2e6c8f182c9c50129b1c7a620242861e6ad6a9fb master date: 2018-07-19 13:43:33 +0100 --- xen/arch/x86/cpu/mcheck/mce.c | 14 ++++++++-- xen/arch/x86/domain.c | 9 +++--- xen/arch/x86/genapic/x2apic.c | 9 ++++-- xen/arch/x86/percpu.c | 9 ++++-- xen/arch/x86/smpboot.c | 65 ++++++++++++++++++++++++++++--------------- xen/include/asm-x86/smp.h | 2 ++ xen/include/xen/cpu.h | 2 ++ xen/include/xen/cpumask.h | 23 +++++++++++++++ xen/include/xen/mm.h | 8 ++++++ xen/include/xen/xmalloc.h | 6 ++++ 10 files changed, 111 insertions(+), 36 deletions(-) diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c index 733a49157b..0a159f13ff 100644 --- a/xen/arch/x86/cpu/mcheck/mce.c +++ b/xen/arch/x86/cpu/mcheck/mce.c @@ -685,12 +685,15 @@ static void cpu_bank_free(unsigned int cpu) mcabanks_free(poll); mcabanks_free(clr); + + per_cpu(poll_bankmask, cpu) = NULL; + per_cpu(mce_clear_banks, cpu) = NULL; } static int cpu_bank_alloc(unsigned int cpu) { - struct mca_banks *poll = mcabanks_alloc(); - struct mca_banks *clr = mcabanks_alloc(); + struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc(); + struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc(); if ( !poll || !clr ) { @@ -717,7 +720,12 @@ static int cpu_callback( break; case CPU_UP_CANCELED: case CPU_DEAD: - cpu_bank_free(cpu); + if ( !park_offline_cpus ) + cpu_bank_free(cpu); + break; + case CPU_REMOVE: + if ( park_offline_cpus ) + cpu_bank_free(cpu); break; default: break; diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 596be5c4f5..b6bb1bed60 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -153,10 +153,11 @@ static void play_dead(void) local_irq_disable(); /* - * NOTE: After cpu_exit_clear, per-cpu variables are no longer accessible, - * as they may be freed at any time. In this case, heap corruption or - * #PF can occur (when heap debugging is enabled). For example, even - * printk() can involve tasklet scheduling, which touches per-cpu vars. + * NOTE: After cpu_exit_clear, per-cpu variables may no longer accessible, + * as they may be freed at any time if offline CPUs don't get parked. In + * this case, heap corruption or #PF can occur (when heap debugging is + * enabled). For example, even printk() can involve tasklet scheduling, + * which touches per-cpu vars. * * Consider very carefully when adding code to *dead_idle. Most hypervisor * subsystems are unsafe to call. diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c index d894a98ebd..adce783046 100644 --- a/xen/arch/x86/genapic/x2apic.c +++ b/xen/arch/x86/genapic/x2apic.c @@ -202,18 +202,21 @@ static int update_clusterinfo( if ( !cluster_cpus_spare ) cluster_cpus_spare = xzalloc(cpumask_t); if ( !cluster_cpus_spare || - !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) ) + !cond_alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) ) err = -ENOMEM; break; case CPU_UP_CANCELED: case CPU_DEAD: + case CPU_REMOVE: + if ( park_offline_cpus == (action != CPU_REMOVE) ) + break; if ( per_cpu(cluster_cpus, cpu) ) { cpumask_clear_cpu(cpu, per_cpu(cluster_cpus, cpu)); if ( cpumask_empty(per_cpu(cluster_cpus, cpu)) ) - xfree(per_cpu(cluster_cpus, cpu)); + XFREE(per_cpu(cluster_cpus, cpu)); } - free_cpumask_var(per_cpu(scratch_mask, cpu)); + FREE_CPUMASK_VAR(per_cpu(scratch_mask, cpu)); break; } diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c index 1c1dad9011..bd87855a9c 100644 --- a/xen/arch/x86/percpu.c +++ b/xen/arch/x86/percpu.c @@ -27,7 +27,7 @@ static int init_percpu_area(unsigned int cpu) { char *p; if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) - return -EBUSY; + return 0; if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) return -ENOMEM; memset(p, 0, __per_cpu_data_end - __per_cpu_start); @@ -70,9 +70,12 @@ static int cpu_percpu_callback( break; case CPU_UP_CANCELED: case CPU_DEAD: - free_percpu_area(cpu); + if ( !park_offline_cpus ) + free_percpu_area(cpu); break; - default: + case CPU_REMOVE: + if ( park_offline_cpus ) + free_percpu_area(cpu); break; } diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 7874e694ed..0febeb1d34 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -60,6 +60,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask); cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); +bool_t __read_mostly park_offline_cpus; + unsigned int __read_mostly nr_sockets; cpumask_t **__read_mostly socket_cpumask; static cpumask_t *secondary_socket_cpumask; @@ -885,7 +887,14 @@ static void cleanup_cpu_root_pgt(unsigned int cpu) } } -static void cpu_smpboot_free(unsigned int cpu) +/* + * The 'remove' boolean controls whether a CPU is just getting offlined (and + * parked), or outright removed / offlined without parking. Parked CPUs need + * things like their stack, GDT, IDT, TSS, and per-CPU data still available. + * A few other items, in particular CPU masks, are also retained, as it's + * difficult to prove that they're entirely unreferenced from parked CPUs. + */ +static void cpu_smpboot_free(unsigned int cpu, bool_t remove) { unsigned int order, socket = cpu_to_socket(cpu); struct cpuinfo_x86 *c = cpu_data; @@ -896,13 +905,17 @@ static void cpu_smpboot_free(unsigned int cpu) socket_cpumask[socket] = NULL; } - c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID; - c[cpu].cpu_core_id = XEN_INVALID_CORE_ID; - c[cpu].compute_unit_id = INVALID_CUID; cpumask_clear_cpu(cpu, &cpu_sibling_setup_map); - free_cpumask_var(per_cpu(cpu_sibling_mask, cpu)); - free_cpumask_var(per_cpu(cpu_core_mask, cpu)); + if ( remove ) + { + c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID; + c[cpu].cpu_core_id = XEN_INVALID_CORE_ID; + c[cpu].compute_unit_id = INVALID_CUID; + + FREE_CPUMASK_VAR(per_cpu(cpu_sibling_mask, cpu)); + FREE_CPUMASK_VAR(per_cpu(cpu_core_mask, cpu)); + } cleanup_cpu_root_pgt(cpu); @@ -924,19 +937,21 @@ static void cpu_smpboot_free(unsigned int cpu) } order = get_order_from_pages(NR_RESERVED_GDT_PAGES); - free_xenheap_pages(per_cpu(gdt_table, cpu), order); + if ( remove ) + FREE_XENHEAP_PAGES(per_cpu(gdt_table, cpu), order); free_xenheap_pages(per_cpu(compat_gdt_table, cpu), order); - order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t)); - free_xenheap_pages(idt_tables[cpu], order); - idt_tables[cpu] = NULL; - - if ( stack_base[cpu] != NULL ) + if ( remove ) { - memguard_unguard_stack(stack_base[cpu]); - free_xenheap_pages(stack_base[cpu], STACK_ORDER); - stack_base[cpu] = NULL; + order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t)); + FREE_XENHEAP_PAGES(idt_tables[cpu], order); + + if ( stack_base[cpu] ) + { + memguard_unguard_stack(stack_base[cpu]); + FREE_XENHEAP_PAGES(stack_base[cpu], STACK_ORDER); + } } } @@ -950,15 +965,17 @@ static int cpu_smpboot_alloc(unsigned int cpu) if ( node != NUMA_NO_NODE ) memflags = MEMF_node(node); - stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags); + if ( stack_base[cpu] == NULL ) + stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags); if ( stack_base[cpu] == NULL ) goto oom; memguard_guard_stack(stack_base[cpu]); order = get_order_from_pages(NR_RESERVED_GDT_PAGES); - per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, memflags); + gdt = per_cpu(gdt_table, cpu) ?: alloc_xenheap_pages(order, memflags); if ( gdt == NULL ) goto oom; + per_cpu(gdt_table, cpu) = gdt; memcpy(gdt, boot_cpu_gdt_table, NR_RESERVED_GDT_PAGES * PAGE_SIZE); BUILD_BUG_ON(NR_CPUS > 0x10000); gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; @@ -970,7 +987,8 @@ static int cpu_smpboot_alloc(unsigned int cpu) gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t)); - idt_tables[cpu] = alloc_xenheap_pages(order, memflags); + if ( idt_tables[cpu] == NULL ) + idt_tables[cpu] = alloc_xenheap_pages(order, memflags); if ( idt_tables[cpu] == NULL ) goto oom; memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t)); @@ -999,12 +1017,12 @@ static int cpu_smpboot_alloc(unsigned int cpu) (secondary_socket_cpumask = xzalloc(cpumask_t)) == NULL ) goto oom; - if ( zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) && - zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) ) + if ( cond_zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) && + cond_zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) ) return 0; oom: - cpu_smpboot_free(cpu); + cpu_smpboot_free(cpu, 1); return -ENOMEM; } @@ -1021,9 +1039,10 @@ static int cpu_smpboot_callback( break; case CPU_UP_CANCELED: case CPU_DEAD: - cpu_smpboot_free(cpu); + cpu_smpboot_free(cpu, !park_offline_cpus); break; - default: + case CPU_REMOVE: + cpu_smpboot_free(cpu, 1); break; } diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index 33c2c32f42..e5d5f719b1 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -28,6 +28,8 @@ extern void smp_alloc_memory(void); DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask); DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask); +extern bool_t park_offline_cpus; + void smp_send_nmi_allbutself(void); void send_IPI_mask(const cpumask_t *, int vector); diff --git a/xen/include/xen/cpu.h b/xen/include/xen/cpu.h index ffefc09f8e..2fe3ec05d8 100644 --- a/xen/include/xen/cpu.h +++ b/xen/include/xen/cpu.h @@ -47,6 +47,8 @@ void register_cpu_notifier(struct notifier_block *nb); #define CPU_DYING (0x0007 | NOTIFY_REVERSE) /* CPU_DEAD: CPU is dead. */ #define CPU_DEAD (0x0008 | NOTIFY_REVERSE) +/* CPU_REMOVE: CPU was removed. */ +#define CPU_REMOVE (0x0009 | NOTIFY_REVERSE) /* Perform CPU hotplug. May return -EAGAIN. */ int cpu_down(unsigned int cpu); diff --git a/xen/include/xen/cpumask.h b/xen/include/xen/cpumask.h index 0e7108c699..42c9f9d67b 100644 --- a/xen/include/xen/cpumask.h +++ b/xen/include/xen/cpumask.h @@ -341,16 +341,35 @@ static inline bool_t alloc_cpumask_var(cpumask_var_t *mask) return *mask != NULL; } +static inline bool_t cond_alloc_cpumask_var(cpumask_var_t *mask) +{ + if (*mask == NULL) + *mask = _xmalloc(nr_cpumask_bits / 8, sizeof(long)); + return *mask != NULL; +} + static inline bool_t zalloc_cpumask_var(cpumask_var_t *mask) { *(void **)mask = _xzalloc(nr_cpumask_bits / 8, sizeof(long)); return *mask != NULL; } +static inline bool_t cond_zalloc_cpumask_var(cpumask_var_t *mask) +{ + if (*mask == NULL) + *mask = _xzalloc(nr_cpumask_bits / 8, sizeof(long)); + else + cpumask_clear(*mask); + return *mask != NULL; +} + static inline void free_cpumask_var(cpumask_var_t mask) { xfree(mask); } + +/* Free an allocated mask, and zero the pointer to it. */ +#define FREE_CPUMASK_VAR(m) XFREE(m) #else typedef cpumask_t cpumask_var_t[1]; @@ -358,16 +377,20 @@ static inline bool_t alloc_cpumask_var(cpumask_var_t *mask) { return 1; } +#define cond_alloc_cpumask_var alloc_cpumask_var static inline bool_t zalloc_cpumask_var(cpumask_var_t *mask) { cpumask_clear(*mask); return 1; } +#define cond_zalloc_cpumask_var zalloc_cpumask_var static inline void free_cpumask_var(cpumask_var_t mask) { } + +#define FREE_CPUMASK_VAR(m) free_cpumask_var(m) #endif #if NR_CPUS > 1 diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 1100409ae8..3d2ef0a1af 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -98,6 +98,14 @@ void *alloc_xenheap_pages(unsigned int order, unsigned int memflags); void free_xenheap_pages(void *v, unsigned int order); #define alloc_xenheap_page() (alloc_xenheap_pages(0,0)) #define free_xenheap_page(v) (free_xenheap_pages(v,0)) + +/* Free an allocation, and zero the pointer to it. */ +#define FREE_XENHEAP_PAGES(p, o) do { \ + free_xenheap_pages(p, o); \ + (p) = NULL; \ +} while ( 0 ) +#define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0) + /* Map machine page range in Xen virtual address space. */ int map_pages_to_xen( unsigned long virt, diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h index 24a99ac244..ab093935c6 100644 --- a/xen/include/xen/xmalloc.h +++ b/xen/include/xen/xmalloc.h @@ -26,6 +26,12 @@ /* Free any of the above. */ extern void xfree(void *); +/* Free an allocation, and zero the pointer to it. */ +#define XFREE(p) do { \ + xfree(p); \ + (p) = NULL; \ +} while ( 0 ) + /* Underlying functions */ extern void *_xmalloc(unsigned long size, unsigned long align); extern void *_xzalloc(unsigned long size, unsigned long align); -- generated by git-patchbot for /home/xen/git/xen.git#stable-4.7 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |