Xen project Mailing List

[Xen-devel] [PATCH RFC 39/44] x86/smp: Introduce get_smp_ipi_buf() and take more IPI parameters off the stack

From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Date: Thu, 4 Jan 2018 20:22:04 +0000

Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Delivery-date: Thu, 04 Jan 2018 20:29:10 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

A number of hypercalls and softirq tasks pass small stack buffers via IPI. These operate sequentially on a single CPU, so introduce a shared PER_CPU buffer for use. Access to the buffer is via get_smp_ipi_buf(), which performs a range check at compile time. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- xen/arch/x86/acpi/cpu_idle.c | 30 +++++++++---------- xen/arch/x86/acpi/cpufreq/cpufreq.c | 57 ++++++++++++++++++------------------ xen/arch/x86/acpi/cpufreq/powernow.c | 26 ++++++++-------- xen/arch/x86/platform_hypercall.c | 40 ++++++++++++------------- xen/arch/x86/psr.c | 9 +++--- xen/arch/x86/pv/pt-shadow.c | 12 ++++---- xen/arch/x86/smp.c | 2 ++ xen/arch/x86/sysctl.c | 10 +++---- xen/include/asm-x86/smp.h | 20 +++++++++++++ 9 files changed, 114 insertions(+), 92 deletions(-) diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c index cb1c5da..0479826 100644 --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -246,23 +246,23 @@ static void get_hw_residencies(uint32_t cpu, struct hw_residencies *hw_res) static void print_hw_residencies(uint32_t cpu) { - struct hw_residencies hw_res; + struct hw_residencies *hw_res = get_smp_ipi_buf(struct hw_residencies); - get_hw_residencies(cpu, &hw_res); + get_hw_residencies(cpu, hw_res); - if ( hw_res.mc0 | hw_res.mc6 ) + if ( hw_res->mc0 | hw_res->mc6 ) printk("MC0[%"PRIu64"] MC6[%"PRIu64"]\n", - hw_res.mc0, hw_res.mc6); + hw_res->mc0, hw_res->mc6); printk("PC2[%"PRIu64"] PC%d[%"PRIu64"] PC6[%"PRIu64"] PC7[%"PRIu64"]\n", - hw_res.pc2, - hw_res.pc4 ? 4 : 3, hw_res.pc4 ?: hw_res.pc3, - hw_res.pc6, hw_res.pc7); - if ( hw_res.pc8 | hw_res.pc9 | hw_res.pc10 ) + hw_res->pc2, + hw_res->pc4 ? 4 : 3, hw_res->pc4 ?: hw_res->pc3, + hw_res->pc6, hw_res->pc7); + if ( hw_res->pc8 | hw_res->pc9 | hw_res->pc10 ) printk("PC8[%"PRIu64"] PC9[%"PRIu64"] PC10[%"PRIu64"]\n", - hw_res.pc8, hw_res.pc9, hw_res.pc10); + hw_res->pc8, hw_res->pc9, hw_res->pc10); printk("CC%d[%"PRIu64"] CC6[%"PRIu64"] CC7[%"PRIu64"]\n", - hw_res.cc1 ? 1 : 3, hw_res.cc1 ?: hw_res.cc3, - hw_res.cc6, hw_res.cc7); + hw_res->cc1 ? 1 : 3, hw_res->cc1 ?: hw_res->cc3, + hw_res->cc6, hw_res->cc7); } static char* acpi_cstate_method_name[] = @@ -1251,7 +1251,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat) } else { - struct hw_residencies hw_res; + struct hw_residencies *hw_res = get_smp_ipi_buf(struct hw_residencies); signed int last_state_idx; stat->nr = power->count; @@ -1285,13 +1285,13 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat) idle_res += res[i]; } - get_hw_residencies(cpuid, &hw_res); + get_hw_residencies(cpuid, hw_res); #define PUT_xC(what, n) do { \ if ( stat->nr_##what >= n && \ - copy_to_guest_offset(stat->what, n - 1, &hw_res.what##n, 1) ) \ + copy_to_guest_offset(stat->what, n - 1, &hw_res->what##n, 1) ) \ return -EFAULT; \ - if ( hw_res.what##n ) \ + if ( hw_res->what##n ) \ nr_##what = n; \ } while ( 0 ) #define PUT_PC(n) PUT_xC(pc, n) diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c index 1f8d02a..f295e1e 100644 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c @@ -198,7 +198,7 @@ static u32 get_cur_val(const cpumask_t *mask) { struct cpufreq_policy *policy; struct processor_performance *perf; - struct drv_cmd cmd; + struct drv_cmd *cmd = get_smp_ipi_buf(struct drv_cmd); unsigned int cpu = smp_processor_id(); if (unlikely(cpumask_empty(mask))) @@ -215,23 +215,23 @@ static u32 get_cur_val(const cpumask_t *mask) switch (cpufreq_drv_data[policy->cpu]->arch_cpu_flags) { case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; + cmd->type = SYSTEM_INTEL_MSR_CAPABLE; + cmd->addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; + cmd->type = SYSTEM_IO_CAPABLE; perf = cpufreq_drv_data[policy->cpu]->acpi_data; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; + cmd->addr.io.port = perf->control_register.address; + cmd->addr.io.bit_width = perf->control_register.bit_width; break; default: return 0; } - cmd.mask = cpumask_of(cpu); + cmd->mask = cpumask_of(cpu); - drv_read(&cmd); - return cmd.val; + drv_read(cmd); + return cmd->val; } struct perf_pair { @@ -270,7 +270,7 @@ static void read_measured_perf_ctrs(void *_readin) unsigned int get_measured_perf(unsigned int cpu, unsigned int flag) { struct cpufreq_policy *policy; - struct perf_pair readin, cur, *saved; + struct perf_pair *readin = get_smp_ipi_buf(struct perf_pair), cur, *saved; unsigned int perf_percent; unsigned int retval; @@ -298,16 +298,15 @@ unsigned int get_measured_perf(unsigned int cpu, unsigned int flag) } if (cpu == smp_processor_id()) { - read_measured_perf_ctrs((void *)&readin); + read_measured_perf_ctrs(readin); } else { - on_selected_cpus(cpumask_of(cpu), read_measured_perf_ctrs, - &readin, 1); + on_selected_cpus(cpumask_of(cpu), read_measured_perf_ctrs, readin, 1); } - cur.aperf.whole = readin.aperf.whole - saved->aperf.whole; - cur.mperf.whole = readin.mperf.whole - saved->mperf.whole; - saved->aperf.whole = readin.aperf.whole; - saved->mperf.whole = readin.mperf.whole; + cur.aperf.whole = readin->aperf.whole - saved->aperf.whole; + cur.mperf.whole = readin->mperf.whole - saved->mperf.whole; + saved->aperf.whole = readin->aperf.whole; + saved->mperf.whole = readin->mperf.whole; if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { int shift_count = 7; @@ -389,7 +388,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, struct processor_performance *perf; struct cpufreq_freqs freqs; cpumask_t online_policy_cpus; - struct drv_cmd cmd; + struct drv_cmd *cmd = get_smp_ipi_buf(struct drv_cmd); unsigned int next_state = 0; /* Index into freq_table */ unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int j; @@ -424,31 +423,31 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, switch (data->arch_cpu_flags) { case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; + cmd->type = SYSTEM_INTEL_MSR_CAPABLE; + cmd->addr.msr.reg = MSR_IA32_PERF_CTL; + cmd->val = (u32) perf->states[next_perf_state].control; break; case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - cmd.val = (u32) perf->states[next_perf_state].control; + cmd->type = SYSTEM_IO_CAPABLE; + cmd->addr.io.port = perf->control_register.address; + cmd->addr.io.bit_width = perf->control_register.bit_width; + cmd->val = (u32) perf->states[next_perf_state].control; break; default: return -ENODEV; } if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = &online_policy_cpus; + cmd->mask = &online_policy_cpus; else - cmd.mask = cpumask_of(policy->cpu); + cmd->mask = cpumask_of(policy->cpu); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; - drv_write(&cmd); + drv_write(cmd); - if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) { + if (acpi_pstate_strict && !check_freqs(cmd->mask, freqs.new, data)) { printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new); return -EAGAIN; } diff --git a/xen/arch/x86/acpi/cpufreq/powernow.c b/xen/arch/x86/acpi/cpufreq/powernow.c index 8f1ac74..72d95b7 100644 --- a/xen/arch/x86/acpi/cpufreq/powernow.c +++ b/xen/arch/x86/acpi/cpufreq/powernow.c @@ -94,7 +94,7 @@ static int powernow_cpufreq_target(struct cpufreq_policy *policy, struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu]; struct processor_performance *perf; unsigned int next_state; /* Index into freq_table */ - unsigned int next_perf_state; /* Index into perf table */ + unsigned int *next_perf_state = get_smp_ipi_buf(unsigned int); int result; if (unlikely(data == NULL || @@ -110,8 +110,8 @@ static int powernow_cpufreq_target(struct cpufreq_policy *policy, if (unlikely(result)) return result; - next_perf_state = data->freq_table[next_state].index; - if (perf->state == next_perf_state) { + *next_perf_state = data->freq_table[next_state].index; + if (perf->state == *next_perf_state) { if (unlikely(data->arch_cpu_flags & ARCH_CPU_FLAG_RESUME)) data->arch_cpu_flags &= ~ARCH_CPU_FLAG_RESUME; else @@ -120,8 +120,8 @@ static int powernow_cpufreq_target(struct cpufreq_policy *policy, if (policy->shared_type == CPUFREQ_SHARED_TYPE_HW && likely(policy->cpu == smp_processor_id())) { - transition_pstate(&next_perf_state); - cpufreq_statistic_update(policy->cpu, perf->state, next_perf_state); + transition_pstate(next_perf_state); + cpufreq_statistic_update(policy->cpu, perf->state, *next_perf_state); } else { cpumask_t online_policy_cpus; unsigned int cpu; @@ -131,15 +131,15 @@ static int powernow_cpufreq_target(struct cpufreq_policy *policy, if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || unlikely(policy->cpu != smp_processor_id())) on_selected_cpus(&online_policy_cpus, transition_pstate, - &next_perf_state, 1); + next_perf_state, 1); else - transition_pstate(&next_perf_state); + transition_pstate(next_perf_state); for_each_cpu(cpu, &online_policy_cpus) - cpufreq_statistic_update(cpu, perf->state, next_perf_state); + cpufreq_statistic_update(cpu, perf->state, *next_perf_state); } - perf->state = next_perf_state; + perf->state = *next_perf_state; policy->cur = data->freq_table[next_state].frequency; return 0; @@ -236,7 +236,7 @@ static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy) struct acpi_cpufreq_data *data; unsigned int result = 0; struct processor_performance *perf; - struct amd_cpu_data info; + struct amd_cpu_data *info = get_smp_ipi_buf(struct amd_cpu_data); struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; data = xzalloc(struct acpi_cpufreq_data); @@ -247,7 +247,7 @@ static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy) data->acpi_data = &processor_pminfo[cpu]->perf; - info.perf = perf = data->acpi_data; + info->perf = perf = data->acpi_data; policy->shared_type = perf->shared_type; if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || @@ -293,10 +293,10 @@ static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR; - on_selected_cpus(cpumask_of(cpu), get_cpu_data, &info, 1); + on_selected_cpus(cpumask_of(cpu), get_cpu_data, info, 1); /* table init */ - for (i = 0; i < perf->state_count && i <= info.max_hw_pstate; i++) { + for (i = 0; i < perf->state_count && i <= info->max_hw_pstate; i++) { if (i > 0 && perf->states[i].core_frequency >= data->freq_table[valid_states-1].frequency / 1000) continue; diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c index ebc2f39..4439bf9 100644 --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -728,21 +728,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op) case XENPF_resource_op: { - struct resource_access ra; + struct resource_access *ra = get_smp_ipi_buf(struct resource_access); unsigned int cpu; XEN_GUEST_HANDLE(xenpf_resource_entry_t) guest_entries; - ra.nr_entries = op->u.resource_op.nr_entries; - if ( ra.nr_entries == 0 ) + ra->nr_entries = op->u.resource_op.nr_entries; + if ( ra->nr_entries == 0 ) break; - if ( ra.nr_entries > RESOURCE_ACCESS_MAX_ENTRIES ) + if ( ra->nr_entries > RESOURCE_ACCESS_MAX_ENTRIES ) { ret = -EINVAL; break; } - ra.entries = xmalloc_array(xenpf_resource_entry_t, ra.nr_entries); - if ( !ra.entries ) + ra->entries = xmalloc_array(xenpf_resource_entry_t, ra->nr_entries); + if ( !ra->entries ) { ret = -ENOMEM; break; @@ -750,46 +750,46 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op) guest_from_compat_handle(guest_entries, op->u.resource_op.entries); - if ( copy_from_guest(ra.entries, guest_entries, ra.nr_entries) ) + if ( copy_from_guest(ra->entries, guest_entries, ra->nr_entries) ) { - xfree(ra.entries); + xfree(ra->entries); ret = -EFAULT; break; } /* Do sanity check earlier to omit the potential IPI overhead. */ - check_resource_access(&ra); - if ( ra.nr_done == 0 ) + check_resource_access(ra); + if ( ra->nr_done == 0 ) { /* Copy the return value for entry 0 if it failed. */ - if ( __copy_to_guest(guest_entries, ra.entries, 1) ) + if ( __copy_to_guest(guest_entries, ra->entries, 1) ) ret = -EFAULT; - xfree(ra.entries); + xfree(ra->entries); break; } cpu = op->u.resource_op.cpu; if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) ) { - xfree(ra.entries); + xfree(ra->entries); ret = -ENODEV; break; } if ( cpu == smp_processor_id() ) - resource_access(&ra); + resource_access(ra); else - on_selected_cpus(cpumask_of(cpu), resource_access, &ra, 1); + on_selected_cpus(cpumask_of(cpu), resource_access, ra, 1); /* Copy all if succeeded or up to the failed entry. */ - if ( __copy_to_guest(guest_entries, ra.entries, - ra.nr_done < ra.nr_entries ? ra.nr_done + 1 - : ra.nr_entries) ) + if ( __copy_to_guest(guest_entries, ra->entries, + ra->nr_done < ra->nr_entries ? ra->nr_done + 1 + : ra->nr_entries) ) ret = -EFAULT; else - ret = ra.nr_done; + ret = ra->nr_done; - xfree(ra.entries); + xfree(ra->entries); } break; diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c index 0ba8ef8..a6f6fb3 100644 --- a/xen/arch/x86/psr.c +++ b/xen/arch/x86/psr.c @@ -1285,8 +1285,9 @@ static int write_psr_msrs(unsigned int socket, unsigned int cos, enum psr_feat_type feat_type) { struct psr_socket_info *info = get_socket_info(socket); - struct cos_write_info data = - { + struct cos_write_info *data = get_smp_ipi_buf(struct cos_write_info); + + *data = (struct cos_write_info){ .cos = cos, .val = val, .array_len = array_len, @@ -1296,14 +1297,14 @@ static int write_psr_msrs(unsigned int socket, unsigned int cos, return -EINVAL; if ( socket == cpu_to_socket(smp_processor_id()) ) - do_write_psr_msrs(&data); + do_write_psr_msrs(data); else { unsigned int cpu = get_socket_cpu(socket); if ( cpu >= nr_cpu_ids ) return -ENOTSOCK; - on_selected_cpus(cpumask_of(cpu), do_write_psr_msrs, &data, 1); + on_selected_cpus(cpumask_of(cpu), do_write_psr_msrs, data, 1); } return 0; diff --git a/xen/arch/x86/pv/pt-shadow.c b/xen/arch/x86/pv/pt-shadow.c index b4f2b86..d550ae1 100644 --- a/xen/arch/x86/pv/pt-shadow.c +++ b/xen/arch/x86/pv/pt-shadow.c @@ -367,35 +367,35 @@ static void _pt_shadow_ipi(void *arg) void pt_shadow_l4_write(const struct domain *d, const struct page_info *pg, unsigned int slot) { - struct ptsh_ipi_info info; + struct ptsh_ipi_info *info = get_smp_ipi_buf(struct ptsh_ipi_info); if ( !pt_need_shadow(d) ) return; - info = (struct ptsh_ipi_info){ + *info = (struct ptsh_ipi_info){ .d = d, .pg = pg, .op = PTSH_IPI_WRITE, .slot = slot, }; - on_selected_cpus(d->domain_dirty_cpumask, _pt_shadow_ipi, &info, 1); + on_selected_cpus(d->domain_dirty_cpumask, _pt_shadow_ipi, info, 1); } void pt_shadow_l4_invlpg(const struct domain *d, const struct page_info *pg) { - struct ptsh_ipi_info info; + struct ptsh_ipi_info *info = get_smp_ipi_buf(struct ptsh_ipi_info); if ( !pt_need_shadow(d) ) return; - info = (struct ptsh_ipi_info){ + *info = (struct ptsh_ipi_info){ .d = d, .pg = pg, .op = PTSH_IPI_INVLPG, }; - on_selected_cpus(d->domain_dirty_cpumask, _pt_shadow_ipi, &info, 1); + on_selected_cpus(d->domain_dirty_cpumask, _pt_shadow_ipi, info, 1); } /* diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index fd6d254..68d3af0 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -22,6 +22,8 @@ #include <asm/hvm/support.h> #include <mach_apic.h> +DEFINE_PER_CPU(struct smp_ipi_buf, smp_ipi_buf); + /* * send_IPI_mask(cpumask, vector): sends @vector IPI to CPUs in @cpumask, * excluding the local CPU. @cpumask may be empty. diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c index 4d372db..7ecf8df 100644 --- a/xen/arch/x86/sysctl.c +++ b/xen/arch/x86/sysctl.c @@ -139,7 +139,7 @@ long arch_do_sysctl( break; case XEN_SYSCTL_PSR_CMT_get_l3_cache_size: { - struct l3_cache_info info; + struct l3_cache_info *info = get_smp_ipi_buf(struct l3_cache_info); unsigned int cpu = sysctl->u.psr_cmt_op.u.l3_cache.cpu; if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) ) @@ -149,12 +149,12 @@ long arch_do_sysctl( break; } if ( cpu == smp_processor_id() ) - l3_cache_get(&info); + l3_cache_get(info); else - on_selected_cpus(cpumask_of(cpu), l3_cache_get, &info, 1); + on_selected_cpus(cpumask_of(cpu), l3_cache_get, info, 1); - ret = info.ret; - sysctl->u.psr_cmt_op.u.data = (ret ? 0 : info.size); + ret = info->ret; + sysctl->u.psr_cmt_op.u.data = (ret ? 0 : info->size); break; } case XEN_SYSCTL_PSR_CMT_get_l3_event_mask: diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index 46bbf0d..d915c1e 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ #include <xen/bitops.h> #include <asm/mpspec.h> +#include <asm/hardirq.h> #endif #define BAD_APICID (-1U) @@ -89,6 +90,25 @@ static inline bool arch_ipi_param_ok(const void *_param) l4_table_offset(param) != l4_table_offset(PERCPU_LINEAR_START)); } +struct smp_ipi_buf { +#define SMP_IPI_BUF_SZ 0x70 + char OPAQUE[SMP_IPI_BUF_SZ]; +}; +DECLARE_PER_CPU(struct smp_ipi_buf, smp_ipi_buf); + +/* + * Wrapper to obtain an IPI bounce buffer, checking that there is sufficient + * size. The choice of SMP_IPI_BUF_SZ is arbitrary, and should be the size of + * the largest object passed into an IPI. + */ +#define get_smp_ipi_buf(obj) \ + ({ \ + typeof(obj) *_o = (void *)this_cpu(smp_ipi_buf).OPAQUE; \ + BUILD_BUG_ON(sizeof(obj) > SMP_IPI_BUF_SZ); \ + ASSERT(!in_irq()); \ + _o; \ + }) + #endif /* !__ASSEMBLY__ */ #endif -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.