[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH v5 11/12] cpufreq: add hwdom-cpufreq driver
This driver uses hwdom to change frequencies on physical CPUs. Workflow: * cpufreq governor driver in Xen wants to change the frequency of the physical CPU * hwdom-cpufreq driver sets parameters in the shared memory * hwdom-cpufreq driver sends an event via event channel to notify the hardware domain * cpufreq driver in the hardware domain reads parameters from the shared memory, changes frequency and copies the result of the operation to the shared memory * cpufreq driver in the hwdom sends an event via event channel to notify the hwdom-cpufreq driver Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx> --- xen/Rules.mk | 1 + xen/common/sysctl.c | 8 + xen/drivers/cpufreq/Makefile | 1 + xen/drivers/cpufreq/hwdom-cpufreq.c | 422 ++++++++++++++++++++++++++++++++++++ xen/include/xen/cpufreq.h | 2 + 5 files changed, 434 insertions(+) create mode 100644 xen/drivers/cpufreq/hwdom-cpufreq.c diff --git a/xen/Rules.mk b/xen/Rules.mk index 3b0b89b..cccbc72 100644 --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -56,6 +56,7 @@ CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS CFLAGS-$(lock_profile) += -DLOCK_PROFILE CFLAGS-$(HAS_ACPI) += -DHAS_ACPI CFLAGS-$(HAS_CPUFREQ) += -DHAS_CPUFREQ +CFLAGS-$(HAS_HWDOM_CPUFREQ) += -DHAS_HWDOM_CPUFREQ CFLAGS-$(HAS_PM) += -DHAS_PM CFLAGS-$(HAS_CPU_TURBO) += -DHAS_CPU_TURBO CFLAGS-$(HAS_GDBSX) += -DHAS_GDBSX diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c index 0dcf06a..fd0cd0d 100644 --- a/xen/common/sysctl.c +++ b/xen/common/sysctl.c @@ -27,6 +27,7 @@ #include <xsm/xsm.h> #include <xen/pmstat.h> #include <xen/gcov.h> +#include <xen/cpufreq.h> long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) { @@ -362,6 +363,13 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) break; #endif +#ifdef HAS_HWDOM_CPUFREQ + case XEN_SYSCTL_cpufreq_op: + ret = sysctl_cpufreq_op(&op->u.cpufreq_op); + copyback = 1; + break; +#endif + default: ret = arch_do_sysctl(op, u_sysctl); copyback = 0; diff --git a/xen/drivers/cpufreq/Makefile b/xen/drivers/cpufreq/Makefile index b87d127..891997c 100644 --- a/xen/drivers/cpufreq/Makefile +++ b/xen/drivers/cpufreq/Makefile @@ -2,3 +2,4 @@ obj-y += cpufreq.o obj-y += cpufreq_ondemand.o obj-y += cpufreq_misc_governors.o obj-y += utility.o +obj-$(HAS_HWDOM_CPUFREQ) += hwdom-cpufreq.o diff --git a/xen/drivers/cpufreq/hwdom-cpufreq.c b/xen/drivers/cpufreq/hwdom-cpufreq.c new file mode 100644 index 0000000..3932dca --- /dev/null +++ b/xen/drivers/cpufreq/hwdom-cpufreq.c @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx> + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx> + * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx> + * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@xxxxxxxxx> + * + * Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx> + * porting acpi-cpufreq.c from Linux 2.6.23 to Xen hypervisor + * + * Copyright (C) 2014 GlobalLogic Inc. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <xen/types.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/event.h> +#include <xen/irq.h> +#include <xen/spinlock.h> +#include <xen/cpufreq.h> +#include <xen/err.h> +#include <xen/timer.h> +#include <asm/shared.h> +#include <asm/current.h> +#include <asm/system.h> + +#define WAIT_HWDOM_ANSWER_TOUT (2000) /* ms */ + +struct hwdom_cpufreq_cpu_data { + struct processor_performance *perf_data; + struct cpufreq_frequency_table *freq_table; +}; + +struct hwdom_cpufreq { + struct hwdom_cpufreq_cpu_data *cpu_data[NR_CPUS]; + struct domain *domain; + spinlock_t drv_lock; + spinlock_t hwdom_res_lock; + bool_t is_timer_active; + spinlock_t timer_lock; + struct timer timer; + uint32_t port; + int32_t hwdom_res; +}; + +static struct hwdom_cpufreq hwdom_cpufreq; + +int cpufreq_cpu_init(unsigned int cpuid) +{ + return cpufreq_add_cpu(cpuid); +} + +/* Notify the hwdom (to do some command) */ +static void notify_cpufreq_domain(void) +{ + uint32_t port; + struct domain *domain; + + spin_lock(&hwdom_cpufreq.drv_lock); + port = hwdom_cpufreq.port; + domain = hwdom_cpufreq.domain; + spin_unlock(&hwdom_cpufreq.drv_lock); + + notify_via_xen_event_channel(domain, port); +} + +static void cpufreq_hwdom_idle(void) +{ + struct cpufreq_sh_info *cpufreq_info; + + stop_timer(&hwdom_cpufreq.timer); + + spin_lock(&hwdom_cpufreq.timer_lock); + hwdom_cpufreq.is_timer_active = false; + spin_unlock(&hwdom_cpufreq.timer_lock); + + cpufreq_info = arch_get_cpufreq_addr(dom0); + + cpufreq_info->cmd = CPUFREQ_CMD_idle; + + smp_wmb(); /* above must be visible before notify_cpufreq_domain() */ + + /* Notification is not needed in case CPUFREQ_CMD_idle */ +} + +static void cpufreq_hwdom_change_freq(uint32_t cpu, uint32_t freq, + uint32_t relation) +{ + struct cpufreq_sh_info *cpufreq_info; + + spin_lock(&hwdom_cpufreq.timer_lock); + hwdom_cpufreq.is_timer_active = true; + spin_unlock(&hwdom_cpufreq.timer_lock); + + set_timer(&hwdom_cpufreq.timer, NOW() + MILLISECS(WAIT_HWDOM_ANSWER_TOUT)); + + cpufreq_info = arch_get_cpufreq_addr(dom0); + + cpufreq_info->cpu = cpu; + cpufreq_info->freq = freq; + cpufreq_info->relation = relation; + cpufreq_info->cmd = CPUFREQ_CMD_change_freq; + + smp_wmb(); /* above must be visible before notify_cpufreq_domain() */ + + notify_cpufreq_domain(); +} + +static bool_t cpufreq_is_waiting_answer(void) +{ + bool_t ret; + + spin_lock(&hwdom_cpufreq.timer_lock); + ret = hwdom_cpufreq.is_timer_active; + spin_unlock(&hwdom_cpufreq.timer_lock); + + return ret; +} + +static void cpufreq_set_hwdom_res(int32_t result) +{ + spin_lock(&hwdom_cpufreq.hwdom_res_lock); + hwdom_cpufreq.hwdom_res = result; + spin_unlock(&hwdom_cpufreq.hwdom_res_lock); +} + +static int32_t cpufreq_get_hwdom_res(void) +{ + int32_t ret; + + spin_lock(&hwdom_cpufreq.hwdom_res_lock); + ret = hwdom_cpufreq.hwdom_res; + spin_unlock(&hwdom_cpufreq.hwdom_res_lock); + + return ret; +} + +static void cpufreq_hwdom_answer_tout(void *data) +{ + cpufreq_hwdom_idle(); + cpufreq_set_hwdom_res(-ETIME); +} + +/* Notification from the hwdom (frequency changed) */ +static void cpufreq_notification(struct vcpu *v, unsigned int port) +{ + struct cpufreq_sh_info *cpufreq_info; + + /* if we are not waiting answer just skip strange notifications */ + if ( !cpufreq_is_waiting_answer() ) + return; + + cpufreq_hwdom_idle(); + + cpufreq_info = arch_get_cpufreq_addr(dom0); + + /* Set previous result in the Hardware domain then read it */ + smp_rmb(); + cpufreq_set_hwdom_res(cpufreq_info->result); +} + +int sysctl_cpufreq_op(xen_sysctl_cpufreq_op_t *op) +{ + int ret = 0; + uint32_t domain_id = current->domain->domain_id; + uint32_t port; + struct domain *d; + + switch ( op->cmd ) + { + case XEN_SYSCTL_CPUFREQ_event_start: + case XEN_SYSCTL_CPUFREQ_event_stop: + d = rcu_lock_domain_by_id(domain_id); + if ( d == NULL ) + return -ESRCH; + break; + + default: + return -EOPNOTSUPP; + } + + switch ( op->cmd ) + { + case XEN_SYSCTL_CPUFREQ_event_start: + /* Allocate event channel */ + ret = alloc_unbound_xen_event_channel(d->vcpu[0], domain_id, + cpufreq_notification); + if (ret < 0) + goto out; + + op->port = ret; + + spin_lock(&hwdom_cpufreq.drv_lock); + hwdom_cpufreq.port = ret; + hwdom_cpufreq.domain = d; + spin_unlock(&hwdom_cpufreq.drv_lock); + + ret = 0; + break; + + case XEN_SYSCTL_CPUFREQ_event_stop: + spin_lock(&hwdom_cpufreq.drv_lock); + port = hwdom_cpufreq.port; + hwdom_cpufreq.port = 0; + hwdom_cpufreq.domain = NULL; + spin_unlock(&hwdom_cpufreq.drv_lock); + + /* Free hwdom's event channel and leave the other one unbound */ + free_xen_event_channel(d->vcpu[0], port); + break; + } +out: + rcu_unlock_domain(d); + return ret; +} + +static int hwdom_cpufreq_verify(struct cpufreq_policy *policy) +{ + struct hwdom_cpufreq_cpu_data *data; + struct processor_performance *perf; + + if ( !policy || !(data = hwdom_cpufreq.cpu_data[policy->cpu]) || + !processor_pminfo[policy->cpu] ) + return -EINVAL; + + perf = &processor_pminfo[policy->cpu]->perf; + + cpufreq_verify_within_limits(policy, 0, + perf->states[perf->platform_limit].core_frequency * 1000); + + return cpufreq_frequency_table_verify(policy, data->freq_table); +} + +static int hwdom_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct hwdom_cpufreq_cpu_data *data = hwdom_cpufreq.cpu_data[policy->cpu]; + struct processor_performance *perf; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + unsigned int next_state = 0; /* Index into freq_table */ + unsigned int next_perf_state = 0; /* Index into perf table */ + unsigned int j; + int ret = 0; + + if ( unlikely(data == NULL || + data->perf_data == NULL || data->freq_table == NULL) ) + return -ENODEV; + + perf = data->perf_data; + ret = cpufreq_frequency_table_target(policy, + data->freq_table, + target_freq, + relation, &next_state); + if ( unlikely(ret) ) + return -ENODEV; + + cpumask_and(&online_policy_cpus, &cpu_online_map, policy->cpus); + + next_perf_state = data->freq_table[next_state].index; + if ( perf->state == next_perf_state ) + { + if ( unlikely(policy->resume) ) + policy->resume = 0; + else + return 0; + } + + freqs.old = perf->states[perf->state].core_frequency * 1000; + freqs.new = data->freq_table[next_state].frequency; + + if ( cpufreq_is_waiting_answer() ) + return -EAGAIN; + + /* return previous result */ + ret = cpufreq_get_hwdom_res(); + + /* Do send cmd for Hardware domain */ + cpufreq_hwdom_change_freq(policy->cpu, freqs.new, (uint32_t)relation); + + for_each_cpu( j, &online_policy_cpus ) + cpufreq_statistic_update(j, perf->state, next_perf_state); + + perf->state = next_perf_state; + policy->cur = freqs.new; + + return ret; +} + +static int hwdom_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + struct processor_performance *perf; + struct hwdom_cpufreq_cpu_data *data; + unsigned int cpu = policy->cpu; + unsigned int valid_states = 0; + int i; + int ret = 0; + + data = xzalloc(struct hwdom_cpufreq_cpu_data); + if ( !data ) + return -ENOMEM; + + hwdom_cpufreq.cpu_data[cpu] = data; + + data->perf_data = &processor_pminfo[cpu]->perf; + + perf = data->perf_data; + policy->shared_type = perf->shared_type; + + data->freq_table = xmalloc_array(struct cpufreq_frequency_table, + (perf->state_count+1)); + if ( !data->freq_table ) + { + ret = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for ( i = 0; i < perf->state_count; i++ ) + { + if ( (perf->states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency ) + policy->cpuinfo.transition_latency = + perf->states[i].transition_latency * 1000; + } + + policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR; + + /* table init */ + for ( i = 0; i < perf->state_count; i++ ) + { + if ( i > 0 && perf->states[i].core_frequency >= + data->freq_table[valid_states-1].frequency / 1000 ) + continue; + + data->freq_table[valid_states].index = i; + data->freq_table[valid_states].frequency = + perf->states[i].core_frequency * 1000; + valid_states++; + } + data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + perf->state = 0; + + ret = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); + if ( ret ) + goto err_freqfree; + + + /* We will set the minimal frequency now. So set policy->resume to 0 */ + policy->resume = 0; + + /* Set the minimal frequency */ + return hwdom_cpufreq_target(policy, policy->min, CPUFREQ_RELATION_L); + + err_freqfree: + xfree(data->freq_table); + err_unreg: + xfree(data); + hwdom_cpufreq.cpu_data[cpu] = NULL; + + return ret; +} + +static int hwdom_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct hwdom_cpufreq_cpu_data *data = hwdom_cpufreq.cpu_data[policy->cpu]; + + if ( data ) + { + hwdom_cpufreq.cpu_data[policy->cpu] = NULL; + xfree(data->freq_table); + xfree(data); + } + + return 0; +} + +static struct cpufreq_driver hwdom_cpufreq_driver = { + .name = "hwdom-cpufreq", + .verify = hwdom_cpufreq_verify, + .target = hwdom_cpufreq_target, + .init = hwdom_cpufreq_cpu_init, + .exit = hwdom_cpufreq_cpu_exit, +}; + +static int __init hwdom_cpufreq_driver_init(void) +{ + int ret = 0; + + if ( cpufreq_controller != FREQCTL_xen ) + return 0; + + spin_lock_init(&hwdom_cpufreq.drv_lock); + spin_lock_init(&hwdom_cpufreq.hwdom_res_lock); + + ret = cpufreq_register_driver(&hwdom_cpufreq_driver); + if ( ret ) + return ret; + + init_timer(&hwdom_cpufreq.timer, cpufreq_hwdom_answer_tout, NULL, 0); + + return ret; +} + +__initcall(hwdom_cpufreq_driver_init); diff --git a/xen/include/xen/cpufreq.h b/xen/include/xen/cpufreq.h index d7b6c34..0c8c19d 100644 --- a/xen/include/xen/cpufreq.h +++ b/xen/include/xen/cpufreq.h @@ -264,4 +264,6 @@ int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq); void cpufreq_dbs_timer_suspend(void); void cpufreq_dbs_timer_resume(void); +int sysctl_cpufreq_op(xen_sysctl_cpufreq_op_t *op); + #endif /* __XEN_CPUFREQ_PM_H__ */ -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |