|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH v5 11/12] cpufreq: add hwdom-cpufreq driver
This driver uses hwdom to change frequencies on physical
CPUs.
Workflow:
* cpufreq governor driver in Xen wants to change the
frequency of the physical CPU
* hwdom-cpufreq driver sets parameters in the shared
memory
* hwdom-cpufreq driver sends an event via event channel
to notify the hardware domain
* cpufreq driver in the hardware domain reads parameters
from the shared memory, changes frequency and copies
the result of the operation to the shared memory
* cpufreq driver in the hwdom sends an event via event
channel to notify the hwdom-cpufreq driver
Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
---
xen/Rules.mk | 1 +
xen/common/sysctl.c | 8 +
xen/drivers/cpufreq/Makefile | 1 +
xen/drivers/cpufreq/hwdom-cpufreq.c | 422 ++++++++++++++++++++++++++++++++++++
xen/include/xen/cpufreq.h | 2 +
5 files changed, 434 insertions(+)
create mode 100644 xen/drivers/cpufreq/hwdom-cpufreq.c
diff --git a/xen/Rules.mk b/xen/Rules.mk
index 3b0b89b..cccbc72 100644
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -56,6 +56,7 @@ CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS
CFLAGS-$(lock_profile) += -DLOCK_PROFILE
CFLAGS-$(HAS_ACPI) += -DHAS_ACPI
CFLAGS-$(HAS_CPUFREQ) += -DHAS_CPUFREQ
+CFLAGS-$(HAS_HWDOM_CPUFREQ) += -DHAS_HWDOM_CPUFREQ
CFLAGS-$(HAS_PM) += -DHAS_PM
CFLAGS-$(HAS_CPU_TURBO) += -DHAS_CPU_TURBO
CFLAGS-$(HAS_GDBSX) += -DHAS_GDBSX
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 0dcf06a..fd0cd0d 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -27,6 +27,7 @@
#include <xsm/xsm.h>
#include <xen/pmstat.h>
#include <xen/gcov.h>
+#include <xen/cpufreq.h>
long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
{
@@ -362,6 +363,13 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t)
u_sysctl)
break;
#endif
+#ifdef HAS_HWDOM_CPUFREQ
+ case XEN_SYSCTL_cpufreq_op:
+ ret = sysctl_cpufreq_op(&op->u.cpufreq_op);
+ copyback = 1;
+ break;
+#endif
+
default:
ret = arch_do_sysctl(op, u_sysctl);
copyback = 0;
diff --git a/xen/drivers/cpufreq/Makefile b/xen/drivers/cpufreq/Makefile
index b87d127..891997c 100644
--- a/xen/drivers/cpufreq/Makefile
+++ b/xen/drivers/cpufreq/Makefile
@@ -2,3 +2,4 @@ obj-y += cpufreq.o
obj-y += cpufreq_ondemand.o
obj-y += cpufreq_misc_governors.o
obj-y += utility.o
+obj-$(HAS_HWDOM_CPUFREQ) += hwdom-cpufreq.o
diff --git a/xen/drivers/cpufreq/hwdom-cpufreq.c
b/xen/drivers/cpufreq/hwdom-cpufreq.c
new file mode 100644
index 0000000..3932dca
--- /dev/null
+++ b/xen/drivers/cpufreq/hwdom-cpufreq.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx>
+ * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@xxxxxxxxx>
+ *
+ * Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ * porting acpi-cpufreq.c from Linux 2.6.23 to Xen hypervisor
+ *
+ * Copyright (C) 2014 GlobalLogic Inc.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/irq.h>
+#include <xen/spinlock.h>
+#include <xen/cpufreq.h>
+#include <xen/err.h>
+#include <xen/timer.h>
+#include <asm/shared.h>
+#include <asm/current.h>
+#include <asm/system.h>
+
+#define WAIT_HWDOM_ANSWER_TOUT (2000) /* ms */
+
+struct hwdom_cpufreq_cpu_data {
+ struct processor_performance *perf_data;
+ struct cpufreq_frequency_table *freq_table;
+};
+
+struct hwdom_cpufreq {
+ struct hwdom_cpufreq_cpu_data *cpu_data[NR_CPUS];
+ struct domain *domain;
+ spinlock_t drv_lock;
+ spinlock_t hwdom_res_lock;
+ bool_t is_timer_active;
+ spinlock_t timer_lock;
+ struct timer timer;
+ uint32_t port;
+ int32_t hwdom_res;
+};
+
+static struct hwdom_cpufreq hwdom_cpufreq;
+
+int cpufreq_cpu_init(unsigned int cpuid)
+{
+ return cpufreq_add_cpu(cpuid);
+}
+
+/* Notify the hwdom (to do some command) */
+static void notify_cpufreq_domain(void)
+{
+ uint32_t port;
+ struct domain *domain;
+
+ spin_lock(&hwdom_cpufreq.drv_lock);
+ port = hwdom_cpufreq.port;
+ domain = hwdom_cpufreq.domain;
+ spin_unlock(&hwdom_cpufreq.drv_lock);
+
+ notify_via_xen_event_channel(domain, port);
+}
+
+static void cpufreq_hwdom_idle(void)
+{
+ struct cpufreq_sh_info *cpufreq_info;
+
+ stop_timer(&hwdom_cpufreq.timer);
+
+ spin_lock(&hwdom_cpufreq.timer_lock);
+ hwdom_cpufreq.is_timer_active = false;
+ spin_unlock(&hwdom_cpufreq.timer_lock);
+
+ cpufreq_info = arch_get_cpufreq_addr(dom0);
+
+ cpufreq_info->cmd = CPUFREQ_CMD_idle;
+
+ smp_wmb(); /* above must be visible before notify_cpufreq_domain() */
+
+ /* Notification is not needed in case CPUFREQ_CMD_idle */
+}
+
+static void cpufreq_hwdom_change_freq(uint32_t cpu, uint32_t freq,
+ uint32_t relation)
+{
+ struct cpufreq_sh_info *cpufreq_info;
+
+ spin_lock(&hwdom_cpufreq.timer_lock);
+ hwdom_cpufreq.is_timer_active = true;
+ spin_unlock(&hwdom_cpufreq.timer_lock);
+
+ set_timer(&hwdom_cpufreq.timer, NOW() + MILLISECS(WAIT_HWDOM_ANSWER_TOUT));
+
+ cpufreq_info = arch_get_cpufreq_addr(dom0);
+
+ cpufreq_info->cpu = cpu;
+ cpufreq_info->freq = freq;
+ cpufreq_info->relation = relation;
+ cpufreq_info->cmd = CPUFREQ_CMD_change_freq;
+
+ smp_wmb(); /* above must be visible before notify_cpufreq_domain() */
+
+ notify_cpufreq_domain();
+}
+
+static bool_t cpufreq_is_waiting_answer(void)
+{
+ bool_t ret;
+
+ spin_lock(&hwdom_cpufreq.timer_lock);
+ ret = hwdom_cpufreq.is_timer_active;
+ spin_unlock(&hwdom_cpufreq.timer_lock);
+
+ return ret;
+}
+
+static void cpufreq_set_hwdom_res(int32_t result)
+{
+ spin_lock(&hwdom_cpufreq.hwdom_res_lock);
+ hwdom_cpufreq.hwdom_res = result;
+ spin_unlock(&hwdom_cpufreq.hwdom_res_lock);
+}
+
+static int32_t cpufreq_get_hwdom_res(void)
+{
+ int32_t ret;
+
+ spin_lock(&hwdom_cpufreq.hwdom_res_lock);
+ ret = hwdom_cpufreq.hwdom_res;
+ spin_unlock(&hwdom_cpufreq.hwdom_res_lock);
+
+ return ret;
+}
+
+static void cpufreq_hwdom_answer_tout(void *data)
+{
+ cpufreq_hwdom_idle();
+ cpufreq_set_hwdom_res(-ETIME);
+}
+
+/* Notification from the hwdom (frequency changed) */
+static void cpufreq_notification(struct vcpu *v, unsigned int port)
+{
+ struct cpufreq_sh_info *cpufreq_info;
+
+ /* if we are not waiting answer just skip strange notifications */
+ if ( !cpufreq_is_waiting_answer() )
+ return;
+
+ cpufreq_hwdom_idle();
+
+ cpufreq_info = arch_get_cpufreq_addr(dom0);
+
+ /* Set previous result in the Hardware domain then read it */
+ smp_rmb();
+ cpufreq_set_hwdom_res(cpufreq_info->result);
+}
+
+int sysctl_cpufreq_op(xen_sysctl_cpufreq_op_t *op)
+{
+ int ret = 0;
+ uint32_t domain_id = current->domain->domain_id;
+ uint32_t port;
+ struct domain *d;
+
+ switch ( op->cmd )
+ {
+ case XEN_SYSCTL_CPUFREQ_event_start:
+ case XEN_SYSCTL_CPUFREQ_event_stop:
+ d = rcu_lock_domain_by_id(domain_id);
+ if ( d == NULL )
+ return -ESRCH;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch ( op->cmd )
+ {
+ case XEN_SYSCTL_CPUFREQ_event_start:
+ /* Allocate event channel */
+ ret = alloc_unbound_xen_event_channel(d->vcpu[0], domain_id,
+ cpufreq_notification);
+ if (ret < 0)
+ goto out;
+
+ op->port = ret;
+
+ spin_lock(&hwdom_cpufreq.drv_lock);
+ hwdom_cpufreq.port = ret;
+ hwdom_cpufreq.domain = d;
+ spin_unlock(&hwdom_cpufreq.drv_lock);
+
+ ret = 0;
+ break;
+
+ case XEN_SYSCTL_CPUFREQ_event_stop:
+ spin_lock(&hwdom_cpufreq.drv_lock);
+ port = hwdom_cpufreq.port;
+ hwdom_cpufreq.port = 0;
+ hwdom_cpufreq.domain = NULL;
+ spin_unlock(&hwdom_cpufreq.drv_lock);
+
+ /* Free hwdom's event channel and leave the other one unbound */
+ free_xen_event_channel(d->vcpu[0], port);
+ break;
+ }
+out:
+ rcu_unlock_domain(d);
+ return ret;
+}
+
+static int hwdom_cpufreq_verify(struct cpufreq_policy *policy)
+{
+ struct hwdom_cpufreq_cpu_data *data;
+ struct processor_performance *perf;
+
+ if ( !policy || !(data = hwdom_cpufreq.cpu_data[policy->cpu]) ||
+ !processor_pminfo[policy->cpu] )
+ return -EINVAL;
+
+ perf = &processor_pminfo[policy->cpu]->perf;
+
+ cpufreq_verify_within_limits(policy, 0,
+ perf->states[perf->platform_limit].core_frequency * 1000);
+
+ return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
+static int hwdom_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq, unsigned int relation)
+{
+ struct hwdom_cpufreq_cpu_data *data = hwdom_cpufreq.cpu_data[policy->cpu];
+ struct processor_performance *perf;
+ struct cpufreq_freqs freqs;
+ cpumask_t online_policy_cpus;
+ unsigned int next_state = 0; /* Index into freq_table */
+ unsigned int next_perf_state = 0; /* Index into perf table */
+ unsigned int j;
+ int ret = 0;
+
+ if ( unlikely(data == NULL ||
+ data->perf_data == NULL || data->freq_table == NULL) )
+ return -ENODEV;
+
+ perf = data->perf_data;
+ ret = cpufreq_frequency_table_target(policy,
+ data->freq_table,
+ target_freq,
+ relation, &next_state);
+ if ( unlikely(ret) )
+ return -ENODEV;
+
+ cpumask_and(&online_policy_cpus, &cpu_online_map, policy->cpus);
+
+ next_perf_state = data->freq_table[next_state].index;
+ if ( perf->state == next_perf_state )
+ {
+ if ( unlikely(policy->resume) )
+ policy->resume = 0;
+ else
+ return 0;
+ }
+
+ freqs.old = perf->states[perf->state].core_frequency * 1000;
+ freqs.new = data->freq_table[next_state].frequency;
+
+ if ( cpufreq_is_waiting_answer() )
+ return -EAGAIN;
+
+ /* return previous result */
+ ret = cpufreq_get_hwdom_res();
+
+ /* Do send cmd for Hardware domain */
+ cpufreq_hwdom_change_freq(policy->cpu, freqs.new, (uint32_t)relation);
+
+ for_each_cpu( j, &online_policy_cpus )
+ cpufreq_statistic_update(j, perf->state, next_perf_state);
+
+ perf->state = next_perf_state;
+ policy->cur = freqs.new;
+
+ return ret;
+}
+
+static int hwdom_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ struct processor_performance *perf;
+ struct hwdom_cpufreq_cpu_data *data;
+ unsigned int cpu = policy->cpu;
+ unsigned int valid_states = 0;
+ int i;
+ int ret = 0;
+
+ data = xzalloc(struct hwdom_cpufreq_cpu_data);
+ if ( !data )
+ return -ENOMEM;
+
+ hwdom_cpufreq.cpu_data[cpu] = data;
+
+ data->perf_data = &processor_pminfo[cpu]->perf;
+
+ perf = data->perf_data;
+ policy->shared_type = perf->shared_type;
+
+ data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
+ (perf->state_count+1));
+ if ( !data->freq_table )
+ {
+ ret = -ENOMEM;
+ goto err_unreg;
+ }
+
+ /* detect transition latency */
+ policy->cpuinfo.transition_latency = 0;
+ for ( i = 0; i < perf->state_count; i++ )
+ {
+ if ( (perf->states[i].transition_latency * 1000) >
+ policy->cpuinfo.transition_latency )
+ policy->cpuinfo.transition_latency =
+ perf->states[i].transition_latency * 1000;
+ }
+
+ policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
+
+ /* table init */
+ for ( i = 0; i < perf->state_count; i++ )
+ {
+ if ( i > 0 && perf->states[i].core_frequency >=
+ data->freq_table[valid_states-1].frequency / 1000 )
+ continue;
+
+ data->freq_table[valid_states].index = i;
+ data->freq_table[valid_states].frequency =
+ perf->states[i].core_frequency * 1000;
+ valid_states++;
+ }
+ data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+ perf->state = 0;
+
+ ret = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+ if ( ret )
+ goto err_freqfree;
+
+
+ /* We will set the minimal frequency now. So set policy->resume to 0 */
+ policy->resume = 0;
+
+ /* Set the minimal frequency */
+ return hwdom_cpufreq_target(policy, policy->min, CPUFREQ_RELATION_L);
+
+ err_freqfree:
+ xfree(data->freq_table);
+ err_unreg:
+ xfree(data);
+ hwdom_cpufreq.cpu_data[cpu] = NULL;
+
+ return ret;
+}
+
+static int hwdom_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ struct hwdom_cpufreq_cpu_data *data = hwdom_cpufreq.cpu_data[policy->cpu];
+
+ if ( data )
+ {
+ hwdom_cpufreq.cpu_data[policy->cpu] = NULL;
+ xfree(data->freq_table);
+ xfree(data);
+ }
+
+ return 0;
+}
+
+static struct cpufreq_driver hwdom_cpufreq_driver = {
+ .name = "hwdom-cpufreq",
+ .verify = hwdom_cpufreq_verify,
+ .target = hwdom_cpufreq_target,
+ .init = hwdom_cpufreq_cpu_init,
+ .exit = hwdom_cpufreq_cpu_exit,
+};
+
+static int __init hwdom_cpufreq_driver_init(void)
+{
+ int ret = 0;
+
+ if ( cpufreq_controller != FREQCTL_xen )
+ return 0;
+
+ spin_lock_init(&hwdom_cpufreq.drv_lock);
+ spin_lock_init(&hwdom_cpufreq.hwdom_res_lock);
+
+ ret = cpufreq_register_driver(&hwdom_cpufreq_driver);
+ if ( ret )
+ return ret;
+
+ init_timer(&hwdom_cpufreq.timer, cpufreq_hwdom_answer_tout, NULL, 0);
+
+ return ret;
+}
+
+__initcall(hwdom_cpufreq_driver_init);
diff --git a/xen/include/xen/cpufreq.h b/xen/include/xen/cpufreq.h
index d7b6c34..0c8c19d 100644
--- a/xen/include/xen/cpufreq.h
+++ b/xen/include/xen/cpufreq.h
@@ -264,4 +264,6 @@ int write_userspace_scaling_setspeed(unsigned int cpu,
unsigned int freq);
void cpufreq_dbs_timer_suspend(void);
void cpufreq_dbs_timer_resume(void);
+int sysctl_cpufreq_op(xen_sysctl_cpufreq_op_t *op);
+
#endif /* __XEN_CPUFREQ_PM_H__ */
--
1.9.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |