[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH v5 10/10] xen/arm: cpufreq: add xen-cpufreq driver
Xen changes frequencies on physical CPUs using this high-level cpufreq driver. Workflow: * cpufreq governor driver in Xen wants to change the frequency of the physical CPU * cpufreq driver in Xen sets parameters in the shared memory * cpufreq driver in Xen sends an event via event channel to notify the xen-cpufreq driver * xen-cpufreq driver reads parameters from the shared memory, changes frequency and copies the result of the operation to the shared memory * xen-cpufreq driver sends an event via event channel to notify the cpufreq driver in Xen Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx> --- drivers/cpufreq/Kconfig | 20 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/cpufreq_drv_ops.c | 13 +- drivers/cpufreq/cpufreq_drv_ops.h | 4 + drivers/cpufreq/xen-cpufreq.c | 917 ++++++++++++++++++++++++++++++++++++++ include/xen/interface/platform.h | 1 + 6 files changed, 954 insertions(+), 2 deletions(-) create mode 100644 drivers/cpufreq/xen-cpufreq.c diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index f5a8f84..4847d8a 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -19,6 +19,26 @@ config CPU_FREQ If in doubt, say N. +config XEN_CPUFREQ + bool "Xen Cpufreq driver" + depends on XEN_DOM0 + depends on !CPUMASK_OFFSTACK + default n + select CPUFREQ_DRV_OPS + help + This driver uploads Power Management information to the Xen + hypervisor and changes CPUs frequency using CPU Frequency scaling + drivers. + + To do that the driver uses CPU Frequency scaling drivers to parse + the Power Management data and uploads said information to the Xen + hypervisor. Then the Xen hypervisor can select the proper Pxx states. + + Then the Xen hypervisor can change CPUs frequency by giving commands + via this driver to the CPU Frequency scaling driver. + + If in doubt, say N. + if CPUFREQ_DRV_OPS config CPU_FREQ_TABLE diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index f12a0d3..c8d5037 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -1,5 +1,6 @@ # CPUfreq core obj-$(CONFIG_CPU_FREQ) += cpufreq.o +obj-$(CONFIG_XEN_CPUFREQ) += xen-cpufreq.o obj-$(CONFIG_CPUFREQ_DRV_OPS) += cpufreq_drv_ops.o # CPUfreq stats obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o diff --git a/drivers/cpufreq/cpufreq_drv_ops.c b/drivers/cpufreq/cpufreq_drv_ops.c index c971442..71c3357 100644 --- a/drivers/cpufreq/cpufreq_drv_ops.c +++ b/drivers/cpufreq/cpufreq_drv_ops.c @@ -18,6 +18,8 @@ #include <linux/init.h> #include <linux/export.h> +#include <xen/xen.h> + static struct cpufreq_drv_ops *ops; struct kobject *get_cpufreq_global_kobject(void) @@ -177,10 +179,17 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); static int __init cpufreq_drv_ops_init(void) { + if (xen_initial_domain()) { +#ifdef CONFIG_XEN_CPUFREQ + ops = &xen_cpufreq_drv_ops; + pr_debug("using xen_cpufreq_drv_ops\n"); +#endif + } else { #ifdef CONFIG_CPU_FREQ - ops = &kern_cpufreq_drv_ops; - pr_debug("using kern_cpufreq_drv_ops\n"); + ops = &kern_cpufreq_drv_ops; + pr_debug("using kern_cpufreq_drv_ops\n"); #endif + } return 0; } diff --git a/drivers/cpufreq/cpufreq_drv_ops.h b/drivers/cpufreq/cpufreq_drv_ops.h index 5cc8e05..d02d509 100644 --- a/drivers/cpufreq/cpufreq_drv_ops.h +++ b/drivers/cpufreq/cpufreq_drv_ops.h @@ -47,4 +47,8 @@ struct cpufreq_drv_ops { extern struct cpufreq_drv_ops kern_cpufreq_drv_ops; #endif +#ifdef CONFIG_XEN_CPUFREQ +extern struct cpufreq_drv_ops xen_cpufreq_drv_ops; +#endif + #endif /* _CPUFREQ_DRV_OPS_H */ diff --git a/drivers/cpufreq/xen-cpufreq.c b/drivers/cpufreq/xen-cpufreq.c new file mode 100644 index 0000000..b19d726 --- /dev/null +++ b/drivers/cpufreq/xen-cpufreq.c @@ -0,0 +1,917 @@ +/* + * Copyright (C) 2001 Russell King + * (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx> + * + * Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx> + * Added handling for CPU hotplug + * Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx> + * Fix handling for CPU hotplug -- affected CPUs + * + * (C) 2014 GlobalLogic Inc. + * + * Based on drivers/cpufreq/cpufreq.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/notifier.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/irq.h> +#include <linux/workqueue.h> +#include <linux/cpufreq.h> + +#include <trace/events/power.h> + +#include <xen/xen.h> +#include <xen/events.h> +#include <xen/interface/xen.h> +#include <xen/interface/platform.h> +#include <xen/interface/sysctl.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> + +#include "cpufreq_drv_ops.h" + +static int xen_nr_cpus; +static int xen_irq; + +#define for_each_xen_cpu(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next((cpu), (mask)), \ + (cpu) < xen_nr_cpus;) + +static struct cpufreq_driver *cpufreq_driver; +static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); + +static DEFINE_SPINLOCK(cpufreq_driver_lock); + +/* + * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure + * all cpufreq/hotplug/workqueue/etc related lock issues. + * + * The rules for this semaphore: + * - Any routine that wants to read from the policy structure will + * do a down_read on this semaphore. + * - Any routine that will write to the policy structure and/or may take away + * the policy altogether (eg. CPU hotplug), will hold this lock in write + * mode before doing so. + * + * Additional rules: + * - Governor routines that can be called in cpufreq hotplug path should not + * take this sem as top level hotplug notifier handler takes this. + * - Lock should not be held across + * __cpufreq_governor(data, CPUFREQ_GOV_STOP); + */ +static DEFINE_PER_CPU(int, cpufreq_policy_cpu); +static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); + +#define lock_policy_rwsem(mode, cpu) \ +static int lock_policy_rwsem_##mode \ +(int cpu) \ +{ \ + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ + BUG_ON(policy_cpu == -1); \ + down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ + \ + return 0; \ +} + +lock_policy_rwsem(write, cpu); + +static void unlock_policy_rwsem_write(int cpu) +{ + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); + BUG_ON(policy_cpu == -1); + up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); +} + +/** + * The "transition" notifier list for kernel code that needs to handle + * changes to devices when the CPU clock speed changes. + * The mutex locks this list. + */ +static struct srcu_notifier_head xen_cpufreq_transition_notifier_list; + +static bool init_cpufreq_transition_notifier_list_called; +static int __init init_cpufreq_transition_notifier_list(void) +{ + srcu_init_notifier_head(&xen_cpufreq_transition_notifier_list); + init_cpufreq_transition_notifier_list_called = true; + return 0; +} +pure_initcall(init_cpufreq_transition_notifier_list); + +static struct cpufreq_policy *xen_cpufreq_cpu_get(unsigned int cpu) +{ + struct cpufreq_policy *data = NULL; + unsigned long flags; + + if (cpu >= xen_nr_cpus) + goto err_out; + + /* get the cpufreq driver */ + spin_lock_irqsave(&cpufreq_driver_lock, flags); + + if (!cpufreq_driver) + goto err_out_unlock; + + /* get the CPU */ + data = per_cpu(cpufreq_cpu_data, cpu); + +err_out_unlock: + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); +err_out: + return data; +} + +static void xen_cpufreq_cpu_put(struct cpufreq_policy *data) +{ + module_put(cpufreq_driver->owner); +} + +static int push_data_to_hypervisor(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table) +{ + int ret = 0; + unsigned int i; + unsigned int cpu; + uint32_t platform_limit = 0; + unsigned int max_freq = 0; + unsigned int state_count = 0; + unsigned int prev_freq = 0; + struct xen_processor_px *dst_states; + struct xen_processor_performance *dst_perf; + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.set_pminfo.type = XEN_PM_PX, + }; + + dst_perf = &op.u.set_pminfo.perf; + + /* Check freq table and find max frequency */ + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + + if (table[i].index != state_count || freq <= prev_freq) { + pr_err("Frequency table format error\n"); + return -EINVAL; + } + + prev_freq = freq; + state_count++; + if (freq > max_freq) + max_freq = freq; + } + + if (!state_count) + return -EINVAL; + + dst_perf->state_count = state_count; + + dst_states = kcalloc(state_count, + sizeof(struct xen_processor_px), GFP_KERNEL); + + if (!dst_states) + return -ENOMEM; + + set_xen_guest_handle(dst_perf->states, dst_states); + + /* + * Freq table should start from lower values + * dst_states should start from higer values + */ + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + unsigned int tbl_index = state_count - 1 - table[i].index; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + + if (freq == max_freq) + platform_limit = tbl_index; + + dst_states[tbl_index].core_frequency = freq / 1000; + dst_states[tbl_index].transition_latency = + policy->cpuinfo.transition_latency / 1000; + } + + dst_perf->shared_type = policy->shared_type; + dst_perf->platform_limit = platform_limit; + dst_perf->domain_info.domain = policy->cpu; + dst_perf->domain_info.num_processors = xen_nr_cpus; + dst_perf->flags = XEN_PX_DATA; + + for_each_xen_cpu(cpu, policy->cpus) { + op.u.set_pminfo.id = cpu; + ret = HYPERVISOR_dom0_op(&op); + if (ret) { + pr_debug("Hypervisor error(%d) for CPU%u\n", ret, cpu); + goto err_free_states; + } + pr_debug("CPU%u - P-states uploaded\n", cpu); + + for (i = 0; i < dst_perf->state_count; i++) { + pr_debug(" state %d: %d MHz, %d uS\n", + i, (u32) dst_states[i].core_frequency, + (u32) dst_states[i].transition_latency); + } + } + +err_free_states: + kfree(dst_states); + return ret; +} + +/* + * Returns: + * Negative: Failure + * 0: Success + * Positive: When we have a managed CPU and the sysfs got symlinked + */ +static int xen_cpufreq_add_dev_policy(unsigned int cpu, + struct cpufreq_policy *policy) +{ + int ret = 0; +#ifdef CONFIG_SMP + unsigned long flags; + unsigned int j; + + for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + + if (cpu == j) + continue; + + /* Check for existing affected CPUs. + * They may not be aware of it due to CPU Hotplug. + * cpufreq_cpu_put is called when the device is removed + * in __cpufreq_remove_dev() + */ + managed_policy = xen_cpufreq_cpu_get(j); + if (unlikely(managed_policy)) { + /* Set proper policy_cpu */ + unlock_policy_rwsem_write(cpu); + per_cpu(cpufreq_policy_cpu, cpu) = + managed_policy->cpu; + + if (lock_policy_rwsem_write(cpu) < 0) { + /* Should not go through policy unlock path */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + xen_cpufreq_cpu_put(managed_policy); + return -EBUSY; + } + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpumask_copy(managed_policy->cpus, policy->cpus); + per_cpu(cpufreq_cpu_data, cpu) = managed_policy; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + pr_debug("CPU already managed, adding link\n"); + + /* + * Success. We only needed to be added to the mask. + * Call driver->exit() because only the cpu parent of + * the kobj needed to call init(). + */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + + return 1; + } + } +#endif + return ret; +} + +/** + * xen_cpufreq_add_dev - add a CPU device + * + * Adds the cpufreq interface for a CPU device. + */ +static int xen_cpufreq_add_dev(unsigned int cpu) +{ + int ret = 0; + struct cpufreq_policy *policy; + unsigned long flags; + unsigned int j; + + pr_debug("adding CPU %u\n", cpu); + +#ifdef CONFIG_SMP + /* check whether a different CPU already registered this + * CPU because it is in the same boat. */ + policy = xen_cpufreq_cpu_get(cpu); + if (unlikely(policy)) { + xen_cpufreq_cpu_put(policy); + return 0; + } +#endif + + if (!try_module_get(cpufreq_driver->owner)) { + ret = -EINVAL; + goto module_out; + } + + ret = -ENOMEM; + policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); + if (!policy) + goto nomem_out; + + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) + goto err_free_policy; + + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) + goto err_free_cpumask; + + policy->cpu = cpu; + cpumask_copy(policy->cpus, cpumask_of(cpu)); + + /* Initially set CPU itself as the policy_cpu */ + per_cpu(cpufreq_policy_cpu, cpu) = cpu; + ret = (lock_policy_rwsem_write(cpu) < 0); + WARN_ON(ret); + + /* call driver. From then on the cpufreq must be able + * to accept all calls to ->verify and ->setpolicy for this CPU + */ + ret = cpufreq_driver->init(policy); + if (ret) { + pr_debug("initialization failed\n"); + goto err_unlock_policy; + } + ret = xen_cpufreq_add_dev_policy(cpu, policy); + if (ret) { + if (ret > 0) + /* This is a managed cpu, symlink created, + exit with 0 */ + ret = 0; + goto err_unlock_policy; + } + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_cpu(j, policy->cpus) { + per_cpu(cpufreq_cpu_data, j) = policy; + per_cpu(cpufreq_policy_cpu, j) = policy->cpu; + } + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + unlock_policy_rwsem_write(cpu); + + module_put(cpufreq_driver->owner); + pr_debug("initialization complete\n"); + + return 0; + +err_unlock_policy: + unlock_policy_rwsem_write(cpu); + free_cpumask_var(policy->related_cpus); +err_free_cpumask: + free_cpumask_var(policy->cpus); +err_free_policy: + kfree(policy); +nomem_out: + module_put(cpufreq_driver->owner); +module_out: + return ret; +} + +/** + * __cpufreq_remove_dev - remove a CPU device + * + * Removes the cpufreq interface for a CPU device. + * Caller should already have policy_rwsem in write mode for this CPU. + * This routine frees the rwsem before returning. + */ +static int __cpufreq_remove_dev(unsigned int cpu) +{ + unsigned long flags; + struct cpufreq_policy *data; +#ifdef CONFIG_SMP + unsigned int j; +#endif + + pr_debug("unregistering CPU %u\n", cpu); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + data = per_cpu(cpufreq_cpu_data, cpu); + + if (!data) { + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + unlock_policy_rwsem_write(cpu); + return -EINVAL; + } + per_cpu(cpufreq_cpu_data, cpu) = NULL; + + +#ifdef CONFIG_SMP + /* if this isn't the CPU which is the parent of the kobj, we + * only need to unlink, put and exit + */ + if (unlikely(cpu != data->cpu)) { + pr_debug("removing link\n"); + cpumask_clear_cpu(cpu, data->cpus); + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + xen_cpufreq_cpu_put(data); + unlock_policy_rwsem_write(cpu); + return 0; + } +#endif + +#ifdef CONFIG_SMP + + /* if we have other CPUs still registered, we need to unlink them, + * or else wait_for_completion below will lock up. Clean the + * per_cpu(cpufreq_cpu_data) while holding the lock, and remove + * the sysfs links afterwards. + */ + if (unlikely(cpumask_weight(data->cpus) > 1)) { + for_each_cpu(j, data->cpus) { + if (j == cpu) + continue; + per_cpu(cpufreq_cpu_data, j) = NULL; + } + } + + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + if (unlikely(cpumask_weight(data->cpus) > 1)) { + for_each_cpu(j, data->cpus) { + if (j == cpu) + continue; + pr_debug("removing link for cpu %u\n", j); + unlock_policy_rwsem_write(cpu); + lock_policy_rwsem_write(cpu); + xen_cpufreq_cpu_put(data); + } + } +#else + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); +#endif + + unlock_policy_rwsem_write(cpu); + + lock_policy_rwsem_write(cpu); + if (cpufreq_driver->exit) + cpufreq_driver->exit(data); + unlock_policy_rwsem_write(cpu); + + free_cpumask_var(data->related_cpus); + free_cpumask_var(data->cpus); + kfree(data); + + return 0; +} + +static int cpufreq_remove_dev(unsigned int cpu) +{ + int retval; + + if (unlikely(lock_policy_rwsem_write(cpu))) + BUG(); + + retval = __cpufreq_remove_dev(cpu); + return retval; +} + +/********************************************************************* + * EXTERNALLY AFFECTING FREQUENCY CHANGES * + *********************************************************************/ + +/** + * adjust_jiffies - adjust the system "loops_per_jiffy" + * + * This function alters the system "loops_per_jiffy" for the clock + * speed change. Note that loops_per_jiffy cannot be updated on SMP + * systems as each CPU might be scaled differently. So, use the arch + * per-CPU loops_per_jiffy value wherever possible. + */ +#ifndef CONFIG_SMP +static unsigned long l_p_j_ref; +static unsigned int l_p_j_ref_freq; + +static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) +{ + if (ci->flags & CPUFREQ_CONST_LOOPS) + return; + + if (!l_p_j_ref_freq) { + l_p_j_ref = loops_per_jiffy; + l_p_j_ref_freq = ci->old; + pr_debug("saving %lu as reference value for loops_per_jiffy; " + "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); + } + if ((val == CPUFREQ_POSTCHANGE && ci->old != ci->new) || + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { + loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, + ci->new); + pr_debug("scaling loops_per_jiffy to %lu " + "for frequency %u kHz\n", loops_per_jiffy, ci->new); + } +} +#else +static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) +{ + return; +} +#endif + + +/** + * xen_cpufreq_notify_transition - call notifier chain and adjust_jiffies + * on frequency transition. + * + * This function calls the transition notifiers and the "adjust_jiffies" + * function. It is called twice on all CPU frequency changes that have + * external effects. + */ +void xen_cpufreq_notify_transition(struct cpufreq_freqs *freqs, + unsigned int state) +{ + struct cpufreq_policy *policy; + + BUG_ON(irqs_disabled()); + + freqs->flags = cpufreq_driver->flags; + pr_debug("notification %u of frequency transition to %u kHz\n", + state, freqs->new); + + policy = per_cpu(cpufreq_cpu_data, freqs->cpu); + switch (state) { + case CPUFREQ_PRECHANGE: + /* detect if the driver reported a value as "old frequency" + * which is not equal to what the cpufreq core thinks is + * "old frequency". + */ + if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { + if ((policy) && (policy->cpu == freqs->cpu) && + (policy->cur) && (policy->cur != freqs->old)) { + pr_debug("Warning: CPU frequency is" + " %u, cpufreq assumed %u kHz.\n", + freqs->old, policy->cur); + freqs->old = policy->cur; + } + } + srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list, + CPUFREQ_PRECHANGE, freqs); + adjust_jiffies(CPUFREQ_PRECHANGE, freqs); + break; + + case CPUFREQ_POSTCHANGE: + adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); + pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new, + (unsigned long)freqs->cpu); + trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); + trace_cpu_frequency(freqs->new, freqs->cpu); + srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list, + CPUFREQ_POSTCHANGE, freqs); + if (likely(policy) && likely(policy->cpu == freqs->cpu)) + policy->cur = freqs->new; + break; + } +} + +/********************************************************************* + * GOVERNORS * + *********************************************************************/ + +int __xen_cpufreq_driver_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + int retval = -EINVAL; + unsigned int old_target_freq = target_freq; + + /* Make sure that target_freq is within supported range */ + if (target_freq > policy->max) + target_freq = policy->max; + if (target_freq < policy->min) + target_freq = policy->min; + + pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", + policy->cpu, target_freq, relation, old_target_freq); + + if (target_freq == policy->cur) + return 0; + + if (cpufreq_driver->target) + retval = cpufreq_driver->target(policy, target_freq, + relation); + + return retval; +} + +int xen_cpufreq_driver_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + int ret = -EINVAL; + + if (!policy) + goto no_policy; + + if (unlikely(lock_policy_rwsem_write(policy->cpu))) + goto fail; + + ret = __xen_cpufreq_driver_target(policy, target_freq, relation); + + unlock_policy_rwsem_write(policy->cpu); + +fail: + xen_cpufreq_cpu_put(policy); +no_policy: + return ret; +} + +/********************************************************************* + * HANDLE COMMANDS FROM XEN * + *********************************************************************/ +static void cpufreq_work_hnd(struct work_struct *w); + +static struct workqueue_struct *cpufreq_wq; +static DECLARE_WORK(cpufreq_work, cpufreq_work_hnd); + +static void cpufreq_work_hnd(struct work_struct *w) +{ + int ret; + struct cpufreq_policy *policy; + struct cpufreq_sh_info *cpufreq_info; + + cpufreq_info = &HYPERVISOR_shared_info->arch.cpufreq; + + policy = xen_cpufreq_cpu_get(cpufreq_info->cpu); + + /* Set parameters in the Xen then read it */ + smp_rmb(); + + /* accept only CPUFREQ_CMD_change_freq */ + if (cpufreq_info->cmd != CPUFREQ_CMD_change_freq) + return; + + ret = xen_cpufreq_driver_target(policy, + cpufreq_info->freq, + cpufreq_info->relation); + + cpufreq_info->result = ret; + smp_wmb(); /* above must be visible before notify_remote_via_irq() */ + + notify_remote_via_irq(xen_irq); +} + +static irqreturn_t cpufreq_interrupt(int irq, void *data) +{ + queue_work(cpufreq_wq, &cpufreq_work); + return IRQ_HANDLED; +} + +/********************************************************************* + * XEN CPUFREQ EVENTS * + *********************************************************************/ +static int xen_start_cpufreq_event(uint32_t *port) +{ + int ret; + struct xen_sysctl op = { + .cmd = XEN_SYSCTL_cpufreq_op, + .interface_version = XEN_SYSCTL_INTERFACE_VERSION, + .u.cpufreq_op.cmd = XEN_SYSCTL_CPUFREQ_event_start, + }; + + ret = HYPERVISOR_sysctl(&op); + if (unlikely(ret)) + pr_err("Hypervisor cpufreq strart event error (%d)\n", ret); + else + *port = op.u.cpufreq_op.port; + + return ret; +} + +static int xen_stop_cpufreq_event(void) +{ + int ret; + struct xen_sysctl op = { + .cmd = XEN_SYSCTL_cpufreq_op, + .interface_version = XEN_SYSCTL_INTERFACE_VERSION, + .u.cpufreq_op.cmd = XEN_SYSCTL_CPUFREQ_event_stop, + }; + + ret = HYPERVISOR_sysctl(&op); + if (ret) + pr_err("Hypervisor cpufreq stop event error (%d)\n", ret); + + return ret; +} + +/********************************************************************* + * REGISTER / UNREGISTER CPUFREQ DRIVER * + *********************************************************************/ + +/** + * xen_cpufreq_register_driver - register a CPU Frequency driver + * @driver_data: A struct cpufreq_driver containing the values# + * submitted by the CPU Frequency driver. + * + * Registers a CPU Frequency driver to this core code. This code + * returns zero on success, -EBUSY when another driver got here first + * (and isn't unregistered in the meantime). + * + */ +int xen_cpufreq_register_driver(struct cpufreq_driver *driver_data) +{ + unsigned long flags; + int ret; + unsigned int cpu; + struct cpufreq_frequency_table *table; + struct cpufreq_policy *policy; + cpumask_var_t pushed_cpus; + uint32_t port; + int irq; + + if (!xen_nr_cpus) + return -EPROBE_DEFER; + + if (!driver_data || !driver_data->verify || !driver_data->init || + (!driver_data->target)) + return -EINVAL; + + pr_debug("trying to register driver %s\n", driver_data->name); + + if (driver_data->setpolicy) + driver_data->flags |= CPUFREQ_CONST_LOOPS; + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + + if (cpufreq_driver) { + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + return -EBUSY; + } + cpufreq_driver = driver_data; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + ret = xen_start_cpufreq_event(&port); + if (ret) + goto err_remove_drv; + + irq = bind_interdomain_evtchn_to_irqhandler(DOMID_SELF, port, + cpufreq_interrupt, 0, + "xen_cpufreq", NULL); + + if (irq < 0) { + pr_err("bind interdomain evtchn to irqhandler error (%d)\n", + irq); + ret = irq; + goto err_stop_cpufreq_event; + } + xen_irq = irq; + + for (cpu = 0; cpu < xen_nr_cpus; cpu++) { + ret = xen_cpufreq_add_dev(cpu); + if (ret) + goto err_remove_cpu; + } + + if (!zalloc_cpumask_var(&pushed_cpus, GFP_KERNEL)) + goto err_remove_cpu; + + for (cpu = 0; cpu < xen_nr_cpus; cpu++) { + if (cpumask_test_cpu(cpu, pushed_cpus)) + continue; + + policy = xen_cpufreq_cpu_get(cpu); + if (!policy) { + ret = -EINVAL; + goto err_free_cpumask; + } + + cpumask_or(pushed_cpus, pushed_cpus, policy->cpus); + table = cpufreq_frequency_get_table(policy->cpu); + if (!table) { + ret = -EINVAL; + goto err_free_cpumask; + } + + ret = push_data_to_hypervisor(policy, table); + if (ret) + goto err_free_cpumask; + } + + free_cpumask_var(pushed_cpus); + + pr_debug("driver %s up and running\n", driver_data->name); + + return 0; + +err_free_cpumask: + free_cpumask_var(pushed_cpus); +err_remove_cpu: + for (cpu = 0; cpu < xen_nr_cpus; cpu++) + cpufreq_remove_dev(cpu); + unbind_from_irqhandler(irq, NULL); +err_stop_cpufreq_event: + xen_stop_cpufreq_event(); +err_remove_drv: + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpufreq_driver = NULL; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + return ret; +} + +/** + * xen_cpufreq_unregister_driver - unregister the current CPUFreq driver + * + * Unregister the current CPUFreq driver. Only call this if you have + * the right to do so, i.e. if you have succeeded in initialising before! + * Returns zero if successful, and -EINVAL if the cpufreq_driver is + * currently not initialised. + */ +int xen_cpufreq_unregister_driver(struct cpufreq_driver *driver) +{ + unsigned long flags; + unsigned int cpu; + + if (!cpufreq_driver || (driver != cpufreq_driver)) + return -EINVAL; + + pr_debug("unregistering driver %s\n", driver->name); + + unbind_from_irqhandler(xen_irq, NULL); + xen_stop_cpufreq_event(); + + for (cpu = 0; cpu < xen_nr_cpus; cpu++) + cpufreq_remove_dev(cpu); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpufreq_driver = NULL; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + return 0; +} + +struct cpufreq_drv_ops xen_cpufreq_drv_ops = { + .notify_transition = xen_cpufreq_notify_transition, + .register_driver = xen_cpufreq_register_driver, + .unregister_driver = xen_cpufreq_unregister_driver, +}; + +static int __init xen_cpufreq_init(void) +{ + int ret; + int i; + + struct xen_sysctl op = { + .cmd = XEN_SYSCTL_physinfo, + .interface_version = XEN_SYSCTL_INTERFACE_VERSION, + }; + + ret = HYPERVISOR_sysctl(&op); + if (ret) { + pr_err("Hypervisor get physinfo error (%d)\n", ret); + return ret; + } + + xen_nr_cpus = op.u.physinfo.nr_cpus; + if (xen_nr_cpus == 0 || xen_nr_cpus > NR_CPUS) { + xen_nr_cpus = 0; + pr_err("Wrong CPUs amount (%d)\n", xen_nr_cpus); + return -EINVAL; + } + + for (i = 0; i < xen_nr_cpus; i++) { + per_cpu(cpufreq_policy_cpu, i) = -1; + init_rwsem(&per_cpu(cpu_policy_rwsem, i)); + } + + cpufreq_wq = create_singlethread_workqueue("xen_cpufreq"); + if (!cpufreq_wq) { + pr_err("Create workqueue error\n"); + ret = -ENOMEM; + goto err_create_wq; + } + + return 0; + +err_create_wq: + xen_nr_cpus = 0; + return ret; +} + +MODULE_AUTHOR("Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>"); +MODULE_DESCRIPTION("Xen cpufreq driver which uploads PM data to Xen hypervisor"); +MODULE_LICENSE("GPL"); + +core_initcall(xen_cpufreq_init); diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index c57d5f6..ee3b154 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -209,6 +209,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t); #define XEN_PX_PSS 2 #define XEN_PX_PPC 4 #define XEN_PX_PSD 8 +#define XEN_PX_DATA 16 struct xen_power_register { uint32_t space_id; -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |