[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v4 9/9] xen/arm: cpufreq: add xen-cpufreq driver



On Tue, 4 Nov 2014, Oleksandr Dmytryshyn wrote:
> Xen changes frequencies on CPUs using this high-level
> cpufreq driver.
> 
> Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>

You CC the wrong email address for Rafael in the entire series.


>  drivers/cpufreq/Kconfig           |  20 +
>  drivers/cpufreq/Makefile          |   1 +
>  drivers/cpufreq/cpufreq_drv_ops.c |  13 +-
>  drivers/cpufreq/cpufreq_drv_ops.h |   4 +
>  drivers/cpufreq/xen-cpufreq.c     | 869 
> ++++++++++++++++++++++++++++++++++++++
>  include/xen/interface/platform.h  |   1 +
>  include/xen/interface/xen.h       |   1 +
>  7 files changed, 907 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/cpufreq/xen-cpufreq.c
> 
> diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
> index f5a8f84..4847d8a 100644
> --- a/drivers/cpufreq/Kconfig
> +++ b/drivers/cpufreq/Kconfig
> @@ -19,6 +19,26 @@ config CPU_FREQ
>  
>         If in doubt, say N.
>  
> +config XEN_CPUFREQ
> +     bool "Xen Cpufreq driver"
> +     depends on XEN_DOM0
> +     depends on !CPUMASK_OFFSTACK
> +     default n
> +     select CPUFREQ_DRV_OPS
> +     help
> +       This driver uploads Power Management information to the Xen
> +       hypervisor and changes CPUs frequency using CPU Frequency scaling
> +       drivers.
> +
> +       To do that the driver uses CPU Frequency scaling drivers to parse
> +       the Power Management data and uploads said information to the Xen
> +       hypervisor. Then the Xen hypervisor can select the proper Pxx states.
> +
> +       Then the Xen hypervisor can change CPUs frequency by giving commands
> +       via this driver to the CPU Frequency scaling driver.
> +
> +       If in doubt, say N.
> +
>  if CPUFREQ_DRV_OPS
>  
>  config CPU_FREQ_TABLE
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index f12a0d3..c8d5037 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -1,5 +1,6 @@
>  # CPUfreq core
>  obj-$(CONFIG_CPU_FREQ)                       += cpufreq.o
> +obj-$(CONFIG_XEN_CPUFREQ)            += xen-cpufreq.o
>  obj-$(CONFIG_CPUFREQ_DRV_OPS)                += cpufreq_drv_ops.o
>  # CPUfreq stats
>  obj-$(CONFIG_CPU_FREQ_STAT)             += cpufreq_stats.o
> diff --git a/drivers/cpufreq/cpufreq_drv_ops.c 
> b/drivers/cpufreq/cpufreq_drv_ops.c
> index c971442..71c3357 100644
> --- a/drivers/cpufreq/cpufreq_drv_ops.c
> +++ b/drivers/cpufreq/cpufreq_drv_ops.c
> @@ -18,6 +18,8 @@
>  #include <linux/init.h>
>  #include <linux/export.h>
>  
> +#include <xen/xen.h>
> +
>  static struct cpufreq_drv_ops *ops;
>  
>  struct kobject *get_cpufreq_global_kobject(void)
> @@ -177,10 +179,17 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
>  
>  static int __init cpufreq_drv_ops_init(void)
>  {
> +     if (xen_initial_domain()) {
> +#ifdef CONFIG_XEN_CPUFREQ
> +             ops = &xen_cpufreq_drv_ops;
> +             pr_debug("using xen_cpufreq_drv_ops\n");
> +#endif
> +     } else {
>  #ifdef CONFIG_CPU_FREQ
> -     ops = &kern_cpufreq_drv_ops;
> -     pr_debug("using kern_cpufreq_drv_ops\n");
> +             ops = &kern_cpufreq_drv_ops;
> +             pr_debug("using kern_cpufreq_drv_ops\n");
>  #endif
> +     }
>  
>       return 0;
>  }
> diff --git a/drivers/cpufreq/cpufreq_drv_ops.h 
> b/drivers/cpufreq/cpufreq_drv_ops.h
> index 5cc8e05..d02d509 100644
> --- a/drivers/cpufreq/cpufreq_drv_ops.h
> +++ b/drivers/cpufreq/cpufreq_drv_ops.h
> @@ -47,4 +47,8 @@ struct cpufreq_drv_ops {
>  extern struct cpufreq_drv_ops kern_cpufreq_drv_ops;
>  #endif
>  
> +#ifdef CONFIG_XEN_CPUFREQ
> +extern struct cpufreq_drv_ops xen_cpufreq_drv_ops;
> +#endif
> +
>  #endif /* _CPUFREQ_DRV_OPS_H */
> diff --git a/drivers/cpufreq/xen-cpufreq.c b/drivers/cpufreq/xen-cpufreq.c
> new file mode 100644
> index 0000000..21062c7
> --- /dev/null
> +++ b/drivers/cpufreq/xen-cpufreq.c
> @@ -0,0 +1,869 @@
> +/*
> + *  Copyright (C) 2001 Russell King
> + *            (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx>
> + *
> + *  Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx>
> + *   Added handling for CPU hotplug
> + *  Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx>
> + *   Fix handling for CPU hotplug -- affected CPUs
> + *
> + *           (C) 2014 GlobalLogic Inc.
> + *
> + * Based on drivers/cpufreq/cpufreq.c
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/notifier.h>
> +#include <linux/types.h>
> +#include <linux/slab.h>
> +#include <linux/mutex.h>
> +#include <linux/irq.h>
> +#include <linux/workqueue.h>
> +#include <linux/cpufreq.h>
> +
> +#include <trace/events/power.h>
> +
> +#include <xen/xen.h>
> +#include <xen/events.h>
> +#include <xen/interface/xen.h>
> +#include <xen/interface/platform.h>
> +#include <xen/interface/sysctl.h>
> +#include <asm/xen/hypercall.h>
> +#include <asm/xen/hypervisor.h>
> +
> +#include "cpufreq_drv_ops.h"
> +
> +static int xen_nr_cpus;
> +static int xen_irq;
> +
> +#define for_each_xen_cpu(cpu, mask)                  \
> +     for ((cpu) = -1;                                \
> +             (cpu) = cpumask_next((cpu), (mask)),    \
> +             (cpu) < xen_nr_cpus;)
> +
> +static struct cpufreq_driver *cpufreq_driver;
> +static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
> +
> +static DEFINE_SPINLOCK(cpufreq_driver_lock);
> +
> +/*
> + * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
> + * all cpufreq/hotplug/workqueue/etc related lock issues.
> + *
> + * The rules for this semaphore:
> + * - Any routine that wants to read from the policy structure will
> + *   do a down_read on this semaphore.
> + * - Any routine that will write to the policy structure and/or may take away
> + *   the policy altogether (eg. CPU hotplug), will hold this lock in write
> + *   mode before doing so.
> + *
> + * Additional rules:
> + * - Governor routines that can be called in cpufreq hotplug path should not
> + *   take this sem as top level hotplug notifier handler takes this.
> + * - Lock should not be held across
> + *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
> + */
> +static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
> +static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
> +
> +#define lock_policy_rwsem(mode, cpu)                         \
> +static int lock_policy_rwsem_##mode                          \
> +(int cpu)                                                    \
> +{                                                            \
> +     int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);      \
> +     BUG_ON(policy_cpu == -1);                               \
> +     down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));    \
> +                                                             \
> +     return 0;                                               \
> +}
> +
> +lock_policy_rwsem(write, cpu);
> +
> +static void unlock_policy_rwsem_write(int cpu)
> +{
> +     int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
> +     BUG_ON(policy_cpu == -1);
> +     up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
> +}
> +
> +/**
> + * The "transition" notifier list for kernel code that needs to handle
> + * changes to devices when the CPU clock speed changes.
> + * The mutex locks this list.
> + */
> +static struct srcu_notifier_head xen_cpufreq_transition_notifier_list;
> +
> +static bool init_cpufreq_transition_notifier_list_called;
> +static int __init init_cpufreq_transition_notifier_list(void)
> +{
> +     srcu_init_notifier_head(&xen_cpufreq_transition_notifier_list);
> +     init_cpufreq_transition_notifier_list_called = true;
> +     return 0;
> +}
> +pure_initcall(init_cpufreq_transition_notifier_list);
> +
> +static struct cpufreq_policy *xen_cpufreq_cpu_get(unsigned int cpu)
> +{
> +     struct cpufreq_policy *data = NULL;
> +     unsigned long flags;
> +
> +     if (cpu >= xen_nr_cpus)
> +             goto err_out;
> +
> +     /* get the cpufreq driver */
> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
> +
> +     if (!cpufreq_driver)
> +             goto err_out_unlock;
> +
> +     /* get the CPU */
> +     data = per_cpu(cpufreq_cpu_data, cpu);
> +
> +err_out_unlock:
> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +err_out:
> +     return data;
> +}
> +
> +static void xen_cpufreq_cpu_put(struct cpufreq_policy *data)
> +{
> +     module_put(cpufreq_driver->owner);
> +}
> +
> +static int push_data_to_hypervisor(struct cpufreq_policy *policy,
> +                                struct cpufreq_frequency_table *table)
> +{
> +     int ret = 0;
> +     unsigned int i;
> +     unsigned int cpu;
> +     uint32_t platform_limit = 0;
> +     unsigned int max_freq = 0;
> +     unsigned int state_count = 0;
> +     unsigned int prev_freq = 0;
> +     struct xen_processor_px *dst_states;
> +     struct xen_processor_performance *dst_perf;
> +     struct xen_platform_op op = {
> +             .cmd                    = XENPF_set_processor_pminfo,
> +             .interface_version      = XENPF_INTERFACE_VERSION,
> +             .u.set_pminfo.type      = XEN_PM_PX,
> +     };
> +
> +     dst_perf = &op.u.set_pminfo.perf;
> +
> +     /* Check freq table and find max frequency */
> +     for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
> +             unsigned int freq = table[i].frequency;
> +             if (freq == CPUFREQ_ENTRY_INVALID)
> +                     continue;
> +
> +             if (table[i].index != state_count || freq <= prev_freq) {
> +                     pr_err("Frequency table format error\n");
> +                     return -EINVAL;
> +             }
> +
> +             prev_freq = freq;
> +             state_count++;
> +             if (freq > max_freq)
> +                     max_freq = freq;
> +     }
> +
> +     if (!state_count)
> +             return -EINVAL;
> +
> +     dst_perf->state_count = state_count;
> +
> +     dst_states = kcalloc(state_count,
> +                          sizeof(struct xen_processor_px), GFP_KERNEL);
> +
> +     if (!dst_states)
> +             return -ENOMEM;
> +
> +     set_xen_guest_handle(dst_perf->states, dst_states);
> +
> +     /*
> +      * Freq table should start from lower values
> +      * dst_states should start from higer values
> +      */
> +     for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
> +             unsigned int freq = table[i].frequency;
> +             unsigned int tbl_index = state_count - 1 - table[i].index;
> +             if (freq == CPUFREQ_ENTRY_INVALID)
> +                     continue;
> +
> +             if (freq == max_freq)
> +                     platform_limit = tbl_index;
> +
> +             dst_states[tbl_index].core_frequency = freq / 1000;
> +             dst_states[tbl_index].transition_latency =
> +                             policy->cpuinfo.transition_latency / 1000;
> +     }
> +
> +     dst_perf->shared_type = policy->shared_type;
> +     dst_perf->platform_limit = platform_limit;
> +     dst_perf->domain_info.domain = policy->cpu;
> +     dst_perf->domain_info.num_processors = xen_nr_cpus;
> +     dst_perf->flags = XEN_PX_DATA;
> +
> +     for_each_xen_cpu(cpu, policy->cpus) {
> +             op.u.set_pminfo.id = cpu;
> +             ret = HYPERVISOR_dom0_op(&op);
> +             if (ret) {
> +                     pr_debug("Hypervisor error(%d) for CPU%u\n", ret, cpu);
> +                     goto err_free_states;
> +             }
> +             pr_debug("CPU%u - P-states uploaded\n", cpu);
> +
> +             for (i = 0; i < dst_perf->state_count; i++) {
> +                     pr_debug("    state %d: %d MHz, %d uS\n",
> +                              i, (u32) dst_states[i].core_frequency,
> +                              (u32) dst_states[i].transition_latency);
> +             }
> +     }
> +
> +err_free_states:
> +     kfree(dst_states);
> +     return ret;
> +}
> +
> +/*
> + * Returns:
> + *   Negative: Failure
> + *   0:        Success
> + *   Positive: When we have a managed CPU and the sysfs got symlinked
> + */
> +static int xen_cpufreq_add_dev_policy(unsigned int cpu,
> +                               struct cpufreq_policy *policy)
> +{
> +     int ret = 0;
> +#ifdef CONFIG_SMP
> +     unsigned long flags;
> +     unsigned int j;
> +
> +     for_each_cpu(j, policy->cpus) {
> +             struct cpufreq_policy *managed_policy;
> +
> +             if (cpu == j)
> +                     continue;
> +
> +             /* Check for existing affected CPUs.
> +              * They may not be aware of it due to CPU Hotplug.
> +              * cpufreq_cpu_put is called when the device is removed
> +              * in __cpufreq_remove_dev()
> +              */
> +             managed_policy = xen_cpufreq_cpu_get(j);
> +             if (unlikely(managed_policy)) {
> +                     /* Set proper policy_cpu */
> +                     unlock_policy_rwsem_write(cpu);
> +                     per_cpu(cpufreq_policy_cpu, cpu) =
> +                                             managed_policy->cpu;
> +
> +                     if (lock_policy_rwsem_write(cpu) < 0) {
> +                             /* Should not go through policy unlock path */
> +                             if (cpufreq_driver->exit)
> +                                     cpufreq_driver->exit(policy);
> +                             xen_cpufreq_cpu_put(managed_policy);
> +                             return -EBUSY;
> +                     }
> +
> +                     spin_lock_irqsave(&cpufreq_driver_lock, flags);
> +                     cpumask_copy(managed_policy->cpus, policy->cpus);
> +                     per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
> +                     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> +                     pr_debug("CPU already managed, adding link\n");
> +
> +                     /*
> +                      * Success. We only needed to be added to the mask.
> +                      * Call driver->exit() because only the cpu parent of
> +                      * the kobj needed to call init().
> +                      */
> +                     if (cpufreq_driver->exit)
> +                             cpufreq_driver->exit(policy);
> +
> +                     return 1;
> +             }
> +     }
> +#endif
> +     return ret;
> +}
> +
> +/**
> + * xen_cpufreq_add_dev - add a CPU device
> + *
> + * Adds the cpufreq interface for a CPU device.
> + */
> +static int xen_cpufreq_add_dev(unsigned int cpu)
> +{
> +     int ret = 0;
> +     struct cpufreq_policy *policy;
> +     unsigned long flags;
> +     unsigned int j;
> +
> +     pr_debug("adding CPU %u\n", cpu);
> +
> +#ifdef CONFIG_SMP
> +     /* check whether a different CPU already registered this
> +      * CPU because it is in the same boat. */
> +     policy = xen_cpufreq_cpu_get(cpu);
> +     if (unlikely(policy)) {
> +             xen_cpufreq_cpu_put(policy);
> +             return 0;
> +     }
> +#endif
> +
> +     if (!try_module_get(cpufreq_driver->owner)) {
> +             ret = -EINVAL;
> +             goto module_out;
> +     }
> +
> +     ret = -ENOMEM;
> +     policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
> +     if (!policy)
> +             goto nomem_out;
> +
> +     if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
> +             goto err_free_policy;
> +
> +     if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
> +             goto err_free_cpumask;
> +
> +     policy->cpu = cpu;
> +     cpumask_copy(policy->cpus, cpumask_of(cpu));
> +
> +     /* Initially set CPU itself as the policy_cpu */
> +     per_cpu(cpufreq_policy_cpu, cpu) = cpu;
> +     ret = (lock_policy_rwsem_write(cpu) < 0);
> +     WARN_ON(ret);
> +
> +     /* call driver. From then on the cpufreq must be able
> +      * to accept all calls to ->verify and ->setpolicy for this CPU
> +      */
> +     ret = cpufreq_driver->init(policy);
> +     if (ret) {
> +             pr_debug("initialization failed\n");
> +             goto err_unlock_policy;
> +     }
> +     ret = xen_cpufreq_add_dev_policy(cpu, policy);
> +     if (ret) {
> +             if (ret > 0)
> +                     /* This is a managed cpu, symlink created,
> +                        exit with 0 */
> +                     ret = 0;
> +             goto err_unlock_policy;
> +     }
> +
> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
> +     for_each_cpu(j, policy->cpus) {
> +             per_cpu(cpufreq_cpu_data, j) = policy;
> +             per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
> +     }
> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> +     unlock_policy_rwsem_write(cpu);
> +
> +     module_put(cpufreq_driver->owner);
> +     pr_debug("initialization complete\n");
> +
> +     return 0;
> +
> +err_unlock_policy:
> +     unlock_policy_rwsem_write(cpu);
> +     free_cpumask_var(policy->related_cpus);
> +err_free_cpumask:
> +     free_cpumask_var(policy->cpus);
> +err_free_policy:
> +     kfree(policy);
> +nomem_out:
> +     module_put(cpufreq_driver->owner);
> +module_out:
> +     return ret;
> +}
> +
> +/**
> + * __cpufreq_remove_dev - remove a CPU device
> + *
> + * Removes the cpufreq interface for a CPU device.
> + * Caller should already have policy_rwsem in write mode for this CPU.
> + * This routine frees the rwsem before returning.
> + */
> +static int __cpufreq_remove_dev(unsigned int cpu)
> +{
> +     unsigned long flags;
> +     struct cpufreq_policy *data;
> +#ifdef CONFIG_SMP
> +     unsigned int j;
> +#endif
> +
> +     pr_debug("unregistering CPU %u\n", cpu);
> +
> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
> +     data = per_cpu(cpufreq_cpu_data, cpu);
> +
> +     if (!data) {
> +             spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +             unlock_policy_rwsem_write(cpu);
> +             return -EINVAL;
> +     }
> +     per_cpu(cpufreq_cpu_data, cpu) = NULL;
> +
> +
> +#ifdef CONFIG_SMP
> +     /* if this isn't the CPU which is the parent of the kobj, we
> +      * only need to unlink, put and exit
> +      */
> +     if (unlikely(cpu != data->cpu)) {
> +             pr_debug("removing link\n");
> +             cpumask_clear_cpu(cpu, data->cpus);
> +             spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +             xen_cpufreq_cpu_put(data);
> +             unlock_policy_rwsem_write(cpu);
> +             return 0;
> +     }
> +#endif
> +
> +#ifdef CONFIG_SMP
> +
> +     /* if we have other CPUs still registered, we need to unlink them,
> +      * or else wait_for_completion below will lock up. Clean the
> +      * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
> +      * the sysfs links afterwards.
> +      */
> +     if (unlikely(cpumask_weight(data->cpus) > 1)) {
> +             for_each_cpu(j, data->cpus) {
> +                     if (j == cpu)
> +                             continue;
> +                     per_cpu(cpufreq_cpu_data, j) = NULL;
> +             }
> +     }
> +
> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> +     if (unlikely(cpumask_weight(data->cpus) > 1)) {
> +             for_each_cpu(j, data->cpus) {
> +                     if (j == cpu)
> +                             continue;
> +                     pr_debug("removing link for cpu %u\n", j);
> +                     unlock_policy_rwsem_write(cpu);
> +                     lock_policy_rwsem_write(cpu);
> +                     xen_cpufreq_cpu_put(data);
> +             }
> +     }
> +#else
> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +#endif
> +
> +     unlock_policy_rwsem_write(cpu);
> +
> +     lock_policy_rwsem_write(cpu);
> +     if (cpufreq_driver->exit)
> +             cpufreq_driver->exit(data);
> +     unlock_policy_rwsem_write(cpu);
> +
> +     free_cpumask_var(data->related_cpus);
> +     free_cpumask_var(data->cpus);
> +     kfree(data);
> +
> +     return 0;
> +}
> +
> +static int cpufreq_remove_dev(unsigned int cpu)
> +{
> +     int retval;
> +
> +     if (unlikely(lock_policy_rwsem_write(cpu)))
> +             BUG();
> +
> +     retval = __cpufreq_remove_dev(cpu);
> +     return retval;
> +}
> +
> +/*********************************************************************
> + *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
> + *********************************************************************/
> +
> +/**
> + * adjust_jiffies - adjust the system "loops_per_jiffy"
> + *
> + * This function alters the system "loops_per_jiffy" for the clock
> + * speed change. Note that loops_per_jiffy cannot be updated on SMP
> + * systems as each CPU might be scaled differently. So, use the arch
> + * per-CPU loops_per_jiffy value wherever possible.
> + */
> +#ifndef CONFIG_SMP
> +static unsigned long l_p_j_ref;
> +static unsigned int  l_p_j_ref_freq;
> +
> +static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
> +{
> +     if (ci->flags & CPUFREQ_CONST_LOOPS)
> +             return;
> +
> +     if (!l_p_j_ref_freq) {
> +             l_p_j_ref = loops_per_jiffy;
> +             l_p_j_ref_freq = ci->old;
> +             pr_debug("saving %lu as reference value for loops_per_jiffy; "
> +                     "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
> +     }
> +     if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
> +         (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
> +             loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
> +                                                             ci->new);
> +             pr_debug("scaling loops_per_jiffy to %lu "
> +                     "for frequency %u kHz\n", loops_per_jiffy, ci->new);
> +     }
> +}
> +#else
> +static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs 
> *ci)
> +{
> +     return;
> +}
> +#endif

There is quite a lot of code duplication with cpufreq.c, I don't think
that is going to be acceptable for the upstream maintainers.


> +/**
> + * xen_cpufreq_notify_transition - call notifier chain and adjust_jiffies
> + * on frequency transition.
> + *
> + * This function calls the transition notifiers and the "adjust_jiffies"
> + * function. It is called twice on all CPU frequency changes that have
> + * external effects.
> + */
> +void xen_cpufreq_notify_transition(struct cpufreq_freqs *freqs,
> +                                unsigned int state)
> +{
> +     struct cpufreq_policy *policy;
> +
> +     BUG_ON(irqs_disabled());
> +
> +     freqs->flags = cpufreq_driver->flags;
> +     pr_debug("notification %u of frequency transition to %u kHz\n",
> +              state, freqs->new);
> +
> +     policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
> +     switch (state) {
> +     case CPUFREQ_PRECHANGE:
> +             /* detect if the driver reported a value as "old frequency"
> +              * which is not equal to what the cpufreq core thinks is
> +              * "old frequency".
> +              */
> +             if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
> +                     if ((policy) && (policy->cpu == freqs->cpu) &&
> +                         (policy->cur) && (policy->cur != freqs->old)) {
> +                             pr_debug("Warning: CPU frequency is"
> +                                      " %u, cpufreq assumed %u kHz.\n",
> +                                      freqs->old, policy->cur);
> +                             freqs->old = policy->cur;
> +                     }
> +             }
> +             srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list,
> +                                      CPUFREQ_PRECHANGE, freqs);
> +             adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
> +             break;
> +
> +     case CPUFREQ_POSTCHANGE:
> +             adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
> +             pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new,
> +                      (unsigned long)freqs->cpu);
> +             trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
> +             trace_cpu_frequency(freqs->new, freqs->cpu);
> +             srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list,
> +                                      CPUFREQ_POSTCHANGE, freqs);
> +             if (likely(policy) && likely(policy->cpu == freqs->cpu))
> +                     policy->cur = freqs->new;
> +             break;
> +     }
> +}
> +
> +/*********************************************************************
> + *                              GOVERNORS                            *
> + *********************************************************************/
> +
> +int __xen_cpufreq_driver_target(struct cpufreq_policy *policy,
> +                             unsigned int target_freq,
> +                             unsigned int relation)
> +{
> +     int retval = -EINVAL;
> +     unsigned int old_target_freq = target_freq;
> +
> +     /* Make sure that target_freq is within supported range */
> +     if (target_freq > policy->max)
> +             target_freq = policy->max;
> +     if (target_freq < policy->min)
> +             target_freq = policy->min;
> +
> +     pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
> +              policy->cpu, target_freq, relation, old_target_freq);
> +
> +     if (target_freq == policy->cur)
> +             return 0;
> +
> +     if (cpufreq_driver->target)
> +             retval = cpufreq_driver->target(policy, target_freq,
> +                                                 relation);
> +
> +     return retval;
> +}
> +
> +int xen_cpufreq_driver_target(struct cpufreq_policy *policy,
> +                           unsigned int target_freq,
> +                           unsigned int relation)
> +{
> +     int ret = -EINVAL;
> +
> +     if (!policy)
> +             goto no_policy;
> +
> +     if (unlikely(lock_policy_rwsem_write(policy->cpu)))
> +             goto fail;
> +
> +     ret = __xen_cpufreq_driver_target(policy, target_freq, relation);
> +
> +     unlock_policy_rwsem_write(policy->cpu);
> +
> +fail:
> +     xen_cpufreq_cpu_put(policy);
> +no_policy:
> +     return ret;
> +}
> +
> +/*********************************************************************
> + *                    HANDLE COMMANDS FROM XEN                       *
> + *********************************************************************/
> +static void cpufreq_work_hnd(struct work_struct *w);
> +
> +static struct workqueue_struct *cpufreq_wq;
> +static DECLARE_WORK(cpufreq_work, cpufreq_work_hnd);
> +
> +static void cpufreq_work_hnd(struct work_struct *w)
> +{
> +     int ret;
> +     struct cpufreq_policy *policy;
> +     struct cpufreq_sh_info *cpufreq_info;
> +
> +     cpufreq_info = &HYPERVISOR_shared_info->arch.cpufreq;
> +
> +     policy = xen_cpufreq_cpu_get(cpufreq_info->cpu);
> +     ret = xen_cpufreq_driver_target(policy,
> +                                     cpufreq_info->freq,
> +                                     cpufreq_info->relation);
> +
> +     cpufreq_info->result = ret;
> +}

No barriers? No locking?


> +static irqreturn_t cpufreq_interrupt(int irq, void *data)
> +{
> +     queue_work(cpufreq_wq, &cpufreq_work);
> +     return IRQ_HANDLED;
> +}
> +
> +/*********************************************************************
> + *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
> + *********************************************************************/
> +
> +/**
> + * xen_cpufreq_register_driver - register a CPU Frequency driver
> + * @driver_data: A struct cpufreq_driver containing the values#
> + * submitted by the CPU Frequency driver.
> + *
> + *   Registers a CPU Frequency driver to this core code. This code
> + * returns zero on success, -EBUSY when another driver got here first
> + * (and isn't unregistered in the meantime).
> + *
> + */
> +int xen_cpufreq_register_driver(struct cpufreq_driver *driver_data)
> +{
> +     unsigned long flags;
> +     int ret;
> +     unsigned int cpu;
> +     struct cpufreq_frequency_table *table;
> +     struct cpufreq_policy *policy;
> +     cpumask_var_t pushed_cpus;
> +     int irq;
> +
> +     if (!xen_nr_cpus)
> +             return -EPROBE_DEFER;
> +
> +     if (!driver_data || !driver_data->verify || !driver_data->init ||
> +         (!driver_data->target))
> +             return -EINVAL;
> +
> +     pr_debug("trying to register driver %s\n", driver_data->name);
> +
> +     if (driver_data->setpolicy)
> +             driver_data->flags |= CPUFREQ_CONST_LOOPS;
> +
> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
> +
> +     if (cpufreq_driver) {
> +             spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +             return -EBUSY;
> +     }
> +     cpufreq_driver = driver_data;
> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> +     irq = bind_virq_to_irq(VIRQ_CPUFREQ, 0);
> +     if (irq < 0) {
> +             pr_err("Bind virq (%d) error (%d)\n", VIRQ_CPUFREQ, irq);
> +             ret = irq;
> +             goto err_remove_drv;
> +     }
> +
> +     irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN|IRQ_NOPROBE);
> +
> +     ret = request_irq(irq, cpufreq_interrupt, 0,
> +                        "xen_cpufreq", NULL);
> +
> +     if (ret < 0) {
> +             pr_err("Request irq (%d) error (%d)\n", irq, ret);
> +             goto err_unbind_from_irqhnd;
> +     }
> +
> +     xen_irq = irq;
> +
> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++) {
> +             ret = xen_cpufreq_add_dev(cpu);
> +             if (ret)
> +                     goto err_remove_cpu;
> +     }
> +
> +     if (!zalloc_cpumask_var(&pushed_cpus, GFP_KERNEL))
> +             goto err_remove_cpu;
> +
> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++) {
> +             if (cpumask_test_cpu(cpu, pushed_cpus))
> +                     continue;
> +
> +             policy = xen_cpufreq_cpu_get(cpu);
> +             if (!policy) {
> +                     ret = -EINVAL;
> +                     goto err_free_cpumask;
> +             }
> +
> +             cpumask_or(pushed_cpus, pushed_cpus, policy->cpus);
> +             table = cpufreq_frequency_get_table(policy->cpu);
> +             if (!table) {
> +                     ret = -EINVAL;
> +                     goto err_free_cpumask;
> +             }
> +
> +             ret = push_data_to_hypervisor(policy, table);
> +             if (ret)
> +                     goto err_free_cpumask;
> +     }
> +
> +     free_cpumask_var(pushed_cpus);
> +
> +     pr_debug("driver %s up and running\n", driver_data->name);
> +
> +     return 0;
> +
> +err_free_cpumask:
> +     free_cpumask_var(pushed_cpus);
> +err_remove_cpu:
> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++)
> +             cpufreq_remove_dev(cpu);
> +err_unbind_from_irqhnd:
> +     unbind_from_irqhandler(irq, NULL);
> +err_remove_drv:
> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
> +     cpufreq_driver = NULL;
> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +     return ret;
> +}
> +
> +/**
> + * xen_cpufreq_unregister_driver - unregister the current CPUFreq driver
> + *
> + *    Unregister the current CPUFreq driver. Only call this if you have
> + * the right to do so, i.e. if you have succeeded in initialising before!
> + * Returns zero if successful, and -EINVAL if the cpufreq_driver is
> + * currently not initialised.
> + */
> +int xen_cpufreq_unregister_driver(struct cpufreq_driver *driver)
> +{
> +     unsigned long flags;
> +     unsigned int cpu;
> +
> +     if (!cpufreq_driver || (driver != cpufreq_driver))
> +             return -EINVAL;
> +
> +     pr_debug("unregistering driver %s\n", driver->name);
> +
> +     unbind_from_irqhandler(xen_irq, NULL);
> +
> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++)
> +             cpufreq_remove_dev(cpu);
> +
> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
> +     cpufreq_driver = NULL;
> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> +     return 0;
> +}
> +
> +struct cpufreq_drv_ops xen_cpufreq_drv_ops = {
> +     .notify_transition = xen_cpufreq_notify_transition,
> +     .register_driver = xen_cpufreq_register_driver,
> +     .unregister_driver = xen_cpufreq_unregister_driver,
> +};
> +
> +static int __init xen_cpufreq_init(void)
> +{
> +     int ret;
> +     int i;
> +
> +     struct xen_sysctl op = {
> +             .cmd                    = XEN_SYSCTL_physinfo,
> +             .interface_version      = XEN_SYSCTL_INTERFACE_VERSION,
> +     };
> +
> +     ret = HYPERVISOR_sysctl(&op);
> +     if (ret) {
> +             pr_err("Hypervisor get physinfo error (%d)\n", ret);
> +             return ret;
> +     }
> +
> +     xen_nr_cpus = op.u.physinfo.nr_cpus;
> +     if (xen_nr_cpus == 0 || xen_nr_cpus > NR_CPUS) {
> +             xen_nr_cpus = 0;
> +             pr_err("Wrong CPUs amount (%d)\n", xen_nr_cpus);
> +             return -EINVAL;
> +     }
> +
> +     for (i = 0; i < xen_nr_cpus; i++) {
> +             per_cpu(cpufreq_policy_cpu, i) = -1;
> +             init_rwsem(&per_cpu(cpu_policy_rwsem, i));
> +     }
> +
> +     cpufreq_wq = alloc_workqueue("xen_cpufreq", 0, 1);
> +     if (!cpufreq_wq) {
> +             pr_err("Create workqueue error\n");
> +             ret = -ENOMEM;
> +             goto err_create_wq;
> +     }
> +
> +     return 0;
> +
> +err_create_wq:
> +     xen_nr_cpus = 0;
> +     return ret;
> +}
> +
> +MODULE_AUTHOR("Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>");
> +MODULE_DESCRIPTION("Xen cpufreq driver which uploads PM data to Xen 
> hypervisor");
> +MODULE_LICENSE("GPL");
> +
> +core_initcall(xen_cpufreq_init);
> diff --git a/include/xen/interface/platform.h 
> b/include/xen/interface/platform.h
> index c57d5f6..ee3b154 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -209,6 +209,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
>  #define XEN_PX_PSS   2
>  #define XEN_PX_PPC   4
>  #define XEN_PX_PSD   8
> +#define XEN_PX_DATA  16
>  
>  struct xen_power_register {
>       uint32_t     space_id;
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index cf64566..9133110 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -81,6 +81,7 @@
>  #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
>  #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
>  #define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
> +#define VIRQ_CPUFREQ    14 /* (DOM0) Notify cpufreq driver                */
>  
>  /* Architecture-specific VIRQ definitions. */
>  #define VIRQ_ARCH_0    16

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.