[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v4 10/11] cpufreq: add hwdom-cpufreq driver



On Tue, Nov 4, 2014 at 6:13 PM, Stefano Stabellini
<stefano.stabellini@xxxxxxxxxxxxx> wrote:
> On Tue, 4 Nov 2014, Oleksandr Dmytryshyn wrote:
>> This driver uses hwdom to change frequencies on CPUs
>> Workflow:
>>  * cpufreq governor driver in Xen wants to change the
>>    frequency of the physical CPU
>>  * hwdom-cpufreq driver sets parameters in the shared
>>    memory
>>  * hwdom-cpufreq driver sends an VIRQ_CPUFREQ interrupt
>>    to notify the hardware domain
>>  * cpufreq driver in the hardware domain reads parameters
>>    from the shared memory, changes frequency and copies
>>    the result of the operation to the shared memory
>>
>> Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
>> ---
>>  xen/Rules.mk                        |   1 +
>>  xen/drivers/cpufreq/Makefile        |   1 +
>>  xen/drivers/cpufreq/hwdom-cpufreq.c | 252 
>> ++++++++++++++++++++++++++++++++++++
>>  xen/include/public/xen.h            |   1 +
>>  4 files changed, 255 insertions(+)
>>  create mode 100644 xen/drivers/cpufreq/hwdom-cpufreq.c
>>
>> diff --git a/xen/Rules.mk b/xen/Rules.mk
>> index 3b0b89b..cccbc72 100644
>> --- a/xen/Rules.mk
>> +++ b/xen/Rules.mk
>> @@ -56,6 +56,7 @@ CFLAGS-$(perfc_arrays)  += -DPERF_ARRAYS
>>  CFLAGS-$(lock_profile)  += -DLOCK_PROFILE
>>  CFLAGS-$(HAS_ACPI)      += -DHAS_ACPI
>>  CFLAGS-$(HAS_CPUFREQ)   += -DHAS_CPUFREQ
>> +CFLAGS-$(HAS_HWDOM_CPUFREQ) += -DHAS_HWDOM_CPUFREQ
>>  CFLAGS-$(HAS_PM)        += -DHAS_PM
>>  CFLAGS-$(HAS_CPU_TURBO) += -DHAS_CPU_TURBO
>>  CFLAGS-$(HAS_GDBSX)     += -DHAS_GDBSX
>> diff --git a/xen/drivers/cpufreq/Makefile b/xen/drivers/cpufreq/Makefile
>> index b87d127..891997c 100644
>> --- a/xen/drivers/cpufreq/Makefile
>> +++ b/xen/drivers/cpufreq/Makefile
>> @@ -2,3 +2,4 @@ obj-y += cpufreq.o
>>  obj-y += cpufreq_ondemand.o
>>  obj-y += cpufreq_misc_governors.o
>>  obj-y += utility.o
>> +obj-$(HAS_HWDOM_CPUFREQ) += hwdom-cpufreq.o
>> diff --git a/xen/drivers/cpufreq/hwdom-cpufreq.c 
>> b/xen/drivers/cpufreq/hwdom-cpufreq.c
>> new file mode 100644
>> index 0000000..a39fc6c
>> --- /dev/null
>> +++ b/xen/drivers/cpufreq/hwdom-cpufreq.c
>> @@ -0,0 +1,252 @@
>> +/*
>> + *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
>> + *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
>> + *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx>
>> + *  Copyright (C) 2006        Denis Sadykov <denis.m.sadykov@xxxxxxxxx>
>> + *
>> + *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
>> + *      porting acpi-cpufreq.c from Linux 2.6.23 to Xen hypervisor
>> + *
>> + *  Copyright (C) 2014 GlobalLogic Inc.
>> + *
>> + * 
>> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> + *
>> + *  This program is free software; you can redistribute it and/or modify
>> + *  it under the terms of the GNU General Public License as published by
>> + *  the Free Software Foundation; either version 2 of the License, or (at
>> + *  your option) any later version.
>> + *
>> + *  This program is distributed in the hope that it will be useful, but
>> + *  WITHOUT ANY WARRANTY; without even the implied warranty of
>> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + *  General Public License for more details.
>> + *
>> + *  You should have received a copy of the GNU General Public License along
>> + *  with this program; if not, write to the Free Software Foundation, Inc.,
>> + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
>> + *
>> + * 
>> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> + */
>> +#include <xen/types.h>
>> +#include <xen/errno.h>
>> +#include <xen/sched.h>
>> +#include <xen/event.h>
>> +#include <xen/irq.h>
>> +#include <xen/spinlock.h>
>> +#include <xen/cpufreq.h>
>> +#include <xen/err.h>
>> +#include <asm/shared.h>
>> +#include <asm/current.h>
>> +#include <asm/system.h>
>> +
>> +struct hwdom_cpufreq_data {
>> +    struct processor_performance *perf_data;
>> +    struct cpufreq_frequency_table *freq_table;
>> +};
>> +
>> +static struct hwdom_cpufreq_data *hwdom_cpufreq_drv_data[NR_CPUS];
>> +
>> +int cpufreq_cpu_init(unsigned int cpuid)
>> +{
>> +    return cpufreq_add_cpu(cpuid);
>> +}
>> +
>> +static void notify_cpufreq_domain(void)
>> +{
>> +    send_global_virq(VIRQ_CPUFREQ);
>> +}
>> +
>> +static int hwdom_cpufreq_verify(struct cpufreq_policy *policy)
>> +{
>> +    struct hwdom_cpufreq_data *data;
>> +    struct processor_performance *perf;
>> +
>> +    if ( !policy || !(data = hwdom_cpufreq_drv_data[policy->cpu]) ||
>> +         !processor_pminfo[policy->cpu] )
>> +        return -EINVAL;
>> +
>> +    perf = &processor_pminfo[policy->cpu]->perf;
>> +
>> +    cpufreq_verify_within_limits(policy, 0,
>> +        perf->states[perf->platform_limit].core_frequency * 1000);
>> +
>> +    return cpufreq_frequency_table_verify(policy, data->freq_table);
>> +}
>> +
>> +static int hwdom_cpufreq_target(struct cpufreq_policy *policy,
>> +                               unsigned int target_freq, unsigned int 
>> relation)
>> +{
>> +    struct hwdom_cpufreq_data *data = hwdom_cpufreq_drv_data[policy->cpu];
>> +    struct processor_performance *perf;
>> +    struct cpufreq_freqs freqs;
>> +    struct cpufreq_sh_info *cpufreq_info;
>> +    cpumask_t online_policy_cpus;
>> +    unsigned int next_state = 0; /* Index into freq_table */
>> +    unsigned int next_perf_state = 0; /* Index into perf table */
>> +    unsigned int j;
>> +    int ret = 0;
>> +
>> +    if ( unlikely(data == NULL ||
>> +         data->perf_data == NULL || data->freq_table == NULL) )
>> +        return -ENODEV;
>> +
>> +    perf = data->perf_data;
>> +    ret = cpufreq_frequency_table_target(policy,
>> +                                         data->freq_table,
>> +                                         target_freq,
>> +                                         relation, &next_state);
>> +    if ( unlikely(ret) )
>> +        return -ENODEV;
>> +
>> +    cpumask_and(&online_policy_cpus, &cpu_online_map, policy->cpus);
>> +
>> +    next_perf_state = data->freq_table[next_state].index;
>> +    if ( perf->state == next_perf_state )
>> +    {
>> +        if ( unlikely(policy->resume) )
>> +            policy->resume = 0;
>> +        else
>> +            return 0;
>> +    }
>> +
>> +    freqs.old = perf->states[perf->state].core_frequency * 1000;
>> +    freqs.new = data->freq_table[next_state].frequency;
>> +
>> +     /* Do send cmd for Hardware domain */
>> +    cpufreq_info = arch_get_cpufreq_addr(dom0);
>> +
>> +    /* Set previous result in the Hardware domain then read it */
>> +    smp_rmb();
>> +
>> +     /* return previous result */
>> +    ret = cpufreq_info->result;
>
> What if the previous command hasn't completed yet?
I'll create an event channel between Xen and hwdom.
In this case global VIRQ_CPUFREQ will not be needed and Xen and hwdom
will be able to send notification to each other,


>> +    cpufreq_info->cpu = policy->cpu;
>> +    cpufreq_info->freq = freqs.new;
>> +    cpufreq_info->relation = (uint32_t)relation;
>> +
>> +    smp_wmb(); /* above must be visible before notify_cpufreq_domain() */
>> +
>> +    notify_cpufreq_domain();
>> +
>> +    for_each_cpu( j, &online_policy_cpus )
>> +        cpufreq_statistic_update(j, perf->state, next_perf_state);
>> +
>> +    perf->state = next_perf_state;
>> +    policy->cur = freqs.new;
>> +
>> +    return ret;
>> +}
>> +
>> +static int
>> +hwdom_cpufreq_cpu_init(struct cpufreq_policy *policy)
>> +{
>> +    struct processor_performance *perf;
>> +    struct hwdom_cpufreq_data *data;
>> +    unsigned int cpu = policy->cpu;
>> +    unsigned int valid_states = 0;
>> +    int i;
>> +    int ret = 0;
>> +
>> +    data = xzalloc(struct hwdom_cpufreq_data);
>> +    if ( !data )
>> +        return -ENOMEM;
>> +
>> +    hwdom_cpufreq_drv_data[cpu] = data;
>> +
>> +    data->perf_data = &processor_pminfo[cpu]->perf;
>> +
>> +    perf = data->perf_data;
>> +    policy->shared_type = perf->shared_type;
>> +
>> +    data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
>> +                                     (perf->state_count+1));
>> +    if ( !data->freq_table )
>> +    {
>> +        ret = -ENOMEM;
>> +        goto err_unreg;
>> +    }
>> +
>> +    /* detect transition latency */
>> +    policy->cpuinfo.transition_latency = 0;
>> +    for ( i = 0; i < perf->state_count; i++ )
>> +    {
>> +        if ( (perf->states[i].transition_latency * 1000) >
>> +             policy->cpuinfo.transition_latency )
>> +            policy->cpuinfo.transition_latency =
>> +                perf->states[i].transition_latency * 1000;
>> +    }
>> +
>> +    policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
>> +
>> +    /* table init */
>> +    for ( i = 0; i < perf->state_count; i++ )
>> +    {
>> +        if ( i > 0 && perf->states[i].core_frequency >=
>> +            data->freq_table[valid_states-1].frequency / 1000 )
>> +            continue;
>> +
>> +        data->freq_table[valid_states].index = i;
>> +        data->freq_table[valid_states].frequency =
>> +            perf->states[i].core_frequency * 1000;
>> +        valid_states++;
>> +    }
>> +    data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
>> +    perf->state = 0;
>> +
>> +    ret = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
>> +    if ( ret )
>> +        goto err_freqfree;
>> +
>> +
>> +    /*
>> +     * the first call to ->target() should result in us actually
>> +     * send command to the Hardware domain to set frequency.
>> +     */
>> +    policy->resume = 1;
>> +
>> +    /* Set the minimal frequency */
>> +    return hwdom_cpufreq_target(policy, policy->min, CPUFREQ_RELATION_L);
>> +
>> + err_freqfree:
>> +    xfree(data->freq_table);
>> + err_unreg:
>> +    xfree(data);
>> +    hwdom_cpufreq_drv_data[cpu] = NULL;
>> +
>> +    return ret;
>> +}
>> +
>> +static int hwdom_cpufreq_cpu_exit(struct cpufreq_policy *policy)
>> +{
>> +    struct hwdom_cpufreq_data *data = hwdom_cpufreq_drv_data[policy->cpu];
>> +
>> +    if ( data )
>> +    {
>> +        hwdom_cpufreq_drv_data[policy->cpu] = NULL;
>> +        xfree(data->freq_table);
>> +        xfree(data);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static struct cpufreq_driver hwdom_cpufreq_driver = {
>> +    .name   = "hwdom-cpufreq",
>> +    .verify = hwdom_cpufreq_verify,
>> +    .target = hwdom_cpufreq_target,
>> +    .init   = hwdom_cpufreq_cpu_init,
>> +    .exit   = hwdom_cpufreq_cpu_exit,
>> +};
>> +
>> +static int __init hwdom_cpufreq_driver_init(void)
>> +{
>> +    int ret = 0;
>> +
>> +    if ( cpufreq_controller == FREQCTL_xen )
>> +        ret = cpufreq_register_driver(&hwdom_cpufreq_driver);
>> +
>> +    return ret;
>> +}
>> +
>> +__initcall(hwdom_cpufreq_driver_init);
>> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
>> index 8c5697e..abd8438 100644
>> --- a/xen/include/public/xen.h
>> +++ b/xen/include/public/xen.h
>> @@ -160,6 +160,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
>>  #define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured          
>>  */
>>  #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                    
>>  */
>>  #define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
>> +#define VIRQ_CPUFREQ    14 /* G. (DOM0) Notify cpufreq driver               
>>  */
>>
>>  /* Architecture-specific VIRQ definitions. */
>>  #define VIRQ_ARCH_0    16
>> --
>> 1.9.1
>>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.