[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1222434278 -3600 # Node ID 08374be213188c10eb7c170c143ca0d0d17e55d8 # Parent 5274aa966231ad5f050987e1fb00c6f6ee75a007 X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64 arch Rebase cpufreq logic for supporting both x86 and ia64 arch: 1. move cpufreq arch-independent logic into common dir (xen/drivers/acpi and xen/drivers/cpufreq dir); 2. leave cpufreq x86-dependent logic at xen/arch/x86/acpi/cpufreq dir; Signed-off-by: Yu, Ke <ke.yu@xxxxxxxxx> Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> --- xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c | 246 ------------------ xen/arch/x86/acpi/cpufreq/utility.c | 368 --------------------------- xen/arch/x86/acpi/pmstat.c | 155 ----------- xen/arch/x86/acpi/Makefile | 1 xen/arch/x86/acpi/cpufreq/Makefile | 2 xen/arch/x86/acpi/cpufreq/cpufreq.c | 153 ----------- xen/drivers/Makefile | 1 xen/drivers/acpi/Makefile | 1 xen/drivers/acpi/pmstat.c | 155 +++++++++++ xen/drivers/cpufreq/Makefile | 3 xen/drivers/cpufreq/cpufreq.c | 188 +++++++++++++ xen/drivers/cpufreq/cpufreq_ondemand.c | 246 ++++++++++++++++++ xen/drivers/cpufreq/utility.c | 368 +++++++++++++++++++++++++++ xen/include/acpi/cpufreq/cpufreq.h | 7 xen/include/acpi/cpufreq/processor_perf.h | 10 15 files changed, 976 insertions(+), 928 deletions(-) diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/Makefile --- a/xen/arch/x86/acpi/Makefile Fri Sep 26 11:12:29 2008 +0100 +++ b/xen/arch/x86/acpi/Makefile Fri Sep 26 14:04:38 2008 +0100 @@ -2,4 +2,3 @@ subdir-y += cpufreq obj-y += boot.o obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o -obj-y += pmstat.o diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/Makefile --- a/xen/arch/x86/acpi/cpufreq/Makefile Fri Sep 26 11:12:29 2008 +0100 +++ b/xen/arch/x86/acpi/cpufreq/Makefile Fri Sep 26 14:04:38 2008 +0100 @@ -1,4 +1,2 @@ obj-y += cpufreq.o obj-y += cpufreq.o -obj-y += utility.o -obj-y += cpufreq_ondemand.o obj-y += powernow.o diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/cpufreq.c --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Sep 26 11:12:29 2008 +0100 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Sep 26 14:04:38 2008 +0100 @@ -45,9 +45,6 @@ #include <acpi/acpi.h> #include <acpi/cpufreq/cpufreq.h> -/* TODO: change to link list later as domain number may be sparse */ -static cpumask_t cpufreq_dom_map[NR_CPUS]; - enum { UNDEFINED_CAPABLE = 0, SYSTEM_INTEL_MSR_CAPABLE, @@ -56,13 +53,6 @@ enum { #define INTEL_MSR_RANGE (0xffff) #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) - -struct acpi_cpufreq_data { - struct processor_performance *acpi_data; - struct cpufreq_frequency_table *freq_table; - unsigned int max_freq; - unsigned int cpu_feature; -}; static struct acpi_cpufreq_data *drv_data[NR_CPUS]; @@ -342,7 +332,7 @@ static int acpi_cpufreq_target(struct cp policy->resume = 0; } else { - printk(KERN_INFO "Already at target state (P%d)\n", + printk(KERN_DEBUG "Already at target state (P%d)\n", next_perf_state); return 0; } @@ -379,7 +369,7 @@ static int acpi_cpufreq_target(struct cp if (!check_freqs(cmd.mask, freqs.new, data)) return -EAGAIN; - px_statistic_update(cmd.mask, perf->state, next_perf_state); + cpufreq_statistic_update(cmd.mask, perf->state, next_perf_state); perf->state = next_perf_state; policy->cur = freqs.new; @@ -581,145 +571,6 @@ static struct cpufreq_driver acpi_cpufre .exit = acpi_cpufreq_cpu_exit, }; -int cpufreq_limit_change(unsigned int cpu) -{ - struct processor_performance *perf = &processor_pminfo[cpu]->perf; - struct cpufreq_policy *data = cpufreq_cpu_policy[cpu]; - struct cpufreq_policy policy; - - if (!cpu_online(cpu) || !data || !processor_pminfo[cpu]) - return -ENODEV; - - if ((perf->platform_limit < 0) || - (perf->platform_limit >= perf->state_count)) - return -EINVAL; - - memcpy(&policy, data, sizeof(struct cpufreq_policy)); - - policy.max = - perf->states[perf->platform_limit].core_frequency * 1000; - - return __cpufreq_set_policy(data, &policy); -} - -int cpufreq_add_cpu(unsigned int cpu) -{ - int ret = 0; - unsigned int firstcpu; - unsigned int dom; - unsigned int j; - struct cpufreq_policy new_policy; - struct cpufreq_policy *policy; - struct processor_performance *perf = &processor_pminfo[cpu]->perf; - - /* to protect the case when Px was not controlled by xen */ - if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) - return 0; - - if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu]) - return -EINVAL; - - ret = px_statistic_init(cpu); - if (ret) - return ret; - - dom = perf->domain_info.domain; - if (cpus_weight(cpufreq_dom_map[dom])) { - /* share policy with the first cpu since on same boat */ - firstcpu = first_cpu(cpufreq_dom_map[dom]); - policy = cpufreq_cpu_policy[firstcpu]; - - cpufreq_cpu_policy[cpu] = policy; - cpu_set(cpu, cpufreq_dom_map[dom]); - cpu_set(cpu, policy->cpus); - - printk(KERN_EMERG"adding CPU %u\n", cpu); - } else { - /* for the first cpu, setup policy and do init work */ - policy = xmalloc(struct cpufreq_policy); - if (!policy) { - px_statistic_exit(cpu); - return -ENOMEM; - } - memset(policy, 0, sizeof(struct cpufreq_policy)); - - cpufreq_cpu_policy[cpu] = policy; - cpu_set(cpu, cpufreq_dom_map[dom]); - cpu_set(cpu, policy->cpus); - - policy->cpu = cpu; - ret = cpufreq_driver->init(policy); - if (ret) - goto err1; - printk(KERN_EMERG"CPU %u initialization completed\n", cpu); - } - - /* - * After get full cpumap of the coordination domain, - * we can safely start gov here. - */ - if (cpus_weight(cpufreq_dom_map[dom]) == - perf->domain_info.num_processors) { - memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); - policy->governor = NULL; - ret = __cpufreq_set_policy(policy, &new_policy); - if (ret) - goto err2; - } - - return 0; - -err2: - cpufreq_driver->exit(policy); -err1: - for_each_cpu_mask(j, cpufreq_dom_map[dom]) { - cpufreq_cpu_policy[j] = NULL; - px_statistic_exit(j); - } - - cpus_clear(cpufreq_dom_map[dom]); - xfree(policy); - return ret; -} - -int cpufreq_del_cpu(unsigned int cpu) -{ - unsigned int dom; - struct cpufreq_policy *policy; - struct processor_performance *perf = &processor_pminfo[cpu]->perf; - - /* to protect the case when Px was not controlled by xen */ - if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) - return 0; - - if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu]) - return -EINVAL; - - dom = perf->domain_info.domain; - policy = cpufreq_cpu_policy[cpu]; - - printk(KERN_EMERG"deleting CPU %u\n", cpu); - - /* for the first cpu of the domain, stop gov */ - if (cpus_weight(cpufreq_dom_map[dom]) == - perf->domain_info.num_processors) - __cpufreq_governor(policy, CPUFREQ_GOV_STOP); - - cpufreq_cpu_policy[cpu] = NULL; - cpu_clear(cpu, policy->cpus); - cpu_clear(cpu, cpufreq_dom_map[dom]); - px_statistic_exit(cpu); - - /* for the last cpu of the domain, clean room */ - /* It's safe here to free freq_table, drv_data and policy */ - if (!cpus_weight(cpufreq_dom_map[dom])) { - cpufreq_driver->exit(policy); - xfree(policy); - } - - return 0; -} - static int __init cpufreq_driver_init(void) { int ret = 0; diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c Fri Sep 26 11:12:29 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,246 +0,0 @@ -/* - * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c - * - * Copyright (C) 2001 Russell King - * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>. - * Jun Nakajima <jun.nakajima@xxxxxxxxx> - * Feb 2008 Liu Jinsong <jinsong.liu@xxxxxxxxx> - * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <xen/types.h> -#include <xen/percpu.h> -#include <xen/cpumask.h> -#include <xen/types.h> -#include <xen/sched.h> -#include <xen/timer.h> -#include <asm/config.h> -#include <acpi/cpufreq/cpufreq.h> - -#define DEF_FREQUENCY_UP_THRESHOLD (80) - -#define MIN_DBS_INTERVAL (MICROSECS(100)) -#define MIN_SAMPLING_MILLISECS (20) -#define MIN_STAT_SAMPLING_RATE \ - (MIN_SAMPLING_MILLISECS * MILLISECS(1)) -#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) -#define TRANSITION_LATENCY_LIMIT (10 * 1000 ) - -static uint64_t def_sampling_rate; - -/* Sampling types */ -enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; - -static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); - -static unsigned int dbs_enable; /* number of CPUs using this policy */ - -static struct dbs_tuners { - uint64_t sampling_rate; - unsigned int up_threshold; - unsigned int ignore_nice; - unsigned int powersave_bias; -} dbs_tuners_ins = { - .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, - .ignore_nice = 0, - .powersave_bias = 0, -}; - -static struct timer dbs_timer[NR_CPUS]; - -uint64_t get_cpu_idle_time(unsigned int cpu) -{ - uint64_t idle_ns; - struct vcpu *v; - - if ((v = idle_vcpu[cpu]) == NULL) - return 0; - - idle_ns = v->runstate.time[RUNSTATE_running]; - if (v->is_running) - idle_ns += NOW() - v->runstate.state_entry_time; - - return idle_ns; -} - -static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) -{ - unsigned int load = 0; - uint64_t cur_ns, idle_ns, total_ns; - - struct cpufreq_policy *policy; - unsigned int j; - - if (!this_dbs_info->enable) - return; - - policy = this_dbs_info->cur_policy; - - if (unlikely(policy->resume)) { - __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); - return; - } - - cur_ns = NOW(); - total_ns = cur_ns - this_dbs_info->prev_cpu_wall; - this_dbs_info->prev_cpu_wall = NOW(); - - if (total_ns < MIN_DBS_INTERVAL) - return; - - /* Get Idle Time */ - idle_ns = UINT_MAX; - for_each_cpu_mask(j, policy->cpus) { - uint64_t total_idle_ns; - unsigned int tmp_idle_ns; - struct cpu_dbs_info_s *j_dbs_info; - - j_dbs_info = &per_cpu(cpu_dbs_info, j); - total_idle_ns = get_cpu_idle_time(j); - tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; - j_dbs_info->prev_cpu_idle = total_idle_ns; - - if (tmp_idle_ns < idle_ns) - idle_ns = tmp_idle_ns; - } - - if (likely(total_ns > idle_ns)) - load = (100 * (total_ns - idle_ns)) / total_ns; - - /* Check for frequency increase */ - if (load > dbs_tuners_ins.up_threshold) { - /* if we are already at full speed then break out early */ - if (policy->cur == policy->max) - return; - __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); - return; - } - - /* Check for frequency decrease */ - /* if we cannot reduce the frequency anymore, break out early */ - if (policy->cur == policy->min) - return; - - /* - * The optimal frequency is the frequency that is the lowest that - * can support the current CPU usage without triggering the up - * policy. To be safe, we focus 10 points under the threshold. - */ - if (load < (dbs_tuners_ins.up_threshold - 10)) { - unsigned int freq_next, freq_cur; - - freq_cur = __cpufreq_driver_getavg(policy); - if (!freq_cur) - freq_cur = policy->cur; - - freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10); - - __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); - } -} - -static void do_dbs_timer(void *dbs) -{ - struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs; - - if (!dbs_info->enable) - return; - - dbs_check_cpu(dbs_info); - - set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); -} - -static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) -{ - dbs_info->enable = 1; - - init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, - (void *)dbs_info, dbs_info->cpu); - - set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); -} - -static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) -{ - dbs_info->enable = 0; - stop_timer(&dbs_timer[dbs_info->cpu]); -} - -int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) -{ - unsigned int cpu = policy->cpu; - struct cpu_dbs_info_s *this_dbs_info; - unsigned int j; - - this_dbs_info = &per_cpu(cpu_dbs_info, cpu); - - switch (event) { - case CPUFREQ_GOV_START: - if ((!cpu_online(cpu)) || (!policy->cur)) - return -EINVAL; - - if (policy->cpuinfo.transition_latency > - (TRANSITION_LATENCY_LIMIT * 1000)) { - printk(KERN_WARNING "ondemand governor failed to load " - "due to too long transition latency\n"); - return -EINVAL; - } - if (this_dbs_info->enable) - /* Already enabled */ - break; - - dbs_enable++; - - for_each_cpu_mask(j, policy->cpus) { - struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(cpu_dbs_info, j); - j_dbs_info->cur_policy = policy; - - j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); - j_dbs_info->prev_cpu_wall = NOW(); - } - this_dbs_info->cpu = cpu; - /* - * Start the timerschedule work, when this governor - * is used for first time - */ - if (dbs_enable == 1) { - def_sampling_rate = policy->cpuinfo.transition_latency * - DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; - - if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) - def_sampling_rate = MIN_STAT_SAMPLING_RATE; - - dbs_tuners_ins.sampling_rate = def_sampling_rate; - } - dbs_timer_init(this_dbs_info); - - break; - - case CPUFREQ_GOV_STOP: - dbs_timer_exit(this_dbs_info); - dbs_enable--; - - break; - - case CPUFREQ_GOV_LIMITS: - if (policy->max < this_dbs_info->cur_policy->cur) - __cpufreq_driver_target(this_dbs_info->cur_policy, - policy->max, CPUFREQ_RELATION_H); - else if (policy->min > this_dbs_info->cur_policy->cur) - __cpufreq_driver_target(this_dbs_info->cur_policy, - policy->min, CPUFREQ_RELATION_L); - break; - } - return 0; -} - -struct cpufreq_governor cpufreq_gov_dbs = { - .name = "ondemand", - .governor = cpufreq_governor_dbs, -}; diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/utility.c --- a/xen/arch/x86/acpi/cpufreq/utility.c Fri Sep 26 11:12:29 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,368 +0,0 @@ -/* - * utility.c - misc functions for cpufreq driver and Px statistic - * - * Copyright (C) 2001 Russell King - * (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx> - * - * Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx> - * Added handling for CPU hotplug - * Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx> - * Fix handling for CPU hotplug -- affected CPUs - * Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx> - * 1. Merge cpufreq.c and freq_table.c of linux 2.6.23 - * And poring to Xen hypervisor - * 2. some Px statistic interface funcdtions - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <xen/errno.h> -#include <xen/cpumask.h> -#include <xen/types.h> -#include <xen/spinlock.h> -#include <xen/percpu.h> -#include <xen/types.h> -#include <xen/sched.h> -#include <xen/timer.h> -#include <asm/config.h> -#include <acpi/cpufreq/cpufreq.h> -#include <public/sysctl.h> - -struct cpufreq_driver *cpufreq_driver; -struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS]; -struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS]; - -/********************************************************************* - * Px STATISTIC INFO * - *********************************************************************/ - -void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) -{ - uint32_t i; - uint64_t now; - - now = NOW(); - - for_each_cpu_mask(i, cpumask) { - struct pm_px *pxpt = px_statistic_data[i]; - struct processor_pminfo *pmpt = processor_pminfo[i]; - uint64_t total_idle_ns; - uint64_t tmp_idle_ns; - - if ( !pxpt || !pmpt ) - continue; - - total_idle_ns = get_cpu_idle_time(i); - tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; - - pxpt->u.last = from; - pxpt->u.cur = to; - pxpt->u.pt[to].count++; - pxpt->u.pt[from].residency += now - pxpt->prev_state_wall; - pxpt->u.pt[from].residency -= tmp_idle_ns; - - (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++; - - pxpt->prev_state_wall = now; - pxpt->prev_idle_wall = total_idle_ns; - } -} - -int px_statistic_init(unsigned int cpuid) -{ - uint32_t i, count; - struct pm_px *pxpt = px_statistic_data[cpuid]; - const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; - - count = pmpt->perf.state_count; - - if ( !pmpt ) - return -EINVAL; - - if ( !pxpt ) - { - pxpt = xmalloc(struct pm_px); - if ( !pxpt ) - return -ENOMEM; - memset(pxpt, 0, sizeof(*pxpt)); - px_statistic_data[cpuid] = pxpt; - } - - pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count); - if (!pxpt->u.trans_pt) - return -ENOMEM; - - pxpt->u.pt = xmalloc_array(struct pm_px_val, count); - if (!pxpt->u.pt) { - xfree(pxpt->u.trans_pt); - return -ENOMEM; - } - - memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t))); - memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val))); - - pxpt->u.total = pmpt->perf.state_count; - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; - - for (i=0; i < pmpt->perf.state_count; i++) - pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency; - - pxpt->prev_state_wall = NOW(); - pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); - - return 0; -} - -void px_statistic_exit(unsigned int cpuid) -{ - struct pm_px *pxpt = px_statistic_data[cpuid]; - - if (!pxpt) - return; - xfree(pxpt->u.trans_pt); - xfree(pxpt->u.pt); - memset(pxpt, 0, sizeof(struct pm_px)); -} - -void px_statistic_reset(unsigned int cpuid) -{ - uint32_t i, j, count; - struct pm_px *pxpt = px_statistic_data[cpuid]; - const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; - - if ( !pxpt || !pmpt ) - return; - - count = pmpt->perf.state_count; - - for (i=0; i < count; i++) { - pxpt->u.pt[i].residency = 0; - pxpt->u.pt[i].count = 0; - - for (j=0; j < count; j++) - *(pxpt->u.trans_pt + i*count + j) = 0; - } - - pxpt->prev_state_wall = NOW(); - pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); -} - - -/********************************************************************* - * FREQUENCY TABLE HELPERS * - *********************************************************************/ - -int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table) -{ - unsigned int min_freq = ~0; - unsigned int max_freq = 0; - unsigned int i; - - for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { - unsigned int freq = table[i].frequency; - if (freq == CPUFREQ_ENTRY_INVALID) - continue; - if (freq < min_freq) - min_freq = freq; - if (freq > max_freq) - max_freq = freq; - } - - policy->min = policy->cpuinfo.min_freq = min_freq; - policy->max = policy->cpuinfo.max_freq = max_freq; - - if (policy->min == ~0) - return -EINVAL; - else - return 0; -} - -int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table) -{ - unsigned int next_larger = ~0; - unsigned int i; - unsigned int count = 0; - - if (!cpu_online(policy->cpu)) - return -EINVAL; - - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - - for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { - unsigned int freq = table[i].frequency; - if (freq == CPUFREQ_ENTRY_INVALID) - continue; - if ((freq >= policy->min) && (freq <= policy->max)) - count++; - else if ((next_larger > freq) && (freq > policy->max)) - next_larger = freq; - } - - if (!count) - policy->max = next_larger; - - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - - return 0; -} - -int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table, - unsigned int target_freq, - unsigned int relation, - unsigned int *index) -{ - struct cpufreq_frequency_table optimal = { - .index = ~0, - .frequency = 0, - }; - struct cpufreq_frequency_table suboptimal = { - .index = ~0, - .frequency = 0, - }; - unsigned int i; - - switch (relation) { - case CPUFREQ_RELATION_H: - suboptimal.frequency = ~0; - break; - case CPUFREQ_RELATION_L: - optimal.frequency = ~0; - break; - } - - if (!cpu_online(policy->cpu)) - return -EINVAL; - - for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { - unsigned int freq = table[i].frequency; - if (freq == CPUFREQ_ENTRY_INVALID) - continue; - if ((freq < policy->min) || (freq > policy->max)) - continue; - switch(relation) { - case CPUFREQ_RELATION_H: - if (freq <= target_freq) { - if (freq >= optimal.frequency) { - optimal.frequency = freq; - optimal.index = i; - } - } else { - if (freq <= suboptimal.frequency) { - suboptimal.frequency = freq; - suboptimal.index = i; - } - } - break; - case CPUFREQ_RELATION_L: - if (freq >= target_freq) { - if (freq <= optimal.frequency) { - optimal.frequency = freq; - optimal.index = i; - } - } else { - if (freq >= suboptimal.frequency) { - suboptimal.frequency = freq; - suboptimal.index = i; - } - } - break; - } - } - if (optimal.index > i) { - if (suboptimal.index > i) - return -EINVAL; - *index = suboptimal.index; - } else - *index = optimal.index; - - return 0; -} - - -/********************************************************************* - * GOVERNORS * - *********************************************************************/ - -int __cpufreq_driver_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - int retval = -EINVAL; - - if (cpu_online(policy->cpu) && cpufreq_driver->target) - retval = cpufreq_driver->target(policy, target_freq, relation); - - return retval; -} - -int __cpufreq_driver_getavg(struct cpufreq_policy *policy) -{ - int ret = 0; - - if (!policy) - return -EINVAL; - - if (cpu_online(policy->cpu) && cpufreq_driver->getavg) - ret = cpufreq_driver->getavg(policy->cpu); - - return ret; -} - - -/********************************************************************* - * POLICY * - *********************************************************************/ - -/* - * data : current policy. - * policy : policy to be set. - */ -int __cpufreq_set_policy(struct cpufreq_policy *data, - struct cpufreq_policy *policy) -{ - int ret = 0; - - memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); - - if (policy->min > data->min && policy->min > policy->max) - return -EINVAL; - - /* verify the cpu speed can be set within this limit */ - ret = cpufreq_driver->verify(policy); - if (ret) - return ret; - - data->min = policy->min; - data->max = policy->max; - - if (policy->governor != data->governor) { - /* save old, working values */ - struct cpufreq_governor *old_gov = data->governor; - - /* end old governor */ - if (data->governor) - __cpufreq_governor(data, CPUFREQ_GOV_STOP); - - /* start new governor */ - data->governor = policy->governor; - if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { - /* new governor failed, so re-start old one */ - if (old_gov) { - data->governor = old_gov; - __cpufreq_governor(data, CPUFREQ_GOV_START); - } - return -EINVAL; - } - /* might be a policy change, too, so fall through */ - } - - return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); -} diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/pmstat.c --- a/xen/arch/x86/acpi/pmstat.c Fri Sep 26 11:12:29 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,155 +0,0 @@ -/***************************************************************************** -# pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.) -# -# Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx> -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., 59 -# Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# The full GNU General Public License is included in this distribution in the -# file called LICENSE. -# -*****************************************************************************/ - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/errno.h> -#include <xen/sched.h> -#include <xen/event.h> -#include <xen/irq.h> -#include <xen/iocap.h> -#include <xen/compat.h> -#include <xen/guest_access.h> -#include <asm/current.h> -#include <public/xen.h> -#include <xen/cpumask.h> -#include <asm/processor.h> -#include <xen/percpu.h> -#include <xen/domain.h> - -#include <public/sysctl.h> -#include <acpi/cpufreq/cpufreq.h> - -struct pm_px *__read_mostly px_statistic_data[NR_CPUS]; - -extern uint32_t pmstat_get_cx_nr(uint32_t cpuid); -extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat); -extern int pmstat_reset_cx_stat(uint32_t cpuid); - -int do_get_pm_info(struct xen_sysctl_get_pmstat *op) -{ - int ret = 0; - const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid]; - - if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) ) - return -EINVAL; - - switch ( op->type & PMSTAT_CATEGORY_MASK ) - { - case PMSTAT_CX: - if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) ) - return -ENODEV; - break; - case PMSTAT_PX: - if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) ) - return -ENODEV; - if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) ) - return -EINVAL; - break; - default: - return -ENODEV; - } - - switch ( op->type ) - { - case PMSTAT_get_max_px: - { - op->u.getpx.total = pmpt->perf.state_count; - break; - } - - case PMSTAT_get_pxstat: - { - uint64_t now, ct; - uint64_t total_idle_ns; - uint64_t tmp_idle_ns; - struct pm_px *pxpt = px_statistic_data[op->cpuid]; - - if ( !pxpt ) - return -ENODATA; - - total_idle_ns = get_cpu_idle_time(op->cpuid); - tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; - - now = NOW(); - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; - pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall; - pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; - pxpt->prev_state_wall = now; - pxpt->prev_idle_wall = total_idle_ns; - - ct = pmpt->perf.state_count; - if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) ) - { - ret = -EFAULT; - break; - } - - if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) ) - { - ret = -EFAULT; - break; - } - - op->u.getpx.total = pxpt->u.total; - op->u.getpx.usable = pxpt->u.usable; - op->u.getpx.last = pxpt->u.last; - op->u.getpx.cur = pxpt->u.cur; - - break; - } - - case PMSTAT_reset_pxstat: - { - px_statistic_reset(op->cpuid); - break; - } - - case PMSTAT_get_max_cx: - { - op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid); - ret = 0; - break; - } - - case PMSTAT_get_cxstat: - { - ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx); - break; - } - - case PMSTAT_reset_cxstat: - { - ret = pmstat_reset_cx_stat(op->cpuid); - break; - } - - default: - printk("not defined sub-hypercall @ do_get_pm_info\n"); - ret = -ENOSYS; - break; - } - - return ret; -} diff -r 5274aa966231 -r 08374be21318 xen/drivers/Makefile --- a/xen/drivers/Makefile Fri Sep 26 11:12:29 2008 +0100 +++ b/xen/drivers/Makefile Fri Sep 26 14:04:38 2008 +0100 @@ -1,4 +1,5 @@ subdir-y += char subdir-y += char +subdir-y += cpufreq subdir-$(x86) += passthrough subdir-$(HAS_ACPI) += acpi subdir-$(HAS_VGA) += video diff -r 5274aa966231 -r 08374be21318 xen/drivers/acpi/Makefile --- a/xen/drivers/acpi/Makefile Fri Sep 26 11:12:29 2008 +0100 +++ b/xen/drivers/acpi/Makefile Fri Sep 26 14:04:38 2008 +0100 @@ -4,6 +4,7 @@ obj-y += tables.o obj-y += tables.o obj-y += numa.o obj-y += osl.o +obj-y += pmstat.o obj-$(x86) += hwregs.o obj-$(x86) += reboot.o diff -r 5274aa966231 -r 08374be21318 xen/drivers/acpi/pmstat.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/acpi/pmstat.c Fri Sep 26 14:04:38 2008 +0100 @@ -0,0 +1,155 @@ +/***************************************************************************** +# pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.) +# +# Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx> +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# The full GNU General Public License is included in this distribution in the +# file called LICENSE. +# +*****************************************************************************/ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/event.h> +#include <xen/irq.h> +#include <xen/iocap.h> +#include <xen/compat.h> +#include <xen/guest_access.h> +#include <asm/current.h> +#include <public/xen.h> +#include <xen/cpumask.h> +#include <asm/processor.h> +#include <xen/percpu.h> +#include <xen/domain.h> + +#include <public/sysctl.h> +#include <acpi/cpufreq/cpufreq.h> + +struct pm_px *__read_mostly cpufreq_statistic_data[NR_CPUS]; + +extern uint32_t pmstat_get_cx_nr(uint32_t cpuid); +extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat); +extern int pmstat_reset_cx_stat(uint32_t cpuid); + +int do_get_pm_info(struct xen_sysctl_get_pmstat *op) +{ + int ret = 0; + const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid]; + + if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) ) + return -EINVAL; + + switch ( op->type & PMSTAT_CATEGORY_MASK ) + { + case PMSTAT_CX: + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) ) + return -ENODEV; + break; + case PMSTAT_PX: + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) ) + return -ENODEV; + if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) ) + return -EINVAL; + break; + default: + return -ENODEV; + } + + switch ( op->type ) + { + case PMSTAT_get_max_px: + { + op->u.getpx.total = pmpt->perf.state_count; + break; + } + + case PMSTAT_get_pxstat: + { + uint64_t now, ct; + uint64_t total_idle_ns; + uint64_t tmp_idle_ns; + struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid]; + + if ( !pxpt ) + return -ENODATA; + + total_idle_ns = get_cpu_idle_time(op->cpuid); + tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; + + now = NOW(); + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; + pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall; + pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; + pxpt->prev_state_wall = now; + pxpt->prev_idle_wall = total_idle_ns; + + ct = pmpt->perf.state_count; + if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) ) + { + ret = -EFAULT; + break; + } + + if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) ) + { + ret = -EFAULT; + break; + } + + op->u.getpx.total = pxpt->u.total; + op->u.getpx.usable = pxpt->u.usable; + op->u.getpx.last = pxpt->u.last; + op->u.getpx.cur = pxpt->u.cur; + + break; + } + + case PMSTAT_reset_pxstat: + { + cpufreq_statistic_reset(op->cpuid); + break; + } + + case PMSTAT_get_max_cx: + { + op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid); + ret = 0; + break; + } + + case PMSTAT_get_cxstat: + { + ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx); + break; + } + + case PMSTAT_reset_cxstat: + { + ret = pmstat_reset_cx_stat(op->cpuid); + break; + } + + default: + printk("not defined sub-hypercall @ do_get_pm_info\n"); + ret = -ENOSYS; + break; + } + + return ret; +} diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/cpufreq/Makefile Fri Sep 26 14:04:38 2008 +0100 @@ -0,0 +1,3 @@ +obj-y += cpufreq.o +obj-y += cpufreq_ondemand.o +obj-y += utility.o diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/cpufreq.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/cpufreq/cpufreq.c Fri Sep 26 14:04:38 2008 +0100 @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx> + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx> + * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx> + * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@xxxxxxxxx> + * + * Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx> + * Add cpufreq limit change handle and per-cpu cpufreq add/del + * to cope with cpu hotplug + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include <xen/types.h> +#include <xen/errno.h> +#include <xen/delay.h> +#include <xen/cpumask.h> +#include <xen/sched.h> +#include <xen/timer.h> +#include <xen/xmalloc.h> +#include <asm/bug.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/config.h> +#include <asm/processor.h> +#include <asm/percpu.h> +#include <asm/cpufeature.h> +#include <acpi/acpi.h> +#include <acpi/cpufreq/cpufreq.h> + +/* TODO: change to link list later as domain number may be sparse */ +static cpumask_t cpufreq_dom_map[NR_CPUS]; + +int cpufreq_limit_change(unsigned int cpu) +{ + struct processor_performance *perf = &processor_pminfo[cpu]->perf; + struct cpufreq_policy *data = cpufreq_cpu_policy[cpu]; + struct cpufreq_policy policy; + + if (!cpu_online(cpu) || !data || !processor_pminfo[cpu]) + return -ENODEV; + + if ((perf->platform_limit < 0) || + (perf->platform_limit >= perf->state_count)) + return -EINVAL; + + memcpy(&policy, data, sizeof(struct cpufreq_policy)); + + policy.max = + perf->states[perf->platform_limit].core_frequency * 1000; + + return __cpufreq_set_policy(data, &policy); +} + +int cpufreq_add_cpu(unsigned int cpu) +{ + int ret = 0; + unsigned int firstcpu; + unsigned int dom; + unsigned int j; + struct cpufreq_policy new_policy; + struct cpufreq_policy *policy; + struct processor_performance *perf = &processor_pminfo[cpu]->perf; + + /* to protect the case when Px was not controlled by xen */ + if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) + return 0; + + if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu]) + return -EINVAL; + + ret = cpufreq_statistic_init(cpu); + if (ret) + return ret; + + dom = perf->domain_info.domain; + if (cpus_weight(cpufreq_dom_map[dom])) { + /* share policy with the first cpu since on same boat */ + firstcpu = first_cpu(cpufreq_dom_map[dom]); + policy = cpufreq_cpu_policy[firstcpu]; + + cpufreq_cpu_policy[cpu] = policy; + cpu_set(cpu, cpufreq_dom_map[dom]); + cpu_set(cpu, policy->cpus); + + printk(KERN_EMERG"adding CPU %u\n", cpu); + } else { + /* for the first cpu, setup policy and do init work */ + policy = xmalloc(struct cpufreq_policy); + if (!policy) { + cpufreq_statistic_exit(cpu); + return -ENOMEM; + } + memset(policy, 0, sizeof(struct cpufreq_policy)); + + cpufreq_cpu_policy[cpu] = policy; + cpu_set(cpu, cpufreq_dom_map[dom]); + cpu_set(cpu, policy->cpus); + + policy->cpu = cpu; + ret = cpufreq_driver->init(policy); + if (ret) + goto err1; + printk(KERN_EMERG"CPU %u initialization completed\n", cpu); + } + + /* + * After get full cpumap of the coordination domain, + * we can safely start gov here. + */ + if (cpus_weight(cpufreq_dom_map[dom]) == + perf->domain_info.num_processors) { + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + policy->governor = NULL; + ret = __cpufreq_set_policy(policy, &new_policy); + if (ret) + goto err2; + } + + return 0; + +err2: + cpufreq_driver->exit(policy); +err1: + for_each_cpu_mask(j, cpufreq_dom_map[dom]) { + cpufreq_cpu_policy[j] = NULL; + cpufreq_statistic_exit(j); + } + + cpus_clear(cpufreq_dom_map[dom]); + xfree(policy); + return ret; +} + +int cpufreq_del_cpu(unsigned int cpu) +{ + unsigned int dom; + struct cpufreq_policy *policy; + struct processor_performance *perf = &processor_pminfo[cpu]->perf; + + /* to protect the case when Px was not controlled by xen */ + if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) + return 0; + + if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu]) + return -EINVAL; + + dom = perf->domain_info.domain; + policy = cpufreq_cpu_policy[cpu]; + + printk(KERN_EMERG"deleting CPU %u\n", cpu); + + /* for the first cpu of the domain, stop gov */ + if (cpus_weight(cpufreq_dom_map[dom]) == + perf->domain_info.num_processors) + __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + + cpufreq_cpu_policy[cpu] = NULL; + cpu_clear(cpu, policy->cpus); + cpu_clear(cpu, cpufreq_dom_map[dom]); + cpufreq_statistic_exit(cpu); + + /* for the last cpu of the domain, clean room */ + /* It's safe here to free freq_table, drv_data and policy */ + if (!cpus_weight(cpufreq_dom_map[dom])) { + cpufreq_driver->exit(policy); + xfree(policy); + } + + return 0; +} + diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/cpufreq_ondemand.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Sep 26 14:04:38 2008 +0100 @@ -0,0 +1,246 @@ +/* + * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>. + * Jun Nakajima <jun.nakajima@xxxxxxxxx> + * Feb 2008 Liu Jinsong <jinsong.liu@xxxxxxxxx> + * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <xen/types.h> +#include <xen/percpu.h> +#include <xen/cpumask.h> +#include <xen/types.h> +#include <xen/sched.h> +#include <xen/timer.h> +#include <asm/config.h> +#include <acpi/cpufreq/cpufreq.h> + +#define DEF_FREQUENCY_UP_THRESHOLD (80) + +#define MIN_DBS_INTERVAL (MICROSECS(100)) +#define MIN_SAMPLING_MILLISECS (20) +#define MIN_STAT_SAMPLING_RATE \ + (MIN_SAMPLING_MILLISECS * MILLISECS(1)) +#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 ) + +static uint64_t def_sampling_rate; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +static struct dbs_tuners { + uint64_t sampling_rate; + unsigned int up_threshold; + unsigned int ignore_nice; + unsigned int powersave_bias; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .ignore_nice = 0, + .powersave_bias = 0, +}; + +static struct timer dbs_timer[NR_CPUS]; + +uint64_t get_cpu_idle_time(unsigned int cpu) +{ + uint64_t idle_ns; + struct vcpu *v; + + if ((v = idle_vcpu[cpu]) == NULL) + return 0; + + idle_ns = v->runstate.time[RUNSTATE_running]; + if (v->is_running) + idle_ns += NOW() - v->runstate.state_entry_time; + + return idle_ns; +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int load = 0; + uint64_t cur_ns, idle_ns, total_ns; + + struct cpufreq_policy *policy; + unsigned int j; + + if (!this_dbs_info->enable) + return; + + policy = this_dbs_info->cur_policy; + + if (unlikely(policy->resume)) { + __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); + return; + } + + cur_ns = NOW(); + total_ns = cur_ns - this_dbs_info->prev_cpu_wall; + this_dbs_info->prev_cpu_wall = NOW(); + + if (total_ns < MIN_DBS_INTERVAL) + return; + + /* Get Idle Time */ + idle_ns = UINT_MAX; + for_each_cpu_mask(j, policy->cpus) { + uint64_t total_idle_ns; + unsigned int tmp_idle_ns; + struct cpu_dbs_info_s *j_dbs_info; + + j_dbs_info = &per_cpu(cpu_dbs_info, j); + total_idle_ns = get_cpu_idle_time(j); + tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; + j_dbs_info->prev_cpu_idle = total_idle_ns; + + if (tmp_idle_ns < idle_ns) + idle_ns = tmp_idle_ns; + } + + if (likely(total_ns > idle_ns)) + load = (100 * (total_ns - idle_ns)) / total_ns; + + /* Check for frequency increase */ + if (load > dbs_tuners_ins.up_threshold) { + /* if we are already at full speed then break out early */ + if (policy->cur == policy->max) + return; + __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); + return; + } + + /* Check for frequency decrease */ + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (load < (dbs_tuners_ins.up_threshold - 10)) { + unsigned int freq_next, freq_cur; + + freq_cur = __cpufreq_driver_getavg(policy); + if (!freq_cur) + freq_cur = policy->cur; + + freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10); + + __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); + } +} + +static void do_dbs_timer(void *dbs) +{ + struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs; + + if (!dbs_info->enable) + return; + + dbs_check_cpu(dbs_info); + + set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); +} + +static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + dbs_info->enable = 1; + + init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, + (void *)dbs_info, dbs_info->cpu); + + set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); +} + +static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + dbs_info->enable = 0; + stop_timer(&dbs_timer[dbs_info->cpu]); +} + +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + + this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + if (policy->cpuinfo.transition_latency > + (TRANSITION_LATENCY_LIMIT * 1000)) { + printk(KERN_WARNING "ondemand governor failed to load " + "due to too long transition latency\n"); + return -EINVAL; + } + if (this_dbs_info->enable) + /* Already enabled */ + break; + + dbs_enable++; + + for_each_cpu_mask(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); + j_dbs_info->prev_cpu_wall = NOW(); + } + this_dbs_info->cpu = cpu; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + def_sampling_rate = policy->cpuinfo.transition_latency * + DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; + + if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) + def_sampling_rate = MIN_STAT_SAMPLING_RATE; + + dbs_tuners_ins.sampling_rate = def_sampling_rate; + } + dbs_timer_init(this_dbs_info); + + break; + + case CPUFREQ_GOV_STOP: + dbs_timer_exit(this_dbs_info); + dbs_enable--; + + break; + + case CPUFREQ_GOV_LIMITS: + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + break; + } + return 0; +} + +struct cpufreq_governor cpufreq_gov_dbs = { + .name = "ondemand", + .governor = cpufreq_governor_dbs, +}; diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/utility.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/cpufreq/utility.c Fri Sep 26 14:04:38 2008 +0100 @@ -0,0 +1,368 @@ +/* + * utility.c - misc functions for cpufreq driver and Px statistic + * + * Copyright (C) 2001 Russell King + * (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx> + * + * Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx> + * Added handling for CPU hotplug + * Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx> + * Fix handling for CPU hotplug -- affected CPUs + * Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx> + * 1. Merge cpufreq.c and freq_table.c of linux 2.6.23 + * And poring to Xen hypervisor + * 2. some Px statistic interface funcdtions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <xen/errno.h> +#include <xen/cpumask.h> +#include <xen/types.h> +#include <xen/spinlock.h> +#include <xen/percpu.h> +#include <xen/types.h> +#include <xen/sched.h> +#include <xen/timer.h> +#include <asm/config.h> +#include <acpi/cpufreq/cpufreq.h> +#include <public/sysctl.h> + +struct cpufreq_driver *cpufreq_driver; +struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS]; +struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS]; + +/********************************************************************* + * Px STATISTIC INFO * + *********************************************************************/ + +void cpufreq_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) +{ + uint32_t i; + uint64_t now; + + now = NOW(); + + for_each_cpu_mask(i, cpumask) { + struct pm_px *pxpt = cpufreq_statistic_data[i]; + struct processor_pminfo *pmpt = processor_pminfo[i]; + uint64_t total_idle_ns; + uint64_t tmp_idle_ns; + + if ( !pxpt || !pmpt ) + continue; + + total_idle_ns = get_cpu_idle_time(i); + tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; + + pxpt->u.last = from; + pxpt->u.cur = to; + pxpt->u.pt[to].count++; + pxpt->u.pt[from].residency += now - pxpt->prev_state_wall; + pxpt->u.pt[from].residency -= tmp_idle_ns; + + (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++; + + pxpt->prev_state_wall = now; + pxpt->prev_idle_wall = total_idle_ns; + } +} + +int cpufreq_statistic_init(unsigned int cpuid) +{ + uint32_t i, count; + struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; + const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; + + count = pmpt->perf.state_count; + + if ( !pmpt ) + return -EINVAL; + + if ( !pxpt ) + { + pxpt = xmalloc(struct pm_px); + if ( !pxpt ) + return -ENOMEM; + memset(pxpt, 0, sizeof(*pxpt)); + cpufreq_statistic_data[cpuid] = pxpt; + } + + pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count); + if (!pxpt->u.trans_pt) + return -ENOMEM; + + pxpt->u.pt = xmalloc_array(struct pm_px_val, count); + if (!pxpt->u.pt) { + xfree(pxpt->u.trans_pt); + return -ENOMEM; + } + + memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t))); + memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val))); + + pxpt->u.total = pmpt->perf.state_count; + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; + + for (i=0; i < pmpt->perf.state_count; i++) + pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency; + + pxpt->prev_state_wall = NOW(); + pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); + + return 0; +} + +void cpufreq_statistic_exit(unsigned int cpuid) +{ + struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; + + if (!pxpt) + return; + xfree(pxpt->u.trans_pt); + xfree(pxpt->u.pt); + memset(pxpt, 0, sizeof(struct pm_px)); +} + +void cpufreq_statistic_reset(unsigned int cpuid) +{ + uint32_t i, j, count; + struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; + const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; + + if ( !pxpt || !pmpt ) + return; + + count = pmpt->perf.state_count; + + for (i=0; i < count; i++) { + pxpt->u.pt[i].residency = 0; + pxpt->u.pt[i].count = 0; + + for (j=0; j < count; j++) + *(pxpt->u.trans_pt + i*count + j) = 0; + } + + pxpt->prev_state_wall = NOW(); + pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); +} + + +/********************************************************************* + * FREQUENCY TABLE HELPERS * + *********************************************************************/ + +int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table) +{ + unsigned int min_freq = ~0; + unsigned int max_freq = 0; + unsigned int i; + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + if (freq < min_freq) + min_freq = freq; + if (freq > max_freq) + max_freq = freq; + } + + policy->min = policy->cpuinfo.min_freq = min_freq; + policy->max = policy->cpuinfo.max_freq = max_freq; + + if (policy->min == ~0) + return -EINVAL; + else + return 0; +} + +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table) +{ + unsigned int next_larger = ~0; + unsigned int i; + unsigned int count = 0; + + if (!cpu_online(policy->cpu)) + return -EINVAL; + + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + if ((freq >= policy->min) && (freq <= policy->max)) + count++; + else if ((next_larger > freq) && (freq > policy->max)) + next_larger = freq; + } + + if (!count) + policy->max = next_larger; + + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + return 0; +} + +int cpufreq_frequency_table_target(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table, + unsigned int target_freq, + unsigned int relation, + unsigned int *index) +{ + struct cpufreq_frequency_table optimal = { + .index = ~0, + .frequency = 0, + }; + struct cpufreq_frequency_table suboptimal = { + .index = ~0, + .frequency = 0, + }; + unsigned int i; + + switch (relation) { + case CPUFREQ_RELATION_H: + suboptimal.frequency = ~0; + break; + case CPUFREQ_RELATION_L: + optimal.frequency = ~0; + break; + } + + if (!cpu_online(policy->cpu)) + return -EINVAL; + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + if ((freq < policy->min) || (freq > policy->max)) + continue; + switch(relation) { + case CPUFREQ_RELATION_H: + if (freq <= target_freq) { + if (freq >= optimal.frequency) { + optimal.frequency = freq; + optimal.index = i; + } + } else { + if (freq <= suboptimal.frequency) { + suboptimal.frequency = freq; + suboptimal.index = i; + } + } + break; + case CPUFREQ_RELATION_L: + if (freq >= target_freq) { + if (freq <= optimal.frequency) { + optimal.frequency = freq; + optimal.index = i; + } + } else { + if (freq >= suboptimal.frequency) { + suboptimal.frequency = freq; + suboptimal.index = i; + } + } + break; + } + } + if (optimal.index > i) { + if (suboptimal.index > i) + return -EINVAL; + *index = suboptimal.index; + } else + *index = optimal.index; + + return 0; +} + + +/********************************************************************* + * GOVERNORS * + *********************************************************************/ + +int __cpufreq_driver_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + int retval = -EINVAL; + + if (cpu_online(policy->cpu) && cpufreq_driver->target) + retval = cpufreq_driver->target(policy, target_freq, relation); + + return retval; +} + +int __cpufreq_driver_getavg(struct cpufreq_policy *policy) +{ + int ret = 0; + + if (!policy) + return -EINVAL; + + if (cpu_online(policy->cpu) && cpufreq_driver->getavg) + ret = cpufreq_driver->getavg(policy->cpu); + + return ret; +} + + +/********************************************************************* + * POLICY * + *********************************************************************/ + +/* + * data : current policy. + * policy : policy to be set. + */ +int __cpufreq_set_policy(struct cpufreq_policy *data, + struct cpufreq_policy *policy) +{ + int ret = 0; + + memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); + + if (policy->min > data->min && policy->min > policy->max) + return -EINVAL; + + /* verify the cpu speed can be set within this limit */ + ret = cpufreq_driver->verify(policy); + if (ret) + return ret; + + data->min = policy->min; + data->max = policy->max; + + if (policy->governor != data->governor) { + /* save old, working values */ + struct cpufreq_governor *old_gov = data->governor; + + /* end old governor */ + if (data->governor) + __cpufreq_governor(data, CPUFREQ_GOV_STOP); + + /* start new governor */ + data->governor = policy->governor; + if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { + /* new governor failed, so re-start old one */ + if (old_gov) { + data->governor = old_gov; + __cpufreq_governor(data, CPUFREQ_GOV_START); + } + return -EINVAL; + } + /* might be a policy change, too, so fall through */ + } + + return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); +} diff -r 5274aa966231 -r 08374be21318 xen/include/acpi/cpufreq/cpufreq.h --- a/xen/include/acpi/cpufreq/cpufreq.h Fri Sep 26 11:12:29 2008 +0100 +++ b/xen/include/acpi/cpufreq/cpufreq.h Fri Sep 26 14:04:38 2008 +0100 @@ -20,6 +20,13 @@ #define CPUFREQ_NAME_LEN 16 struct cpufreq_governor; + +struct acpi_cpufreq_data { + struct processor_performance *acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int max_freq; + unsigned int cpu_feature; +}; struct cpufreq_cpuinfo { unsigned int max_freq; diff -r 5274aa966231 -r 08374be21318 xen/include/acpi/cpufreq/processor_perf.h --- a/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 11:12:29 2008 +0100 +++ b/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 14:04:38 2008 +0100 @@ -9,10 +9,10 @@ int get_cpu_id(u8); int get_cpu_id(u8); int powernow_cpufreq_init(void); -void px_statistic_update(cpumask_t, uint8_t, uint8_t); -int px_statistic_init(unsigned int); -void px_statistic_exit(unsigned int); -void px_statistic_reset(unsigned int); +void cpufreq_statistic_update(cpumask_t, uint8_t, uint8_t); +int cpufreq_statistic_init(unsigned int); +void cpufreq_statistic_exit(unsigned int); +void cpufreq_statistic_reset(unsigned int); int cpufreq_limit_change(unsigned int); @@ -58,6 +58,6 @@ struct pm_px { uint64_t prev_idle_wall; }; -extern struct pm_px *px_statistic_data[NR_CPUS]; +extern struct pm_px *cpufreq_statistic_data[NR_CPUS]; #endif /* __XEN_PROCESSOR_PM_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |