[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1222434278 -3600
# Node ID 08374be213188c10eb7c170c143ca0d0d17e55d8
# Parent  5274aa966231ad5f050987e1fb00c6f6ee75a007
X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64
arch

Rebase cpufreq logic for supporting both x86 and ia64 arch:
1. move cpufreq arch-independent logic into common dir
(xen/drivers/acpi
and xen/drivers/cpufreq dir);
2. leave cpufreq x86-dependent logic at xen/arch/x86/acpi/cpufreq dir;

Signed-off-by: Yu, Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
---
 xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c |  246 ------------------
 xen/arch/x86/acpi/cpufreq/utility.c          |  368 ---------------------------
 xen/arch/x86/acpi/pmstat.c                   |  155 -----------
 xen/arch/x86/acpi/Makefile                   |    1 
 xen/arch/x86/acpi/cpufreq/Makefile           |    2 
 xen/arch/x86/acpi/cpufreq/cpufreq.c          |  153 -----------
 xen/drivers/Makefile                         |    1 
 xen/drivers/acpi/Makefile                    |    1 
 xen/drivers/acpi/pmstat.c                    |  155 +++++++++++
 xen/drivers/cpufreq/Makefile                 |    3 
 xen/drivers/cpufreq/cpufreq.c                |  188 +++++++++++++
 xen/drivers/cpufreq/cpufreq_ondemand.c       |  246 ++++++++++++++++++
 xen/drivers/cpufreq/utility.c                |  368 +++++++++++++++++++++++++++
 xen/include/acpi/cpufreq/cpufreq.h           |    7 
 xen/include/acpi/cpufreq/processor_perf.h    |   10 
 15 files changed, 976 insertions(+), 928 deletions(-)

diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile        Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/arch/x86/acpi/Makefile        Fri Sep 26 14:04:38 2008 +0100
@@ -2,4 +2,3 @@ subdir-y += cpufreq
 
 obj-y += boot.o
 obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
-obj-y += pmstat.o
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/Makefile
--- a/xen/arch/x86/acpi/cpufreq/Makefile        Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/Makefile        Fri Sep 26 14:04:38 2008 +0100
@@ -1,4 +1,2 @@ obj-y += cpufreq.o
 obj-y += cpufreq.o
-obj-y += utility.o
-obj-y += cpufreq_ondemand.o
 obj-y += powernow.o
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Sep 26 14:04:38 2008 +0100
@@ -45,9 +45,6 @@
 #include <acpi/acpi.h>
 #include <acpi/cpufreq/cpufreq.h>
 
-/* TODO: change to link list later as domain number may be sparse */
-static cpumask_t cpufreq_dom_map[NR_CPUS];
-
 enum {
     UNDEFINED_CAPABLE = 0,
     SYSTEM_INTEL_MSR_CAPABLE,
@@ -56,13 +53,6 @@ enum {
 
 #define INTEL_MSR_RANGE         (0xffff)
 #define CPUID_6_ECX_APERFMPERF_CAPABILITY       (0x1)
-
-struct acpi_cpufreq_data {
-    struct processor_performance *acpi_data;
-    struct cpufreq_frequency_table *freq_table;
-    unsigned int max_freq;
-    unsigned int cpu_feature;
-};
 
 static struct acpi_cpufreq_data *drv_data[NR_CPUS];
 
@@ -342,7 +332,7 @@ static int acpi_cpufreq_target(struct cp
             policy->resume = 0;
         }
         else {
-            printk(KERN_INFO "Already at target state (P%d)\n", 
+            printk(KERN_DEBUG "Already at target state (P%d)\n", 
                 next_perf_state);
             return 0;
         }
@@ -379,7 +369,7 @@ static int acpi_cpufreq_target(struct cp
     if (!check_freqs(cmd.mask, freqs.new, data))
         return -EAGAIN;
 
-    px_statistic_update(cmd.mask, perf->state, next_perf_state);
+    cpufreq_statistic_update(cmd.mask, perf->state, next_perf_state);
 
     perf->state = next_perf_state;
     policy->cur = freqs.new;
@@ -581,145 +571,6 @@ static struct cpufreq_driver acpi_cpufre
     .exit   = acpi_cpufreq_cpu_exit,
 };
 
-int cpufreq_limit_change(unsigned int cpu)
-{
-    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
-    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
-    struct cpufreq_policy policy;
-
-    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
-        return -ENODEV;
-
-    if ((perf->platform_limit < 0) || 
-        (perf->platform_limit >= perf->state_count))
-        return -EINVAL;
-
-    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
-
-    policy.max =
-        perf->states[perf->platform_limit].core_frequency * 1000;
-
-    return __cpufreq_set_policy(data, &policy);
-}
-
-int cpufreq_add_cpu(unsigned int cpu)
-{
-    int ret = 0;
-    unsigned int firstcpu;
-    unsigned int dom;
-    unsigned int j;
-    struct cpufreq_policy new_policy;
-    struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
-
-    /* to protect the case when Px was not controlled by xen */
-    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
-        return 0;
-
-    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
-        return -EINVAL;
-
-    ret = px_statistic_init(cpu);
-    if (ret)
-        return ret;
-
-    dom = perf->domain_info.domain;
-    if (cpus_weight(cpufreq_dom_map[dom])) {
-        /* share policy with the first cpu since on same boat */
-        firstcpu = first_cpu(cpufreq_dom_map[dom]);
-        policy = cpufreq_cpu_policy[firstcpu];
-
-        cpufreq_cpu_policy[cpu] = policy;
-        cpu_set(cpu, cpufreq_dom_map[dom]);
-        cpu_set(cpu, policy->cpus);
-
-        printk(KERN_EMERG"adding CPU %u\n", cpu);
-    } else {
-        /* for the first cpu, setup policy and do init work */
-        policy = xmalloc(struct cpufreq_policy);
-        if (!policy) {
-            px_statistic_exit(cpu);
-            return -ENOMEM;
-        }
-        memset(policy, 0, sizeof(struct cpufreq_policy));
-
-        cpufreq_cpu_policy[cpu] = policy;
-        cpu_set(cpu, cpufreq_dom_map[dom]);
-        cpu_set(cpu, policy->cpus);
-
-        policy->cpu = cpu;
-        ret = cpufreq_driver->init(policy);
-        if (ret)
-            goto err1;
-        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
-    }
-
-    /*
-     * After get full cpumap of the coordination domain,
-     * we can safely start gov here.
-     */
-    if (cpus_weight(cpufreq_dom_map[dom]) ==
-        perf->domain_info.num_processors) {
-        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
-        policy->governor = NULL;
-        ret = __cpufreq_set_policy(policy, &new_policy);
-        if (ret)
-            goto err2;
-    }
-
-    return 0;
-
-err2:
-    cpufreq_driver->exit(policy);
-err1:
-    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
-        cpufreq_cpu_policy[j] = NULL;
-        px_statistic_exit(j);
-    }
-
-    cpus_clear(cpufreq_dom_map[dom]);
-    xfree(policy);
-    return ret;
-}
-
-int cpufreq_del_cpu(unsigned int cpu)
-{
-    unsigned int dom;
-    struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
-
-    /* to protect the case when Px was not controlled by xen */
-    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
-        return 0;
-
-    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
-        return -EINVAL;
-
-    dom = perf->domain_info.domain;
-    policy = cpufreq_cpu_policy[cpu];
-
-    printk(KERN_EMERG"deleting CPU %u\n", cpu);
-
-    /* for the first cpu of the domain, stop gov */
-    if (cpus_weight(cpufreq_dom_map[dom]) ==
-        perf->domain_info.num_processors)
-        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-
-    cpufreq_cpu_policy[cpu] = NULL;
-    cpu_clear(cpu, policy->cpus);
-    cpu_clear(cpu, cpufreq_dom_map[dom]);
-    px_statistic_exit(cpu);
-
-    /* for the last cpu of the domain, clean room */
-    /* It's safe here to free freq_table, drv_data and policy */
-    if (!cpus_weight(cpufreq_dom_map[dom])) {
-        cpufreq_driver->exit(policy);
-        xfree(policy);
-    }
-
-    return 0;
-}
-
 static int __init cpufreq_driver_init(void)
 {
     int ret = 0;
diff -r 5274aa966231 -r 08374be21318 
xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c      Fri Sep 26 11:12:29 
2008 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,246 +0,0 @@
-/*
- *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
- *
- *  Copyright (C)  2001 Russell King
- *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>.
- *                      Jun Nakajima <jun.nakajima@xxxxxxxxx>
- *             Feb 2008 Liu Jinsong <jinsong.liu@xxxxxxxxx>
- *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <xen/types.h>
-#include <xen/percpu.h>
-#include <xen/cpumask.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <xen/timer.h>
-#include <asm/config.h>
-#include <acpi/cpufreq/cpufreq.h>
-
-#define DEF_FREQUENCY_UP_THRESHOLD              (80)
-
-#define MIN_DBS_INTERVAL                        (MICROSECS(100))
-#define MIN_SAMPLING_MILLISECS                  (20)
-#define MIN_STAT_SAMPLING_RATE                   \
-    (MIN_SAMPLING_MILLISECS * MILLISECS(1))
-#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
-#define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
-
-static uint64_t def_sampling_rate;
-
-/* Sampling types */
-enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
-
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
-
-static unsigned int dbs_enable;    /* number of CPUs using this policy */
-
-static struct dbs_tuners {
-    uint64_t     sampling_rate;
-    unsigned int up_threshold;
-    unsigned int ignore_nice;
-    unsigned int powersave_bias;
-} dbs_tuners_ins = {
-    .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
-    .ignore_nice = 0,
-    .powersave_bias = 0,
-};
-
-static struct timer dbs_timer[NR_CPUS];
-
-uint64_t get_cpu_idle_time(unsigned int cpu)
-{
-    uint64_t idle_ns;
-    struct vcpu *v;
-
-    if ((v = idle_vcpu[cpu]) == NULL)
-        return 0;
-
-    idle_ns = v->runstate.time[RUNSTATE_running];
-    if (v->is_running)
-        idle_ns += NOW() - v->runstate.state_entry_time;
-
-    return idle_ns;
-}
-
-static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
-{
-    unsigned int load = 0;
-    uint64_t cur_ns, idle_ns, total_ns;
-
-    struct cpufreq_policy *policy;
-    unsigned int j;
-
-    if (!this_dbs_info->enable)
-        return;
-
-    policy = this_dbs_info->cur_policy;
-
-    if (unlikely(policy->resume)) {
-        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
-        return;
-    }
-
-    cur_ns = NOW();
-    total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
-    this_dbs_info->prev_cpu_wall = NOW();
-
-    if (total_ns < MIN_DBS_INTERVAL)
-        return;
-
-    /* Get Idle Time */
-    idle_ns = UINT_MAX;
-    for_each_cpu_mask(j, policy->cpus) {
-        uint64_t total_idle_ns;
-        unsigned int tmp_idle_ns;
-        struct cpu_dbs_info_s *j_dbs_info;
-
-        j_dbs_info = &per_cpu(cpu_dbs_info, j);
-        total_idle_ns = get_cpu_idle_time(j);
-        tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
-        j_dbs_info->prev_cpu_idle = total_idle_ns;
-
-        if (tmp_idle_ns < idle_ns)
-            idle_ns = tmp_idle_ns;
-    }
-
-    if (likely(total_ns > idle_ns))
-        load = (100 * (total_ns - idle_ns)) / total_ns;
-
-    /* Check for frequency increase */
-    if (load > dbs_tuners_ins.up_threshold) {
-        /* if we are already at full speed then break out early */
-        if (policy->cur == policy->max)
-            return;
-        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
-        return;
-    }
-
-    /* Check for frequency decrease */
-    /* if we cannot reduce the frequency anymore, break out early */
-    if (policy->cur == policy->min)
-        return;
-
-    /*
-     * The optimal frequency is the frequency that is the lowest that
-     * can support the current CPU usage without triggering the up
-     * policy. To be safe, we focus 10 points under the threshold.
-     */
-    if (load < (dbs_tuners_ins.up_threshold - 10)) {
-        unsigned int freq_next, freq_cur;
-
-        freq_cur = __cpufreq_driver_getavg(policy);
-        if (!freq_cur)
-            freq_cur = policy->cur;
-
-        freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
-
-        __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
-    }
-}
-
-static void do_dbs_timer(void *dbs)
-{
-    struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
-
-    if (!dbs_info->enable)
-        return;
-
-    dbs_check_cpu(dbs_info);
-
-    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
-}
-
-static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
-{
-    dbs_info->enable = 1;
-
-    init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 
-        (void *)dbs_info, dbs_info->cpu);
-
-    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
-}
-
-static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
-{
-    dbs_info->enable = 0;
-    stop_timer(&dbs_timer[dbs_info->cpu]);
-}
-
-int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
-{
-    unsigned int cpu = policy->cpu;
-    struct cpu_dbs_info_s *this_dbs_info;
-    unsigned int j;
-
-    this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
-
-    switch (event) {
-    case CPUFREQ_GOV_START:
-        if ((!cpu_online(cpu)) || (!policy->cur))
-            return -EINVAL;
-
-        if (policy->cpuinfo.transition_latency >
-            (TRANSITION_LATENCY_LIMIT * 1000)) {
-            printk(KERN_WARNING "ondemand governor failed to load "
-                "due to too long transition latency\n");
-            return -EINVAL;
-        }
-        if (this_dbs_info->enable)
-            /* Already enabled */
-            break;
-
-        dbs_enable++;
-
-        for_each_cpu_mask(j, policy->cpus) {
-            struct cpu_dbs_info_s *j_dbs_info;
-            j_dbs_info = &per_cpu(cpu_dbs_info, j);
-            j_dbs_info->cur_policy = policy;
-
-            j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
-            j_dbs_info->prev_cpu_wall = NOW();
-        }
-        this_dbs_info->cpu = cpu;
-        /*
-         * Start the timerschedule work, when this governor
-         * is used for first time
-         */
-        if (dbs_enable == 1) {
-            def_sampling_rate = policy->cpuinfo.transition_latency *
-                DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
-
-            if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
-                def_sampling_rate = MIN_STAT_SAMPLING_RATE;
-
-            dbs_tuners_ins.sampling_rate = def_sampling_rate;
-        }
-        dbs_timer_init(this_dbs_info);
-
-        break;
-
-    case CPUFREQ_GOV_STOP:
-        dbs_timer_exit(this_dbs_info);
-        dbs_enable--;
-
-        break;
-
-    case CPUFREQ_GOV_LIMITS:
-        if (policy->max < this_dbs_info->cur_policy->cur)
-            __cpufreq_driver_target(this_dbs_info->cur_policy,
-                policy->max, CPUFREQ_RELATION_H);
-        else if (policy->min > this_dbs_info->cur_policy->cur)
-            __cpufreq_driver_target(this_dbs_info->cur_policy,
-                policy->min, CPUFREQ_RELATION_L);
-        break;
-    }
-    return 0;
-}
-
-struct cpufreq_governor cpufreq_gov_dbs = {
-    .name = "ondemand",
-    .governor = cpufreq_governor_dbs,
-};
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/utility.c
--- a/xen/arch/x86/acpi/cpufreq/utility.c       Fri Sep 26 11:12:29 2008 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,368 +0,0 @@
-/*
- *  utility.c - misc functions for cpufreq driver and Px statistic
- *
- *  Copyright (C) 2001 Russell King
- *            (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx>
- *
- *  Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx>
- *    Added handling for CPU hotplug
- *  Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx>
- *    Fix handling for CPU hotplug -- affected CPUs
- *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
- *    1. Merge cpufreq.c and freq_table.c of linux 2.6.23
- *    And poring to Xen hypervisor
- *    2. some Px statistic interface funcdtions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <xen/errno.h>
-#include <xen/cpumask.h>
-#include <xen/types.h>
-#include <xen/spinlock.h>
-#include <xen/percpu.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <xen/timer.h>
-#include <asm/config.h>
-#include <acpi/cpufreq/cpufreq.h>
-#include <public/sysctl.h>
-
-struct cpufreq_driver   *cpufreq_driver;
-struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
-struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
-
-/*********************************************************************
- *                    Px STATISTIC INFO                              *
- *********************************************************************/
-
-void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
-{
-    uint32_t i;
-    uint64_t now;
-
-    now = NOW();
-
-    for_each_cpu_mask(i, cpumask) {
-        struct pm_px *pxpt = px_statistic_data[i];
-        struct processor_pminfo *pmpt = processor_pminfo[i];
-        uint64_t total_idle_ns;
-        uint64_t tmp_idle_ns;
-
-        if ( !pxpt || !pmpt )
-            continue;
-
-        total_idle_ns = get_cpu_idle_time(i);
-        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
-
-        pxpt->u.last = from;
-        pxpt->u.cur = to;
-        pxpt->u.pt[to].count++;
-        pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
-        pxpt->u.pt[from].residency -= tmp_idle_ns;
-
-        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
-
-        pxpt->prev_state_wall = now;
-        pxpt->prev_idle_wall = total_idle_ns;
-    }
-}
-
-int px_statistic_init(unsigned int cpuid)
-{
-    uint32_t i, count;
-    struct pm_px *pxpt = px_statistic_data[cpuid];
-    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
-
-    count = pmpt->perf.state_count;
-
-    if ( !pmpt )
-        return -EINVAL;
-
-    if ( !pxpt )
-    {
-        pxpt = xmalloc(struct pm_px);
-        if ( !pxpt )
-            return -ENOMEM;
-        memset(pxpt, 0, sizeof(*pxpt));
-        px_statistic_data[cpuid] = pxpt;
-    }
-
-    pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
-    if (!pxpt->u.trans_pt)
-        return -ENOMEM;
-
-    pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
-    if (!pxpt->u.pt) {
-        xfree(pxpt->u.trans_pt);
-        return -ENOMEM;
-    }
-
-    memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t)));
-    memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
-
-    pxpt->u.total = pmpt->perf.state_count;
-    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
-
-    for (i=0; i < pmpt->perf.state_count; i++)
-        pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
-
-    pxpt->prev_state_wall = NOW();
-    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
-
-    return 0;
-}
-
-void px_statistic_exit(unsigned int cpuid)
-{
-    struct pm_px *pxpt = px_statistic_data[cpuid];
-
-    if (!pxpt)
-        return;
-    xfree(pxpt->u.trans_pt);
-    xfree(pxpt->u.pt);
-    memset(pxpt, 0, sizeof(struct pm_px));
-}
-
-void px_statistic_reset(unsigned int cpuid)
-{
-    uint32_t i, j, count;
-    struct pm_px *pxpt = px_statistic_data[cpuid];
-    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
-
-    if ( !pxpt || !pmpt )
-        return;
-
-    count = pmpt->perf.state_count;
-
-    for (i=0; i < count; i++) {
-        pxpt->u.pt[i].residency = 0;
-        pxpt->u.pt[i].count = 0;
-
-        for (j=0; j < count; j++)
-            *(pxpt->u.trans_pt + i*count + j) = 0;
-    }
-
-    pxpt->prev_state_wall = NOW();
-    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
-}
-
-
-/*********************************************************************
- *                   FREQUENCY TABLE HELPERS                         *
- *********************************************************************/
-
-int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
-                                    struct cpufreq_frequency_table *table)
-{
-    unsigned int min_freq = ~0;
-    unsigned int max_freq = 0;
-    unsigned int i;
-
-    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
-        unsigned int freq = table[i].frequency;
-        if (freq == CPUFREQ_ENTRY_INVALID)
-            continue;
-        if (freq < min_freq)
-            min_freq = freq;
-        if (freq > max_freq)
-            max_freq = freq;
-    }
-
-    policy->min = policy->cpuinfo.min_freq = min_freq;
-    policy->max = policy->cpuinfo.max_freq = max_freq;
-
-    if (policy->min == ~0)
-        return -EINVAL;
-    else
-        return 0;
-}
-
-int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
-                                   struct cpufreq_frequency_table *table)
-{
-    unsigned int next_larger = ~0;
-    unsigned int i;
-    unsigned int count = 0;
-
-    if (!cpu_online(policy->cpu))
-        return -EINVAL;
-
-    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-                                 policy->cpuinfo.max_freq);
-
-    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
-        unsigned int freq = table[i].frequency;
-        if (freq == CPUFREQ_ENTRY_INVALID)
-            continue;
-        if ((freq >= policy->min) && (freq <= policy->max))
-            count++;
-        else if ((next_larger > freq) && (freq > policy->max))
-            next_larger = freq;
-    }
-
-    if (!count)
-        policy->max = next_larger;
-
-    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-                                 policy->cpuinfo.max_freq);
-
-    return 0;
-}
-
-int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-                                   struct cpufreq_frequency_table *table,
-                                   unsigned int target_freq,
-                                   unsigned int relation,
-                                   unsigned int *index)
-{
-    struct cpufreq_frequency_table optimal = {
-        .index = ~0,
-        .frequency = 0,
-    };
-    struct cpufreq_frequency_table suboptimal = {
-        .index = ~0,
-        .frequency = 0,
-    };
-    unsigned int i;
-
-    switch (relation) {
-    case CPUFREQ_RELATION_H:
-        suboptimal.frequency = ~0;
-        break;
-    case CPUFREQ_RELATION_L:
-        optimal.frequency = ~0;
-        break;
-    }
-
-    if (!cpu_online(policy->cpu))
-        return -EINVAL;
-
-    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
-        unsigned int freq = table[i].frequency;
-        if (freq == CPUFREQ_ENTRY_INVALID)
-            continue;
-        if ((freq < policy->min) || (freq > policy->max))
-            continue;
-        switch(relation) {
-        case CPUFREQ_RELATION_H:
-            if (freq <= target_freq) {
-                if (freq >= optimal.frequency) {
-                    optimal.frequency = freq;
-                    optimal.index = i;
-                }
-            } else {
-                if (freq <= suboptimal.frequency) {
-                    suboptimal.frequency = freq;
-                    suboptimal.index = i;
-                }
-            }
-            break;
-        case CPUFREQ_RELATION_L:
-            if (freq >= target_freq) {
-                if (freq <= optimal.frequency) {
-                    optimal.frequency = freq;
-                    optimal.index = i;
-                }
-            } else {
-                if (freq >= suboptimal.frequency) {
-                    suboptimal.frequency = freq;
-                    suboptimal.index = i;
-                }
-            }
-            break;
-        }
-    }
-    if (optimal.index > i) {
-        if (suboptimal.index > i)
-            return -EINVAL;
-        *index = suboptimal.index;
-    } else
-        *index = optimal.index;
-
-    return 0;
-}
-
-
-/*********************************************************************
- *               GOVERNORS                                           *
- *********************************************************************/
-
-int __cpufreq_driver_target(struct cpufreq_policy *policy,
-                            unsigned int target_freq,
-                            unsigned int relation)
-{
-    int retval = -EINVAL;
-
-    if (cpu_online(policy->cpu) && cpufreq_driver->target)
-        retval = cpufreq_driver->target(policy, target_freq, relation);
-
-    return retval;
-}
-
-int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
-{
-    int ret = 0;
-
-    if (!policy)
-        return -EINVAL;
-
-    if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
-        ret = cpufreq_driver->getavg(policy->cpu);
-
-    return ret;
-}
-
-
-/*********************************************************************
- *                 POLICY                                            *
- *********************************************************************/
-
-/*
- * data   : current policy.
- * policy : policy to be set.
- */
-int __cpufreq_set_policy(struct cpufreq_policy *data,
-                                struct cpufreq_policy *policy)
-{
-    int ret = 0;
-
-    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
-
-    if (policy->min > data->min && policy->min > policy->max)
-        return -EINVAL;
-
-    /* verify the cpu speed can be set within this limit */
-    ret = cpufreq_driver->verify(policy);
-    if (ret)
-        return ret;
-
-    data->min = policy->min;
-    data->max = policy->max;
-
-    if (policy->governor != data->governor) {
-        /* save old, working values */
-        struct cpufreq_governor *old_gov = data->governor;
-
-        /* end old governor */
-        if (data->governor)
-            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
-
-        /* start new governor */
-        data->governor = policy->governor;
-        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
-            /* new governor failed, so re-start old one */
-            if (old_gov) {
-                data->governor = old_gov;
-                __cpufreq_governor(data, CPUFREQ_GOV_START);
-            }
-            return -EINVAL;
-        }
-        /* might be a policy change, too, so fall through */
-    }
-
-    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
-}
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/pmstat.c
--- a/xen/arch/x86/acpi/pmstat.c        Fri Sep 26 11:12:29 2008 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-/*****************************************************************************
-#  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
-#
-#  Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx>
-#
-# This program is free software; you can redistribute it and/or modify it 
-# under the terms of the GNU General Public License as published by the Free 
-# Software Foundation; either version 2 of the License, or (at your option) 
-# any later version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT 
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 59 
-# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-#
-# The full GNU General Public License is included in this distribution in the
-# file called LICENSE.
-#
-*****************************************************************************/
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/event.h>
-#include <xen/irq.h>
-#include <xen/iocap.h>
-#include <xen/compat.h>
-#include <xen/guest_access.h>
-#include <asm/current.h>
-#include <public/xen.h>
-#include <xen/cpumask.h>
-#include <asm/processor.h>
-#include <xen/percpu.h>
-#include <xen/domain.h>
-
-#include <public/sysctl.h>
-#include <acpi/cpufreq/cpufreq.h>
-
-struct pm_px *__read_mostly px_statistic_data[NR_CPUS];
-
-extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
-extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
-extern int pmstat_reset_cx_stat(uint32_t cpuid);
-
-int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
-{
-    int ret = 0;
-    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
-
-    if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
-        return -EINVAL;
-
-    switch ( op->type & PMSTAT_CATEGORY_MASK )
-    {
-    case PMSTAT_CX:
-        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
-            return -ENODEV;
-        break;
-    case PMSTAT_PX:
-        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
-            return -ENODEV;
-        if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
-            return -EINVAL;
-        break;
-    default:
-        return -ENODEV;
-    }
-
-    switch ( op->type )
-    {
-    case PMSTAT_get_max_px:
-    {
-        op->u.getpx.total = pmpt->perf.state_count;
-        break;
-    }
-
-    case PMSTAT_get_pxstat:
-    {
-        uint64_t now, ct;
-        uint64_t total_idle_ns;
-        uint64_t tmp_idle_ns;
-        struct pm_px *pxpt = px_statistic_data[op->cpuid];
-
-        if ( !pxpt )
-            return -ENODATA;
-
-        total_idle_ns = get_cpu_idle_time(op->cpuid);
-        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
-
-        now = NOW();
-        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
-        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
-        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
-        pxpt->prev_state_wall = now;
-        pxpt->prev_idle_wall = total_idle_ns;
-
-        ct = pmpt->perf.state_count;
-        if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
-        {
-            ret = -EFAULT;
-            break;
-        }
-
-        if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
-        {
-            ret = -EFAULT;
-            break;
-        }
-
-        op->u.getpx.total = pxpt->u.total;
-        op->u.getpx.usable = pxpt->u.usable;
-        op->u.getpx.last = pxpt->u.last;
-        op->u.getpx.cur = pxpt->u.cur;
-
-        break;
-    }
-
-    case PMSTAT_reset_pxstat:
-    {
-        px_statistic_reset(op->cpuid);
-        break;
-    }
-
-    case PMSTAT_get_max_cx:
-    {
-        op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
-        ret = 0;
-        break;
-    }
-
-    case PMSTAT_get_cxstat:
-    {
-        ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
-        break;
-    }
-
-    case PMSTAT_reset_cxstat:
-    {
-        ret = pmstat_reset_cx_stat(op->cpuid);
-        break;
-    }
-
-    default:
-        printk("not defined sub-hypercall @ do_get_pm_info\n");
-        ret = -ENOSYS;
-        break;
-    }
-
-    return ret;
-}
diff -r 5274aa966231 -r 08374be21318 xen/drivers/Makefile
--- a/xen/drivers/Makefile      Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/drivers/Makefile      Fri Sep 26 14:04:38 2008 +0100
@@ -1,4 +1,5 @@ subdir-y += char
 subdir-y += char
+subdir-y += cpufreq
 subdir-$(x86) += passthrough
 subdir-$(HAS_ACPI) += acpi
 subdir-$(HAS_VGA) += video
diff -r 5274aa966231 -r 08374be21318 xen/drivers/acpi/Makefile
--- a/xen/drivers/acpi/Makefile Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/drivers/acpi/Makefile Fri Sep 26 14:04:38 2008 +0100
@@ -4,6 +4,7 @@ obj-y += tables.o
 obj-y += tables.o
 obj-y += numa.o
 obj-y += osl.o
+obj-y += pmstat.o
 
 obj-$(x86) += hwregs.o
 obj-$(x86) += reboot.o
diff -r 5274aa966231 -r 08374be21318 xen/drivers/acpi/pmstat.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/acpi/pmstat.c Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,155 @@
+/*****************************************************************************
+#  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
+#
+#  Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx>
+#
+# This program is free software; you can redistribute it and/or modify it 
+# under the terms of the GNU General Public License as published by the Free 
+# Software Foundation; either version 2 of the License, or (at your option) 
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT 
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 
+# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+# The full GNU General Public License is included in this distribution in the
+# file called LICENSE.
+#
+*****************************************************************************/
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/irq.h>
+#include <xen/iocap.h>
+#include <xen/compat.h>
+#include <xen/guest_access.h>
+#include <asm/current.h>
+#include <public/xen.h>
+#include <xen/cpumask.h>
+#include <asm/processor.h>
+#include <xen/percpu.h>
+#include <xen/domain.h>
+
+#include <public/sysctl.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+struct pm_px *__read_mostly cpufreq_statistic_data[NR_CPUS];
+
+extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
+extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
+extern int pmstat_reset_cx_stat(uint32_t cpuid);
+
+int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
+{
+    int ret = 0;
+    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
+
+    if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
+        return -EINVAL;
+
+    switch ( op->type & PMSTAT_CATEGORY_MASK )
+    {
+    case PMSTAT_CX:
+        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
+            return -ENODEV;
+        break;
+    case PMSTAT_PX:
+        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+            return -ENODEV;
+        if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
+            return -EINVAL;
+        break;
+    default:
+        return -ENODEV;
+    }
+
+    switch ( op->type )
+    {
+    case PMSTAT_get_max_px:
+    {
+        op->u.getpx.total = pmpt->perf.state_count;
+        break;
+    }
+
+    case PMSTAT_get_pxstat:
+    {
+        uint64_t now, ct;
+        uint64_t total_idle_ns;
+        uint64_t tmp_idle_ns;
+        struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid];
+
+        if ( !pxpt )
+            return -ENODATA;
+
+        total_idle_ns = get_cpu_idle_time(op->cpuid);
+        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
+
+        now = NOW();
+        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
+        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
+        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
+        pxpt->prev_state_wall = now;
+        pxpt->prev_idle_wall = total_idle_ns;
+
+        ct = pmpt->perf.state_count;
+        if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
+        {
+            ret = -EFAULT;
+            break;
+        }
+
+        if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
+        {
+            ret = -EFAULT;
+            break;
+        }
+
+        op->u.getpx.total = pxpt->u.total;
+        op->u.getpx.usable = pxpt->u.usable;
+        op->u.getpx.last = pxpt->u.last;
+        op->u.getpx.cur = pxpt->u.cur;
+
+        break;
+    }
+
+    case PMSTAT_reset_pxstat:
+    {
+        cpufreq_statistic_reset(op->cpuid);
+        break;
+    }
+
+    case PMSTAT_get_max_cx:
+    {
+        op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
+        ret = 0;
+        break;
+    }
+
+    case PMSTAT_get_cxstat:
+    {
+        ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
+        break;
+    }
+
+    case PMSTAT_reset_cxstat:
+    {
+        ret = pmstat_reset_cx_stat(op->cpuid);
+        break;
+    }
+
+    default:
+        printk("not defined sub-hypercall @ do_get_pm_info\n");
+        ret = -ENOSYS;
+        break;
+    }
+
+    return ret;
+}
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/Makefile      Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,3 @@
+obj-y += cpufreq.o
+obj-y += cpufreq_ondemand.o
+obj-y += utility.o
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/cpufreq.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/cpufreq.c     Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,188 @@
+/*
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx>
+ *  Copyright (C) 2006        Denis Sadykov <denis.m.sadykov@xxxxxxxxx>
+ *
+ *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *      Add cpufreq limit change handle and per-cpu cpufreq add/del
+ *      to cope with cpu hotplug
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/delay.h>
+#include <xen/cpumask.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <xen/xmalloc.h>
+#include <asm/bug.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/config.h>
+#include <asm/processor.h>
+#include <asm/percpu.h>
+#include <asm/cpufeature.h>
+#include <acpi/acpi.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+/* TODO: change to link list later as domain number may be sparse */
+static cpumask_t cpufreq_dom_map[NR_CPUS];
+
+int cpufreq_limit_change(unsigned int cpu)
+{
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
+    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
+    struct cpufreq_policy policy;
+
+    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
+        return -ENODEV;
+
+    if ((perf->platform_limit < 0) || 
+        (perf->platform_limit >= perf->state_count))
+        return -EINVAL;
+
+    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
+
+    policy.max =
+        perf->states[perf->platform_limit].core_frequency * 1000;
+
+    return __cpufreq_set_policy(data, &policy);
+}
+
+int cpufreq_add_cpu(unsigned int cpu)
+{
+    int ret = 0;
+    unsigned int firstcpu;
+    unsigned int dom;
+    unsigned int j;
+    struct cpufreq_policy new_policy;
+    struct cpufreq_policy *policy;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
+
+    /* to protect the case when Px was not controlled by xen */
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
+        return 0;
+
+    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
+        return -EINVAL;
+
+    ret = cpufreq_statistic_init(cpu);
+    if (ret)
+        return ret;
+
+    dom = perf->domain_info.domain;
+    if (cpus_weight(cpufreq_dom_map[dom])) {
+        /* share policy with the first cpu since on same boat */
+        firstcpu = first_cpu(cpufreq_dom_map[dom]);
+        policy = cpufreq_cpu_policy[firstcpu];
+
+        cpufreq_cpu_policy[cpu] = policy;
+        cpu_set(cpu, cpufreq_dom_map[dom]);
+        cpu_set(cpu, policy->cpus);
+
+        printk(KERN_EMERG"adding CPU %u\n", cpu);
+    } else {
+        /* for the first cpu, setup policy and do init work */
+        policy = xmalloc(struct cpufreq_policy);
+        if (!policy) {
+            cpufreq_statistic_exit(cpu);
+            return -ENOMEM;
+        }
+        memset(policy, 0, sizeof(struct cpufreq_policy));
+
+        cpufreq_cpu_policy[cpu] = policy;
+        cpu_set(cpu, cpufreq_dom_map[dom]);
+        cpu_set(cpu, policy->cpus);
+
+        policy->cpu = cpu;
+        ret = cpufreq_driver->init(policy);
+        if (ret)
+            goto err1;
+        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
+    }
+
+    /*
+     * After get full cpumap of the coordination domain,
+     * we can safely start gov here.
+     */
+    if (cpus_weight(cpufreq_dom_map[dom]) ==
+        perf->domain_info.num_processors) {
+        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+        policy->governor = NULL;
+        ret = __cpufreq_set_policy(policy, &new_policy);
+        if (ret)
+            goto err2;
+    }
+
+    return 0;
+
+err2:
+    cpufreq_driver->exit(policy);
+err1:
+    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
+        cpufreq_cpu_policy[j] = NULL;
+        cpufreq_statistic_exit(j);
+    }
+
+    cpus_clear(cpufreq_dom_map[dom]);
+    xfree(policy);
+    return ret;
+}
+
+int cpufreq_del_cpu(unsigned int cpu)
+{
+    unsigned int dom;
+    struct cpufreq_policy *policy;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
+
+    /* to protect the case when Px was not controlled by xen */
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
+        return 0;
+
+    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
+        return -EINVAL;
+
+    dom = perf->domain_info.domain;
+    policy = cpufreq_cpu_policy[cpu];
+
+    printk(KERN_EMERG"deleting CPU %u\n", cpu);
+
+    /* for the first cpu of the domain, stop gov */
+    if (cpus_weight(cpufreq_dom_map[dom]) ==
+        perf->domain_info.num_processors)
+        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+
+    cpufreq_cpu_policy[cpu] = NULL;
+    cpu_clear(cpu, policy->cpus);
+    cpu_clear(cpu, cpufreq_dom_map[dom]);
+    cpufreq_statistic_exit(cpu);
+
+    /* for the last cpu of the domain, clean room */
+    /* It's safe here to free freq_table, drv_data and policy */
+    if (!cpus_weight(cpufreq_dom_map[dom])) {
+        cpufreq_driver->exit(policy);
+        xfree(policy);
+    }
+
+    return 0;
+}
+
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/cpufreq_ondemand.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c    Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,246 @@
+/*
+ *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
+ *
+ *  Copyright (C)  2001 Russell King
+ *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>.
+ *                      Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *             Feb 2008 Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <xen/types.h>
+#include <xen/percpu.h>
+#include <xen/cpumask.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <asm/config.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+#define DEF_FREQUENCY_UP_THRESHOLD              (80)
+
+#define MIN_DBS_INTERVAL                        (MICROSECS(100))
+#define MIN_SAMPLING_MILLISECS                  (20)
+#define MIN_STAT_SAMPLING_RATE                   \
+    (MIN_SAMPLING_MILLISECS * MILLISECS(1))
+#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
+#define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
+
+static uint64_t def_sampling_rate;
+
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
+
+static unsigned int dbs_enable;    /* number of CPUs using this policy */
+
+static struct dbs_tuners {
+    uint64_t     sampling_rate;
+    unsigned int up_threshold;
+    unsigned int ignore_nice;
+    unsigned int powersave_bias;
+} dbs_tuners_ins = {
+    .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+    .ignore_nice = 0,
+    .powersave_bias = 0,
+};
+
+static struct timer dbs_timer[NR_CPUS];
+
+uint64_t get_cpu_idle_time(unsigned int cpu)
+{
+    uint64_t idle_ns;
+    struct vcpu *v;
+
+    if ((v = idle_vcpu[cpu]) == NULL)
+        return 0;
+
+    idle_ns = v->runstate.time[RUNSTATE_running];
+    if (v->is_running)
+        idle_ns += NOW() - v->runstate.state_entry_time;
+
+    return idle_ns;
+}
+
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
+{
+    unsigned int load = 0;
+    uint64_t cur_ns, idle_ns, total_ns;
+
+    struct cpufreq_policy *policy;
+    unsigned int j;
+
+    if (!this_dbs_info->enable)
+        return;
+
+    policy = this_dbs_info->cur_policy;
+
+    if (unlikely(policy->resume)) {
+        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
+        return;
+    }
+
+    cur_ns = NOW();
+    total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
+    this_dbs_info->prev_cpu_wall = NOW();
+
+    if (total_ns < MIN_DBS_INTERVAL)
+        return;
+
+    /* Get Idle Time */
+    idle_ns = UINT_MAX;
+    for_each_cpu_mask(j, policy->cpus) {
+        uint64_t total_idle_ns;
+        unsigned int tmp_idle_ns;
+        struct cpu_dbs_info_s *j_dbs_info;
+
+        j_dbs_info = &per_cpu(cpu_dbs_info, j);
+        total_idle_ns = get_cpu_idle_time(j);
+        tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
+        j_dbs_info->prev_cpu_idle = total_idle_ns;
+
+        if (tmp_idle_ns < idle_ns)
+            idle_ns = tmp_idle_ns;
+    }
+
+    if (likely(total_ns > idle_ns))
+        load = (100 * (total_ns - idle_ns)) / total_ns;
+
+    /* Check for frequency increase */
+    if (load > dbs_tuners_ins.up_threshold) {
+        /* if we are already at full speed then break out early */
+        if (policy->cur == policy->max)
+            return;
+        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
+        return;
+    }
+
+    /* Check for frequency decrease */
+    /* if we cannot reduce the frequency anymore, break out early */
+    if (policy->cur == policy->min)
+        return;
+
+    /*
+     * The optimal frequency is the frequency that is the lowest that
+     * can support the current CPU usage without triggering the up
+     * policy. To be safe, we focus 10 points under the threshold.
+     */
+    if (load < (dbs_tuners_ins.up_threshold - 10)) {
+        unsigned int freq_next, freq_cur;
+
+        freq_cur = __cpufreq_driver_getavg(policy);
+        if (!freq_cur)
+            freq_cur = policy->cur;
+
+        freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
+
+        __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+    }
+}
+
+static void do_dbs_timer(void *dbs)
+{
+    struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
+
+    if (!dbs_info->enable)
+        return;
+
+    dbs_check_cpu(dbs_info);
+
+    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+}
+
+static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
+{
+    dbs_info->enable = 1;
+
+    init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 
+        (void *)dbs_info, dbs_info->cpu);
+
+    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+}
+
+static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+{
+    dbs_info->enable = 0;
+    stop_timer(&dbs_timer[dbs_info->cpu]);
+}
+
+int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
+{
+    unsigned int cpu = policy->cpu;
+    struct cpu_dbs_info_s *this_dbs_info;
+    unsigned int j;
+
+    this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
+
+    switch (event) {
+    case CPUFREQ_GOV_START:
+        if ((!cpu_online(cpu)) || (!policy->cur))
+            return -EINVAL;
+
+        if (policy->cpuinfo.transition_latency >
+            (TRANSITION_LATENCY_LIMIT * 1000)) {
+            printk(KERN_WARNING "ondemand governor failed to load "
+                "due to too long transition latency\n");
+            return -EINVAL;
+        }
+        if (this_dbs_info->enable)
+            /* Already enabled */
+            break;
+
+        dbs_enable++;
+
+        for_each_cpu_mask(j, policy->cpus) {
+            struct cpu_dbs_info_s *j_dbs_info;
+            j_dbs_info = &per_cpu(cpu_dbs_info, j);
+            j_dbs_info->cur_policy = policy;
+
+            j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
+            j_dbs_info->prev_cpu_wall = NOW();
+        }
+        this_dbs_info->cpu = cpu;
+        /*
+         * Start the timerschedule work, when this governor
+         * is used for first time
+         */
+        if (dbs_enable == 1) {
+            def_sampling_rate = policy->cpuinfo.transition_latency *
+                DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
+
+            if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
+                def_sampling_rate = MIN_STAT_SAMPLING_RATE;
+
+            dbs_tuners_ins.sampling_rate = def_sampling_rate;
+        }
+        dbs_timer_init(this_dbs_info);
+
+        break;
+
+    case CPUFREQ_GOV_STOP:
+        dbs_timer_exit(this_dbs_info);
+        dbs_enable--;
+
+        break;
+
+    case CPUFREQ_GOV_LIMITS:
+        if (policy->max < this_dbs_info->cur_policy->cur)
+            __cpufreq_driver_target(this_dbs_info->cur_policy,
+                policy->max, CPUFREQ_RELATION_H);
+        else if (policy->min > this_dbs_info->cur_policy->cur)
+            __cpufreq_driver_target(this_dbs_info->cur_policy,
+                policy->min, CPUFREQ_RELATION_L);
+        break;
+    }
+    return 0;
+}
+
+struct cpufreq_governor cpufreq_gov_dbs = {
+    .name = "ondemand",
+    .governor = cpufreq_governor_dbs,
+};
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/utility.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/utility.c     Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,368 @@
+/*
+ *  utility.c - misc functions for cpufreq driver and Px statistic
+ *
+ *  Copyright (C) 2001 Russell King
+ *            (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx>
+ *
+ *  Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx>
+ *    Added handling for CPU hotplug
+ *  Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx>
+ *    Fix handling for CPU hotplug -- affected CPUs
+ *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *    1. Merge cpufreq.c and freq_table.c of linux 2.6.23
+ *    And poring to Xen hypervisor
+ *    2. some Px statistic interface funcdtions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <xen/errno.h>
+#include <xen/cpumask.h>
+#include <xen/types.h>
+#include <xen/spinlock.h>
+#include <xen/percpu.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <asm/config.h>
+#include <acpi/cpufreq/cpufreq.h>
+#include <public/sysctl.h>
+
+struct cpufreq_driver   *cpufreq_driver;
+struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
+struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
+
+/*********************************************************************
+ *                    Px STATISTIC INFO                              *
+ *********************************************************************/
+
+void cpufreq_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
+{
+    uint32_t i;
+    uint64_t now;
+
+    now = NOW();
+
+    for_each_cpu_mask(i, cpumask) {
+        struct pm_px *pxpt = cpufreq_statistic_data[i];
+        struct processor_pminfo *pmpt = processor_pminfo[i];
+        uint64_t total_idle_ns;
+        uint64_t tmp_idle_ns;
+
+        if ( !pxpt || !pmpt )
+            continue;
+
+        total_idle_ns = get_cpu_idle_time(i);
+        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
+
+        pxpt->u.last = from;
+        pxpt->u.cur = to;
+        pxpt->u.pt[to].count++;
+        pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
+        pxpt->u.pt[from].residency -= tmp_idle_ns;
+
+        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
+
+        pxpt->prev_state_wall = now;
+        pxpt->prev_idle_wall = total_idle_ns;
+    }
+}
+
+int cpufreq_statistic_init(unsigned int cpuid)
+{
+    uint32_t i, count;
+    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+
+    count = pmpt->perf.state_count;
+
+    if ( !pmpt )
+        return -EINVAL;
+
+    if ( !pxpt )
+    {
+        pxpt = xmalloc(struct pm_px);
+        if ( !pxpt )
+            return -ENOMEM;
+        memset(pxpt, 0, sizeof(*pxpt));
+        cpufreq_statistic_data[cpuid] = pxpt;
+    }
+
+    pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
+    if (!pxpt->u.trans_pt)
+        return -ENOMEM;
+
+    pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
+    if (!pxpt->u.pt) {
+        xfree(pxpt->u.trans_pt);
+        return -ENOMEM;
+    }
+
+    memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t)));
+    memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
+
+    pxpt->u.total = pmpt->perf.state_count;
+    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
+
+    for (i=0; i < pmpt->perf.state_count; i++)
+        pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
+
+    pxpt->prev_state_wall = NOW();
+    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
+
+    return 0;
+}
+
+void cpufreq_statistic_exit(unsigned int cpuid)
+{
+    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+
+    if (!pxpt)
+        return;
+    xfree(pxpt->u.trans_pt);
+    xfree(pxpt->u.pt);
+    memset(pxpt, 0, sizeof(struct pm_px));
+}
+
+void cpufreq_statistic_reset(unsigned int cpuid)
+{
+    uint32_t i, j, count;
+    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+
+    if ( !pxpt || !pmpt )
+        return;
+
+    count = pmpt->perf.state_count;
+
+    for (i=0; i < count; i++) {
+        pxpt->u.pt[i].residency = 0;
+        pxpt->u.pt[i].count = 0;
+
+        for (j=0; j < count; j++)
+            *(pxpt->u.trans_pt + i*count + j) = 0;
+    }
+
+    pxpt->prev_state_wall = NOW();
+    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
+}
+
+
+/*********************************************************************
+ *                   FREQUENCY TABLE HELPERS                         *
+ *********************************************************************/
+
+int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
+                                    struct cpufreq_frequency_table *table)
+{
+    unsigned int min_freq = ~0;
+    unsigned int max_freq = 0;
+    unsigned int i;
+
+    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+        unsigned int freq = table[i].frequency;
+        if (freq == CPUFREQ_ENTRY_INVALID)
+            continue;
+        if (freq < min_freq)
+            min_freq = freq;
+        if (freq > max_freq)
+            max_freq = freq;
+    }
+
+    policy->min = policy->cpuinfo.min_freq = min_freq;
+    policy->max = policy->cpuinfo.max_freq = max_freq;
+
+    if (policy->min == ~0)
+        return -EINVAL;
+    else
+        return 0;
+}
+
+int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
+                                   struct cpufreq_frequency_table *table)
+{
+    unsigned int next_larger = ~0;
+    unsigned int i;
+    unsigned int count = 0;
+
+    if (!cpu_online(policy->cpu))
+        return -EINVAL;
+
+    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+                                 policy->cpuinfo.max_freq);
+
+    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+        unsigned int freq = table[i].frequency;
+        if (freq == CPUFREQ_ENTRY_INVALID)
+            continue;
+        if ((freq >= policy->min) && (freq <= policy->max))
+            count++;
+        else if ((next_larger > freq) && (freq > policy->max))
+            next_larger = freq;
+    }
+
+    if (!count)
+        policy->max = next_larger;
+
+    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+                                 policy->cpuinfo.max_freq);
+
+    return 0;
+}
+
+int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
+                                   struct cpufreq_frequency_table *table,
+                                   unsigned int target_freq,
+                                   unsigned int relation,
+                                   unsigned int *index)
+{
+    struct cpufreq_frequency_table optimal = {
+        .index = ~0,
+        .frequency = 0,
+    };
+    struct cpufreq_frequency_table suboptimal = {
+        .index = ~0,
+        .frequency = 0,
+    };
+    unsigned int i;
+
+    switch (relation) {
+    case CPUFREQ_RELATION_H:
+        suboptimal.frequency = ~0;
+        break;
+    case CPUFREQ_RELATION_L:
+        optimal.frequency = ~0;
+        break;
+    }
+
+    if (!cpu_online(policy->cpu))
+        return -EINVAL;
+
+    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+        unsigned int freq = table[i].frequency;
+        if (freq == CPUFREQ_ENTRY_INVALID)
+            continue;
+        if ((freq < policy->min) || (freq > policy->max))
+            continue;
+        switch(relation) {
+        case CPUFREQ_RELATION_H:
+            if (freq <= target_freq) {
+                if (freq >= optimal.frequency) {
+                    optimal.frequency = freq;
+                    optimal.index = i;
+                }
+            } else {
+                if (freq <= suboptimal.frequency) {
+                    suboptimal.frequency = freq;
+                    suboptimal.index = i;
+                }
+            }
+            break;
+        case CPUFREQ_RELATION_L:
+            if (freq >= target_freq) {
+                if (freq <= optimal.frequency) {
+                    optimal.frequency = freq;
+                    optimal.index = i;
+                }
+            } else {
+                if (freq >= suboptimal.frequency) {
+                    suboptimal.frequency = freq;
+                    suboptimal.index = i;
+                }
+            }
+            break;
+        }
+    }
+    if (optimal.index > i) {
+        if (suboptimal.index > i)
+            return -EINVAL;
+        *index = suboptimal.index;
+    } else
+        *index = optimal.index;
+
+    return 0;
+}
+
+
+/*********************************************************************
+ *               GOVERNORS                                           *
+ *********************************************************************/
+
+int __cpufreq_driver_target(struct cpufreq_policy *policy,
+                            unsigned int target_freq,
+                            unsigned int relation)
+{
+    int retval = -EINVAL;
+
+    if (cpu_online(policy->cpu) && cpufreq_driver->target)
+        retval = cpufreq_driver->target(policy, target_freq, relation);
+
+    return retval;
+}
+
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
+{
+    int ret = 0;
+
+    if (!policy)
+        return -EINVAL;
+
+    if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
+        ret = cpufreq_driver->getavg(policy->cpu);
+
+    return ret;
+}
+
+
+/*********************************************************************
+ *                 POLICY                                            *
+ *********************************************************************/
+
+/*
+ * data   : current policy.
+ * policy : policy to be set.
+ */
+int __cpufreq_set_policy(struct cpufreq_policy *data,
+                                struct cpufreq_policy *policy)
+{
+    int ret = 0;
+
+    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
+
+    if (policy->min > data->min && policy->min > policy->max)
+        return -EINVAL;
+
+    /* verify the cpu speed can be set within this limit */
+    ret = cpufreq_driver->verify(policy);
+    if (ret)
+        return ret;
+
+    data->min = policy->min;
+    data->max = policy->max;
+
+    if (policy->governor != data->governor) {
+        /* save old, working values */
+        struct cpufreq_governor *old_gov = data->governor;
+
+        /* end old governor */
+        if (data->governor)
+            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+
+        /* start new governor */
+        data->governor = policy->governor;
+        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
+            /* new governor failed, so re-start old one */
+            if (old_gov) {
+                data->governor = old_gov;
+                __cpufreq_governor(data, CPUFREQ_GOV_START);
+            }
+            return -EINVAL;
+        }
+        /* might be a policy change, too, so fall through */
+    }
+
+    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+}
diff -r 5274aa966231 -r 08374be21318 xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h        Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/include/acpi/cpufreq/cpufreq.h        Fri Sep 26 14:04:38 2008 +0100
@@ -20,6 +20,13 @@
 #define CPUFREQ_NAME_LEN 16
 
 struct cpufreq_governor;
+
+struct acpi_cpufreq_data {
+    struct processor_performance *acpi_data;
+    struct cpufreq_frequency_table *freq_table;
+    unsigned int max_freq;
+    unsigned int cpu_feature;
+};
 
 struct cpufreq_cpuinfo {
     unsigned int        max_freq;
diff -r 5274aa966231 -r 08374be21318 xen/include/acpi/cpufreq/processor_perf.h
--- a/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 14:04:38 2008 +0100
@@ -9,10 +9,10 @@ int get_cpu_id(u8);
 int get_cpu_id(u8);
 int powernow_cpufreq_init(void);
 
-void px_statistic_update(cpumask_t, uint8_t, uint8_t);
-int  px_statistic_init(unsigned int);
-void px_statistic_exit(unsigned int);
-void px_statistic_reset(unsigned int);
+void cpufreq_statistic_update(cpumask_t, uint8_t, uint8_t);
+int  cpufreq_statistic_init(unsigned int);
+void cpufreq_statistic_exit(unsigned int);
+void cpufreq_statistic_reset(unsigned int);
 
 int  cpufreq_limit_change(unsigned int);
 
@@ -58,6 +58,6 @@ struct pm_px {
     uint64_t prev_idle_wall;
 };
 
-extern struct pm_px *px_statistic_data[NR_CPUS];
+extern struct pm_px *cpufreq_statistic_data[NR_CPUS];
 
 #endif /* __XEN_PROCESSOR_PM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.