[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: Clean up cpufreq core logic



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1221212090 -3600
# Node ID f125e481d8b65b81dd794d60a99fb0b823eaee2c
# Parent  346c073ed6a4f0debca36588039d649e2efd93c3
x86: Clean up cpufreq core logic

Clean up cpufreq core logic, which now can cope with cpu
online/offline event, and also dynamic platform limitation event
(_PPC).

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
---
 xen/arch/x86/acpi/cpufreq/cpufreq.c          |  287 ++++++++++++++++-----------
 xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c |    7 
 xen/arch/x86/acpi/cpufreq/powernow.c         |   10 
 xen/arch/x86/acpi/cpufreq/utility.c          |  186 ++++++++---------
 xen/arch/x86/acpi/pmstat.c                   |    2 
 xen/arch/x86/acpi/power.c                    |    6 
 xen/arch/x86/platform_hypercall.c            |   39 ++-
 xen/arch/x86/smpboot.c                       |    5 
 xen/include/acpi/cpufreq/cpufreq.h           |   74 ++++++
 xen/include/acpi/cpufreq/processor_perf.h    |   19 -
 xen/include/public/platform.h                |    2 
 11 files changed, 391 insertions(+), 246 deletions(-)

diff -r 346c073ed6a4 -r f125e481d8b6 xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Sep 12 10:34:50 2008 +0100
@@ -32,6 +32,7 @@
 #include <xen/errno.h>
 #include <xen/delay.h>
 #include <xen/cpumask.h>
+#include <xen/sched.h>
 #include <xen/timer.h>
 #include <xen/xmalloc.h>
 #include <asm/bug.h>
@@ -44,12 +45,8 @@
 #include <acpi/acpi.h>
 #include <acpi/cpufreq/cpufreq.h>
 
-struct processor_pminfo processor_pminfo[NR_CPUS];
-struct cpufreq_policy xen_px_policy[NR_CPUS];
-
-static cpumask_t *cpufreq_dom_pt;
-static unsigned long *cpufreq_dom_mask;
-static unsigned int cpufreq_dom_max;
+/* TODO: change to link list later as domain number may be sparse */
+static cpumask_t cpufreq_dom_map[NR_CPUS];
 
 enum {
     UNDEFINED_CAPABLE = 0,
@@ -335,7 +332,7 @@ static int acpi_cpufreq_target(struct cp
     if (unlikely(result))
         return -ENODEV;
 
-    online_policy_cpus = policy->cpus;
+    cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
 
     next_perf_state = data->freq_table[next_state].index;
     if (perf->state == next_perf_state) {
@@ -388,6 +385,20 @@ static int acpi_cpufreq_target(struct cp
     policy->cur = freqs.new;
 
     return result;
+}
+
+static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
+{
+    struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+    struct processor_performance *perf = &processor_pminfo[policy->cpu].perf;
+
+    if (!policy || !data)
+        return -EINVAL;
+
+    cpufreq_verify_within_limits(policy, 0, 
+        perf->states[perf->platform_limit].core_frequency * 1000);
+
+    return cpufreq_frequency_table_verify(policy, data->freq_table);
 }
 
 static unsigned long
@@ -441,14 +452,6 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
     perf = data->acpi_data;
     policy->shared_type = perf->shared_type;
 
-    /* 
-     * Currently the latest linux (kernel version 2.6.26) 
-     * still has issue when handle the situation _psd HW_ALL coordination.
-     * In Xen hypervisor, we handle _psd HW_ALL coordination in same way as
-     * _psd SW_ALL coordination for the seek of safety.
-     */
-    policy->cpus = perf->shared_cpu_map;
-
     /* capability check */
     if (perf->state_count <= 1) {
         printk("No P-States\n");
@@ -496,6 +499,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
             policy->cpuinfo.transition_latency =
                 perf->states[i].transition_latency * 1000;
     }
+    policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
 
     data->max_freq = perf->states[0].core_frequency * 1000;
     /* table init */
@@ -554,114 +558,173 @@ err_unreg:
     return result;
 }
 
+static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+    struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+
+    if (data) {
+        drv_data[policy->cpu] = NULL;
+        xfree(data->freq_table);
+        xfree(data);
+    }
+
+    return 0;
+}
+
 static struct cpufreq_driver acpi_cpufreq_driver = {
+    .verify = acpi_cpufreq_verify,
     .target = acpi_cpufreq_target,
     .init   = acpi_cpufreq_cpu_init,
+    .exit   = acpi_cpufreq_cpu_exit,
 };
 
-void cpufreq_dom_exit(void)
-{
-    cpufreq_dom_max = 0;
-    if (cpufreq_dom_mask)
-        xfree(cpufreq_dom_mask);
-    if (cpufreq_dom_pt)
-        xfree(cpufreq_dom_pt);
-}
-
-int cpufreq_dom_init(void)
-{
-    unsigned int i;
-
-    cpufreq_dom_max = 0;
-
-    for_each_online_cpu(i) {
-        if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain)
-            cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain;
-    }
-    cpufreq_dom_max++;
-
-    cpufreq_dom_mask = xmalloc_array(unsigned long,
-                                     BITS_TO_LONGS(cpufreq_dom_max));
-    if (!cpufreq_dom_mask)
-        return -ENOMEM;
-    bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max);
-
-    cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max);
-    if (!cpufreq_dom_pt)
-        return -ENOMEM;
-    memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t));
-
-    for_each_online_cpu(i) {
-        __set_bit(processor_pminfo[i].perf.domain_info.domain, 
cpufreq_dom_mask);
-        cpu_set(i, 
cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]);
-    }
-
-    for_each_online_cpu(i)
-        processor_pminfo[i].perf.shared_cpu_map =
-            cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain];
+int cpufreq_limit_change(unsigned int cpu)
+{
+    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
+    struct cpufreq_policy policy;
+
+    if (!cpu_online(cpu) || !data)
+        return -ENODEV;
+
+    if ((perf->platform_limit < 0) || 
+        (perf->platform_limit >= perf->state_count))
+        return -EINVAL;
+
+    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
+
+    policy.max =
+        perf->states[perf->platform_limit].core_frequency * 1000;
+
+    return __cpufreq_set_policy(data, &policy);
+}
+
+int cpufreq_add_cpu(unsigned int cpu)
+{
+    int ret = 0;
+    unsigned int firstcpu;
+    unsigned int dom;
+    unsigned int j;
+    struct cpufreq_policy new_policy;
+    struct cpufreq_policy *policy;
+    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+
+    /* to protect the case when Px was not controlled by xen */
+    if (!(perf->init & XEN_PX_INIT))
+        return 0;
+
+    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
+        return -EINVAL;
+
+    ret = px_statistic_init(cpu);
+    if (ret)
+        return ret;
+
+    dom = perf->domain_info.domain;
+    if (cpus_weight(cpufreq_dom_map[dom])) {
+        /* share policy with the first cpu since on same boat */
+        firstcpu = first_cpu(cpufreq_dom_map[dom]);
+        policy = cpufreq_cpu_policy[firstcpu];
+
+        cpufreq_cpu_policy[cpu] = policy;
+        cpu_set(cpu, cpufreq_dom_map[dom]);
+        cpu_set(cpu, policy->cpus);
+
+        printk(KERN_EMERG"adding CPU %u\n", cpu);
+    } else {
+        /* for the first cpu, setup policy and do init work */
+        policy = xmalloc(struct cpufreq_policy);
+        if (!policy) {
+            px_statistic_exit(cpu);
+            return -ENOMEM;
+        }
+        memset(policy, 0, sizeof(struct cpufreq_policy));
+
+        cpufreq_cpu_policy[cpu] = policy;
+        cpu_set(cpu, cpufreq_dom_map[dom]);
+        cpu_set(cpu, policy->cpus);
+
+        policy->cpu = cpu;
+        ret = cpufreq_driver->init(policy);
+        if (ret)
+            goto err1;
+        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
+    }
+
+    /*
+     * After get full cpumap of the coordination domain,
+     * we can safely start gov here.
+     */
+    if (cpus_weight(cpufreq_dom_map[dom]) ==
+        perf->domain_info.num_processors) {
+        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+        policy->governor = NULL;
+        ret = __cpufreq_set_policy(policy, &new_policy);
+        if (ret)
+            goto err2;
+    }
 
     return 0;
-}
-
-static int cpufreq_cpu_init(void)
-{
-    int i, ret = 0;
-
-    for_each_online_cpu(i) {
-        xen_px_policy[i].cpu = i;
-
-        ret = px_statistic_init(i);
-        if (ret)
-            return ret;
-
-        ret = acpi_cpufreq_cpu_init(&xen_px_policy[i]);
-        if (ret)
-            return ret;
-    }
+
+err2:
+    cpufreq_driver->exit(policy);
+err1:
+    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
+        cpufreq_cpu_policy[j] = NULL;
+        px_statistic_exit(j);
+    }
+
+    cpus_clear(cpufreq_dom_map[dom]);
+    xfree(policy);
     return ret;
 }
 
-int cpufreq_dom_dbs(unsigned int event)
-{
-    unsigned int cpu, dom;
+int cpufreq_del_cpu(unsigned int cpu)
+{
+    unsigned int dom;
+    struct cpufreq_policy *policy;
+    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+
+    /* to protect the case when Px was not controlled by xen */
+    if (!(perf->init & XEN_PX_INIT))
+        return 0;
+
+    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
+        return -EINVAL;
+
+    dom = perf->domain_info.domain;
+    policy = cpufreq_cpu_policy[cpu];
+
+    printk(KERN_EMERG"deleting CPU %u\n", cpu);
+
+    /* for the first cpu of the domain, stop gov */
+    if (cpus_weight(cpufreq_dom_map[dom]) ==
+        perf->domain_info.num_processors)
+        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+
+    cpufreq_cpu_policy[cpu] = NULL;
+    cpu_clear(cpu, policy->cpus);
+    cpu_clear(cpu, cpufreq_dom_map[dom]);
+    px_statistic_exit(cpu);
+
+    /* for the last cpu of the domain, clean room */
+    /* It's safe here to free freq_table, drv_data and policy */
+    if (!cpus_weight(cpufreq_dom_map[dom])) {
+        cpufreq_driver->exit(policy);
+        xfree(policy);
+    }
+
+    return 0;
+}
+
+static int __init cpufreq_driver_init(void)
+{
     int ret = 0;
 
-    for (dom = 0; dom < cpufreq_dom_max; dom++) {
-        if (!test_bit(dom, cpufreq_dom_mask))
-            continue;
-        cpu = first_cpu(cpufreq_dom_pt[dom]);
-        ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event);
-        if (ret)
-            return ret;
-    }
+    if ((cpufreq_controller == FREQCTL_xen) &&
+        (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
+        ret = cpufreq_register_driver(&acpi_cpufreq_driver);
+
     return ret;
 }
-
-int acpi_cpufreq_init(void)
-{
-    int ret = 0;
-    
-    /* setup cpumask of psd dom and shared cpu map of cpu */
-    ret = cpufreq_dom_init();
-    if (ret)
-        goto err;
-
-    /* setup cpufreq driver */
-    cpufreq_driver = &acpi_cpufreq_driver;
-
-    /* setup cpufreq infrastructure */
-    ret = cpufreq_cpu_init();
-    if (ret)
-        goto err;
-
-    /* setup cpufreq dbs according to dom coordiation */
-    ret = cpufreq_dom_dbs(CPUFREQ_GOV_START);
-    if (ret)
-        goto err;
-
-    return ret;
-
-err:
-    cpufreq_dom_exit();
-    return ret;
-}
+__initcall(cpufreq_driver_init);
diff -r 346c073ed6a4 -r f125e481d8b6 
xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c      Thu Sep 11 18:00:06 
2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c      Fri Sep 12 10:34:50 
2008 +0100
@@ -238,4 +238,9 @@ int cpufreq_governor_dbs(struct cpufreq_
         break;
     }
     return 0;
-} 
+}
+
+struct cpufreq_governor cpufreq_gov_dbs = {
+    .name = "ondemand",
+    .governor = cpufreq_governor_dbs,
+};
diff -r 346c073ed6a4 -r f125e481d8b6 xen/arch/x86/acpi/cpufreq/powernow.c
--- a/xen/arch/x86/acpi/cpufreq/powernow.c      Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c      Fri Sep 12 10:34:50 2008 +0100
@@ -50,7 +50,7 @@
 #define MSR_PSTATE_CUR_LIMIT    0xc0010061 /* pstate current limit MSR */
 
 extern struct processor_pminfo processor_pminfo[NR_CPUS];
-extern struct cpufreq_policy xen_px_policy[NR_CPUS];
+extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS];
 
 struct powernow_cpufreq_data {
     struct processor_performance *acpi_data;
@@ -281,9 +281,9 @@ int powernow_cpufreq_init(void)
 
     /* setup cpufreq infrastructure */
     for_each_online_cpu(i) {
-        xen_px_policy[i].cpu = i;
-
-        ret = powernow_cpufreq_cpu_init(&xen_px_policy[i]);
+        cpufreq_cpu_policy[i]->cpu = i;
+
+        ret = powernow_cpufreq_cpu_init(cpufreq_cpu_policy[i]);
         if (ret)
             goto cpufreq_init_out;
     }
@@ -293,7 +293,7 @@ int powernow_cpufreq_init(void)
         if (!cpu_isset(dom, dom_mask))
             continue;
         i = first_cpu(pt[dom]);
-        ret = cpufreq_governor_dbs(&xen_px_policy[i], CPUFREQ_GOV_START);
+        ret = cpufreq_governor_dbs(cpufreq_cpu_policy[i], CPUFREQ_GOV_START);
         if (ret)
             goto cpufreq_init_out;
     }
diff -r 346c073ed6a4 -r f125e481d8b6 xen/arch/x86/acpi/cpufreq/utility.c
--- a/xen/arch/x86/acpi/cpufreq/utility.c       Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/utility.c       Fri Sep 12 10:34:50 2008 +0100
@@ -31,46 +31,13 @@
 #include <acpi/cpufreq/cpufreq.h>
 #include <public/sysctl.h>
 
-struct cpufreq_driver *cpufreq_driver;
+struct cpufreq_driver   *cpufreq_driver;
+struct processor_pminfo processor_pminfo[NR_CPUS];
+struct cpufreq_policy   *cpufreq_cpu_policy[NR_CPUS];
 
 /*********************************************************************
  *                    Px STATISTIC INFO                              *
  *********************************************************************/
-
-void px_statistic_suspend(void)
-{
-    int cpu;
-    uint64_t now;
-
-    now = NOW();
-
-    for_each_online_cpu(cpu) {
-        struct pm_px *pxpt = &px_statistic_data[cpu];
-        uint64_t total_idle_ns;
-        uint64_t tmp_idle_ns;
-
-        total_idle_ns = get_cpu_idle_time(cpu);
-        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
-
-        pxpt->u.pt[pxpt->u.cur].residency +=
-                    now - pxpt->prev_state_wall;
-        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
-    }
-}
-
-void px_statistic_resume(void)
-{
-    int cpu;
-    uint64_t now;
-
-    now = NOW();
-
-    for_each_online_cpu(cpu) {
-        struct pm_px *pxpt = &px_statistic_data[cpu];
-        pxpt->prev_state_wall = now;
-        pxpt->prev_idle_wall = get_cpu_idle_time(cpu);
-    }
-}
 
 void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
 {
@@ -101,7 +68,7 @@ void px_statistic_update(cpumask_t cpuma
     }
 }
 
-int px_statistic_init(int cpuid)
+int px_statistic_init(unsigned int cpuid)
 {
     uint32_t i, count;
     struct pm_px *pxpt = &px_statistic_data[cpuid];
@@ -123,7 +90,7 @@ int px_statistic_init(int cpuid)
     memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
 
     pxpt->u.total = pmpt->perf.state_count;
-    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc;
+    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
 
     for (i=0; i < pmpt->perf.state_count; i++)
         pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
@@ -134,7 +101,16 @@ int px_statistic_init(int cpuid)
     return 0;
 }
 
-void px_statistic_reset(int cpuid)
+void px_statistic_exit(unsigned int cpuid)
+{
+    struct pm_px *pxpt = &px_statistic_data[cpuid];
+
+    xfree(pxpt->u.trans_pt);
+    xfree(pxpt->u.pt);
+    memset(pxpt, 0, sizeof(struct pm_px));
+}
+
+void px_statistic_reset(unsigned int cpuid)
 {
     uint32_t i, j, count;
     struct pm_px *pxpt = &px_statistic_data[cpuid];
@@ -182,6 +158,38 @@ int cpufreq_frequency_table_cpuinfo(stru
         return -EINVAL;
     else
         return 0;
+}
+
+int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
+                                   struct cpufreq_frequency_table *table)
+{
+    unsigned int next_larger = ~0;
+    unsigned int i;
+    unsigned int count = 0;
+
+    if (!cpu_online(policy->cpu))
+        return -EINVAL;
+
+    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+                                 policy->cpuinfo.max_freq);
+
+    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+        unsigned int freq = table[i].frequency;
+        if (freq == CPUFREQ_ENTRY_INVALID)
+            continue;
+        if ((freq >= policy->min) && (freq <= policy->max))
+            count++;
+        else if ((next_larger > freq) && (freq > policy->max))
+            next_larger = freq;
+    }
+
+    if (!count)
+        policy->max = next_larger;
+
+    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+                                 policy->cpuinfo.max_freq);
+
+    return 0;
 }
 
 int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
@@ -289,57 +297,51 @@ int __cpufreq_driver_getavg(struct cpufr
 
 
 /*********************************************************************
- *               CPUFREQ SUSPEND/RESUME                              *
- *********************************************************************/
-
-void cpufreq_suspend(void)
-{
-    int cpu;
-
-    /* to protect the case when Px was not controlled by xen */
-    for_each_online_cpu(cpu) {
-        struct processor_performance *perf = &processor_pminfo[cpu].perf;
-
-        if (!(perf->init & XEN_PX_INIT))
-            return;
-    }
-
-    cpufreq_dom_dbs(CPUFREQ_GOV_STOP);
-
-    cpufreq_dom_exit();
-
-    px_statistic_suspend();
-}
-
-int cpufreq_resume(void)
-{
-    int cpu, ret = 0;
-
-    /* 1. to protect the case when Px was not controlled by xen */
-    /* 2. set state and resume flag to sync cpu to right state and freq */
-    for_each_online_cpu(cpu) {
-        struct processor_performance *perf = &processor_pminfo[cpu].perf;
-        struct cpufreq_policy *policy = &xen_px_policy[cpu];
-
-        if (!(perf->init & XEN_PX_INIT))
-            goto err;
-        perf->state = 0;
-        policy->resume = 1;
-    }
-
-    px_statistic_resume();
-
-    ret = cpufreq_dom_init();
+ *                 POLICY                                            *
+ *********************************************************************/
+
+/*
+ * data   : current policy.
+ * policy : policy to be set.
+ */
+int __cpufreq_set_policy(struct cpufreq_policy *data,
+                                struct cpufreq_policy *policy)
+{
+    int ret = 0;
+
+    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
+
+    if (policy->min > data->min && policy->min > policy->max)
+        return -EINVAL;
+
+    /* verify the cpu speed can be set within this limit */
+    ret = cpufreq_driver->verify(policy);
     if (ret)
-        goto err;
-
-    ret = cpufreq_dom_dbs(CPUFREQ_GOV_START);
-    if (ret)
-        goto err;
-
-    return ret;
-
-err:
-    cpufreq_dom_exit();
-    return ret;
-}
+        return ret;
+
+    data->min = policy->min;
+    data->max = policy->max;
+
+    if (policy->governor != data->governor) {
+        /* save old, working values */
+        struct cpufreq_governor *old_gov = data->governor;
+
+        /* end old governor */
+        if (data->governor)
+            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+
+        /* start new governor */
+        data->governor = policy->governor;
+        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
+            /* new governor failed, so re-start old one */
+            if (old_gov) {
+                data->governor = old_gov;
+                __cpufreq_governor(data, CPUFREQ_GOV_START);
+            }
+            return -EINVAL;
+        }
+        /* might be a policy change, too, so fall through */
+    }
+
+    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+}
diff -r 346c073ed6a4 -r f125e481d8b6 xen/arch/x86/acpi/pmstat.c
--- a/xen/arch/x86/acpi/pmstat.c        Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/arch/x86/acpi/pmstat.c        Fri Sep 12 10:34:50 2008 +0100
@@ -78,7 +78,7 @@ int do_get_pm_info(struct xen_sysctl_get
         tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
 
         now = NOW();
-        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc;
+        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
         pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
         pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
         pxpt->prev_state_wall = now;
diff -r 346c073ed6a4 -r f125e481d8b6 xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/arch/x86/acpi/power.c Fri Sep 12 10:34:50 2008 +0100
@@ -133,14 +133,14 @@ static int enter_state(u32 state)
 
     freeze_domains();
 
-    cpufreq_suspend();
-
     disable_nonboot_cpus();
     if ( num_online_cpus() != 1 )
     {
         error = -EBUSY;
         goto enable_cpu;
     }
+
+    cpufreq_del_cpu(0);
 
     hvm_cpu_down();
 
@@ -189,8 +189,8 @@ static int enter_state(u32 state)
         BUG();
 
  enable_cpu:
+    cpufreq_add_cpu(0);
     enable_nonboot_cpus();
-    cpufreq_resume();
     thaw_domains();
     spin_unlock(&pm_lock);
     return error;
diff -r 346c073ed6a4 -r f125e481d8b6 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/arch/x86/platform_hypercall.c Fri Sep 12 10:34:50 2008 +0100
@@ -393,7 +393,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                 memcpy ((void *)&pxpt->status_register,
                     (void *)&xenpxpt->status_register,
                     sizeof(struct xen_pct_register));
-                pxpt->init |= XEN_PX_PCT;
             }
             if ( xenpxpt->flags & XEN_PX_PSS ) 
             {
@@ -411,7 +410,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                     break;
                 }
                 pxpt->state_count = xenpxpt->state_count;
-                pxpt->init |= XEN_PX_PSS;
             }
             if ( xenpxpt->flags & XEN_PX_PSD )
             {
@@ -419,27 +417,34 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                 memcpy ((void *)&pxpt->domain_info,
                     (void *)&xenpxpt->domain_info,
                     sizeof(struct xen_psd_package));
-                pxpt->init |= XEN_PX_PSD;
             }
             if ( xenpxpt->flags & XEN_PX_PPC )
             {
-                pxpt->ppc = xenpxpt->ppc;
-                pxpt->init |= XEN_PX_PPC;
-            }
-
-            if ( pxpt->init == ( XEN_PX_PCT | XEN_PX_PSS |
-                                 XEN_PX_PSD | XEN_PX_PPC ) )
-            {
-                pxpt->init |= XEN_PX_INIT;
+                pxpt->platform_limit = xenpxpt->platform_limit;
+
+                if ( pxpt->init == XEN_PX_INIT )
+                {
+                    ret = cpufreq_limit_change(cpuid);
+                    break;
+                }
+            }
+
+            if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS |
+                                     XEN_PX_PSD | XEN_PX_PPC ) )
+            {
+                pxpt->init = XEN_PX_INIT;
                 cpu_count++;
-            }
-            if ( cpu_count == num_online_cpus() )
-            {
-                if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+
+                /* Currently we only handle Intel and AMD processor */
+                if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+                    ret = cpufreq_add_cpu(cpuid);
+                else if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+                    (cpu_count == num_online_cpus()) )
                     ret = powernow_cpufreq_init();
                 else
-                    ret = acpi_cpufreq_init();
-            }
+                    break;
+            }
+
             break;
         }
  
diff -r 346c073ed6a4 -r f125e481d8b6 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/arch/x86/smpboot.c    Fri Sep 12 10:34:50 2008 +0100
@@ -55,6 +55,7 @@
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
 #include <xen/stop_machine.h>
+#include <acpi/cpufreq/processor_perf.h>
 
 #define set_kernel_exec(x, y) (0)
 #define setup_trampoline()    (bootsym_phys(trampoline_realmode_entry))
@@ -1232,6 +1233,8 @@ int __cpu_disable(void)
        mdelay(1);
        local_irq_disable();
 
+       cpufreq_del_cpu(cpu);
+
        time_suspend();
 
        remove_siblinginfo(cpu);
@@ -1421,6 +1424,8 @@ int __devinit __cpu_up(unsigned int cpu)
                mb();
                process_pending_timers();
        }
+
+       cpufreq_add_cpu(cpu);
        return 0;
 }
 
diff -r 346c073ed6a4 -r f125e481d8b6 xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h        Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/include/acpi/cpufreq/cpufreq.h        Fri Sep 12 10:34:50 2008 +0100
@@ -18,6 +18,8 @@
 #include "processor_perf.h"
 
 #define CPUFREQ_NAME_LEN 16
+
+struct cpufreq_governor;
 
 struct cpufreq_cpuinfo {
     unsigned int        max_freq;
@@ -30,16 +32,21 @@ struct cpufreq_policy {
     unsigned int        shared_type;   /* ANY or ALL affected CPUs
                                           should set cpufreq */
     unsigned int        cpu;           /* cpu nr of registered CPU */
-    struct cpufreq_cpuinfo    cpuinfo; /* see above */
+    struct cpufreq_cpuinfo    cpuinfo;
 
     unsigned int        min;    /* in kHz */
     unsigned int        max;    /* in kHz */
     unsigned int        cur;    /* in kHz, only needed if cpufreq
                                  * governors are used */
+    struct cpufreq_governor     *governor;
+
     unsigned int        resume; /* flag for cpufreq 1st run
                                  * S3 wakeup, hotplug cpu, etc */
 };
-extern struct cpufreq_policy xen_px_policy[NR_CPUS];
+extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS];
+
+extern int __cpufreq_set_policy(struct cpufreq_policy *data,
+                                struct cpufreq_policy *policy);
 
 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
 #define CPUFREQ_SHARED_TYPE_HW   (1) /* HW does needed coordination */
@@ -64,11 +71,26 @@ struct cpufreq_freqs {
 #define CPUFREQ_GOV_STOP   2
 #define CPUFREQ_GOV_LIMITS 3
 
+struct cpufreq_governor {
+    char    name[CPUFREQ_NAME_LEN];
+    int     (*governor)(struct cpufreq_policy *policy,
+                        unsigned int event);
+};
+
+extern struct cpufreq_governor cpufreq_gov_dbs;
+#define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_dbs
+
 /* pass a target to the cpufreq driver */
 extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
                                    unsigned int target_freq,
                                    unsigned int relation);
 extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);
+
+static __inline__ int 
+__cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
+{
+    return policy->governor->governor(policy, event);
+}
 
 
 /*********************************************************************
@@ -91,7 +113,50 @@ struct cpufreq_driver {
 
 extern struct cpufreq_driver *cpufreq_driver;
 
-void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int 
state);
+static __inline__ 
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
+{
+    if (!driver_data         || 
+        !driver_data->init   || 
+        !driver_data->exit   || 
+        !driver_data->verify || 
+        !driver_data->target)
+        return -EINVAL;
+
+    if (cpufreq_driver)
+        return -EBUSY;
+
+    cpufreq_driver = driver_data;
+    return 0;
+}
+
+static __inline__ 
+int cpufreq_unregister_driver(struct cpufreq_driver *driver)
+{
+    if (!cpufreq_driver || (driver != cpufreq_driver))
+        return -EINVAL;
+
+    cpufreq_driver = NULL;
+    return 0;
+}
+
+static __inline__
+void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
+                                  unsigned int min, unsigned int max)
+{
+    if (policy->min < min)
+        policy->min = min;
+    if (policy->max < min)
+        policy->max = min;
+    if (policy->min > max)
+        policy->min = max;
+    if (policy->max > max)
+        policy->max = max;
+    if (policy->min > policy->max)
+        policy->min = policy->max;
+    return;
+}
+
 
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
@@ -107,6 +172,9 @@ struct cpufreq_frequency_table {
 };
 
 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
+                   struct cpufreq_frequency_table *table);
+
+int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
                    struct cpufreq_frequency_table *table);
 
 int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
diff -r 346c073ed6a4 -r f125e481d8b6 xen/include/acpi/cpufreq/processor_perf.h
--- a/xen/include/acpi/cpufreq/processor_perf.h Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 12 10:34:50 2008 +0100
@@ -7,26 +7,23 @@
 #define XEN_PX_INIT 0x80000000
 
 int get_cpu_id(u8);
-int acpi_cpufreq_init(void);
 int powernow_cpufreq_init(void);
 
 void px_statistic_update(cpumask_t, uint8_t, uint8_t);
-int  px_statistic_init(int);
-void px_statistic_reset(int);
-void px_statistic_suspend(void);
-void px_statistic_resume(void);
+int  px_statistic_init(unsigned int);
+void px_statistic_exit(unsigned int);
+void px_statistic_reset(unsigned int);
 
-void cpufreq_dom_exit(void);
-int  cpufreq_dom_init(void);
-int  cpufreq_dom_dbs(unsigned int);
-void cpufreq_suspend(void);
-int  cpufreq_resume(void);
+int  cpufreq_limit_change(unsigned int);
+
+int  cpufreq_add_cpu(unsigned int);
+int  cpufreq_del_cpu(unsigned int);
 
 uint64_t get_cpu_idle_time(unsigned int);
 
 struct processor_performance {
     uint32_t state;
-    uint32_t ppc;
+    uint32_t platform_limit;
     struct xen_pct_register control_register;
     struct xen_pct_register status_register;
     uint32_t state_count;
diff -r 346c073ed6a4 -r f125e481d8b6 xen/include/public/platform.h
--- a/xen/include/public/platform.h     Thu Sep 11 18:00:06 2008 +0100
+++ b/xen/include/public/platform.h     Fri Sep 12 10:34:50 2008 +0100
@@ -289,7 +289,7 @@ struct xen_psd_package {
 
 struct xen_processor_performance {
     uint32_t flags;     /* flag for Px sub info type */
-    uint32_t ppc;       /* Platform limitation on freq usage */
+    uint32_t platform_limit;  /* Platform limitation on freq usage */
     struct xen_pct_register control_register;
     struct xen_pct_register status_register;
     uint32_t state_count;     /* total available performance states */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.