[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Fix cpufreq HW-ALL coordination handle



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1239704455 -3600
# Node ID 0108af6efdaeec041676f2dc00ad8327e95f1267
# Parent  e15d30dfb6003e10b1cc4189d7c25fb3a53ac4d1
Fix cpufreq HW-ALL coordination handle

Currently cpufreq HW-ALL coordination is handled same way as SW-ALL.
However, SW-ALL will bring more IPIs which is bad for cpuidle.
This patch implement HW-ALL coordination handled in different way from
SW-ALL, for the sake of performance and reduce IPIs. We also
suspend/resume HW-ALL dbs timer for idle.

Signed-off-by: Yu, Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Signed-off-by: Tian, Kevin <kevin.tian@xxxxxxxxx>
---
 xen/arch/x86/acpi/cpu_idle.c           |    7 +
 xen/arch/x86/acpi/cpufreq/cpufreq.c    |    6 +
 xen/drivers/cpufreq/cpufreq.c          |  149 +++++++++++++++++----------------
 xen/drivers/cpufreq/cpufreq_ondemand.c |   41 +++++++++
 xen/include/acpi/cpufreq/cpufreq.h     |    4 
 5 files changed, 136 insertions(+), 71 deletions(-)

diff -r e15d30dfb600 -r 0108af6efdae xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c      Tue Apr 14 11:20:55 2009 +0100
@@ -47,6 +47,7 @@
 #include <asm/processor.h>
 #include <public/platform.h>
 #include <public/sysctl.h>
+#include <acpi/cpufreq/cpufreq.h>
 
 /*#define DEBUG_PM_CX*/
 
@@ -195,6 +196,8 @@ static void acpi_processor_idle(void)
     int sleep_ticks = 0;
     u32 t1, t2 = 0;
 
+    cpufreq_dbs_timer_suspend();
+
     sched_tick_suspend();
     /*
      * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer,
@@ -214,6 +217,7 @@ static void acpi_processor_idle(void)
     {
         local_irq_enable();
         sched_tick_resume();
+        cpufreq_dbs_timer_resume();
         return;
     }
 
@@ -234,6 +238,7 @@ static void acpi_processor_idle(void)
         else
             acpi_safe_halt();
         sched_tick_resume();
+        cpufreq_dbs_timer_resume();
         return;
     }
 
@@ -341,6 +346,7 @@ static void acpi_processor_idle(void)
     default:
         local_irq_enable();
         sched_tick_resume();
+        cpufreq_dbs_timer_resume();
         return;
     }
 
@@ -352,6 +358,7 @@ static void acpi_processor_idle(void)
     }
 
     sched_tick_resume();
+    cpufreq_dbs_timer_resume();
 
     if ( cpuidle_current_governor->reflect )
         cpuidle_current_governor->reflect(power);
diff -r e15d30dfb600 -r 0108af6efdae xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Tue Apr 14 11:20:55 2009 +0100
@@ -191,7 +191,11 @@ static void drv_read(struct drv_cmd *cmd
 
 static void drv_write(struct drv_cmd *cmd)
 {
-    on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
+    if ((cpus_weight(cmd->mask) ==  1) &&
+        cpu_isset(smp_processor_id(), cmd->mask))
+        do_drv_write((void *)cmd);
+    else
+        on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
 }
 
 static u32 get_cur_val(cpumask_t mask)
diff -r e15d30dfb600 -r 0108af6efdae xen/drivers/cpufreq/cpufreq.c
--- a/xen/drivers/cpufreq/cpufreq.c     Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/drivers/cpufreq/cpufreq.c     Tue Apr 14 11:20:55 2009 +0100
@@ -130,7 +130,7 @@ int cpufreq_add_cpu(unsigned int cpu)
     int ret = 0;
     unsigned int firstcpu;
     unsigned int dom, domexist = 0;
-    unsigned int j;
+    unsigned int hw_all = 0;
     struct list_head *pos;
     struct cpufreq_dom *cpufreq_dom = NULL;
     struct cpufreq_policy new_policy;
@@ -146,9 +146,8 @@ int cpufreq_add_cpu(unsigned int cpu)
     if (cpufreq_cpu_policy[cpu])
         return 0;
 
-    ret = cpufreq_statistic_init(cpu);
-    if (ret)
-        return ret;
+    if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+        hw_all = 1;
 
     dom = perf->domain_info.domain;
 
@@ -160,61 +159,57 @@ int cpufreq_add_cpu(unsigned int cpu)
         }
     }
 
-    if (domexist) {
-        /* share policy with the first cpu since on same boat */
+    if (!domexist) {
+        cpufreq_dom = xmalloc(struct cpufreq_dom);
+        if (!cpufreq_dom)
+            return -ENOMEM;
+
+        memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
+        cpufreq_dom->dom = dom;
+        list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
+    } else {
+        /* domain sanity check under whatever coordination type */
+        firstcpu = first_cpu(cpufreq_dom->map);
+        if ((perf->domain_info.coord_type !=
+            processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
+            (perf->domain_info.num_processors !=
+            processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
+            return -EINVAL;
+        }
+    }
+
+    if (!domexist || hw_all) {
+        policy = xmalloc(struct cpufreq_policy);
+        if (!policy)
+            ret = -ENOMEM;
+
+        memset(policy, 0, sizeof(struct cpufreq_policy));
+        policy->cpu = cpu;
+        cpufreq_cpu_policy[cpu] = policy;
+
+        ret = cpufreq_driver->init(policy);
+        if (ret) {
+            xfree(policy);
+            return ret;
+        }
+        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
+    } else {
         firstcpu = first_cpu(cpufreq_dom->map);
         policy = cpufreq_cpu_policy[firstcpu];
 
         cpufreq_cpu_policy[cpu] = policy;
-        cpu_set(cpu, cpufreq_dom->map);
-        cpu_set(cpu, policy->cpus);
-
-        /* domain coordination sanity check */
-        if ((perf->domain_info.coord_type !=
-             processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
-            (perf->domain_info.num_processors !=
-             processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
-            ret = -EINVAL;
-            goto err2;
-        }
-
         printk(KERN_EMERG"adding CPU %u\n", cpu);
-    } else {
-        cpufreq_dom = xmalloc(struct cpufreq_dom);
-        if (!cpufreq_dom) {
-            cpufreq_statistic_exit(cpu);
-            return -ENOMEM;
-        }
-        memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
-        cpufreq_dom->dom = dom;
-        cpu_set(cpu, cpufreq_dom->map);
-        list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
-
-        /* for the first cpu, setup policy and do init work */
-        policy = xmalloc(struct cpufreq_policy);
-        if (!policy) {
-            list_del(&cpufreq_dom->node);
-            xfree(cpufreq_dom);
-            cpufreq_statistic_exit(cpu);
-            return -ENOMEM;
-        }
-        memset(policy, 0, sizeof(struct cpufreq_policy));
-        policy->cpu = cpu;
-        cpu_set(cpu, policy->cpus);
-        cpufreq_cpu_policy[cpu] = policy;
-
-        ret = cpufreq_driver->init(policy);
-        if (ret)
-            goto err1;
-        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
-    }
-
-    /*
-     * After get full cpumap of the coordination domain,
-     * we can safely start gov here.
-     */
-    if (cpus_weight(cpufreq_dom->map) ==
-        perf->domain_info.num_processors) {
+    }
+
+    cpu_set(cpu, policy->cpus);
+    cpu_set(cpu, cpufreq_dom->map);
+
+    ret = cpufreq_statistic_init(cpu);
+    if (ret)
+        goto err1;
+
+    if (hw_all ||
+        (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) {
         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
         policy->governor = NULL;
 
@@ -240,22 +235,29 @@ int cpufreq_add_cpu(unsigned int cpu)
     return 0;
 
 err2:
-    cpufreq_driver->exit(policy);
+    cpufreq_statistic_exit(cpu);
 err1:
-    for_each_cpu_mask(j, cpufreq_dom->map) {
-        cpufreq_cpu_policy[j] = NULL;
-        cpufreq_statistic_exit(j);
-    }
-
-    list_del(&cpufreq_dom->node);
-    xfree(cpufreq_dom);
-    xfree(policy);
+    cpufreq_cpu_policy[cpu] = NULL;
+    cpu_clear(cpu, policy->cpus);
+    cpu_clear(cpu, cpufreq_dom->map);
+
+    if (cpus_empty(policy->cpus)) {
+        cpufreq_driver->exit(policy);
+        xfree(policy);
+    }
+
+    if (cpus_empty(cpufreq_dom->map)) {
+        list_del(&cpufreq_dom->node);
+        xfree(cpufreq_dom);
+    }
+
     return ret;
 }
 
 int cpufreq_del_cpu(unsigned int cpu)
 {
     unsigned int dom, domexist = 0;
+    unsigned int hw_all = 0;
     struct list_head *pos;
     struct cpufreq_dom *cpufreq_dom = NULL;
     struct cpufreq_policy *policy;
@@ -269,6 +271,9 @@ int cpufreq_del_cpu(unsigned int cpu)
 
     if (!cpufreq_cpu_policy[cpu])
         return 0;
+
+    if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+        hw_all = 1;
 
     dom = perf->domain_info.domain;
     policy = cpufreq_cpu_policy[cpu];
@@ -284,23 +289,27 @@ int cpufreq_del_cpu(unsigned int cpu)
     if (!domexist)
         return -EINVAL;
 
-    /* for the first cpu of the domain, stop gov */
-    if (cpus_weight(cpufreq_dom->map) ==
-        perf->domain_info.num_processors)
+    /* for HW_ALL, stop gov for each core of the _PSD domain */
+    /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
+    if (hw_all ||
+        (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors))
         __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
+    cpufreq_statistic_exit(cpu);
     cpufreq_cpu_policy[cpu] = NULL;
     cpu_clear(cpu, policy->cpus);
     cpu_clear(cpu, cpufreq_dom->map);
-    cpufreq_statistic_exit(cpu);
+
+    if (cpus_empty(policy->cpus)) {
+        cpufreq_driver->exit(policy);
+        xfree(policy);
+    }
 
     /* for the last cpu of the domain, clean room */
     /* It's safe here to free freq_table, drv_data and policy */
-    if (!cpus_weight(cpufreq_dom->map)) {
-        cpufreq_driver->exit(policy);
+    if (cpus_empty(cpufreq_dom->map)) {
         list_del(&cpufreq_dom->node);
         xfree(cpufreq_dom);
-        xfree(policy);
     }
 
     printk(KERN_EMERG"deleting CPU %u\n", cpu);
diff -r e15d30dfb600 -r 0108af6efdae xen/drivers/cpufreq/cpufreq_ondemand.c
--- a/xen/drivers/cpufreq/cpufreq_ondemand.c    Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c    Tue Apr 14 11:20:55 2009 +0100
@@ -190,6 +190,12 @@ static void dbs_timer_init(struct cpu_db
         (void *)dbs_info, dbs_info->cpu);
 
     set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+
+    if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
+            == CPUFREQ_SHARED_TYPE_HW )
+    {
+        dbs_info->stoppable = 1;
+    }
 }
 
 static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -337,3 +343,38 @@ static void __exit cpufreq_gov_dbs_exit(
     cpufreq_unregister_governor(&cpufreq_gov_dbs);
 }
 __exitcall(cpufreq_gov_dbs_exit);
+
+void cpufreq_dbs_timer_suspend(void)
+{
+    int cpu;
+
+    cpu = smp_processor_id();
+
+    if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+    {
+        stop_timer( &dbs_timer[cpu] );
+    }
+}
+
+void cpufreq_dbs_timer_resume(void)
+{
+    int cpu;
+    struct timer* t;
+    s_time_t now;
+
+    cpu = smp_processor_id();
+
+    if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+    {
+        now = NOW();
+        t = &dbs_timer[cpu];
+        if (t->expires <= now)
+        {
+            t->function(t->data);
+        }
+        else
+        {
+            set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate));
+        }
+    }
+}
diff -r e15d30dfb600 -r 0108af6efdae xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h        Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/include/acpi/cpufreq/cpufreq.h        Tue Apr 14 11:20:55 2009 +0100
@@ -221,6 +221,7 @@ struct cpu_dbs_info_s {
     struct cpufreq_frequency_table *freq_table;
     int cpu;
     unsigned int enable:1;
+    unsigned int stoppable:1;
 };
 
 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
@@ -232,4 +233,7 @@ int write_ondemand_up_threshold(unsigned
 int write_ondemand_up_threshold(unsigned int up_threshold);
 
 int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq);
+
+void cpufreq_dbs_timer_suspend(void);
+void cpufreq_dbs_timer_resume(void);
 #endif /* __XEN_CPUFREQ_PM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.