[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: Implement cpufreq ondemand policy



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1210769726 -3600
# Node ID c9ec94410137d9509a6ea8329e9c0a1f22a4378b
# Parent  50fb7620d05ad869f354619e9fdf72e316c0d2b5
x86: Implement cpufreq ondemand policy

Based on initialized cpufreq infrustructure, setup a timer on
each PSD domain, periodicly sample cpu load and driver cpu to
some Px state according to cpu load and cpu coordination.

Signed-off-by: Liu Jinsong <jinsong.liu@xxxxxxxxx>
---
 xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c |  231 +++++++++++++++++++++++++++
 1 files changed, 231 insertions(+)

diff -r 50fb7620d05a -r c9ec94410137 
xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c      Wed May 14 13:55:11 
2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c      Wed May 14 13:55:26 
2008 +0100
@@ -1,6 +1,237 @@
+/*
+ *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
+ *
+ *  Copyright (C)  2001 Russell King
+ *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>.
+ *                      Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *             Feb 2008 Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <xen/types.h>
+#include <xen/percpu.h>
+#include <xen/cpumask.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <asm/config.h>
 #include <acpi/cpufreq/cpufreq.h>
 
+#define DEF_FREQUENCY_UP_THRESHOLD              (80)
+
+#define MIN_DBS_INTERVAL                        (MICROSECS(100))
+#define MIN_SAMPLING_MILLISECS                  (20)
+#define MIN_STAT_SAMPLING_RATE                   \
+    (MIN_SAMPLING_MILLISECS * MILLISECS(1))
+#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
+#define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
+
+static uint64_t def_sampling_rate;
+
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
+
+static unsigned int dbs_enable;    /* number of CPUs using this policy */
+
+static struct dbs_tuners {
+    uint64_t     sampling_rate;
+    unsigned int up_threshold;
+    unsigned int ignore_nice;
+    unsigned int powersave_bias;
+} dbs_tuners_ins = {
+    .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+    .ignore_nice = 0,
+    .powersave_bias = 0,
+};
+
+static struct timer dbs_timer[NR_CPUS];
+
+static inline uint64_t get_cpu_idle_time(unsigned int cpu)
+{
+    uint64_t idle_ns;
+    struct vcpu *v;
+
+    if ((v = idle_vcpu[cpu]) == NULL)
+        return 0;
+
+    idle_ns = v->runstate.time[RUNSTATE_running];
+    if (v->is_running)
+        idle_ns += NOW() - v->runstate.state_entry_time;
+
+    return idle_ns;
+}
+
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
+{
+    unsigned int load = 0;
+    uint64_t cur_ns, idle_ns, total_ns;
+
+    struct cpufreq_policy *policy;
+    unsigned int j;
+
+    if (!this_dbs_info->enable)
+        return;
+
+    policy = this_dbs_info->cur_policy;
+    cur_ns = NOW();
+    total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
+    this_dbs_info->prev_cpu_wall = NOW();
+
+    if (total_ns < MIN_DBS_INTERVAL)
+        return;
+
+    /* Get Idle Time */
+    idle_ns = UINT_MAX;
+    for_each_cpu_mask(j, policy->cpus) {
+        uint64_t total_idle_ns;
+        unsigned int tmp_idle_ns;
+        struct cpu_dbs_info_s *j_dbs_info;
+
+        j_dbs_info = &per_cpu(cpu_dbs_info, j);
+        total_idle_ns = get_cpu_idle_time(j);
+        tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
+        j_dbs_info->prev_cpu_idle = total_idle_ns;
+
+        if (tmp_idle_ns < idle_ns)
+            idle_ns = tmp_idle_ns;
+    }
+
+    if (likely(total_ns > idle_ns))
+        load = (100 * (total_ns - idle_ns)) / total_ns;
+
+    /* Check for frequency increase */
+    if (load > dbs_tuners_ins.up_threshold) {
+        /* if we are already at full speed then break out early */
+        if (policy->cur == policy->max)
+            return;
+        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
+        return;
+    }
+
+    /* Check for frequency decrease */
+    /* if we cannot reduce the frequency anymore, break out early */
+    if (policy->cur == policy->min)
+        return;
+
+    /*
+     * The optimal frequency is the frequency that is the lowest that
+     * can support the current CPU usage without triggering the up
+     * policy. To be safe, we focus 10 points under the threshold.
+     */
+    if (load < (dbs_tuners_ins.up_threshold - 10)) {
+        unsigned int freq_next, freq_cur;
+
+        freq_cur = __cpufreq_driver_getavg(policy);
+        if (!freq_cur)
+            freq_cur = policy->cur;
+
+        freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
+
+        __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+    }
+}
+
+static void do_dbs_timer(void *dbs)
+{
+    struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
+
+    if (!dbs_info->enable)
+        return;
+
+    dbs_check_cpu(dbs_info);
+
+    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+}
+
+static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
+{
+    dbs_info->enable = 1;
+
+    init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 
+        (void *)dbs_info, dbs_info->cpu);
+
+    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+}
+
+static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+{
+    dbs_info->enable = 0;
+    stop_timer(&dbs_timer[dbs_info->cpu]);
+}
+
 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
 {
+    unsigned int cpu = policy->cpu;
+    struct cpu_dbs_info_s *this_dbs_info;
+    unsigned int j;
+
+    this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
+
+    switch (event) {
+    case CPUFREQ_GOV_START:
+        if ((!cpu_online(cpu)) || (!policy->cur))
+            return -EINVAL;
+
+        if (policy->cpuinfo.transition_latency >
+            (TRANSITION_LATENCY_LIMIT * 1000)) {
+            printk(KERN_WARNING "ondemand governor failed to load "
+                "due to too long transition latency\n");
+            return -EINVAL;
+        }
+        if (this_dbs_info->enable)
+            /* Already enabled */
+            break;
+
+        dbs_enable++;
+
+        for_each_cpu_mask(j, policy->cpus) {
+            struct cpu_dbs_info_s *j_dbs_info;
+            j_dbs_info = &per_cpu(cpu_dbs_info, j);
+            j_dbs_info->cur_policy = policy;
+
+            j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
+            j_dbs_info->prev_cpu_wall = NOW();
+        }
+        this_dbs_info->cpu = cpu;
+        /*
+         * Start the timerschedule work, when this governor
+         * is used for first time
+         */
+        if (dbs_enable == 1) {
+            def_sampling_rate = policy->cpuinfo.transition_latency *
+                DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
+
+            if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
+                def_sampling_rate = MIN_STAT_SAMPLING_RATE;
+
+            dbs_tuners_ins.sampling_rate = def_sampling_rate;
+        }
+        dbs_timer_init(this_dbs_info);
+
+        break;
+
+    case CPUFREQ_GOV_STOP:
+        if (this_dbs_info->enable)
+            dbs_timer_exit(this_dbs_info);
+        dbs_enable--;
+
+        break;
+
+    case CPUFREQ_GOV_LIMITS:
+        if (policy->max < this_dbs_info->cur_policy->cur)
+            __cpufreq_driver_target(this_dbs_info->cur_policy,
+                policy->max, CPUFREQ_RELATION_H);
+        else if (policy->min > this_dbs_info->cur_policy->cur)
+            __cpufreq_driver_target(this_dbs_info->cur_policy,
+                policy->min, CPUFREQ_RELATION_L);
+        break;
+    }
     return 0;
 }
+             

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.