[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86/cpufreq: don't use static array for large per-CPU data structures



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1222093443 -3600
# Node ID d8a2d117225c316438f327ae09e6d365c336e8eb
# Parent  1c09b810f97757288a0908b22d9d2b11c5d13501
x86/cpufreq: don't use static array for large per-CPU data structures

... as this is rather wasteful when Xen is configured to support many
CPUs but is running on systems having only a few.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/acpi/cpu_idle.c              |   39 ++++++++++++++++++--------
 xen/arch/x86/acpi/cpufreq/cpufreq.c       |   25 +++++++++-------
 xen/arch/x86/acpi/cpufreq/powernow.c      |   17 ++++-------
 xen/arch/x86/acpi/cpufreq/utility.c       |   45 ++++++++++++++++++++++--------
 xen/arch/x86/acpi/pmstat.c                |   13 +++++---
 xen/arch/x86/platform_hypercall.c         |   15 ++++++++--
 xen/include/acpi/cpufreq/processor_perf.h |    4 +-
 7 files changed, 105 insertions(+), 53 deletions(-)

diff -r 1c09b810f977 -r d8a2d117225c xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Mon Sep 22 15:20:25 2008 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c      Mon Sep 22 15:24:03 2008 +0100
@@ -66,7 +66,7 @@ static int local_apic_timer_c2_ok __read
 static int local_apic_timer_c2_ok __read_mostly = 0;
 boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok);
 
-static struct acpi_processor_power processor_powers[NR_CPUS];
+static struct acpi_processor_power *__read_mostly processor_powers[NR_CPUS];
 
 static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
 {
@@ -91,8 +91,11 @@ static void print_acpi_power(uint32_t cp
 
 static void dump_cx(unsigned char key)
 {
-    for( int i = 0; i < num_online_cpus(); i++ )
-        print_acpi_power(i, &processor_powers[i]);
+    unsigned int cpu;
+
+    for_each_online_cpu ( cpu )
+        if (processor_powers[cpu])
+            print_acpi_power(cpu, processor_powers[cpu]);
 }
 
 static int __init cpu_idle_key_init(void)
@@ -193,13 +196,11 @@ static struct {
 
 static void acpi_processor_idle(void)
 {
-    struct acpi_processor_power *power = NULL;
+    struct acpi_processor_power *power = processor_powers[smp_processor_id()];
     struct acpi_processor_cx *cx = NULL;
     int next_state;
     int sleep_ticks = 0;
     u32 t1, t2 = 0;
-
-    power = &processor_powers[smp_processor_id()];
 
     /*
      * Interrupts must be disabled during bus mastering calculations and
@@ -213,7 +214,7 @@ static void acpi_processor_idle(void)
         return;
     }
 
-    next_state = cpuidle_current_governor->select(power);
+    next_state = power ? cpuidle_current_governor->select(power) : -1;
     if ( next_state > 0 )
     {
         cx = &power->states[next_state];
@@ -675,7 +676,15 @@ long set_cx_pminfo(uint32_t cpu, struct 
         return -EFAULT;
     }
 
-    acpi_power = &processor_powers[cpu_id];
+    acpi_power = processor_powers[cpu_id];
+    if ( !acpi_power )
+    {
+        acpi_power = xmalloc(struct acpi_processor_power);
+        if ( !acpi_power )
+            return -ENOMEM;
+        memset(acpi_power, 0, sizeof(*acpi_power));
+        processor_powers[cpu_id] = acpi_power;
+    }
 
     init_cx_pminfo(acpi_power);
 
@@ -713,19 +722,27 @@ long set_cx_pminfo(uint32_t cpu, struct 
 
 uint32_t pmstat_get_cx_nr(uint32_t cpuid)
 {
-    return processor_powers[cpuid].count;
+    return processor_powers[cpuid] ? processor_powers[cpuid]->count : 0;
 }
 
 int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
 {
-    struct acpi_processor_power *power = &processor_powers[cpuid];
+    const struct acpi_processor_power *power = processor_powers[cpuid];
     struct vcpu *v = idle_vcpu[cpuid];
     uint64_t usage;
     int i;
 
+    if ( power == NULL )
+    {
+        stat->last = 0;
+        stat->nr = 0;
+        stat->idle_time = 0;
+        return 0;
+    }
+
     stat->last = (power->last_state) ?
         (int)(power->last_state - &power->states[0]) : 0;
-    stat->nr = processor_powers[cpuid].count;
+    stat->nr = power->count;
     stat->idle_time = v->runstate.time[RUNSTATE_running];
     if ( v->is_running )
         stat->idle_time += NOW() - v->runstate.state_entry_time;
diff -r 1c09b810f977 -r d8a2d117225c xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Mon Sep 22 15:20:25 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Mon Sep 22 15:24:03 2008 +0100
@@ -389,11 +389,14 @@ static int acpi_cpufreq_target(struct cp
 
 static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
 {
-    struct acpi_cpufreq_data *data = drv_data[policy->cpu];
-    struct processor_performance *perf = &processor_pminfo[policy->cpu].perf;
-
-    if (!policy || !data)
+    struct acpi_cpufreq_data *data;
+    struct processor_performance *perf;
+
+    if (!policy || !(data = drv_data[policy->cpu]) ||
+        !processor_pminfo[policy->cpu])
         return -EINVAL;
+
+    perf = &processor_pminfo[policy->cpu]->perf;
 
     cpufreq_verify_within_limits(policy, 0, 
         perf->states[perf->platform_limit].core_frequency * 1000);
@@ -447,7 +450,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
 
     drv_data[cpu] = data;
 
-    data->acpi_data = &processor_pminfo[cpu].perf;
+    data->acpi_data = &processor_pminfo[cpu]->perf;
 
     perf = data->acpi_data;
     policy->shared_type = perf->shared_type;
@@ -580,11 +583,11 @@ static struct cpufreq_driver acpi_cpufre
 
 int cpufreq_limit_change(unsigned int cpu)
 {
-    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
     struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
     struct cpufreq_policy policy;
 
-    if (!cpu_online(cpu) || !data)
+    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
         return -ENODEV;
 
     if ((perf->platform_limit < 0) || 
@@ -607,10 +610,10 @@ int cpufreq_add_cpu(unsigned int cpu)
     unsigned int j;
     struct cpufreq_policy new_policy;
     struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
 
     /* to protect the case when Px was not controlled by xen */
-    if (!(perf->init & XEN_PX_INIT))
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
         return 0;
 
     if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
@@ -683,10 +686,10 @@ int cpufreq_del_cpu(unsigned int cpu)
 {
     unsigned int dom;
     struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
 
     /* to protect the case when Px was not controlled by xen */
-    if (!(perf->init & XEN_PX_INIT))
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
         return 0;
 
     if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
diff -r 1c09b810f977 -r d8a2d117225c xen/arch/x86/acpi/cpufreq/powernow.c
--- a/xen/arch/x86/acpi/cpufreq/powernow.c      Mon Sep 22 15:20:25 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c      Mon Sep 22 15:24:03 2008 +0100
@@ -49,9 +49,6 @@
 #define MSR_PSTATE_CTRL         0xc0010062 /* Pstate control MSR */
 #define MSR_PSTATE_CUR_LIMIT    0xc0010061 /* pstate current limit MSR */
 
-extern struct processor_pminfo processor_pminfo[NR_CPUS];
-extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS];
-
 struct powernow_cpufreq_data {
     struct processor_performance *acpi_data;
     struct cpufreq_frequency_table *freq_table;
@@ -149,7 +146,7 @@ static int powernow_cpufreq_cpu_init(str
 
     drv_data[cpu] = data;
 
-    data->acpi_data = &processor_pminfo[cpu].perf;
+    data->acpi_data = &processor_pminfo[cpu]->perf;
 
     perf = data->acpi_data;
     policy->shared_type = perf->shared_type;
@@ -257,8 +254,8 @@ int powernow_cpufreq_init(void)
        }
         if (ret)
             return ret;
-        if (max_dom < processor_pminfo[i].perf.domain_info.domain)
-            max_dom = processor_pminfo[i].perf.domain_info.domain;
+        if (max_dom < processor_pminfo[i]->perf.domain_info.domain)
+            max_dom = processor_pminfo[i]->perf.domain_info.domain;
     }
     max_dom++;
 
@@ -274,13 +271,13 @@ int powernow_cpufreq_init(void)
 
     /* get cpumask of each psd domain */
     for_each_online_cpu(i) {
-        __set_bit(processor_pminfo[i].perf.domain_info.domain, dom_mask);
-        cpu_set(i, pt[processor_pminfo[i].perf.domain_info.domain]);
+        __set_bit(processor_pminfo[i]->perf.domain_info.domain, dom_mask);
+        cpu_set(i, pt[processor_pminfo[i]->perf.domain_info.domain]);
     }
 
     for_each_online_cpu(i)
-        processor_pminfo[i].perf.shared_cpu_map = 
-            pt[processor_pminfo[i].perf.domain_info.domain];
+        processor_pminfo[i]->perf.shared_cpu_map =
+            pt[processor_pminfo[i]->perf.domain_info.domain];
 
     cpufreq_driver = &powernow_cpufreq_driver;
 
diff -r 1c09b810f977 -r d8a2d117225c xen/arch/x86/acpi/cpufreq/utility.c
--- a/xen/arch/x86/acpi/cpufreq/utility.c       Mon Sep 22 15:20:25 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/utility.c       Mon Sep 22 15:24:03 2008 +0100
@@ -32,8 +32,8 @@
 #include <public/sysctl.h>
 
 struct cpufreq_driver   *cpufreq_driver;
-struct processor_pminfo processor_pminfo[NR_CPUS];
-struct cpufreq_policy   *cpufreq_cpu_policy[NR_CPUS];
+struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
+struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
 
 /*********************************************************************
  *                    Px STATISTIC INFO                              *
@@ -47,10 +47,13 @@ void px_statistic_update(cpumask_t cpuma
     now = NOW();
 
     for_each_cpu_mask(i, cpumask) {
-        struct pm_px *pxpt = &px_statistic_data[i];
-        uint32_t statnum = processor_pminfo[i].perf.state_count;
+        struct pm_px *pxpt = px_statistic_data[i];
+        struct processor_pminfo *pmpt = processor_pminfo[i];
         uint64_t total_idle_ns;
         uint64_t tmp_idle_ns;
+
+        if ( !pxpt || !pmpt )
+            continue;
 
         total_idle_ns = get_cpu_idle_time(i);
         tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
@@ -61,7 +64,7 @@ void px_statistic_update(cpumask_t cpuma
         pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
         pxpt->u.pt[from].residency -= tmp_idle_ns;
 
-        (*(pxpt->u.trans_pt + from*statnum + to))++;
+        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
 
         pxpt->prev_state_wall = now;
         pxpt->prev_idle_wall = total_idle_ns;
@@ -71,10 +74,22 @@ int px_statistic_init(unsigned int cpuid
 int px_statistic_init(unsigned int cpuid)
 {
     uint32_t i, count;
-    struct pm_px *pxpt = &px_statistic_data[cpuid];
-    struct processor_pminfo *pmpt = &processor_pminfo[cpuid];
+    struct pm_px *pxpt = px_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
 
     count = pmpt->perf.state_count;
+
+    if ( !pmpt )
+        return -EINVAL;
+
+    if ( !pxpt )
+    {
+        pxpt = xmalloc(struct pm_px);
+        if ( !pxpt )
+            return -ENOMEM;
+        memset(pxpt, 0, sizeof(*pxpt));
+        px_statistic_data[cpuid] = pxpt;
+    }
 
     pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
     if (!pxpt->u.trans_pt)
@@ -103,8 +118,10 @@ int px_statistic_init(unsigned int cpuid
 
 void px_statistic_exit(unsigned int cpuid)
 {
-    struct pm_px *pxpt = &px_statistic_data[cpuid];
-
+    struct pm_px *pxpt = px_statistic_data[cpuid];
+
+    if (!pxpt)
+        return;
     xfree(pxpt->u.trans_pt);
     xfree(pxpt->u.pt);
     memset(pxpt, 0, sizeof(struct pm_px));
@@ -113,9 +130,13 @@ void px_statistic_reset(unsigned int cpu
 void px_statistic_reset(unsigned int cpuid)
 {
     uint32_t i, j, count;
-    struct pm_px *pxpt = &px_statistic_data[cpuid];
-
-    count = processor_pminfo[cpuid].perf.state_count;
+    struct pm_px *pxpt = px_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+
+    if ( !pxpt || !pmpt )
+        return;
+
+    count = pmpt->perf.state_count;
 
     for (i=0; i < count; i++) {
         pxpt->u.pt[i].residency = 0;
diff -r 1c09b810f977 -r d8a2d117225c xen/arch/x86/acpi/pmstat.c
--- a/xen/arch/x86/acpi/pmstat.c        Mon Sep 22 15:20:25 2008 +0100
+++ b/xen/arch/x86/acpi/pmstat.c        Mon Sep 22 15:24:03 2008 +0100
@@ -40,7 +40,7 @@
 #include <public/sysctl.h>
 #include <acpi/cpufreq/cpufreq.h>
 
-struct pm_px px_statistic_data[NR_CPUS];
+struct pm_px *__read_mostly px_statistic_data[NR_CPUS];
 
 extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
 extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
@@ -49,15 +49,14 @@ int do_get_pm_info(struct xen_sysctl_get
 int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
 {
     int ret = 0;
-    struct pm_px *pxpt = &px_statistic_data[op->cpuid];
-    struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
 
     /* to protect the case when Px was not controlled by xen */
-    if ( (!(pmpt->perf.init & XEN_PX_INIT)) && 
+    if ( (!pmpt || !(pmpt->perf.init & XEN_PX_INIT)) &&
         (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX )
         return -EINVAL;
 
-    if ( !cpu_online(op->cpuid) )
+    if ( op->cpuid >= NR_CPUS || !cpu_online(op->cpuid) )
         return -EINVAL;
 
     switch( op->type )
@@ -73,6 +72,10 @@ int do_get_pm_info(struct xen_sysctl_get
         uint64_t now, ct;
         uint64_t total_idle_ns;
         uint64_t tmp_idle_ns;
+        struct pm_px *pxpt = px_statistic_data[op->cpuid];
+
+        if ( !pxpt )
+            return -ENODATA;
 
         total_idle_ns = get_cpu_idle_time(op->cpuid);
         tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
diff -r 1c09b810f977 -r d8a2d117225c xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Mon Sep 22 15:20:25 2008 +0100
+++ b/xen/arch/x86/platform_hypercall.c Mon Sep 22 15:24:03 2008 +0100
@@ -380,8 +380,19 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                 ret = -EINVAL;
                 break;
             }
-            pmpt = &processor_pminfo[cpuid];
-            pxpt = &processor_pminfo[cpuid].perf;
+            pmpt = processor_pminfo[cpuid];
+            if ( !pmpt )
+            {
+                pmpt = xmalloc(struct processor_pminfo);
+                if ( !pmpt )
+                {
+                    ret = -ENOMEM;
+                    break;
+                }
+                memset(pmpt, 0, sizeof(*pmpt));
+                processor_pminfo[cpuid] = pmpt;
+            }
+            pxpt = &pmpt->perf;
             pmpt->acpi_id = xenpmpt->id;
             pmpt->id = cpuid;
 
diff -r 1c09b810f977 -r d8a2d117225c xen/include/acpi/cpufreq/processor_perf.h
--- a/xen/include/acpi/cpufreq/processor_perf.h Mon Sep 22 15:20:25 2008 +0100
+++ b/xen/include/acpi/cpufreq/processor_perf.h Mon Sep 22 15:24:03 2008 +0100
@@ -41,7 +41,7 @@ struct processor_pminfo {
     struct processor_performance    perf;
 };
 
-extern struct processor_pminfo processor_pminfo[NR_CPUS];
+extern struct processor_pminfo *processor_pminfo[NR_CPUS];
 
 struct px_stat {
     uint8_t total;        /* total Px states */
@@ -58,6 +58,6 @@ struct pm_px {
     uint64_t prev_idle_wall;
 };
 
-extern struct pm_px px_statistic_data[NR_CPUS];
+extern struct pm_px *px_statistic_data[NR_CPUS];
 
 #endif /* __XEN_PROCESSOR_PM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.