[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Add basic acpi C-states based cpu idle power mgmt in xen for x86.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1209634801 -3600
# Node ID 5bb9093eb0e9abfcf8537148ba088fd2604e5737
# Parent  ad55c06c9bbc31e4e3db2944f3a1fcbf842bd4aa
Add basic acpi C-states based cpu idle power mgmt in xen for x86.

It includes:
 1. hypercall definition for passing ACPI info.
 2. C1/C2 support.
 3. Mwait support, as well as legacy ioport.
 4. Ladder policy from Linux kernel.

A lot of code & ideas came from Linux.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
---
 xen/arch/x86/acpi/Makefile               |    2 
 xen/arch/x86/acpi/cpu_idle.c             |  690 +++++++++++++++++++++++++++++++
 xen/arch/x86/domain.c                    |    5 
 xen/arch/x86/platform_hypercall.c        |   23 +
 xen/arch/x86/x86_64/Makefile             |    2 
 xen/arch/x86/x86_64/cpu_idle.c           |  128 +++++
 xen/arch/x86/x86_64/platform_hypercall.c |    4 
 xen/include/public/platform.h            |   65 ++
 xen/include/xlat.lst                     |    5 
 9 files changed, 922 insertions(+), 2 deletions(-)

diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile        Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/acpi/Makefile        Thu May 01 10:40:01 2008 +0100
@@ -1,2 +1,2 @@ obj-y += boot.o
 obj-y += boot.o
-obj-y += power.o suspend.o wakeup_prot.o
+obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/acpi/cpu_idle.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/acpi/cpu_idle.c      Thu May 01 10:40:01 2008 +0100
@@ -0,0 +1,690 @@
+/*
+ * cpu_idle - xen idle state module derived from Linux 
+ *            drivers/acpi/processor_idle.c & 
+ *            arch/x86/kernel/acpi/cstate.c
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@xxxxxxxx>
+ *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@xxxxxxxxx>
+ *                      - Added processor hotplug support
+ *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
+ *                      - Added support for C3 on SMP
+ *  Copyright (C) 2007, 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <xen/guest_access.h>
+#include <public/platform.h>
+#include <asm/processor.h>
+#include <xen/keyhandler.h>
+
+#define DEBUG_PM_CX
+
+#define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+#define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
+#define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
+
+#define ACPI_PROCESSOR_MAX_POWER        8
+#define ACPI_PROCESSOR_MAX_C2_LATENCY   100
+#define ACPI_PROCESSOR_MAX_C3_LATENCY   1000
+
+extern u32 pmtmr_ioport;
+extern void (*pm_idle) (void);
+
+static void (*pm_idle_save) (void) __read_mostly;
+unsigned int max_cstate __read_mostly = 2;
+integer_param("max_cstate", max_cstate);
+
+struct acpi_processor_cx;
+
+struct acpi_processor_cx_policy
+{
+    u32 count;
+    struct acpi_processor_cx *state;
+    struct
+    {
+        u32 time;
+        u32 ticks;
+        u32 count;
+        u32 bm;
+    } threshold;
+};
+
+struct acpi_processor_cx
+{
+    u8 valid;
+    u8 type;
+    u32 address;
+    u8 space_id;
+    u32 latency;
+    u32 latency_ticks;
+    u32 power;
+    u32 usage;
+    u64 time;
+    struct acpi_processor_cx_policy promotion;
+    struct acpi_processor_cx_policy demotion;
+};
+
+struct acpi_processor_power
+{
+    struct acpi_processor_cx *state;
+    u64 bm_check_timestamp;
+    u32 default_state;
+    u32 bm_activity;
+    u32 count;
+    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
+};
+
+static struct acpi_processor_power processor_powers[NR_CPUS];
+
+static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
+{
+    uint32_t i;
+
+    printk("saved cpu%d cx acpi info:\n", cpu);
+    printk("\tcurrent state is C%d\n", (power->state)?power->state->type:-1);
+    printk("\tbm_check_timestamp = %"PRId64"\n", power->bm_check_timestamp);
+    printk("\tdefault_state = %d\n", power->default_state);
+    printk("\tbm_activity = 0x%08x\n", power->bm_activity);
+    printk("\tcount = %d\n", power->count);
+    
+    for ( i = 0; i < power->count; i++ )
+    {
+        printk("\tstates[%d]:\n", i);
+        printk("\t\tvalid   = %d\n", power->states[i].valid);
+        printk("\t\ttype    = %d\n", power->states[i].type);
+        printk("\t\taddress = 0x%x\n", power->states[i].address);
+        printk("\t\tspace_id = 0x%x\n", power->states[i].space_id);
+        printk("\t\tlatency = %d\n", power->states[i].latency);
+        printk("\t\tpower   = %d\n", power->states[i].power);
+        printk("\t\tlatency_ticks = %d\n", power->states[i].latency_ticks);
+        printk("\t\tusage   = %d\n", power->states[i].usage);
+        printk("\t\ttime    = %"PRId64"\n", power->states[i].time);
+
+        printk("\t\tpromotion policy:\n");
+        printk("\t\t\tcount    = %d\n", power->states[i].promotion.count);
+        printk("\t\t\tstate    = C%d\n",
+               (power->states[i].promotion.state) ? 
+               power->states[i].promotion.state->type : -1);
+        printk("\t\t\tthreshold.time = %d\n", 
power->states[i].promotion.threshold.time);
+        printk("\t\t\tthreshold.ticks = %d\n", 
power->states[i].promotion.threshold.ticks);
+        printk("\t\t\tthreshold.count = %d\n", 
power->states[i].promotion.threshold.count);
+        printk("\t\t\tthreshold.bm = %d\n", 
power->states[i].promotion.threshold.bm);
+
+        printk("\t\tdemotion policy:\n");
+        printk("\t\t\tcount    = %d\n", power->states[i].demotion.count);
+        printk("\t\t\tstate    = C%d\n",
+               (power->states[i].demotion.state) ? 
+               power->states[i].demotion.state->type : -1);
+        printk("\t\t\tthreshold.time = %d\n", 
power->states[i].demotion.threshold.time);
+        printk("\t\t\tthreshold.ticks = %d\n", 
power->states[i].demotion.threshold.ticks);
+        printk("\t\t\tthreshold.count = %d\n", 
power->states[i].demotion.threshold.count);
+        printk("\t\t\tthreshold.bm = %d\n", 
power->states[i].demotion.threshold.bm);
+    }
+}
+
+static void dump_cx(unsigned char key)
+{
+    for( int i = 0; i < num_online_cpus(); i++ )
+        print_acpi_power(i, &processor_powers[i]);
+}
+
+static int __init cpu_idle_key_init(void)
+{
+    register_keyhandler(
+        'c', dump_cx,        "dump cx structures");
+    return 0;
+}
+__initcall(cpu_idle_key_init);
+
+static inline u32 ticks_elapsed(u32 t1, u32 t2)
+{
+    if ( t2 >= t1 )
+        return (t2 - t1);
+    else
+        return ((0xFFFFFFFF - t1) + t2);
+}
+
+static void acpi_processor_power_activate(struct acpi_processor_power *power,
+                                          struct acpi_processor_cx *new)
+{
+    struct acpi_processor_cx *old;
+
+    if ( !power || !new )
+        return;
+
+    old = power->state;
+
+    if ( old )
+        old->promotion.count = 0;
+    new->demotion.count = 0;
+
+    power->state = new;
+
+    return;
+}
+
+static void acpi_safe_halt(void)
+{
+    smp_mb__after_clear_bit();
+    safe_halt();
+}
+
+#define MWAIT_ECX_INTERRUPT_BREAK      (0x1)
+
+static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+{
+    __monitor((void *)current, 0, 0);
+    smp_mb();
+    __mwait(eax, ecx);
+}
+
+static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
+{
+    mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK);
+}
+
+static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
+{
+    if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE )
+    {
+        /* Call into architectural FFH based C-state */
+        acpi_processor_ffh_cstate_enter(cx);
+    }
+    else
+    {
+        int unused;
+        /* IO port based C-state */
+        inb(cx->address);
+        /* Dummy wait op - must do something useless after P_LVL2 read
+           because chipsets cannot guarantee that STPCLK# signal
+           gets asserted in time to freeze execution properly. */
+        unused = inl(pmtmr_ioport);
+    }
+}
+
+static void acpi_processor_idle(void)
+{
+    struct acpi_processor_power *power = NULL;
+    struct acpi_processor_cx *cx = NULL;
+    struct acpi_processor_cx *next_state = NULL;
+    int sleep_ticks = 0;
+    u32 t1, t2 = 0;
+
+    power = &processor_powers[smp_processor_id()];
+
+    /*
+     * Interrupts must be disabled during bus mastering calculations and
+     * for C2/C3 transitions.
+     */
+    local_irq_disable();
+    cx = power->state;
+    if ( !cx )
+    {
+        if ( pm_idle_save )
+        {
+            printk(XENLOG_DEBUG "call pm_idle_save()\n");
+            pm_idle_save();
+        }
+        else
+        {
+            printk(XENLOG_DEBUG "call acpi_safe_halt()\n");
+            acpi_safe_halt();
+        }
+        return;
+    }
+
+    /*
+     * Sleep:
+     * ------
+     * Invoke the current Cx state to put the processor to sleep.
+     */
+    if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
+        smp_mb__after_clear_bit();
+
+    switch ( cx->type )
+    {
+    case ACPI_STATE_C1:
+        /*
+         * Invoke C1.
+         * Use the appropriate idle routine, the one that would
+         * be used without acpi C-states.
+         */
+        if ( pm_idle_save )
+            pm_idle_save();
+        else 
+            acpi_safe_halt();
+
+        /*
+         * TBD: Can't get time duration while in C1, as resumes
+         *      go to an ISR rather than here.  Need to instrument
+         *      base interrupt handler.
+         */
+        sleep_ticks = 0xFFFFFFFF;
+        break;
+
+    case ACPI_STATE_C2:
+        /* Get start time (ticks) */
+        t1 = inl(pmtmr_ioport);
+        /* Invoke C2 */
+        acpi_idle_do_entry(cx);
+        /* Get end time (ticks) */
+        t2 = inl(pmtmr_ioport);
+
+        /* Re-enable interrupts */
+        local_irq_enable();
+        /* Compute time (ticks) that we were actually asleep */
+        sleep_ticks =
+            ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
+        break;
+    default:
+        local_irq_enable();
+        return;
+    }
+
+    cx->usage++;
+    if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) )
+        cx->time += sleep_ticks;
+
+    next_state = power->state;
+
+    /*
+     * Promotion?
+     * ----------
+     * Track the number of longs (time asleep is greater than threshold)
+     * and promote when the count threshold is reached.  Note that bus
+     * mastering activity may prevent promotions.
+     * Do not promote above max_cstate.
+     */
+    if ( cx->promotion.state &&
+         ((cx->promotion.state - power->states) <= max_cstate) )
+    {
+        if ( sleep_ticks > cx->promotion.threshold.ticks )
+        {
+            cx->promotion.count++;
+            cx->demotion.count = 0;
+            if ( cx->promotion.count >= cx->promotion.threshold.count )
+            {
+                next_state = cx->promotion.state;
+                goto end;
+            }
+        }
+    }
+
+    /*
+     * Demotion?
+     * ---------
+     * Track the number of shorts (time asleep is less than time threshold)
+     * and demote when the usage threshold is reached.
+     */
+    if ( cx->demotion.state )
+    {
+        if ( sleep_ticks < cx->demotion.threshold.ticks )
+        {
+            cx->demotion.count++;
+            cx->promotion.count = 0;
+            if ( cx->demotion.count >= cx->demotion.threshold.count )
+            {
+                next_state = cx->demotion.state;
+                goto end;
+            }
+        }
+    }
+
+end:
+    /*
+     * Demote if current state exceeds max_cstate
+     */
+    if ( (power->state - power->states) > max_cstate )
+    {
+        if ( cx->demotion.state )
+            next_state = cx->demotion.state;
+    }
+
+    /*
+     * New Cx State?
+     * -------------
+     * If we're going to start using a new Cx state we must clean up
+     * from the previous and prepare to use the new.
+     */
+    if ( next_state != power->state )
+        acpi_processor_power_activate(power, next_state);
+}
+
+static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
+{
+    unsigned int i;
+    unsigned int state_is_set = 0;
+    struct acpi_processor_cx *lower = NULL;
+    struct acpi_processor_cx *higher = NULL;
+    struct acpi_processor_cx *cx;
+
+    if ( !power )
+        return -EINVAL;
+
+    /*
+     * This function sets the default Cx state policy (OS idle handler).
+     * Our scheme is to promote quickly to C2 but more conservatively
+     * to C3.  We're favoring C2  for its characteristics of low latency
+     * (quick response), good power savings, and ability to allow bus
+     * mastering activity.  Note that the Cx state policy is completely
+     * customizable and can be altered dynamically.
+     */
+
+    /* startup state */
+    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
+    {
+        cx = &power->states[i];
+        if ( !cx->valid )
+            continue;
+
+        if ( !state_is_set )
+            power->state = cx;
+        state_is_set++;
+        break;
+    }
+
+    if ( !state_is_set )
+        return -ENODEV;
+
+    /* demotion */
+    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
+    {
+        cx = &power->states[i];
+        if ( !cx->valid )
+            continue;
+
+        if ( lower )
+        {
+            cx->demotion.state = lower;
+            cx->demotion.threshold.ticks = cx->latency_ticks;
+            cx->demotion.threshold.count = 1;
+        }
+
+        lower = cx;
+    }
+
+    /* promotion */
+    for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
+    {
+        cx = &power->states[i];
+        if ( !cx->valid )
+            continue;
+
+        if ( higher )
+        {
+            cx->promotion.state = higher;
+            cx->promotion.threshold.ticks = cx->latency_ticks;
+            if ( cx->type >= ACPI_STATE_C2 )
+                cx->promotion.threshold.count = 4;
+            else
+                cx->promotion.threshold.count = 10;
+        }
+
+        higher = cx;
+    }
+
+    return 0;
+}
+
+static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
+{
+    memset(acpi_power, 0, sizeof(*acpi_power));
+
+    acpi_power->states[ACPI_STATE_C1].type = ACPI_STATE_C1;
+
+    acpi_power->states[ACPI_STATE_C0].valid = 1;
+    acpi_power->states[ACPI_STATE_C1].valid = 1;
+
+    acpi_power->count = 2;
+
+    return 0;
+}
+
+#define CPUID_MWAIT_LEAF (5)
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
+#define CPUID5_ECX_INTERRUPT_BREAK      (0x2)
+
+#define MWAIT_ECX_INTERRUPT_BREAK       (0x1)
+
+#define MWAIT_SUBSTATE_MASK (0xf)
+#define MWAIT_SUBSTATE_SIZE (4)
+
+static int acpi_processor_ffh_cstate_probe(xen_processor_cx_t *cx)
+{
+    struct cpuinfo_x86 *c = &current_cpu_data;
+    unsigned int eax, ebx, ecx, edx;
+    unsigned int edx_part;
+    unsigned int cstate_type; /* C-state type and not ACPI C-state type */
+    unsigned int num_cstate_subtype;
+
+    if ( c->cpuid_level < CPUID_MWAIT_LEAF )
+    {
+        printk(XENLOG_INFO "MWAIT leaf not supported by cpuid\n");
+        return -EFAULT;
+    }
+
+    cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+    printk(XENLOG_DEBUG "cpuid.MWAIT[.eax=%x, .ebx=%x, .ecx=%x, .edx=%x]\n",
+           eax, ebx, ecx, edx);
+
+    /* Check whether this particular cx_type (in CST) is supported or not */
+    cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1;
+    edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
+    num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
+
+    if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) )
+        return -EFAULT;
+
+    /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
+    if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+         !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
+        return -EFAULT;
+
+    printk(XENLOG_INFO "Monitor-Mwait will be used to enter C-%d state\n", 
cx->type);
+    return 0;
+}
+
+#define VENDOR_INTEL                   (1)
+#define NATIVE_CSTATE_BEYOND_HALT      (2)
+
+static int check_cx(xen_processor_cx_t *cx)
+{
+    if ( cx == NULL )
+        return -EINVAL;
+
+    switch ( cx->reg.space_id )
+    {
+    case ACPI_ADR_SPACE_SYSTEM_IO:
+        if ( cx->reg.address == 0 )
+            return -EINVAL;
+        break;
+
+    case ACPI_ADR_SPACE_FIXED_HARDWARE:
+        if ( cx->type > ACPI_STATE_C1 )
+        {
+            if ( cx->reg.bit_width != VENDOR_INTEL || 
+                 cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
+                return -EINVAL;
+
+            /* assume all logical cpu has the same support for mwait */
+            if ( acpi_processor_ffh_cstate_probe(cx) )
+                return -EFAULT;
+        }
+        break;
+
+    default:
+        return -ENODEV;
+    }
+
+    return 0;
+}
+
+static int set_cx(struct acpi_processor_power *acpi_power,
+                  xen_processor_cx_t *xen_cx)
+{
+    struct acpi_processor_cx *cx;
+
+    /* skip unsupported acpi cstate */
+    if ( check_cx(xen_cx) )
+        return -EFAULT;
+
+    cx = &acpi_power->states[xen_cx->type];
+    if ( !cx->valid )
+        acpi_power->count++;
+
+    cx->valid    = 1;
+    cx->type     = xen_cx->type;
+    cx->address  = xen_cx->reg.address;
+    cx->space_id = xen_cx->reg.space_id;
+    cx->latency  = xen_cx->latency;
+    cx->power    = xen_cx->power;
+    
+    cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+
+    return 0;   
+}
+
+static int get_cpu_id(u8 acpi_id)
+{
+    int i;
+    u8 apic_id;
+
+    apic_id = x86_acpiid_to_apicid[acpi_id];
+    if ( apic_id == 0xff )
+        return -1;
+
+    for ( i = 0; i < NR_CPUS; i++ )
+    {
+        if ( apic_id == x86_cpu_to_apicid[i] )
+            return i;
+    }
+
+    return -1;
+}
+
+#ifdef DEBUG_PM_CX
+static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
+{
+    XEN_GUEST_HANDLE(xen_processor_cx_t) states;
+    xen_processor_cx_t  state;
+    XEN_GUEST_HANDLE(xen_processor_csd_t) csd;
+    xen_processor_csd_t dp;
+    uint32_t i;
+
+    printk("cpu%d cx acpi info:\n", cpu);
+    printk("\tcount = %d\n", power->count);
+    printk("\tflags: bm_cntl[%d], bm_chk[%d], has_cst[%d],\n"
+           "\t       pwr_setup_done[%d], bm_rld_set[%d]\n",
+           power->flags.bm_control, power->flags.bm_check, 
power->flags.has_cst,
+           power->flags.power_setup_done, power->flags.bm_rld_set);
+    
+    states = power->states;
+    
+    for ( i = 0; i < power->count; i++ )
+    {
+        if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) )
+            return;
+        
+        printk("\tstates[%d]:\n", i);
+        printk("\t\treg.space_id = 0x%x\n", state.reg.space_id);
+        printk("\t\treg.bit_width = 0x%x\n", state.reg.bit_width);
+        printk("\t\treg.bit_offset = 0x%x\n", state.reg.bit_offset);
+        printk("\t\treg.access_size = 0x%x\n", state.reg.access_size);
+        printk("\t\treg.address = 0x%"PRIx64"\n", state.reg.address);
+        printk("\t\ttype    = %d\n", state.type);
+        printk("\t\tlatency = %d\n", state.latency);
+        printk("\t\tpower   = %d\n", state.power);
+
+        csd = state.dp;
+        printk("\t\tdp(@0x%p)\n", csd.p);
+        
+        if ( csd.p != NULL )
+        {
+            if ( unlikely(copy_from_guest(&dp, csd, 1)) )
+                return;
+            printk("\t\t\tdomain = %d\n", dp.domain);
+            printk("\t\t\tcoord_type   = %d\n", dp.coord_type);
+            printk("\t\t\tnum = %d\n", dp.num);
+        }
+    }
+}
+#else
+#define print_cx_pminfo(c, p)
+#endif
+
+long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
+{
+    XEN_GUEST_HANDLE(xen_processor_cx_t) states;
+    xen_processor_cx_t xen_cx;
+    struct acpi_processor_power *acpi_power;
+    int cpu_id, i;
+
+    if ( unlikely(!guest_handle_okay(power->states, power->count)) )
+        return -EFAULT;
+
+    print_cx_pminfo(cpu, power);
+
+    /* map from acpi_id to cpu_id */
+    cpu_id = get_cpu_id((u8)cpu);
+    if ( cpu_id == -1 )
+    {
+        printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu);
+        return -EFAULT;
+    }
+
+    acpi_power = &processor_powers[cpu_id];
+
+    init_cx_pminfo(acpi_power);
+
+    states = power->states;
+
+    for ( i = 0; i < power->count; i++ )
+    {
+        if ( unlikely(copy_from_guest_offset(&xen_cx, states, i, 1)) )
+            return -EFAULT;
+
+        set_cx(acpi_power, &xen_cx);
+    }
+
+    /* FIXME: C-state dependency is not supported by far */
+    
+    /* initialize default policy */
+    acpi_processor_set_power_policy(acpi_power);
+
+    print_acpi_power(cpu_id, acpi_power);
+
+    if ( cpu_id == 0 && pm_idle_save == NULL )
+    {
+        pm_idle_save = pm_idle;
+        pm_idle = acpi_processor_idle;
+    }
+        
+    return 0;
+}
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/domain.c     Thu May 01 10:40:01 2008 +0100
@@ -56,6 +56,9 @@ DEFINE_PER_CPU(u64, efer);
 DEFINE_PER_CPU(u64, efer);
 DEFINE_PER_CPU(unsigned long, cr4);
 
+static void default_idle(void);
+void (*pm_idle) (void) = default_idle;
+
 static void unmap_vcpu_info(struct vcpu *v);
 
 static void paravirt_ctxt_switch_from(struct vcpu *v);
@@ -105,7 +108,7 @@ void idle_loop(void)
         if ( cpu_is_offline(smp_processor_id()) )
             play_dead();
         page_scrub_schedule_work();
-        default_idle();
+        (*pm_idle)();
         do_softirq();
     }
 }
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/platform_hypercall.c Thu May 01 10:40:01 2008 +0100
@@ -44,6 +44,8 @@ extern spinlock_t xenpf_lock;
 
 static DEFINE_PER_CPU(uint64_t, freq);
 
+extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
+
 static long cpu_frequency_change_helper(void *data)
 {
     return cpu_frequency_change(this_cpu(freq));
@@ -340,6 +342,27 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
     }
     break;
 
+    case XENPF_set_processor_pminfo:
+        switch ( op->u.set_pminfo.type )
+        {
+        case XEN_PM_PX:
+            ret = -EINVAL;
+            break;
+            
+        case XEN_PM_CX:
+            ret = set_cx_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.power);
+            break;
+
+        case XEN_PM_TX:
+            ret = -EINVAL;
+            break;
+
+        default:
+            ret = -EINVAL;
+            break;
+        }
+        break;
+ 
     default:
         ret = -ENOSYS;
         break;
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile      Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/x86_64/Makefile      Thu May 01 10:40:01 2008 +0100
@@ -12,6 +12,7 @@ obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += platform_hypercall.o
+obj-$(CONFIG_COMPAT) += cpu_idle.o
 
 ifeq ($(CONFIG_COMPAT),y)
 # extra dependencies
@@ -22,4 +23,5 @@ platform_hypercall.o: ../platform_hyperc
 platform_hypercall.o: ../platform_hypercall.c
 sysctl.o:      ../sysctl.c
 traps.o:       compat/traps.c
+cpu_idle.o:    ../acpi/cpu_idle.c
 endif
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/x86_64/cpu_idle.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_64/cpu_idle.c    Thu May 01 10:40:01 2008 +0100
@@ -0,0 +1,128 @@
+/******************************************************************************
+ * cpu_idle.c -- adapt x86/acpi/cpu_idle.c to compat guest.
+ *
+ *  Copyright (C) 2007, 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#define __XEN_TOOLS__ /* for using get_xen_guest_handle macro */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/xmalloc.h>
+#include <xen/guest_access.h>
+#include <compat/platform.h>
+
+CHECK_processor_csd;
+
+DEFINE_XEN_GUEST_HANDLE(compat_processor_csd_t);
+DEFINE_XEN_GUEST_HANDLE(compat_processor_cx_t);
+
+#define xlat_page_start COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id)
+#define xlat_page_size  COMPAT_ARG_XLAT_SIZE
+#define xlat_page_left_size(xlat_page_current) \
+    (xlat_page_start + xlat_page_size - xlat_page_current)
+
+#define xlat_malloc_init(xlat_page_current)    do { \
+    xlat_page_current = xlat_page_start; \
+} while (0)
+
+static void *xlat_malloc(unsigned long *xlat_page_current, size_t size)
+{
+    void *ret;
+
+    /* normalize size to be 64 * n */
+    size = (size + 0x3fUL) & ~0x3fUL;
+
+    if ( unlikely(size > xlat_page_left_size(*xlat_page_current)) )
+        return NULL;
+
+    ret = (void *) *xlat_page_current;
+    *xlat_page_current += size;
+
+    return ret;
+}
+
+#define xlat_malloc_array(_p, _t, _c) ((_t *) xlat_malloc(&_p, sizeof(_t) * 
_c))
+
+static int copy_from_compat_state(xen_processor_cx_t *xen_state,
+                                  compat_processor_cx_t *state)
+{
+#define XLAT_processor_cx_HNDL_dp(_d_, _s_) do { \
+    XEN_GUEST_HANDLE(compat_processor_csd_t) dps; \
+    if ( unlikely(!compat_handle_okay((_s_)->dp, (_s_)->dpcnt)) ) \
+            return -EFAULT; \
+    guest_from_compat_handle(dps, (_s_)->dp); \
+    (_d_)->dp = guest_handle_cast(dps, xen_processor_csd_t); \
+} while (0)
+    XLAT_processor_cx(xen_state, state);
+#undef XLAT_processor_cx_HNDL_dp
+
+    return 0;
+}
+
+extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
+
+long compat_set_cx_pminfo(uint32_t cpu, struct compat_processor_power *power)
+{
+    struct xen_processor_power *xen_power;
+    unsigned long xlat_page_current;
+
+    xlat_malloc_init(xlat_page_current);
+
+    xen_power = xlat_malloc_array(xlat_page_current,
+                                  struct xen_processor_power, 1);
+    if ( unlikely(xen_power == NULL) )
+       return -EFAULT;
+
+#define XLAT_processor_power_HNDL_states(_d_, _s_) do { \
+    xen_processor_cx_t *xen_states = NULL; \
+\
+    if ( likely((_s_)->count > 0) ) \
+    { \
+        XEN_GUEST_HANDLE(compat_processor_cx_t) states; \
+        compat_processor_cx_t state; \
+        int i; \
+\
+        xen_states = xlat_malloc_array(xlat_page_current, \
+                                       xen_processor_cx_t, (_s_)->count); \
+        if ( unlikely(xen_states == NULL) ) \
+            return -EFAULT; \
+\
+        if ( unlikely(!compat_handle_okay((_s_)->states, (_s_)->count)) ) \
+            return -EFAULT; \
+        guest_from_compat_handle(states, (_s_)->states); \
+\
+        for ( i = 0; i < _s_->count; i++ ) \
+        { \
+           if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) \
+               return -EFAULT; \
+           if ( unlikely(copy_from_compat_state(&xen_states[i], &state)) ) \
+               return -EFAULT; \
+        } \
+    } \
+\
+    set_xen_guest_handle((_d_)->states, xen_states); \
+} while (0)
+    XLAT_processor_power(xen_power, power);
+#undef XLAT_processor_power_HNDL_states
+
+    return set_cx_pminfo(cpu, xen_power);
+}
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/x86_64/platform_hypercall.c
--- a/xen/arch/x86/x86_64/platform_hypercall.c  Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/x86_64/platform_hypercall.c  Thu May 01 10:40:01 2008 +0100
@@ -10,6 +10,10 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_
 #define xen_platform_op     compat_platform_op
 #define xen_platform_op_t   compat_platform_op_t
 #define do_platform_op(x)   compat_platform_op(_##x)
+
+#define xen_processor_power     compat_processor_power
+#define xen_processor_power_t   compat_processor_power_t
+#define set_cx_pminfo           compat_set_cx_pminfo
 
 #define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep
 
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/include/public/platform.h
--- a/xen/include/public/platform.h     Thu May 01 10:33:03 2008 +0100
+++ b/xen/include/public/platform.h     Thu May 01 10:40:01 2008 +0100
@@ -199,6 +199,70 @@ typedef struct xenpf_getidletime xenpf_g
 typedef struct xenpf_getidletime xenpf_getidletime_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
 
+#define XENPF_set_processor_pminfo      54
+
+/* ability bits */
+#define XEN_PROCESSOR_PM_CX    1
+#define XEN_PROCESSOR_PM_PX    2
+#define XEN_PROCESSOR_PM_TX    4
+
+/* cmd type */
+#define XEN_PM_CX   0
+#define XEN_PM_PX   1
+#define XEN_PM_TX   2
+
+struct xen_power_register {
+    uint32_t     space_id;
+    uint32_t     bit_width;
+    uint32_t     bit_offset;
+    uint32_t     access_size;
+    uint64_t     address;
+};
+
+struct xen_processor_csd {
+    uint32_t    domain;      /* domain number of one dependent group */
+    uint32_t    coord_type;  /* coordination type */
+    uint32_t    num;         /* number of processors in same domain */
+};
+typedef struct xen_processor_csd xen_processor_csd_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t);
+
+struct xen_processor_cx {
+    struct xen_power_register  reg; /* GAS for Cx trigger register */
+    uint8_t     type;     /* cstate value, c0: 0, c1: 1, ... */
+    uint32_t    latency;  /* worst latency (ms) to enter/exit this cstate */
+    uint32_t    power;    /* average power consumption(mW) */
+    uint32_t    dpcnt;    /* number of dependency entries */
+    XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */
+};
+typedef struct xen_processor_cx xen_processor_cx_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t);
+
+struct xen_processor_flags {
+    uint32_t bm_control:1;
+    uint32_t bm_check:1;
+    uint32_t has_cst:1;
+    uint32_t power_setup_done:1;
+    uint32_t bm_rld_set:1;
+};
+
+struct xen_processor_power {
+    uint32_t count;  /* number of C state entries in array below */
+    struct xen_processor_flags flags;  /* global flags of this processor */
+    XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */
+};
+
+struct xenpf_set_processor_pminfo {
+    /* IN variables */
+    uint32_t id;    /* ACPI CPU ID */
+    uint32_t type;  /* {XEN_PM_CX, ...} */
+    union {
+        struct xen_processor_power          power;/* Cx: _CST/_CSD */
+    };
+};
+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);
+
 struct xen_platform_op {
     uint32_t cmd;
     uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -213,6 +277,7 @@ struct xen_platform_op {
         struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
         struct xenpf_change_freq       change_freq;
         struct xenpf_getidletime       getidletime;
+        struct xenpf_set_processor_pminfo set_pminfo;
         uint8_t                        pad[128];
     } u;
 };
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/include/xlat.lst
--- a/xen/include/xlat.lst      Thu May 01 10:33:03 2008 +0100
+++ b/xen/include/xlat.lst      Thu May 01 10:40:01 2008 +0100
@@ -44,3 +44,8 @@
 !      vcpu_runstate_info              vcpu.h
 ?      xenoprof_init                   xenoprof.h
 ?      xenoprof_passive                xenoprof.h
+!      power_register                  platform.h
+?      processor_csd                   platform.h
+!      processor_cx                    platform.h
+!      processor_flags                 platform.h
+!      processor_power                 platform.h

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.