[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Add acpi C3 support for x86.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1209635219 -3600
# Node ID 520519f5e346034585dfe65b2e6ca5ec3fc49687
# Parent  93bc6d9b5f3186ad5f0221a8e23e524284c3197e
Add acpi C3 support for x86.

C3 & deep C state will need to do ARB_DIS or cache flush. ARB_DIS
should be done within the last cpu which is ready for C3. Bus master
activities are checked and recorded for C state promotion/demotion
judgement.

C3 is disabled by default, till TSC/APIC stop issues resolved.
Adding cmdline option "max_cstate=3" can enable C3.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
---
 xen/arch/x86/acpi/cpu_idle.c |  280 +++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 273 insertions(+), 7 deletions(-)

diff -r 93bc6d9b5f31 -r 520519f5e346 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Thu May 01 10:41:51 2008 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c      Thu May 01 10:46:59 2008 +0100
@@ -60,6 +60,16 @@ static void (*pm_idle_save) (void) __rea
 static void (*pm_idle_save) (void) __read_mostly;
 unsigned int max_cstate __read_mostly = 2;
 integer_param("max_cstate", max_cstate);
+/*
+ * bm_history -- bit-mask with a bit per jiffy of bus-master activity
+ * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
+ * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
+ * 100 HZ: 0x0000000F: 4 jiffies = 40ms
+ * reduce history for more aggressive entry into C3
+ */
+unsigned int bm_history __read_mostly =
+    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
+integer_param("bm_history", bm_history);
 
 struct acpi_processor_cx;
 
@@ -91,10 +101,20 @@ struct acpi_processor_cx
     struct acpi_processor_cx_policy demotion;
 };
 
+struct acpi_processor_flags
+{
+    u8 bm_control:1;
+    u8 bm_check:1;
+    u8 has_cst:1;
+    u8 power_setup_done:1;
+    u8 bm_rld_set:1;
+};
+
 struct acpi_processor_power
 {
+    struct acpi_processor_flags flags;
     struct acpi_processor_cx *state;
-    u64 bm_check_timestamp;
+    s_time_t bm_check_timestamp;
     u32 default_state;
     u32 bm_activity;
     u32 count;
@@ -185,6 +205,29 @@ static void acpi_processor_power_activat
         old->promotion.count = 0;
     new->demotion.count = 0;
 
+    /* Cleanup from old state. */
+    if ( old )
+    {
+        switch ( old->type )
+        {
+        case ACPI_STATE_C3:
+            /* Disable bus master reload */
+            if ( new->type != ACPI_STATE_C3 && power->flags.bm_check )
+                acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+            break;
+        }
+    }
+
+    /* Prepare to use new state. */
+    switch ( new->type )
+    {
+    case ACPI_STATE_C3:
+        /* Enable bus master reload */
+        if ( old->type != ACPI_STATE_C3 && power->flags.bm_check )
+            acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+        break;
+    }
+
     power->state = new;
 
     return;
@@ -196,7 +239,7 @@ static void acpi_safe_halt(void)
     safe_halt();
 }
 
-#define MWAIT_ECX_INTERRUPT_BREAK      (0x1)
+#define MWAIT_ECX_INTERRUPT_BREAK   (0x1)
 
 static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
@@ -229,6 +272,8 @@ static void acpi_idle_do_entry(struct ac
     }
 }
 
+static atomic_t c3_cpu_count;
+
 static void acpi_processor_idle(void)
 {
     struct acpi_processor_power *power = NULL;
@@ -258,6 +303,62 @@ static void acpi_processor_idle(void)
             acpi_safe_halt();
         }
         return;
+    }
+
+    /*
+     * Check BM Activity
+     * -----------------
+     * Check for bus mastering activity (if required), record, and check
+     * for demotion.
+     */
+    if ( power->flags.bm_check )
+    {
+        u32 bm_status = 0;
+        unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23;
+
+        if ( diff > 31 )
+            diff = 31;
+
+        power->bm_activity <<= diff;
+
+        acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
+        if ( bm_status )
+        {
+            power->bm_activity |= 0x1;
+            acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
+        }
+        /*
+         * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
+         * the true state of bus mastering activity; forcing us to
+         * manually check the BMIDEA bit of each IDE channel.
+         */
+        /*else if ( errata.piix4.bmisx )
+        {
+            if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01)
+                || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) )
+                pr->power.bm_activity |= 0x1;
+        }*/
+
+        power->bm_check_timestamp = NOW();
+
+        /*
+         * If bus mastering is or was active this jiffy, demote
+         * to avoid a faulty transition.  Note that the processor
+         * won't enter a low-power state during this call (to this
+         * function) but should upon the next.
+         *
+         * TBD: A better policy might be to fallback to the demotion
+         *      state (use it for this quantum only) istead of
+         *      demoting -- and rely on duration as our sole demotion
+         *      qualification.  This may, however, introduce DMA
+         *      issues (e.g. floppy DMA transfer overrun/underrun).
+         */
+        if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm )
+        {
+            local_irq_enable();
+            next_state = cx->demotion.state;
+            goto end;
+        }
     }
 
     /*
@@ -303,6 +404,73 @@ static void acpi_processor_idle(void)
         sleep_ticks =
             ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
         break;
+
+    case ACPI_STATE_C3:
+        /*
+         * disable bus master
+         * bm_check implies we need ARB_DIS
+         * !bm_check implies we need cache flush
+         * bm_control implies whether we can do ARB_DIS
+         *
+         * That leaves a case where bm_check is set and bm_control is
+         * not set. In that case we cannot do much, we enter C3
+         * without doing anything.
+         */
+        if ( power->flags.bm_check && power->flags.bm_control )
+        {
+            atomic_inc(&c3_cpu_count);
+            if ( atomic_read(&c3_cpu_count) == num_online_cpus() )
+            {
+                /*
+                 * All CPUs are trying to go to C3
+                 * Disable bus master arbitration
+                 */
+                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
+            }
+        }
+        else if ( !power->flags.bm_check )
+        {
+            /* SMP with no shared cache... Invalidate cache  */
+            ACPI_FLUSH_CPU_CACHE();
+        }
+
+        /* Get start time (ticks) */
+        t1 = inl(pmtmr_ioport);
+
+        /*
+         * FIXME: Before invoking C3, be aware that TSC/APIC timer may be 
+         * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
+         * deep C state can't work correctly.
+         */
+        /* placeholder for preparing TSC stop */
+
+        /* placeholder for preparing APIC stop */
+
+        /* Invoke C3 */
+        acpi_idle_do_entry(cx);
+
+        /* placeholder for recovering APIC */
+
+        /* placeholder for recovering TSC */
+
+        /* Get end time (ticks) */
+        t2 = inl(pmtmr_ioport);
+        if ( power->flags.bm_check && power->flags.bm_control )
+        {
+            /* Enable bus master arbitration */
+            atomic_dec(&c3_cpu_count);
+            acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+        }
+
+        /* Compute time (ticks) that we were actually asleep */
+        sleep_ticks = ticks_elapsed(t1, t2);
+        /* Re-enable interrupts */
+        local_irq_enable();
+        /* Do not account our idle-switching overhead: */
+        sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
+
+        break;
+
     default:
         local_irq_enable();
         return;
@@ -331,8 +499,19 @@ static void acpi_processor_idle(void)
             cx->demotion.count = 0;
             if ( cx->promotion.count >= cx->promotion.threshold.count )
             {
-                next_state = cx->promotion.state;
-                goto end;
+                if ( power->flags.bm_check )
+                {
+                    if ( !(power->bm_activity & cx->promotion.threshold.bm) )
+                    {
+                        next_state = cx->promotion.state;
+                        goto end;
+                    }
+                }
+                else
+                {
+                    next_state = cx->promotion.state;
+                    goto end;
+                }
             }
         }
     }
@@ -425,6 +604,8 @@ static int acpi_processor_set_power_poli
             cx->demotion.state = lower;
             cx->demotion.threshold.ticks = cx->latency_ticks;
             cx->demotion.threshold.count = 1;
+            if ( cx->type == ACPI_STATE_C3 )
+                cx->demotion.threshold.bm = bm_history;
         }
 
         lower = cx;
@@ -445,6 +626,8 @@ static int acpi_processor_set_power_poli
                 cx->promotion.threshold.count = 4;
             else
                 cx->promotion.threshold.count = 10;
+            if ( higher->type == ACPI_STATE_C3 )
+                cx->promotion.threshold.bm = bm_history;
         }
 
         higher = cx;
@@ -511,11 +694,40 @@ static int acpi_processor_ffh_cstate_pro
     return 0;
 }
 
+/*
+ * Initialize bm_flags based on the CPU cache properties
+ * On SMP it depends on cache configuration
+ * - When cache is not shared among all CPUs, we flush cache
+ *   before entering C3.
+ * - When cache is shared among all CPUs, we use bm_check
+ *   mechanism as in UP case
+ *
+ * This routine is called only after all the CPUs are online
+ */
+static void acpi_processor_power_init_bm_check(struct acpi_processor_flags 
*flags)
+{
+    struct cpuinfo_x86 *c = &current_cpu_data;
+
+    flags->bm_check = 0;
+    if ( num_online_cpus() == 1 )
+        flags->bm_check = 1;
+    else if ( c->x86_vendor == X86_VENDOR_INTEL )
+    {
+        /*
+         * Today all CPUs that support C3 share cache.
+         * TBD: This needs to look at cache shared map, once
+         * multi-core detection patch makes to the base.
+         */
+        flags->bm_check = 1;
+    }
+}
+
 #define VENDOR_INTEL                   (1)
 #define NATIVE_CSTATE_BEYOND_HALT      (2)
 
-static int check_cx(xen_processor_cx_t *cx)
-{
+static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx)
+{
+    static int bm_check_flag;
     if ( cx == NULL )
         return -EINVAL;
 
@@ -543,6 +755,56 @@ static int check_cx(xen_processor_cx_t *
         return -ENODEV;
     }
 
+    if ( cx->type == ACPI_STATE_C3 )
+    {
+        /* All the logic here assumes flags.bm_check is same across all CPUs */
+        if ( !bm_check_flag )
+        {
+            /* Determine whether bm_check is needed based on CPU  */
+            acpi_processor_power_init_bm_check(&(power->flags));
+            bm_check_flag = power->flags.bm_check;
+        }
+        else
+        {
+            power->flags.bm_check = bm_check_flag;
+        }
+
+        if ( power->flags.bm_check )
+        {
+            if ( !power->flags.bm_control )
+            {
+                if ( power->flags.has_cst != 1 )
+                {
+                    /* bus mastering control is necessary */
+                    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+                        "C3 support requires BM control\n"));
+                    return -1;
+                }
+                else
+                {
+                    /* Here we enter C3 without bus mastering */
+                    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+                        "C3 support without BM control\n"));
+                }
+            }
+        }
+        else
+        {
+            /*
+             * WBINVD should be set in fadt, for C3 state to be
+             * supported on when bm_check is not required.
+             */
+            if ( !(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD) )
+            {
+                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+                          "Cache invalidation should work properly"
+                          " for C3 to be enabled on SMP systems\n"));
+                return -1;
+            }
+            acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+        }
+    }
+
     return 0;
 }
 
@@ -552,7 +814,7 @@ static int set_cx(struct acpi_processor_
     struct acpi_processor_cx *cx;
 
     /* skip unsupported acpi cstate */
-    if ( check_cx(xen_cx) )
+    if ( check_cx(acpi_power, xen_cx) )
         return -EFAULT;
 
     cx = &acpi_power->states[xen_cx->type];
@@ -663,6 +925,10 @@ long set_cx_pminfo(uint32_t cpu, struct 
 
     init_cx_pminfo(acpi_power);
 
+    acpi_power->flags.bm_check = power->flags.bm_check;
+    acpi_power->flags.bm_control = power->flags.bm_control;
+    acpi_power->flags.has_cst = power->flags.has_cst;
+
     states = power->states;
 
     for ( i = 0; i < power->count; i++ )

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.