[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: Fix lapic timer stop issue in deep C state



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1211362737 -3600
# Node ID d795e15b85a788d0389d24963897cf480dcab0e9
# Parent  672c09aad49df0b8056c795bd6c351746d037975
x86: Fix lapic timer stop issue in deep C state

Local APIC timer may stop at deep C state (C3/C4...) entry/exit. this
patch add the logic that use platform timer (HPET) to reenable local
APIC timer at C state entry/exit.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
Signed-off-by: Yu Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 xen/arch/x86/Makefile        |    1 
 xen/arch/x86/acpi/cpu_idle.c |   11 -
 xen/arch/x86/hpet.c          |  291 +++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/time.c          |   59 ++------
 xen/common/timer.c           |    8 -
 xen/include/asm-x86/hpet.h   |   20 ++
 xen/include/xen/timer.h      |    9 +
 7 files changed, 348 insertions(+), 51 deletions(-)

diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/Makefile     Wed May 21 10:38:57 2008 +0100
@@ -50,6 +50,7 @@ obj-y += machine_kexec.o
 obj-y += machine_kexec.o
 obj-y += crash.o
 obj-y += tboot.o
+obj-y += hpet.o
 
 obj-$(crash_debug) += gdbstub.o
 
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c      Wed May 21 10:38:57 2008 +0100
@@ -39,6 +39,7 @@
 #include <xen/smp.h>
 #include <asm/cache.h>
 #include <asm/io.h>
+#include <asm/hpet.h>
 #include <xen/guest_access.h>
 #include <public/platform.h>
 #include <asm/processor.h>
@@ -438,19 +439,19 @@ static void acpi_processor_idle(void)
         t1 = inl(pmtmr_ioport);
 
         /*
-         * FIXME: Before invoking C3, be aware that TSC/APIC timer may be 
+         * Before invoking C3, be aware that TSC/APIC timer may be 
          * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
          * deep C state can't work correctly.
          */
         /* preparing TSC stop */
         cstate_save_tsc();
-        /* placeholder for preparing APIC stop */
-
+        /* preparing APIC stop */
+        hpet_broadcast_enter();
         /* Invoke C3 */
         acpi_idle_do_entry(cx);
 
-        /* placeholder for recovering APIC */
-
+        /* recovering APIC */
+        hpet_broadcast_exit();
         /* recovering TSC */
         cstate_restore_tsc();
 
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/hpet.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hpet.c       Wed May 21 10:38:57 2008 +0100
@@ -0,0 +1,291 @@
+/******************************************************************************
+ * arch/x86/hpet.c
+ * 
+ * HPET management.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/time.h>
+#include <xen/timer.h>
+#include <xen/smp.h>
+#include <xen/softirq.h>
+#include <asm/fixmap.h>
+#include <asm/div64.h>
+#include <asm/hpet.h>
+
+#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
+
+#define MAX_DELTA_NS MILLISECS(10*1000)
+#define MIN_DELTA_NS MICROSECS(1)
+
+struct hpet_event_channel
+{
+    unsigned long mult;
+    int           shift;
+    s_time_t      next_event;
+    cpumask_t     cpumask;
+    spinlock_t    lock;
+    void          (*event_handler)(struct hpet_event_channel *);
+};
+static struct hpet_event_channel hpet_event;
+
+unsigned long hpet_address;
+
+/*
+ * Calculate a multiplication factor for scaled math, which is used to convert
+ * nanoseconds based values to clock ticks:
+ *
+ * clock_ticks = (nanoseconds * factor) >> shift.
+ *
+ * div_sc is the rearranged equation to calculate a factor from a given clock
+ * ticks / nanoseconds ratio:
+ *
+ * factor = (clock_ticks << shift) / nanoseconds
+ */
+static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
+                                   int shift)
+{
+    uint64_t tmp = ((uint64_t)ticks) << shift;
+
+    do_div(tmp, nsec);
+    return (unsigned long) tmp;
+}
+
+/*
+ * Convert nanoseconds based values to clock ticks:
+ *
+ * clock_ticks = (nanoseconds * factor) >> shift.
+ */
+static inline unsigned long ns2ticks(unsigned long nsec, int shift,
+                                     unsigned long factor)
+{
+    uint64_t tmp = ((uint64_t)nsec * factor) >> shift;
+
+    return (unsigned long) tmp;
+}
+
+static int hpet_legacy_next_event(unsigned long delta)
+{
+    unsigned long cnt;
+
+    cnt = hpet_read32(HPET_COUNTER);
+    cnt += delta;
+    hpet_write32(cnt, HPET_T0_CMP);
+
+    return ((long)(hpet_read32(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0;
+}
+
+static int reprogram_hpet_evt_channel(
+    struct hpet_event_channel *ch,
+    s_time_t expire, s_time_t now, int force)
+{
+    int64_t delta;
+    int ret;
+
+    if ( unlikely(expire < 0) )
+    {
+        printk(KERN_DEBUG "reprogram: expire < 0\n");
+        return -ETIME;
+    }
+
+    delta = expire - now;
+    if ( delta <= 0 )
+    {
+        printk(KERN_DEBUG "reprogram: expire(%"PRIx64") < "
+               "now(%"PRIx64")\n", expire, now);
+        if ( !force )
+            return -ETIME;
+    }
+
+    ch->next_event = expire;
+
+    delta = min_t(int64_t, delta, MAX_DELTA_NS);
+    delta = max_t(int64_t, delta, MIN_DELTA_NS);
+    delta = ns2ticks(delta, ch->shift, ch->mult);
+
+    ret = hpet_legacy_next_event(delta);
+    while ( ret && force )
+    {
+        delta += delta;
+        ret = hpet_legacy_next_event(delta);
+    }
+
+    return ret;
+}
+
+static int evt_do_broadcast(cpumask_t mask)
+{
+    int ret = 0, cpu = smp_processor_id();
+
+    if ( cpu_isset(cpu, mask) )
+    {
+        cpu_clear(cpu, mask);
+        raise_softirq(TIMER_SOFTIRQ);
+        ret = 1;
+    }
+
+    if ( !cpus_empty(mask) )
+    {
+       cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
+       ret = 1;
+    }
+    return ret;
+}
+
+static void handle_hpet_broadcast(struct hpet_event_channel *ch)
+{
+    cpumask_t mask;
+    s_time_t now, next_event;
+    int cpu, current_cpu = smp_processor_id();
+
+    spin_lock(&ch->lock);
+
+    if ( cpu_isset(current_cpu, ch->cpumask) )
+        printk(KERN_DEBUG "WARNING: current cpu%d in bc_mask\n", current_cpu);
+again:
+    ch->next_event = STIME_MAX;
+    next_event = STIME_MAX;
+    mask = (cpumask_t)CPU_MASK_NONE;
+    now = NOW();
+
+    /* find all expired events */
+    for_each_cpu_mask(cpu, ch->cpumask)
+    {
+        if ( per_cpu(timer_deadline, cpu) <= now )
+            cpu_set(cpu, mask);
+        else if ( per_cpu(timer_deadline, cpu) < next_event )
+            next_event = per_cpu(timer_deadline, cpu);
+    }
+    if ( per_cpu(timer_deadline, current_cpu) <= now )
+        cpu_set(current_cpu, mask);
+
+    /* wakeup the cpus which have an expired event. */
+    evt_do_broadcast(mask);
+
+    if ( next_event != STIME_MAX )
+    {
+        if ( reprogram_hpet_evt_channel(ch, next_event, now, 0) )
+            goto again;
+    }
+    spin_unlock(&ch->lock);
+}
+
+void hpet_broadcast_init(void)
+{
+    u64 hpet_rate;
+    u32 hpet_id, cfg;
+
+    hpet_rate = hpet_setup();
+    if ( hpet_rate == 0 )
+        return;
+
+    hpet_id = hpet_read32(HPET_ID);
+    if ( !(hpet_id & HPET_ID_LEGSUP) )
+        return;
+
+    /* Start HPET legacy interrupts */
+    cfg = hpet_read32(HPET_CFG);
+    cfg |= HPET_CFG_LEGACY;
+    hpet_write32(cfg, HPET_CFG);
+
+    /* set HPET T0 as oneshot */
+    cfg = hpet_read32(HPET_T0_CFG);
+    cfg &= ~HPET_TN_PERIODIC;
+    cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+    hpet_write32(cfg, HPET_T0_CFG);
+
+    /*
+     * The period is a femto seconds value. We need to calculate the scaled
+     * math multiplication factor for nanosecond to hpet tick conversion.
+     */
+    hpet_event.mult = div_sc((unsigned long)hpet_rate, 1000000000ul, 32);
+    hpet_event.shift = 32;
+    hpet_event.next_event = STIME_MAX;
+    hpet_event.event_handler = handle_hpet_broadcast;
+    spin_lock_init(&hpet_event.lock);
+}
+
+void hpet_broadcast_enter(void)
+{
+    struct hpet_event_channel *ch = &hpet_event;
+
+    cpu_set(smp_processor_id(), ch->cpumask);
+
+    spin_lock(&ch->lock);
+
+    /* reprogram if current cpu expire time is nearer */
+    if ( this_cpu(timer_deadline) < ch->next_event )
+        reprogram_hpet_evt_channel(ch, this_cpu(timer_deadline), NOW(), 1);
+
+    spin_unlock(&ch->lock);
+}
+
+void hpet_broadcast_exit(void)
+{
+    struct hpet_event_channel *ch = &hpet_event;
+    int cpu = smp_processor_id();
+
+    if ( cpu_test_and_clear(cpu, ch->cpumask) )
+        reprogram_timer(per_cpu(timer_deadline, cpu));
+}
+
+int hpet_legacy_irq_tick(void)
+{
+    if ( !hpet_event.event_handler )
+        return 0;
+    hpet_event.event_handler(&hpet_event);
+    return 1;
+}
+
+u64 hpet_setup(void)
+{
+    static u64 hpet_rate;
+    static int initialised;
+    u32 hpet_id, hpet_period, cfg;
+    int i;
+
+    if ( initialised )
+        return hpet_rate;
+    initialised = 1;
+
+    if ( hpet_address == 0 )
+        return 0;
+
+    set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
+
+    hpet_id = hpet_read32(HPET_ID);
+    if ( hpet_id == 0 )
+    {
+        printk("BAD HPET vendor id.\n");
+        return 0;
+    }
+
+    /* Check for sane period (100ps <= period <= 100ns). */
+    hpet_period = hpet_read32(HPET_PERIOD);
+    if ( (hpet_period > 100000000) || (hpet_period < 100000) )
+    {
+        printk("BAD HPET period %u.\n", hpet_period);
+        return 0;
+    }
+
+    cfg = hpet_read32(HPET_CFG);
+    cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+    hpet_write32(cfg, HPET_CFG);
+
+    for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
+    {
+        cfg = hpet_read32(HPET_T0_CFG + i*0x20);
+        cfg &= ~HPET_TN_ENABLE;
+        hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
+    }
+
+    cfg = hpet_read32(HPET_CFG);
+    cfg |= HPET_CFG_ENABLE;
+    hpet_write32(cfg, HPET_CFG);
+
+    hpet_rate = 1000000000000000ULL; /* 10^15 */
+    (void)do_div(hpet_rate, hpet_period);
+
+    return hpet_rate;
+}
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/time.c       Wed May 21 10:38:57 2008 +0100
@@ -38,7 +38,6 @@ string_param("clocksource", opt_clocksou
 #define EPOCH MILLISECS(1000)
 
 unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
-unsigned long hpet_address;
 DEFINE_SPINLOCK(rtc_lock);
 unsigned long pit0_ticks;
 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
@@ -68,7 +67,8 @@ struct platform_timesource {
 
 static DEFINE_PER_CPU(struct cpu_time, cpu_time);
 
-static u8 tsc_invariant=0;  /* TSC is invariant upon C state entry */
+/* TSC is invariant on C state entry? */
+static bool_t tsc_invariant;
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -151,6 +151,9 @@ static void timer_interrupt(int irq, voi
 {
     ASSERT(local_irq_is_enabled());
 
+    if ( hpet_legacy_irq_tick() )
+        return;
+
     /* Only for start-of-day interruopt tests in io_apic.c. */
     (*(volatile unsigned long *)&pit0_ticks)++;
 
@@ -347,47 +350,10 @@ static u32 read_hpet_count(void)
 
 static int init_hpet(struct platform_timesource *pts)
 {
-    u64 hpet_rate;
-    u32 hpet_id, hpet_period, cfg;
-    int i;
-
-    if ( hpet_address == 0 )
+    u64 hpet_rate = hpet_setup();
+
+    if ( hpet_rate == 0 )
         return 0;
-
-    set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-
-    hpet_id = hpet_read32(HPET_ID);
-    if ( hpet_id == 0 )
-    {
-        printk("BAD HPET vendor id.\n");
-        return 0;
-    }
-
-    /* Check for sane period (100ps <= period <= 100ns). */
-    hpet_period = hpet_read32(HPET_PERIOD);
-    if ( (hpet_period > 100000000) || (hpet_period < 100000) )
-    {
-        printk("BAD HPET period %u.\n", hpet_period);
-        return 0;
-    }
-
-    cfg = hpet_read32(HPET_CFG);
-    cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
-    hpet_write32(cfg, HPET_CFG);
-
-    for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
-    {
-        cfg = hpet_read32(HPET_T0_CFG + i*0x20);
-        cfg &= ~HPET_TN_ENABLE;
-        hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
-    }
-
-    cfg = hpet_read32(HPET_CFG);
-    cfg |= HPET_CFG_ENABLE;
-    hpet_write32(cfg, HPET_CFG);
-
-    hpet_rate = 1000000000000000ULL; /* 10^15 */
-    (void)do_div(hpet_rate, hpet_period);
 
     pts->name = "HPET";
     pts->frequency = hpet_rate;
@@ -1041,7 +1007,14 @@ static int __init disable_pit_irq(void)
         outb_p(0x30, PIT_MODE);
         outb_p(0, PIT_CH0);
         outb_p(0, PIT_CH0);
-    }
+
+        /*
+         * If we do not rely on PIT CH0 then we can use HPET for one-shot
+         * timer emulation when entering deep C states.
+         */
+        hpet_broadcast_init();
+    }
+
     return 0;
 }
 __initcall(disable_pit_irq);
diff -r 672c09aad49d -r d795e15b85a7 xen/common/timer.c
--- a/xen/common/timer.c        Tue May 20 14:50:45 2008 +0100
+++ b/xen/common/timer.c        Wed May 21 10:38:57 2008 +0100
@@ -35,7 +35,7 @@ struct timers {
 
 static DEFINE_PER_CPU(struct timers, timers);
 
-extern int reprogram_timer(s_time_t timeout);
+DEFINE_PER_CPU(s_time_t, timer_deadline);
 
 /****************************************************************************
  * HEAP OPERATIONS.
@@ -323,8 +323,10 @@ static void timer_softirq_action(void)
         }
 
         ts->running = NULL;
-    }
-    while ( !reprogram_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) );
+
+        this_cpu(timer_deadline) = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0;
+    }
+    while ( !reprogram_timer(this_cpu(timer_deadline)) );
 
     spin_unlock_irq(&ts->lock);
 }
diff -r 672c09aad49d -r d795e15b85a7 xen/include/asm-x86/hpet.h
--- a/xen/include/asm-x86/hpet.h        Tue May 20 14:50:45 2008 +0100
+++ b/xen/include/asm-x86/hpet.h        Wed May 21 10:38:57 2008 +0100
@@ -49,4 +49,24 @@
 #define hpet_write32(y,x) \
     (*(volatile u32 *)(fix_to_virt(FIX_HPET_BASE) + (x)) = (y))
 
+/*
+ * Detect and initialise HPET hardware: return counter update frequency.
+ * Return value is zero if HPET is unavailable.
+ */
+u64 hpet_setup(void);
+
+/*
+ * Callback from legacy timer (PIT channel 0) IRQ handler.
+ * Returns 1 if tick originated from HPET; else 0.
+ */
+int hpet_legacy_irq_tick(void);
+
+/*
+ * Temporarily use an HPET event counter for timer interrupt handling,
+ * rather than using the LAPIC timer. Used for Cx state entry.
+ */
+void hpet_broadcast_init(void);
+void hpet_broadcast_enter(void);
+void hpet_broadcast_exit(void);
+
 #endif /* __X86_HPET_H__ */
diff -r 672c09aad49d -r d795e15b85a7 xen/include/xen/timer.h
--- a/xen/include/xen/timer.h   Tue May 20 14:50:45 2008 +0100
+++ b/xen/include/xen/timer.h   Wed May 21 10:38:57 2008 +0100
@@ -99,6 +99,15 @@ extern void process_pending_timers(void)
  */
 extern void timer_init(void);
 
+/*
+ * Next timer deadline for each CPU.
+ * Modified only by the local CPU and never in interrupt context.
+ */
+DECLARE_PER_CPU(s_time_t, timer_deadline);
+
+/* Arch-defined function to reprogram timer hardware for new deadline. */
+extern int reprogram_timer(s_time_t timeout);
+
 #endif /* _TIMER_H_ */
 
 /*

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.