[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: Fix lapic timer stop issue in deep C state
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1211362737 -3600 # Node ID d795e15b85a788d0389d24963897cf480dcab0e9 # Parent 672c09aad49df0b8056c795bd6c351746d037975 x86: Fix lapic timer stop issue in deep C state Local APIC timer may stop at deep C state (C3/C4...) entry/exit. this patch add the logic that use platform timer (HPET) to reenable local APIC timer at C state entry/exit. Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx> Signed-off-by: Yu Ke <ke.yu@xxxxxxxxx> Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> --- xen/arch/x86/Makefile | 1 xen/arch/x86/acpi/cpu_idle.c | 11 - xen/arch/x86/hpet.c | 291 +++++++++++++++++++++++++++++++++++++++++++ xen/arch/x86/time.c | 59 ++------ xen/common/timer.c | 8 - xen/include/asm-x86/hpet.h | 20 ++ xen/include/xen/timer.h | 9 + 7 files changed, 348 insertions(+), 51 deletions(-) diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Tue May 20 14:50:45 2008 +0100 +++ b/xen/arch/x86/Makefile Wed May 21 10:38:57 2008 +0100 @@ -50,6 +50,7 @@ obj-y += machine_kexec.o obj-y += machine_kexec.o obj-y += crash.o obj-y += tboot.o +obj-y += hpet.o obj-$(crash_debug) += gdbstub.o diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/acpi/cpu_idle.c --- a/xen/arch/x86/acpi/cpu_idle.c Tue May 20 14:50:45 2008 +0100 +++ b/xen/arch/x86/acpi/cpu_idle.c Wed May 21 10:38:57 2008 +0100 @@ -39,6 +39,7 @@ #include <xen/smp.h> #include <asm/cache.h> #include <asm/io.h> +#include <asm/hpet.h> #include <xen/guest_access.h> #include <public/platform.h> #include <asm/processor.h> @@ -438,19 +439,19 @@ static void acpi_processor_idle(void) t1 = inl(pmtmr_ioport); /* - * FIXME: Before invoking C3, be aware that TSC/APIC timer may be + * Before invoking C3, be aware that TSC/APIC timer may be * stopped by H/W. Without carefully handling of TSC/APIC stop issues, * deep C state can't work correctly. */ /* preparing TSC stop */ cstate_save_tsc(); - /* placeholder for preparing APIC stop */ - + /* preparing APIC stop */ + hpet_broadcast_enter(); /* Invoke C3 */ acpi_idle_do_entry(cx); - /* placeholder for recovering APIC */ - + /* recovering APIC */ + hpet_broadcast_exit(); /* recovering TSC */ cstate_restore_tsc(); diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/hpet.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hpet.c Wed May 21 10:38:57 2008 +0100 @@ -0,0 +1,291 @@ +/****************************************************************************** + * arch/x86/hpet.c + * + * HPET management. + */ + +#include <xen/config.h> +#include <xen/errno.h> +#include <xen/time.h> +#include <xen/timer.h> +#include <xen/smp.h> +#include <xen/softirq.h> +#include <asm/fixmap.h> +#include <asm/div64.h> +#include <asm/hpet.h> + +#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1)) + +#define MAX_DELTA_NS MILLISECS(10*1000) +#define MIN_DELTA_NS MICROSECS(1) + +struct hpet_event_channel +{ + unsigned long mult; + int shift; + s_time_t next_event; + cpumask_t cpumask; + spinlock_t lock; + void (*event_handler)(struct hpet_event_channel *); +}; +static struct hpet_event_channel hpet_event; + +unsigned long hpet_address; + +/* + * Calculate a multiplication factor for scaled math, which is used to convert + * nanoseconds based values to clock ticks: + * + * clock_ticks = (nanoseconds * factor) >> shift. + * + * div_sc is the rearranged equation to calculate a factor from a given clock + * ticks / nanoseconds ratio: + * + * factor = (clock_ticks << shift) / nanoseconds + */ +static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, + int shift) +{ + uint64_t tmp = ((uint64_t)ticks) << shift; + + do_div(tmp, nsec); + return (unsigned long) tmp; +} + +/* + * Convert nanoseconds based values to clock ticks: + * + * clock_ticks = (nanoseconds * factor) >> shift. + */ +static inline unsigned long ns2ticks(unsigned long nsec, int shift, + unsigned long factor) +{ + uint64_t tmp = ((uint64_t)nsec * factor) >> shift; + + return (unsigned long) tmp; +} + +static int hpet_legacy_next_event(unsigned long delta) +{ + unsigned long cnt; + + cnt = hpet_read32(HPET_COUNTER); + cnt += delta; + hpet_write32(cnt, HPET_T0_CMP); + + return ((long)(hpet_read32(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0; +} + +static int reprogram_hpet_evt_channel( + struct hpet_event_channel *ch, + s_time_t expire, s_time_t now, int force) +{ + int64_t delta; + int ret; + + if ( unlikely(expire < 0) ) + { + printk(KERN_DEBUG "reprogram: expire < 0\n"); + return -ETIME; + } + + delta = expire - now; + if ( delta <= 0 ) + { + printk(KERN_DEBUG "reprogram: expire(%"PRIx64") < " + "now(%"PRIx64")\n", expire, now); + if ( !force ) + return -ETIME; + } + + ch->next_event = expire; + + delta = min_t(int64_t, delta, MAX_DELTA_NS); + delta = max_t(int64_t, delta, MIN_DELTA_NS); + delta = ns2ticks(delta, ch->shift, ch->mult); + + ret = hpet_legacy_next_event(delta); + while ( ret && force ) + { + delta += delta; + ret = hpet_legacy_next_event(delta); + } + + return ret; +} + +static int evt_do_broadcast(cpumask_t mask) +{ + int ret = 0, cpu = smp_processor_id(); + + if ( cpu_isset(cpu, mask) ) + { + cpu_clear(cpu, mask); + raise_softirq(TIMER_SOFTIRQ); + ret = 1; + } + + if ( !cpus_empty(mask) ) + { + cpumask_raise_softirq(mask, TIMER_SOFTIRQ); + ret = 1; + } + return ret; +} + +static void handle_hpet_broadcast(struct hpet_event_channel *ch) +{ + cpumask_t mask; + s_time_t now, next_event; + int cpu, current_cpu = smp_processor_id(); + + spin_lock(&ch->lock); + + if ( cpu_isset(current_cpu, ch->cpumask) ) + printk(KERN_DEBUG "WARNING: current cpu%d in bc_mask\n", current_cpu); +again: + ch->next_event = STIME_MAX; + next_event = STIME_MAX; + mask = (cpumask_t)CPU_MASK_NONE; + now = NOW(); + + /* find all expired events */ + for_each_cpu_mask(cpu, ch->cpumask) + { + if ( per_cpu(timer_deadline, cpu) <= now ) + cpu_set(cpu, mask); + else if ( per_cpu(timer_deadline, cpu) < next_event ) + next_event = per_cpu(timer_deadline, cpu); + } + if ( per_cpu(timer_deadline, current_cpu) <= now ) + cpu_set(current_cpu, mask); + + /* wakeup the cpus which have an expired event. */ + evt_do_broadcast(mask); + + if ( next_event != STIME_MAX ) + { + if ( reprogram_hpet_evt_channel(ch, next_event, now, 0) ) + goto again; + } + spin_unlock(&ch->lock); +} + +void hpet_broadcast_init(void) +{ + u64 hpet_rate; + u32 hpet_id, cfg; + + hpet_rate = hpet_setup(); + if ( hpet_rate == 0 ) + return; + + hpet_id = hpet_read32(HPET_ID); + if ( !(hpet_id & HPET_ID_LEGSUP) ) + return; + + /* Start HPET legacy interrupts */ + cfg = hpet_read32(HPET_CFG); + cfg |= HPET_CFG_LEGACY; + hpet_write32(cfg, HPET_CFG); + + /* set HPET T0 as oneshot */ + cfg = hpet_read32(HPET_T0_CFG); + cfg &= ~HPET_TN_PERIODIC; + cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; + hpet_write32(cfg, HPET_T0_CFG); + + /* + * The period is a femto seconds value. We need to calculate the scaled + * math multiplication factor for nanosecond to hpet tick conversion. + */ + hpet_event.mult = div_sc((unsigned long)hpet_rate, 1000000000ul, 32); + hpet_event.shift = 32; + hpet_event.next_event = STIME_MAX; + hpet_event.event_handler = handle_hpet_broadcast; + spin_lock_init(&hpet_event.lock); +} + +void hpet_broadcast_enter(void) +{ + struct hpet_event_channel *ch = &hpet_event; + + cpu_set(smp_processor_id(), ch->cpumask); + + spin_lock(&ch->lock); + + /* reprogram if current cpu expire time is nearer */ + if ( this_cpu(timer_deadline) < ch->next_event ) + reprogram_hpet_evt_channel(ch, this_cpu(timer_deadline), NOW(), 1); + + spin_unlock(&ch->lock); +} + +void hpet_broadcast_exit(void) +{ + struct hpet_event_channel *ch = &hpet_event; + int cpu = smp_processor_id(); + + if ( cpu_test_and_clear(cpu, ch->cpumask) ) + reprogram_timer(per_cpu(timer_deadline, cpu)); +} + +int hpet_legacy_irq_tick(void) +{ + if ( !hpet_event.event_handler ) + return 0; + hpet_event.event_handler(&hpet_event); + return 1; +} + +u64 hpet_setup(void) +{ + static u64 hpet_rate; + static int initialised; + u32 hpet_id, hpet_period, cfg; + int i; + + if ( initialised ) + return hpet_rate; + initialised = 1; + + if ( hpet_address == 0 ) + return 0; + + set_fixmap_nocache(FIX_HPET_BASE, hpet_address); + + hpet_id = hpet_read32(HPET_ID); + if ( hpet_id == 0 ) + { + printk("BAD HPET vendor id.\n"); + return 0; + } + + /* Check for sane period (100ps <= period <= 100ns). */ + hpet_period = hpet_read32(HPET_PERIOD); + if ( (hpet_period > 100000000) || (hpet_period < 100000) ) + { + printk("BAD HPET period %u.\n", hpet_period); + return 0; + } + + cfg = hpet_read32(HPET_CFG); + cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); + hpet_write32(cfg, HPET_CFG); + + for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ ) + { + cfg = hpet_read32(HPET_T0_CFG + i*0x20); + cfg &= ~HPET_TN_ENABLE; + hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG); + } + + cfg = hpet_read32(HPET_CFG); + cfg |= HPET_CFG_ENABLE; + hpet_write32(cfg, HPET_CFG); + + hpet_rate = 1000000000000000ULL; /* 10^15 */ + (void)do_div(hpet_rate, hpet_period); + + return hpet_rate; +} diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Tue May 20 14:50:45 2008 +0100 +++ b/xen/arch/x86/time.c Wed May 21 10:38:57 2008 +0100 @@ -38,7 +38,6 @@ string_param("clocksource", opt_clocksou #define EPOCH MILLISECS(1000) unsigned long cpu_khz; /* CPU clock frequency in kHz. */ -unsigned long hpet_address; DEFINE_SPINLOCK(rtc_lock); unsigned long pit0_ticks; static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */ @@ -68,7 +67,8 @@ struct platform_timesource { static DEFINE_PER_CPU(struct cpu_time, cpu_time); -static u8 tsc_invariant=0; /* TSC is invariant upon C state entry */ +/* TSC is invariant on C state entry? */ +static bool_t tsc_invariant; /* * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter. @@ -151,6 +151,9 @@ static void timer_interrupt(int irq, voi { ASSERT(local_irq_is_enabled()); + if ( hpet_legacy_irq_tick() ) + return; + /* Only for start-of-day interruopt tests in io_apic.c. */ (*(volatile unsigned long *)&pit0_ticks)++; @@ -347,47 +350,10 @@ static u32 read_hpet_count(void) static int init_hpet(struct platform_timesource *pts) { - u64 hpet_rate; - u32 hpet_id, hpet_period, cfg; - int i; - - if ( hpet_address == 0 ) + u64 hpet_rate = hpet_setup(); + + if ( hpet_rate == 0 ) return 0; - - set_fixmap_nocache(FIX_HPET_BASE, hpet_address); - - hpet_id = hpet_read32(HPET_ID); - if ( hpet_id == 0 ) - { - printk("BAD HPET vendor id.\n"); - return 0; - } - - /* Check for sane period (100ps <= period <= 100ns). */ - hpet_period = hpet_read32(HPET_PERIOD); - if ( (hpet_period > 100000000) || (hpet_period < 100000) ) - { - printk("BAD HPET period %u.\n", hpet_period); - return 0; - } - - cfg = hpet_read32(HPET_CFG); - cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); - hpet_write32(cfg, HPET_CFG); - - for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ ) - { - cfg = hpet_read32(HPET_T0_CFG + i*0x20); - cfg &= ~HPET_TN_ENABLE; - hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG); - } - - cfg = hpet_read32(HPET_CFG); - cfg |= HPET_CFG_ENABLE; - hpet_write32(cfg, HPET_CFG); - - hpet_rate = 1000000000000000ULL; /* 10^15 */ - (void)do_div(hpet_rate, hpet_period); pts->name = "HPET"; pts->frequency = hpet_rate; @@ -1041,7 +1007,14 @@ static int __init disable_pit_irq(void) outb_p(0x30, PIT_MODE); outb_p(0, PIT_CH0); outb_p(0, PIT_CH0); - } + + /* + * If we do not rely on PIT CH0 then we can use HPET for one-shot + * timer emulation when entering deep C states. + */ + hpet_broadcast_init(); + } + return 0; } __initcall(disable_pit_irq); diff -r 672c09aad49d -r d795e15b85a7 xen/common/timer.c --- a/xen/common/timer.c Tue May 20 14:50:45 2008 +0100 +++ b/xen/common/timer.c Wed May 21 10:38:57 2008 +0100 @@ -35,7 +35,7 @@ struct timers { static DEFINE_PER_CPU(struct timers, timers); -extern int reprogram_timer(s_time_t timeout); +DEFINE_PER_CPU(s_time_t, timer_deadline); /**************************************************************************** * HEAP OPERATIONS. @@ -323,8 +323,10 @@ static void timer_softirq_action(void) } ts->running = NULL; - } - while ( !reprogram_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) ); + + this_cpu(timer_deadline) = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0; + } + while ( !reprogram_timer(this_cpu(timer_deadline)) ); spin_unlock_irq(&ts->lock); } diff -r 672c09aad49d -r d795e15b85a7 xen/include/asm-x86/hpet.h --- a/xen/include/asm-x86/hpet.h Tue May 20 14:50:45 2008 +0100 +++ b/xen/include/asm-x86/hpet.h Wed May 21 10:38:57 2008 +0100 @@ -49,4 +49,24 @@ #define hpet_write32(y,x) \ (*(volatile u32 *)(fix_to_virt(FIX_HPET_BASE) + (x)) = (y)) +/* + * Detect and initialise HPET hardware: return counter update frequency. + * Return value is zero if HPET is unavailable. + */ +u64 hpet_setup(void); + +/* + * Callback from legacy timer (PIT channel 0) IRQ handler. + * Returns 1 if tick originated from HPET; else 0. + */ +int hpet_legacy_irq_tick(void); + +/* + * Temporarily use an HPET event counter for timer interrupt handling, + * rather than using the LAPIC timer. Used for Cx state entry. + */ +void hpet_broadcast_init(void); +void hpet_broadcast_enter(void); +void hpet_broadcast_exit(void); + #endif /* __X86_HPET_H__ */ diff -r 672c09aad49d -r d795e15b85a7 xen/include/xen/timer.h --- a/xen/include/xen/timer.h Tue May 20 14:50:45 2008 +0100 +++ b/xen/include/xen/timer.h Wed May 21 10:38:57 2008 +0100 @@ -99,6 +99,15 @@ extern void process_pending_timers(void) */ extern void timer_init(void); +/* + * Next timer deadline for each CPU. + * Modified only by the local CPU and never in interrupt context. + */ +DECLARE_PER_CPU(s_time_t, timer_deadline); + +/* Arch-defined function to reprogram timer hardware for new deadline. */ +extern int reprogram_timer(s_time_t timeout); + #endif /* _TIMER_H_ */ /* _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |