[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] First cut of new time interfaces and synchronisation mechanisms.
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID 43564304cf9448ad8978df6d2d0d6721b4615143 # Parent 9697bc63d4039196b15378f3b3fe406c6a445ea2 First cut of new time interfaces and synchronisation mechanisms. Based on an initial patch from Don Fry at IBM. Still TODO: 1. Testing 2. NTP synchronisation 3. Fix wallclock interface a bit 4. Support for platform timers other than PIT (e.g., HPET, IBM Cyclone) 5. Scale 64-bit TSC diffs instead of 32-bit, just for sanity 6. Error-correcting scale factor is still slightly wrong 6. More testing Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Sun Jul 17 14:16:21 2005 +++ b/xen/arch/x86/apic.c Mon Jul 18 20:22:11 2005 @@ -723,16 +723,8 @@ static void __init setup_APIC_timer(unsigned int clocks) { unsigned long flags; - local_irq_save(flags); - - /* - * Wait for IRQ0's slice: - */ - wait_timer_tick(); - __setup_APIC_LVTT(clocks); - local_irq_restore(flags); } diff -r 9697bc63d403 -r 43564304cf94 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Sun Jul 17 14:16:21 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Mon Jul 18 20:22:11 2005 @@ -19,7 +19,7 @@ s-obj-y := obj-y += cpu/ -obj-y += timers/ +#obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ #c-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o c-obj-$(CONFIG_MCA) += mca.o diff -r 9697bc63d403 -r 43564304cf94 xen/common/domain.c --- a/xen/common/domain.c Sun Jul 17 14:16:21 2005 +++ b/xen/common/domain.c Mon Jul 18 20:22:11 2005 @@ -42,8 +42,6 @@ d->domain_id = dom_id; v->processor = cpu; - spin_lock_init(&d->time_lock); - spin_lock_init(&d->big_lock); spin_lock_init(&d->page_alloc_lock); diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/vmx_intercept.c --- a/xen/arch/x86/vmx_intercept.c Sun Jul 17 14:16:21 2005 +++ b/xen/arch/x86/vmx_intercept.c Mon Jul 18 20:22:11 2005 @@ -24,10 +24,10 @@ #include <asm/vmx_virpit.h> #include <asm/vmx_intercept.h> #include <public/io/ioreq.h> - #include <xen/lib.h> #include <xen/sched.h> #include <asm/current.h> +#include <io_ports.h> #ifdef CONFIG_VMX @@ -175,7 +175,7 @@ p->port_mm) return 0; - if (p->addr == 0x43 && + if (p->addr == PIT_MODE && p->dir == 0 && /* write */ ((p->u.data >> 4) & 0x3) == 0 && /* latch command */ ((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */ @@ -183,7 +183,7 @@ return 1; } - if (p->addr == (0x40 + vpit->channel) && + if (p->addr == (PIT_CH0 + vpit->channel) && p->dir == 1) { /* read */ p->u.data = pit_read_io(vpit); resume_pit_io(p); diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Sun Jul 17 14:16:21 2005 +++ b/xen/arch/x86/i8259.c Mon Jul 18 20:22:11 2005 @@ -19,7 +19,7 @@ #include <asm/bitops.h> #include <xen/delay.h> #include <asm/apic.h> - +#include <io_ports.h> /* * Common place to define all x86 IRQ vectors @@ -395,9 +395,9 @@ /* Set the clock to HZ Hz */ #define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */ #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) - outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ - outb_p(LATCH & 0xff , 0x40); /* LSB */ - outb(LATCH >> 8 , 0x40); /* MSB */ + outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ + outb(LATCH >> 8, PIT_CH0); /* MSB */ setup_irq(2, &cascade); } diff -r 9697bc63d403 -r 43564304cf94 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Sun Jul 17 14:16:21 2005 +++ b/xen/common/page_alloc.c Mon Jul 18 20:22:11 2005 @@ -351,10 +351,10 @@ void scrub_heap_pages(void) { void *p; - unsigned long pfn, flags; + unsigned long pfn; + int cpu = smp_processor_id(); printk("Scrubbing Free RAM: "); - watchdog_disable(); for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ ) { @@ -362,12 +362,15 @@ if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 ) printk("."); + if ( unlikely(softirq_pending(cpu)) ) + do_softirq(); + /* Quick lock-free check. */ if ( allocated_in_map(pfn) ) continue; - - spin_lock_irqsave(&heap_lock, flags); - + + spin_lock_irq(&heap_lock); + /* Re-check page status with lock held. */ if ( !allocated_in_map(pfn) ) { @@ -385,11 +388,10 @@ unmap_domain_page(p); } } - - spin_unlock_irqrestore(&heap_lock, flags); - } - - watchdog_enable(); + + spin_unlock_irq(&heap_lock); + } + printk("done.\n"); } diff -r 9697bc63d403 -r 43564304cf94 xen/common/ac_timer.c --- a/xen/common/ac_timer.c Sun Jul 17 14:16:21 2005 +++ b/xen/common/ac_timer.c Mon Jul 18 20:22:11 2005 @@ -202,7 +202,7 @@ do { heap = ac_timers[cpu].heap; now = NOW(); - + while ( (GET_HEAP_SIZE(heap) != 0) && ((t = heap[1])->expires < (now + TIMER_SLOP)) ) { diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Sun Jul 17 14:16:21 2005 +++ b/xen/arch/x86/smpboot.c Mon Jul 18 20:22:11 2005 @@ -40,6 +40,7 @@ #include <xen/sched.h> #include <xen/irq.h> #include <xen/delay.h> +#include <xen/softirq.h> #include <asm/current.h> #include <asm/mc146818rtc.h> #include <asm/desc.h> @@ -406,6 +407,7 @@ */ if (cpu_has_tsc && cpu_khz) synchronize_tsc_ap(); + calibrate_tsc_ap(); } int cpucount; @@ -464,6 +466,8 @@ /* We can take interrupts now: we're officially "up". */ local_irq_enable(); + + init_percpu_time(); wmb(); startup_cpu_idle_loop(); @@ -1149,6 +1153,7 @@ */ if (cpu_has_tsc && cpucount && cpu_khz) synchronize_tsc_bp(); + calibrate_tsc_bp(); } /* These are wrappers to interface to the new boot process. Someone @@ -1167,22 +1172,21 @@ int __devinit __cpu_up(unsigned int cpu) { /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) { - local_irq_enable(); + if (cpu_isset(cpu, smp_commenced_mask)) return -ENOSYS; - } /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) { - local_irq_enable(); + if (!cpu_isset(cpu, cpu_callin_map)) return -EIO; - } - - local_irq_enable(); + /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) + while (!cpu_isset(cpu, cpu_online_map)) { mb(); + if (softirq_pending(0)) + do_softirq(); + } + return 0; } diff -r 9697bc63d403 -r 43564304cf94 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Sun Jul 17 14:16:21 2005 +++ b/xen/include/xen/sched.h Mon Jul 18 20:22:11 2005 @@ -92,7 +92,6 @@ domid_t domain_id; shared_info_t *shared_info; /* shared data area */ - spinlock_t time_lock; spinlock_t big_lock; diff -r 9697bc63d403 -r 43564304cf94 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Sun Jul 17 14:16:21 2005 +++ b/xen/drivers/char/console.c Mon Jul 18 20:22:11 2005 @@ -635,8 +635,6 @@ debugtrace_bytes = bytes; - memset(debugtrace_buf, '\0', debugtrace_bytes); - return 0; } __initcall(debugtrace_init); diff -r 9697bc63d403 -r 43564304cf94 xen/include/xen/time.h --- a/xen/include/xen/time.h Sun Jul 17 14:16:21 2005 +++ b/xen/include/xen/time.h Mon Jul 18 20:22:11 2005 @@ -30,7 +30,8 @@ #include <public/xen.h> #include <asm/time.h> -extern int init_xen_time(); +extern int init_xen_time(void); +extern void init_percpu_time(void); extern unsigned long cpu_khz; diff -r 9697bc63d403 -r 43564304cf94 xen/include/public/xen.h --- a/xen/include/public/xen.h Sun Jul 17 14:16:21 2005 +++ b/xen/include/public/xen.h Mon Jul 18 20:22:11 2005 @@ -329,12 +329,36 @@ #endif } vcpu_info_t; +typedef struct vcpu_time_info { + /* + * The following values are updated periodically (and not necessarily + * atomically!). The guest OS detects this because 'time_version1' is + * incremented just before updating these values, and 'time_version2' is + * incremented immediately after. See the Xen-specific Linux code for an + * example of how to read these values safely (arch/xen/kernel/time.c). + */ + u32 time_version1; + u32 time_version2; + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + u32 tsc_to_system_mul; + s8 tsc_shift; +} vcpu_time_info_t; + /* * Xen/kernel shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. */ typedef struct shared_info { vcpu_info_t vcpu_data[MAX_VIRT_CPUS]; + + vcpu_time_info_t vcpu_time[MAX_VIRT_CPUS]; u32 n_vcpu; @@ -373,33 +397,11 @@ u32 evtchn_mask[32]; /* - * Time: The following abstractions are exposed: System Time, Clock Time, - * Domain Virtual Time. Domains can access Cycle counter time directly. + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. */ - u64 cpu_freq; /* CPU frequency (Hz). */ - - /* - * The following values are updated periodically (and not necessarily - * atomically!). The guest OS detects this because 'time_version1' is - * incremented just before updating these values, and 'time_version2' is - * incremented immediately after. See the Xen-specific Linux code for an - * example of how to read these values safely (arch/xen/kernel/time.c). - */ - u32 time_version1; - u32 time_version2; - tsc_timestamp_t tsc_timestamp; /* TSC at last update of time vals. */ - u64 system_time; /* Time, in nanosecs, since boot. */ u32 wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ u32 wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */ - u64 domain_time; /* Domain virtual time, in nanosecs. */ - - /* - * Timeout values: - * Allow a domain to specify a timeout value in system time and - * domain virtual time. - */ - u64 wall_timeout; - u64 domain_timeout; arch_shared_info_t arch; diff -r 9697bc63d403 -r 43564304cf94 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Sun Jul 17 14:16:21 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Mon Jul 18 20:22:11 2005 @@ -15,7 +15,7 @@ ptrace.o quirks.o syscall.o bootflag.o i386-obj-y := time.o -obj-y += ../../i386/kernel/timers/ +#obj-y += ../../i386/kernel/timers/ s-obj-y := diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Sun Jul 17 14:16:21 2005 +++ b/xen/arch/x86/time.c Mon Jul 18 20:22:11 2005 @@ -1,16 +1,12 @@ -/**************************************************************************** - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge - * (C) 2002-2003 University of Cambridge - **************************************************************************** - * - * File: i386/time.c - * Author: Rolf Neugebar & Keir Fraser - */ - -/* - * linux/arch/i386/kernel/time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds +/****************************************************************************** + * arch/x86/time.c + * + * Per-CPU time calibration and management. + * + * Copyright (c) 2002-2005, K A Fraser + * + * Portions from Linux are: + * Copyright (c) 1991, 1992, 1995 Linus Torvalds */ #include <xen/config.h> @@ -31,29 +27,74 @@ #include <asm/processor.h> #include <asm/fixmap.h> #include <asm/mc146818rtc.h> - -/* GLOBAL */ +#include <asm/div64.h> +#include <io_ports.h> + unsigned long cpu_khz; /* CPU clock frequency in kHz. */ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; int timer_ack = 0; unsigned long volatile jiffies; - -/* PRIVATE */ -static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */ -static u64 cpu_freq; /* CPU frequency (Hz) */ -static u32 st_scale_f; /* Cycles -> ns, fractional part */ -static u32 st_scale_i; /* Cycles -> ns, integer part */ -static u32 shifted_tsc_irq; /* CPU0's TSC at last 'time update' */ -static u64 full_tsc_irq; /* ...ditto, but all 64 bits */ -static s_time_t stime_irq; /* System time at last 'time update' */ -static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */ -static rwlock_t time_lock = RW_LOCK_UNLOCKED; +static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */ + +struct time_scale { + int shift; + u32 mul_frac; +}; + +struct cpu_time { + u64 local_tsc_stamp; + s_time_t stime_local_stamp; + s_time_t stime_master_stamp; + struct time_scale tsc_scale; + struct ac_timer calibration_timer; +} __cacheline_aligned; + +static struct cpu_time cpu_time[NR_CPUS]; + +/* Protected by platform_timer_lock. */ +static s_time_t stime_platform_stamp; +static u64 platform_timer_stamp; +static struct time_scale platform_timer_scale; +static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED; + +static inline u32 down_shift(u64 time, int shift) +{ + if ( shift < 0 ) + return (u32)(time >> -shift); + return (u32)((u32)time << shift); +} + +/* + * 32-bit division of integer dividend and integer divisor yielding + * 32-bit fractional quotient. + */ +static inline u32 div_frac(u32 dividend, u32 divisor) +{ + u32 quotient, remainder; + ASSERT(dividend < divisor); + __asm__ ( + "div %4" + : "=a" (quotient), "=d" (remainder) + : "0" (0), "1" (dividend), "r" (divisor) ); + return quotient; +} + +/* + * 32-bit multiplication of integer multiplicand and fractional multiplier + * yielding 32-bit integer product. + */ +static inline u32 mul_frac(u32 multiplicand, u32 multiplier) +{ + u32 product_int, product_frac; + __asm__ ( + "mul %3" + : "=a" (product_frac), "=d" (product_int) + : "0" (multiplicand), "r" (multiplier) ); + return product_int; +} void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs) { - write_lock_irq(&time_lock); - -#ifdef CONFIG_X86_IO_APIC if ( timer_ack ) { extern spinlock_t i8259A_lock; @@ -63,30 +104,9 @@ inb(0x20); spin_unlock(&i8259A_lock); } -#endif - /* - * Updates TSC timestamp (used to interpolate passage of time between - * interrupts). - */ - rdtscll(full_tsc_irq); - shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift); - /* Update jiffies counter. */ (*(unsigned long *)&jiffies)++; - - /* Update wall time. */ - wc_usec += 1000000/HZ; - if ( wc_usec >= 1000000 ) - { - wc_usec -= 1000000; - wc_sec++; - } - - /* Updates system time (nanoseconds since boot). */ - stime_irq += MILLISECS(1000/HZ); - - write_unlock_irq(&time_lock); /* Rough hack to allow accurate timers to sort-of-work with no APIC. */ if ( !cpu_has_apic ) @@ -103,9 +123,9 @@ #define CALIBRATE_FRAC 20 /* calibrate over 50ms */ #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) -static unsigned long __init calibrate_tsc(void) -{ - u64 start, end, diff; +static u64 calibrate_boot_tsc(void) +{ + u64 start, end; unsigned long count; /* Set the Gate high, disable speaker */ @@ -118,9 +138,9 @@ * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB) * to begin countdown. */ - outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ - outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ - outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ + outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */ + outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */ rdtscll(start); for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) @@ -131,15 +151,147 @@ if ( count == 0 ) return 0; - diff = end - start; - -#if defined(__i386__) - /* If quotient doesn't fit in 32 bits then we return error (zero). */ - if ( diff & ~0xffffffffULL ) - return 0; -#endif - - return (unsigned long)diff; + return ((end - start) * (u64)CALIBRATE_FRAC); +} + +static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec) +{ + u64 tps64 = ticks_per_sec; + u32 tps32; + int shift = 0; + + while ( tps64 > (MILLISECS(1000)*2) ) + { + tps64 >>= 1; + shift--; + } + + tps32 = (u32)tps64; + while ( tps32 < (u32)MILLISECS(1000) ) + { + tps32 <<= 1; + shift++; + } + + ts->mul_frac = div_frac(MILLISECS(1000), tps32); + ts->shift = shift; +} + +static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0); +static unsigned int tsc_calibrate_status = 0; + +void calibrate_tsc_bp(void) +{ + while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) ) + mb(); + + outb(CALIBRATE_LATCH & 0xff, PIT_CH2); + outb(CALIBRATE_LATCH >> 8, PIT_CH2); + + tsc_calibrate_status = 1; + wmb(); + + while ( (inb(0x61) & 0x20) == 0 ) + continue; + + tsc_calibrate_status = 2; + wmb(); + + while ( atomic_read(&tsc_calibrate_gang) != 0 ) + mb(); +} + +void calibrate_tsc_ap(void) +{ + u64 t1, t2, ticks_per_sec; + + atomic_inc(&tsc_calibrate_gang); + + while ( tsc_calibrate_status < 1 ) + mb(); + + rdtscll(t1); + + while ( tsc_calibrate_status < 2 ) + mb(); + + rdtscll(t2); + + ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC; + set_time_scale(&cpu_time[smp_processor_id()].tsc_scale, ticks_per_sec); + + atomic_dec(&tsc_calibrate_gang); +} + +/* Protected by platform_timer_lock. */ +static u64 platform_pit_counter; +static u16 pit_stamp; +static struct ac_timer pit_overflow_timer; + +static u16 pit_read_counter(void) +{ + u16 count; + ASSERT(spin_is_locked(&platform_timer_lock)); + outb(0x80, PIT_MODE); + count = inb(PIT_CH2); + count |= inb(PIT_CH2) << 8; + return count; +} + +static void pit_overflow(void *unused) +{ + u16 counter; + + spin_lock(&platform_timer_lock); + counter = pit_read_counter(); + platform_pit_counter += (u16)(pit_stamp - counter); + pit_stamp = counter; + spin_unlock(&platform_timer_lock); + + set_ac_timer(&pit_overflow_timer, NOW() + MILLISECS(20)); +} + +static void init_platform_timer(void) +{ + init_ac_timer(&pit_overflow_timer, pit_overflow, NULL, 0); + pit_overflow(NULL); + platform_timer_stamp = platform_pit_counter; + set_time_scale(&platform_timer_scale, CLOCK_TICK_RATE); +} + +static s_time_t __read_platform_stime(u64 platform_time) +{ + u64 diff64 = platform_time - platform_timer_stamp; + u32 diff = down_shift(diff64, platform_timer_scale.shift); + ASSERT(spin_is_locked(&platform_timer_lock)); + return (stime_platform_stamp + + (u64)mul_frac(diff, platform_timer_scale.mul_frac)); +} + +static s_time_t read_platform_stime(void) +{ + u64 counter; + s_time_t stime; + + spin_lock(&platform_timer_lock); + counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter()); + stime = __read_platform_stime(counter); + spin_unlock(&platform_timer_lock); + + return stime; +} + +static void platform_time_calibration(void) +{ + u64 counter; + s_time_t stamp; + + spin_lock(&platform_timer_lock); + counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter()); + stamp = __read_platform_stime(counter); + stime_platform_stamp = stamp; + platform_timer_stamp = counter; + spin_unlock(&platform_timer_lock); } @@ -233,140 +385,214 @@ * System Time ***************************************************************************/ -static inline u64 get_time_delta(void) -{ - s32 delta_tsc; - u32 low; - u64 delta, tsc; - - ASSERT(st_scale_f || st_scale_i); +s_time_t get_s_time(void) +{ + struct cpu_time *t = &cpu_time[smp_processor_id()]; + u64 tsc; + u32 delta; + s_time_t now; rdtscll(tsc); - low = (u32)(tsc >> rdtsc_bitshift); - delta_tsc = (s32)(low - shifted_tsc_irq); - if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; - delta = ((u64)delta_tsc * st_scale_f); - delta >>= 32; - delta += ((u64)delta_tsc * st_scale_i); - - return delta; -} - -s_time_t get_s_time(void) -{ - s_time_t now; - unsigned long flags; - - read_lock_irqsave(&time_lock, flags); - - now = stime_irq + get_time_delta(); - - /* Ensure that the returned system time is monotonically increasing. */ - { - static s_time_t prev_now = 0; - if ( unlikely(now < prev_now) ) - now = prev_now; - prev_now = now; - } - - read_unlock_irqrestore(&time_lock, flags); - - return now; + delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift); + now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac); + + return now; } static inline void __update_dom_time(struct vcpu *v) { - struct domain *d = v->domain; - shared_info_t *si = d->shared_info; - - spin_lock(&d->time_lock); - - si->time_version1++; + struct cpu_time *t = &cpu_time[smp_processor_id()]; + struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id]; + + u->time_version1++; wmb(); - si->cpu_freq = cpu_freq; - si->tsc_timestamp = full_tsc_irq; - si->system_time = stime_irq; - si->wc_sec = wc_sec; - si->wc_usec = wc_usec; + u->tsc_timestamp = t->local_tsc_stamp; + u->system_time = t->stime_local_stamp; + u->tsc_to_system_mul = t->tsc_scale.mul_frac; + u->tsc_shift = (s8)t->tsc_scale.shift; wmb(); - si->time_version2++; - - spin_unlock(&d->time_lock); + u->time_version2++; + + /* Should only do this during do_settime(). */ + v->domain->shared_info->wc_sec = wc_sec; + v->domain->shared_info->wc_usec = wc_usec; } void update_dom_time(struct vcpu *v) { - unsigned long flags; - - if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq ) - { - read_lock_irqsave(&time_lock, flags); + if ( v->domain->shared_info->vcpu_time[v->vcpu_id].tsc_timestamp != + cpu_time[smp_processor_id()].local_tsc_stamp ) __update_dom_time(v); - read_unlock_irqrestore(&time_lock, flags); - } } /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) { - s64 delta; - long _usecs = (long)usecs; - - write_lock_irq(&time_lock); - - delta = (s64)(stime_irq - system_time_base); - - _usecs += (long)(delta/1000); - while ( _usecs >= 1000000 ) - { - _usecs -= 1000000; - secs++; - } - - wc_sec = secs; - wc_usec = _usecs; - - /* Others will pick up the change at the next tick. */ + u64 x, base_usecs; + u32 y; + + base_usecs = system_time_base; + do_div(base_usecs, 1000); + + x = (secs * 1000000ULL) + (u64)usecs + base_usecs; + y = do_div(x, 1000000); + + wc_sec = (unsigned long)x; + wc_usec = (unsigned long)y; + __update_dom_time(current); - send_guest_virq(current, VIRQ_TIMER); - - write_unlock_irq(&time_lock); -} - +} + +static void local_time_calibration(void *unused) +{ + unsigned int cpu = smp_processor_id(); + + /* + * System timestamps, extrapolated from local and master oscillators, + * taken during this calibration and the previous calibration. + */ + s_time_t prev_local_stime, curr_local_stime; + s_time_t prev_master_stime, curr_master_stime; + + /* TSC timestamps taken during this calibration and prev calibration. */ + u64 prev_tsc, curr_tsc; + + /* + * System time and TSC ticks elapsed during the previous calibration + * 'epoch'. Also the accumulated error in the local estimate. All these + * values end up down-shifted to fit in 32 bits. + */ + u64 stime_elapsed64, tsc_elapsed64, local_stime_error64; + u32 stime_elapsed32, tsc_elapsed32, local_stime_error32; + + /* Calculated TSC shift to ensure 32-bit scale multiplier. */ + int tsc_shift = 0; + + prev_tsc = cpu_time[cpu].local_tsc_stamp; + prev_local_stime = cpu_time[cpu].stime_local_stamp; + prev_master_stime = cpu_time[cpu].stime_master_stamp; + + /* Disable IRQs to get 'instantaneous' current timestamps. */ + local_irq_disable(); + rdtscll(curr_tsc); + curr_local_stime = get_s_time(); + curr_master_stime = read_platform_stime(); + local_irq_enable(); + +#if 0 + printk("PRE%d: tsc=%lld stime=%lld master=%lld\n", + cpu, prev_tsc, prev_local_stime, prev_master_stime); + printk("CUR%d: tsc=%lld stime=%lld master=%lld %lld\n", + cpu, curr_tsc, curr_local_stime, curr_master_stime, + platform_pit_counter); +#endif + + /* Local time warps forward if it lags behind master time. */ + if ( curr_local_stime < curr_master_stime ) + curr_local_stime = curr_master_stime; + + stime_elapsed64 = curr_master_stime - prev_master_stime; + tsc_elapsed64 = curr_tsc - prev_tsc; + + /* + * Error in the local system time estimate. Clamp to epoch time period, or + * we could end up with a negative scale factor (time going backwards!). + * This effectively clamps the scale factor to >= 0. + */ + local_stime_error64 = curr_local_stime - curr_master_stime; + if ( local_stime_error64 > stime_elapsed64 ) + local_stime_error64 = stime_elapsed64; + + /* + * We require 0 < stime_elapsed < 2^31. + * This allows us to binary shift a 32-bit tsc_elapsed such that: + * stime_elapsed < tsc_elapsed <= 2*stime_elapsed + */ + while ( ((u32)stime_elapsed64 != stime_elapsed64) || + ((s32)stime_elapsed64 < 0) ) + { + stime_elapsed64 >>= 1; + tsc_elapsed64 >>= 1; + local_stime_error64 >>= 1; + } + + /* stime_master_diff (and hence stime_error) now fit in a 32-bit word. */ + stime_elapsed32 = (u32)stime_elapsed64; + local_stime_error32 = (u32)local_stime_error64; + + /* tsc_elapsed <= 2*stime_elapsed */ + while ( tsc_elapsed64 > (stime_elapsed32 * 2) ) + { + tsc_elapsed64 >>= 1; + tsc_shift--; + } + + /* Local difference must now fit in 32 bits. */ + ASSERT((u32)tsc_elapsed64 == tsc_elapsed64); + tsc_elapsed32 = (u32)tsc_elapsed64; + + /* tsc_elapsed > stime_elapsed */ + ASSERT(tsc_elapsed32 != 0); + while ( tsc_elapsed32 <= stime_elapsed32 ) + { + tsc_elapsed32 <<= 1; + tsc_shift++; + } + +#if 0 + printk("---%d: %08x %d\n", cpu, + div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32), + tsc_shift); +#endif + + /* Record new timestamp information. */ + cpu_time[cpu].tsc_scale.mul_frac = + div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32); + cpu_time[cpu].tsc_scale.shift = tsc_shift; + cpu_time[cpu].local_tsc_stamp = curr_tsc; + cpu_time[cpu].stime_local_stamp = curr_local_stime; + cpu_time[cpu].stime_master_stamp = curr_master_stime; + + set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000)); + + if ( cpu == 0 ) + platform_time_calibration(); +} + +void init_percpu_time(void) +{ + unsigned int cpu = smp_processor_id(); + unsigned long flags; + s_time_t now; + + local_irq_save(flags); + rdtscll(cpu_time[cpu].local_tsc_stamp); + now = (cpu == 0) ? 0 : read_platform_stime(); + local_irq_restore(flags); + + cpu_time[cpu].stime_master_stamp = now; + cpu_time[cpu].stime_local_stamp = now; + + init_ac_timer(&cpu_time[cpu].calibration_timer, + local_time_calibration, NULL, cpu); + set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000)); +} /* Late init function (after all CPUs are booted). */ -int __init init_xen_time() -{ - u64 scale; - unsigned int cpu_ghz; - - cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL); - for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 ) - continue; - - scale = 1000000000LL << (32 + rdtsc_bitshift); - scale /= cpu_freq; - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; +int __init init_xen_time(void) +{ + wc_sec = get_cmos_time(); local_irq_disable(); - /* System time ticks from zero. */ - rdtscll(full_tsc_irq); - stime_irq = (s_time_t)0; - shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift); - - /* Wallclock time starts as the initial RTC time. */ - wc_sec = get_cmos_time(); + init_percpu_time(); + + stime_platform_stamp = 0; + init_platform_timer(); local_irq_enable(); - - printk("Time init:\n"); - printk(".... cpu_freq: %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq); - printk(".... scale: %08X:%08X\n", (u32)(scale>>32),(u32)scale); - printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec); return 0; } @@ -375,15 +601,12 @@ /* Early init function. */ void __init early_time_init(void) { - unsigned long ticks_per_frac = calibrate_tsc(); - - if ( !ticks_per_frac ) - panic("Error calibrating TSC\n"); - - cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC); - - cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC; - + u64 tmp = calibrate_boot_tsc(); + + set_time_scale(&cpu_time[0].tsc_scale, tmp); + + do_div(tmp, 1000); + cpu_khz = (unsigned long)tmp; printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); diff -r 9697bc63d403 -r 43564304cf94 xen/include/asm-x86/time.h --- a/xen/include/asm-x86/time.h Sun Jul 17 14:16:21 2005 +++ b/xen/include/asm-x86/time.h Mon Jul 18 20:22:11 2005 @@ -4,4 +4,7 @@ extern int timer_ack; +extern void calibrate_tsc_bp(void); +extern void calibrate_tsc_ap(void); + #endif /* __X86_TIME_H__ */ diff -r 9697bc63d403 -r 43564304cf94 linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Sun Jul 17 14:16:21 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Mon Jul 18 20:22:11 2005 @@ -104,24 +104,16 @@ struct timer_opts *cur_timer = &timer_tsc; /* These are peridically updated in shared_info, and then copied here. */ -u32 shadow_tsc_stamp; -u64 shadow_system_time; -static u32 shadow_time_version; +struct shadow_time_info { + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_timestamp; /* Time, in nanosecs, since boot. */ + u32 tsc_to_nsec_mul; + u32 tsc_to_usec_mul; + int tsc_shift; + u32 version; +}; +static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); static struct timeval shadow_tv; - -/* - * We use this to ensure that gettimeofday() is monotonically increasing. We - * only break this guarantee if the wall clock jumps backwards "a long way". - */ -static struct timeval last_seen_tv = {0,0}; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -/* Periodically propagate synchronised time base to the RTC and to Xen. */ -static long last_rtc_update, last_update_to_xen; -#endif - -/* Periodically take synchronised time base from Xen, if we need it. */ -static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ /* Keep track of last time we did processing/updating of jiffies and xtime. */ static u64 processed_system_time; /* System time (ns) at last processing. */ @@ -164,26 +156,147 @@ #define INDEPENDENT_WALLCLOCK() \ (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN)) +int tsc_disable __initdata = 0; + +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +struct timer_opts timer_tsc = { + .name = "tsc", + .delay = delay_tsc, +}; + +static inline u32 down_shift(u64 time, int shift) +{ + if ( shift < 0 ) + return (u32)(time >> -shift); + return (u32)((u32)time << shift); +} + +/* + * 32-bit multiplication of integer multiplicand and fractional multiplier + * yielding 32-bit integer product. + */ +static inline u32 mul_frac(u32 multiplicand, u32 multiplier) +{ + u32 product_int, product_frac; + __asm__ ( + "mul %3" + : "=a" (product_frac), "=d" (product_int) + : "0" (multiplicand), "r" (multiplier) ); + return product_int; +} + +void init_cpu_khz(void) +{ + u64 __cpu_khz = 1000000ULL << 32; + struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0]; + do_div(__cpu_khz, info->tsc_to_system_mul); + cpu_khz = down_shift(__cpu_khz, -info->tsc_shift); + printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); +} + +static u64 get_nsec_offset(struct shadow_time_info *shadow) +{ + u64 now; + u32 delta; + rdtscll(now); + delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift); + return mul_frac(delta, shadow->tsc_to_nsec_mul); +} + +static unsigned long get_usec_offset(struct shadow_time_info *shadow) +{ + u64 now; + u32 delta; + rdtscll(now); + delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift); + return mul_frac(delta, shadow->tsc_to_usec_mul); +} + +static void update_wallclock(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + long wtm_nsec; + time_t wtm_sec, sec; + s64 nsec; + + shadow_tv.tv_sec = s->wc_sec; + shadow_tv.tv_usec = s->wc_usec; + + if (INDEPENDENT_WALLCLOCK()) + return; + + if ((time_status & STA_UNSYNC) != 0) + return; + + /* Adjust shadow for jiffies that haven't updated xtime yet. */ + shadow_tv.tv_usec -= + (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ); + HANDLE_USEC_UNDERFLOW(shadow_tv); + + /* Update our unsynchronised xtime appropriately. */ + sec = shadow_tv.tv_sec; + nsec = shadow_tv.tv_usec * NSEC_PER_USEC; + + __normalize_time(&sec, &nsec); + wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); + wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); + + set_normalized_timespec(&xtime, sec, nsec); + set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); +} + /* * Reads a consistent set of time-base values from Xen, into a shadow data * area. Must be called with the xtime_lock held for writing. */ static void __get_time_values_from_xen(void) { - shared_info_t *s = HYPERVISOR_shared_info; + shared_info_t *s = HYPERVISOR_shared_info; + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &s->vcpu_time[smp_processor_id()]; + dst = &per_cpu(shadow_time, smp_processor_id()); do { - shadow_time_version = s->time_version2; + dst->version = src->time_version2; rmb(); - shadow_tv.tv_sec = s->wc_sec; - shadow_tv.tv_usec = s->wc_usec; - shadow_tsc_stamp = (u32)s->tsc_timestamp; - shadow_system_time = s->system_time; + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; rmb(); } - while (shadow_time_version != s->time_version1); - - cur_timer->mark_offset(); + while (dst->version != src->time_version1); + + dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; + + if ((shadow_tv.tv_sec != s->wc_sec) || + (shadow_tv.tv_usec != s->wc_usec)) + update_wallclock(); +} + +static inline int time_values_up_to_date(int cpu) +{ + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()]; + dst = &per_cpu(shadow_time, smp_processor_id()); + + return (dst->version == src->time_version2); } #define TIME_VALUES_UP_TO_DATE \ @@ -229,13 +342,18 @@ unsigned long max_ntp_tick; unsigned long flags; s64 nsec; + unsigned int cpu; + struct shadow_time_info *shadow; + + cpu = get_cpu(); + shadow = &per_cpu(shadow_time, cpu); do { unsigned long lost; seq = read_seqbegin(&xtime_lock); - usec = cur_timer->get_offset(); + usec = get_usec_offset(shadow); lost = jiffies - wall_jiffies; /* @@ -256,11 +374,11 @@ sec = xtime.tv_sec; usec += (xtime.tv_nsec / NSEC_PER_USEC); - nsec = shadow_system_time - processed_system_time; + nsec = shadow->system_timestamp - processed_system_time; __normalize_time(&sec, &nsec); usec += (long)nsec / NSEC_PER_USEC; - if (unlikely(!TIME_VALUES_UP_TO_DATE)) { + if (unlikely(!time_values_up_to_date(cpu))) { /* * We may have blocked for a long time, * rendering our calculations invalid @@ -275,19 +393,11 @@ } } while (read_seqretry(&xtime_lock, seq)); + put_cpu(); + while (usec >= USEC_PER_SEC) { usec -= USEC_PER_SEC; sec++; - } - - /* Ensure that time-of-day is monotonically increasing. */ - if ((sec < last_seen_tv.tv_sec) || - ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) { - sec = last_seen_tv.tv_sec; - usec = last_seen_tv.tv_usec; - } else { - last_seen_tv.tv_sec = sec; - last_seen_tv.tv_usec = usec; } tv->tv_sec = sec; @@ -302,12 +412,17 @@ long wtm_nsec; s64 nsec; struct timespec xentime; + unsigned int cpu; + struct shadow_time_info *shadow; if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; if (!INDEPENDENT_WALLCLOCK()) return 0; /* Silent failure? */ + + cpu = get_cpu(); + shadow = &per_cpu(shadow_time, cpu); write_seqlock_irq(&xtime_lock); @@ -317,9 +432,8 @@ * be stale, so we can retry with fresh ones. */ again: - nsec = (s64)tv->tv_nsec - - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC); - if (unlikely(!TIME_VALUES_UP_TO_DATE)) { + nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow); + if (unlikely(!time_values_up_to_date(cpu))) { __get_time_values_from_xen(); goto again; } @@ -335,7 +449,7 @@ */ nsec -= (jiffies - wall_jiffies) * TICK_NSEC; - nsec -= (shadow_system_time - processed_system_time); + nsec -= (shadow->system_timestamp - processed_system_time); __normalize_time(&sec, &nsec); wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); @@ -349,23 +463,20 @@ time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - /* Reset all our running time counts. They make no sense now. */ - last_seen_tv.tv_sec = 0; - last_update_from_xen = 0; - #ifdef CONFIG_XEN_PRIVILEGED_GUEST if (xen_start_info.flags & SIF_INITDOMAIN) { dom0_op_t op; - last_rtc_update = last_update_to_xen = 0; op.cmd = DOM0_SETTIME; op.u.settime.secs = xentime.tv_sec; op.u.settime.usecs = xentime.tv_nsec / NSEC_PER_USEC; - op.u.settime.system_time = shadow_system_time; + op.u.settime.system_time = shadow->system_timestamp; write_sequnlock_irq(&xtime_lock); HYPERVISOR_dom0_op(&op); } else #endif write_sequnlock_irq(&xtime_lock); + + put_cpu(); clock_was_set(); return 0; @@ -403,9 +514,30 @@ */ unsigned long long monotonic_clock(void) { - return cur_timer->monotonic_clock(); + int cpu = get_cpu(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + s64 off; + unsigned long flags; + + for ( ; ; ) { + off = get_nsec_offset(shadow); + if (time_values_up_to_date(cpu)) + break; + write_seqlock_irqsave(&xtime_lock, flags); + __get_time_values_from_xen(); + write_sequnlock_irqrestore(&xtime_lock, flags); + } + + put_cpu(); + + return shadow->system_timestamp + off; } EXPORT_SYMBOL(monotonic_clock); + +unsigned long long sched_clock(void) +{ + return monotonic_clock(); +} #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) unsigned long profile_pc(struct pt_regs *regs) @@ -427,27 +559,26 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - time_t wtm_sec, sec; - s64 delta, delta_cpu, nsec; - long sec_diff, wtm_nsec; + s64 delta, delta_cpu; int cpu = smp_processor_id(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); do { __get_time_values_from_xen(); - delta = delta_cpu = (s64)shadow_system_time + - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC); + delta = delta_cpu = + shadow->system_timestamp + get_nsec_offset(shadow); delta -= processed_system_time; delta_cpu -= per_cpu(processed_system_time, cpu); } - while (!TIME_VALUES_UP_TO_DATE); + while (!time_values_up_to_date(cpu)); if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) { printk("Timer ISR/%d: Time went backwards: " "delta=%lld cpu_delta=%lld shadow=%lld " "off=%lld processed=%lld cpu_processed=%lld\n", - cpu, delta, delta_cpu, shadow_system_time, - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), + cpu, delta, delta_cpu, shadow->system_timestamp, + (s64)get_nsec_offset(shadow), processed_system_time, per_cpu(processed_system_time, cpu)); for (cpu = 0; cpu < num_online_cpus(); cpu++) @@ -470,76 +601,6 @@ update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING, regs); } - - if (cpu != 0) - return; - - /* - * Take synchronised time from Xen once a minute if we're not - * synchronised ourselves, and we haven't chosen to keep an independent - * time base. - */ - if (!INDEPENDENT_WALLCLOCK() && - ((time_status & STA_UNSYNC) != 0) && - (xtime.tv_sec > (last_update_from_xen + 60))) { - /* Adjust shadow for jiffies that haven't updated xtime yet. */ - shadow_tv.tv_usec -= - (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ); - HANDLE_USEC_UNDERFLOW(shadow_tv); - - /* - * Reset our running time counts if they are invalidated by - * a warp backwards of more than 500ms. - */ - sec_diff = xtime.tv_sec - shadow_tv.tv_sec; - if (unlikely(abs(sec_diff) > 1) || - unlikely(((sec_diff * USEC_PER_SEC) + - (xtime.tv_nsec / NSEC_PER_USEC) - - shadow_tv.tv_usec) > 500000)) { -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - last_rtc_update = last_update_to_xen = 0; -#endif - last_seen_tv.tv_sec = 0; - } - - /* Update our unsynchronised xtime appropriately. */ - sec = shadow_tv.tv_sec; - nsec = shadow_tv.tv_usec * NSEC_PER_USEC; - - __normalize_time(&sec, &nsec); - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - last_update_from_xen = sec; - } - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (!(xen_start_info.flags & SIF_INITDOMAIN)) - return; - - /* Send synchronised time to Xen approximately every minute. */ - if (((time_status & STA_UNSYNC) == 0) && - (xtime.tv_sec > (last_update_to_xen + 60))) { - dom0_op_t op; - struct timeval tv; - - tv.tv_sec = xtime.tv_sec; - tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; - tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ); - HANDLE_USEC_OVERFLOW(tv); - - op.cmd = DOM0_SETTIME; - op.u.settime.secs = tv.tv_sec; - op.u.settime.usecs = tv.tv_usec; - op.u.settime.system_time = shadow_system_time; - HYPERVISOR_dom0_op(&op); - - last_update_to_xen = xtime.tv_sec; - } -#endif } /* @@ -731,12 +792,10 @@ xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - processed_system_time = shadow_system_time; + processed_system_time = per_cpu(shadow_time, 0).system_timestamp; per_cpu(processed_system_time, 0) = processed_system_time; - if (timer_tsc_init.init(NULL) != 0) - BUG(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); + init_cpu_khz(); #if defined(__x86_64__) vxtime.mode = VXTIME_TSC; @@ -807,21 +866,15 @@ /* No locking required. We are only CPU running, and interrupts are off. */ void time_resume(void) { - if (timer_tsc_init.init(NULL) != 0) - BUG(); + init_cpu_khz(); /* Get timebases for new environment. */ __get_time_values_from_xen(); /* Reset our own concept of passage of system time. */ - processed_system_time = shadow_system_time; + processed_system_time = + per_cpu(shadow_time, smp_processor_id()).system_timestamp; per_cpu(processed_system_time, 0) = processed_system_time; - - /* Accept a warp in UTC (wall-clock) time. */ - last_seen_tv.tv_sec = 0; - - /* Make sure we resync UTC time with Xen on next timer interrupt. */ - last_update_from_xen = 0; } #ifdef CONFIG_SMP @@ -832,7 +885,8 @@ do { seq = read_seqbegin(&xtime_lock); - per_cpu(processed_system_time, cpu) = shadow_system_time; + per_cpu(processed_system_time, cpu) = + per_cpu(shadow_time, cpu).system_timestamp; } while (read_seqretry(&xtime_lock, seq)); per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER); @@ -861,3 +915,13 @@ return 0; } __initcall(xen_sysctl_init); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |