x86/time: adjust local system time initialization Using the bare return value from read_platform_stime() is not suitable when local_time_calibration() is going to use its fat path: Divergence of several dozen microseconds between NOW() return values on different CPUs results when platform and local time don't stay in close sync. Latch local and platform time on the CPU initiating AP bringup, such that the AP can use these values to seed its stime_local_stamp with as little of an error as possible. The boot CPU, otoh, can simply calculate the correct initial value (other CPUs could do so too with even greater accuracy than the approach being introduced, but that can work only if all CPUs' TSCs start ticking at the same time, which generally can't be assumed to be the case on multi-socket systems). This slightly defers init_percpu_time() (moved ahead by commit dd2658f966 ["x86/time: initialise time earlier during start_secondary()"]) in order to reduce as much as possible the gap between populating the stamps and consuming them. Signed-off-by: Jan Beulich --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -328,12 +328,12 @@ void start_secondary(void *unused) percpu_traps_init(); - init_percpu_time(); - cpu_init(); smp_callin(); + init_percpu_time(); + setup_secondary_APIC_clock(); /* @@ -996,6 +996,8 @@ int __cpu_up(unsigned int cpu) if ( (ret = do_boot_cpu(apicid, cpu)) != 0 ) return ret; + time_latch_stamps(); + set_cpu_state(CPU_STATE_ONLINE); while ( !cpu_online(cpu) ) { --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -1328,21 +1328,51 @@ static void time_calibration(void *unuse &r, 1); } +static struct { + s_time_t local_stime, master_stime; +} ap_bringup_ref; + +void time_latch_stamps(void) { + unsigned long flags; + u64 tsc; + + local_irq_save(flags); + ap_bringup_ref.master_stime = read_platform_stime(); + tsc = rdtsc(); + local_irq_restore(flags); + + ap_bringup_ref.local_stime = get_s_time_fixed(tsc); +} + void init_percpu_time(void) { struct cpu_time *t = &this_cpu(cpu_time); unsigned long flags; + u64 tsc; s_time_t now; /* Initial estimate for TSC rate. */ t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale; local_irq_save(flags); - t->local_tsc_stamp = rdtsc(); now = read_platform_stime(); + tsc = rdtsc(); local_irq_restore(flags); t->stime_master_stamp = now; + /* + * To avoid a discontinuity (TSC and platform clock can't be expected + * to be in perfect sync), initialization here needs to match up with + * local_time_calibration()'s decision whether to use its fast path. + */ + if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + { + if ( system_state < SYS_STATE_smp_boot ) + now = get_s_time_fixed(tsc); + else + now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime; + } + t->local_tsc_stamp = tsc; t->stime_local_stamp = now; } --- a/xen/include/asm-x86/time.h +++ b/xen/include/asm-x86/time.h @@ -40,6 +40,7 @@ int time_suspend(void); int time_resume(void); void init_percpu_time(void); +void time_latch_stamps(void); struct ioreq; int hwdom_pit_access(struct ioreq *ioreq);