|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH 1/5] x86/time: deal with negative deltas in get_s_time_fixed()
```
(XEN) update stime on time calibrate 0, 8564145820102 -> 8565145861597 (8565145862216, 0) (XEN) update stime on time calibrate 1, 8564145820129 -> 8565145861609 (8565145863957, 0) (XEN) update stime on time calibrate 3, 8564145819996 -> 8565145861491 (8565145864800, 0) (XEN) update stime on time calibrate 2, 8564145820099 -> 8565145861609 (8565145865372, 0)
8565145861609 - 8565145861491 = 115 * 3 (3.00 GHz) = 345 lag ```
6 ``` (XEN) update stime on time calibrate 0, 22914730829200 -> 22915730869993 (22915730870665, 0) (XEN) update stime on time calibrate 1, 22914730829073 -> 22915730869889 (22915730870693, 0) (XEN) update stime on time calibrate 2, 22914730829052 -> 22915730869841 (22915730872231, 0) (XEN) update stime on time calibrate 3, 22914730828892 -> 22915730869696 (22915730872096, 0)
22915730869993 - 22915730869696 = 297 * 3 (3.00 GHz) = 891 lag ```
ucode=scan dom0_mem=53923M,max:53923M dom0_max_vcpus=4-96 dom0_vcpus_pin=0 force-ept=1 ept=no-ad,no-pml hap_1gb=0 hap_2mb=0 altp2m=1 hpet=legacy-replacement smt=1 spec-ctrl=no gnttab_max_frames=512 cpufreq=xen:performance max_cstate=1 sched=credit sched-gran=cpu apicv=0 sched_credit_tslice_ms=5 sched_ratelimit_us=500 ```
Processors I tested on:
``` Intel(R) Core(TM) i5-3330 CPU @ 3.00GHz
Flags: fpu de tsc msr pae mce cx8 apic sep mca cmov pat clflush acpi mmx fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid tsc_known_freq pni pclmulqdq monitor est ssse3 cx16 sse4_1 sse4_2 popcnt aes xsave avx f16c hypervisor lahf_lm cpuid_fault fsgsbase erms xsaveopt arch_capabilities ``` ``` Intel(R) Xeon(R) Gold 5318Y CPU @ 2.10GHz x2 (NUMA)
Flags: fpu de tsc msr pae mce cx8 apic sep mca cmov pat clflush acpi mmx fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid tsc_known_freq pni pclmulqdq monitor est ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch cpuid_fault ibpb fsgsbase bmi1 avx2 bmi2 erms rtm avx512f avx512dq rdseed adx avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 avx512vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq rdpid fsrm md_clear arch_capabilities ```
Next I moved the code to r3 to speed up playback:
``` #include <stdint.h> #include <stdio.h>
static __inline__ unsigned long long rdtsc(void) { unsigned hi, lo; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); }
#define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL))
struct cpu_time_stamp { uint64_t local_tsc; int64_t local_stime; int64_t master_stime; };
struct time_scale { int shift; uint32_t mul_frac; };
static inline uint32_t div_frac(uint32_t dividend, uint32_t divisor) { uint32_t quotient, remainder; asm ( "divl %4" : "=a" (quotient), "=d" (remainder) : "0" (0), "1" (dividend), "r" (divisor) ); return quotient; }
void set_time_scale(struct time_scale *ts, uint64_t ticks_per_sec) { uint64_t tps64 = ticks_per_sec; uint32_t tps32; int shift = 0;
while ( tps64 > (MILLISECS(1000)*2) ) { tps64 >>= 1; shift--; }
tps32 = (uint32_t)tps64; while ( tps32 <= (uint32_t)MILLISECS(1000) ) { tps32 <<= 1; shift++; }
ts->mul_frac = div_frac(MILLISECS(1000), tps32); ts->shift = shift; }
uint64_t scale_delta(uint64_t delta, const struct time_scale *scale) { uint64_t product;
if ( scale->shift < 0 ) delta >>= -scale->shift; else delta <<= scale->shift;
asm ( "mulq %2 ; shrd $32,%1,%0" : "=a" (product), "=d" (delta) : "rm" (delta), "0" ((uint64_t)scale->mul_frac) );
return product; }
#define _TS_MUL_FRAC_IDENTITY 0x80000000UL
static inline struct time_scale scale_reciprocal(struct time_scale scale) { struct time_scale reciprocal; uint32_t dividend;
dividend = _TS_MUL_FRAC_IDENTITY; reciprocal.shift = 1 - scale.shift; while ( dividend >= scale.mul_frac ) { dividend >>= 1; reciprocal.shift++; }
asm ( "divl %4" : "=a" (reciprocal.mul_frac), "=d" (dividend) : "0" (0), "1" (dividend), "r" (scale.mul_frac) );
return reciprocal; }
int64_t get_s_time_fixed(const struct cpu_time_stamp *t, const struct time_scale *ts, uint64_t at_tsc) { uint64_t tsc, delta;
if ( at_tsc ) tsc = at_tsc; else tsc = rdtsc(); delta = tsc - t->local_tsc; return t->local_stime + scale_delta(delta, ts); }
int main() {
struct cpu_time_stamp ct; struct time_scale ts; struct time_scale back;
uint64_t start = rdtsc();
set_time_scale(&ts, 3000000000); back = scale_reciprocal(ts);
ct.local_tsc = start; ct.local_stime = scale_delta(start, &ts);
while (1) { uint64_t new_tsc = rdtsc(); ct.local_stime = get_s_time_fixed(&ct, &ts, new_tsc); ct.local_tsc = new_tsc;
uint64_t tmp_tsc = rdtsc(); printf("%lu %lu\n", tmp_tsc, scale_delta(get_s_time_fixed(&ct, &ts, tmp_tsc), &back)); }
return 0; } ```
It's enough to run the loop for 10-20 seconds to see the problem. scale_delta rounds the least significant bits during calculation, which causes the error to accumulate, at different rates on different cores, depending on the least significant bits at the time of calibration.
Now let's talk about why dwm reacts this way. When a snapshot is reversed, last_guest_time in hvm_get_guest_time_fixed is set to 0, which doesn't prevent time from flowing backwards. This means that cache_tsc_offset in hvm_get_guest_tsc_fixed may be configured correctly on one physical core, but due to shedding on a physical core with a lagging tsc, the guest may occasionally see a tsc value lower than it saw before the snapshot was reversed. If this happens to the dwm code, it terminates with an error.
A quick solution to this problem might be to save the last_seen_tsc parameter in a snapshot for each core, followed by validation.
The correct solution is to remove the rounding of the least significant bits from the sequence. On Wed, Jan 7, 2026 at 11:02 AM Jan Beulich <jbeulich@xxxxxxxx> wrote: On 06.01.2026 21:10, Антон Марков wrote:
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |