[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] x86/time: don't move TSC backwards in time_calibration_tsc_rendezvous()



commit 76aff7f6336b0ce19559700717537449972531be
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri Feb 19 17:21:41 2021 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Feb 19 17:21:41 2021 +0100

    x86/time: don't move TSC backwards in time_calibration_tsc_rendezvous()
    
    While doing this for small amounts may be okay, the unconditional use
    of CPU0's value here has been found to be a problem when the boot time
    TSC of the BSP was behind that of all APs by more than a second. In
    particular because of get_s_time_fixed() producing insane output when
    the calculated delta is negative, we can't allow this to happen.
    
    On the first iteration have all other CPUs sort out the highest TSC
    value any one of them has read. On the second iteration, if that
    maximum is higher than CPU0's, update its recorded value from that
    taken in the first iteration. Use the resulting value on the last
    iteration to write everyone's TSCs.
    
    To account for the possible discontinuity, have
    time_calibration_rendezvous_tail() record the newly written value, but
    extrapolate local stime using the value read.
    
    Reported-by: Claudemir Todo Bom <claudemir@xxxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Release-Acked-by: Ian Jackson <iwj@xxxxxxxxxxxxxx>
---
 xen/arch/x86/time.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index a894f74b88..6bc1fd11d6 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1658,17 +1658,17 @@ struct calibration_rendezvous {
     cpumask_t cpu_calibration_map;
     atomic_t semaphore;
     s_time_t master_stime;
-    u64 master_tsc_stamp;
+    uint64_t master_tsc_stamp, max_tsc_stamp;
 };
 
 static void
 time_calibration_rendezvous_tail(const struct calibration_rendezvous *r,
-                                 uint64_t tsc)
+                                 uint64_t old_tsc, uint64_t new_tsc)
 {
     struct cpu_time_stamp *c = &this_cpu(cpu_calibration);
 
-    c->local_tsc    = tsc;
-    c->local_stime  = get_s_time_fixed(c->local_tsc);
+    c->local_tsc    = new_tsc;
+    c->local_stime  = get_s_time_fixed(old_tsc ?: new_tsc);
     c->master_stime = r->master_stime;
 
     raise_softirq(TIME_CALIBRATE_SOFTIRQ);
@@ -1683,6 +1683,7 @@ static void time_calibration_tsc_rendezvous(void *_r)
     int i;
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpumask_weight(&r->cpu_calibration_map);
+    uint64_t tsc = 0;
 
     /* Loop to get rid of cache effects on TSC skew. */
     for ( i = 4; i >= 0; i-- )
@@ -1692,8 +1693,15 @@ static void time_calibration_tsc_rendezvous(void *_r)
             while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
                 cpu_relax();
 
-            if ( r->master_tsc_stamp == 0 )
-                r->master_tsc_stamp = rdtsc_ordered();
+            if ( tsc == 0 )
+                r->master_tsc_stamp = tsc = rdtsc_ordered();
+            else if ( r->master_tsc_stamp < r->max_tsc_stamp )
+                /*
+                 * We want to avoid moving the TSC backwards for any CPU.
+                 * Use the largest value observed anywhere on the first
+                 * iteration.
+                 */
+                r->master_tsc_stamp = r->max_tsc_stamp;
             else if ( i == 0 )
                 r->master_stime = read_platform_stime(NULL);
 
@@ -1712,6 +1720,15 @@ static void time_calibration_tsc_rendezvous(void *_r)
             while ( atomic_read(&r->semaphore) < total_cpus )
                 cpu_relax();
 
+            if ( tsc == 0 )
+            {
+                uint64_t cur = ACCESS_ONCE(r->max_tsc_stamp);
+
+                tsc = rdtsc_ordered();
+                while ( tsc > cur )
+                    cur = cmpxchg(&r->max_tsc_stamp, cur, tsc);
+            }
+
             if ( i == 0 )
                 write_tsc(r->master_tsc_stamp);
 
@@ -1719,9 +1736,12 @@ static void time_calibration_tsc_rendezvous(void *_r)
             while ( atomic_read(&r->semaphore) > total_cpus )
                 cpu_relax();
         }
+
+        /* Just in case a read above ended up reading zero. */
+        tsc += !tsc;
     }
 
-    time_calibration_rendezvous_tail(r, r->master_tsc_stamp);
+    time_calibration_rendezvous_tail(r, tsc, r->master_tsc_stamp);
 }
 
 /* Ordinary rendezvous function which does not modify TSC values. */
@@ -1746,7 +1766,7 @@ static void time_calibration_std_rendezvous(void *_r)
         smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
-    time_calibration_rendezvous_tail(r, rdtsc_ordered());
+    time_calibration_rendezvous_tail(r, 0, rdtsc_ordered());
 }
 
 /*
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.