[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [Xen-devel] Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus


  • To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: "Wei, Gang" <gang.wei@xxxxxxxxx>
  • Date: Mon, 15 Dec 2008 11:06:52 +0800
  • Accept-language: en-US
  • Acceptlanguage: en-US
  • Cc: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
  • Delivery-date: Sun, 14 Dec 2008 19:07:48 -0800
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AclWoeM2xc+X6mj6QOaQHsyxDmpzagAFRoNpAAEP6tAAAWmCgAAATdDsAAIJ5qAAAvL7GQFMKVOwAA1xISwAJTEgYAANxIuJAAtYh4AAAOh3kwBJ7/4Q
  • Thread-topic: [Xen-devel] Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus

Here is the updated patch for constant-tsc case. -Jimmy

CPUIDLE: revise tsc-restore to avoid increasing tsc skew between cpus

Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, 
exit deepC, tsc-restore], the system error is quite easy to be accumulated. 
Once the workloads between cpus are not balanced, the tsc skew between cpus 
will eventually become bigger & begger - more than 10 seconds can be observed.

Then we remove the tsc-save step, and just based on percpu 
t->stime_master_stamp, t->tsc_scale, & t->local_tsc_stamp to do the tsc-restore 
after exit from deepC. It make the accumulating slower, but can't remove it.

Now, for constant-tsc case, we just keep a initial stamp via cstate_init_stamp 
during the booting/s3 resuming, which is based on the platform stime. All cpus 
need only to do tsc-restore relative to the initial stamp after exit deepC. The 
base  and tsc->ns scale are fixed and same for all cpus, so it can avoid 
accumulated tsc-skew. BTW, bypass the percpu tsc scale calibration for 
constant-tsc case.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>

diff -r 045f70d1acdb xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/time.c       Mon Dec 15 10:35:11 2008 +0800
@@ -69,8 +69,11 @@ static DEFINE_PER_CPU(struct cpu_time, c
 #define EPOCH MILLISECS(1000)
 static struct timer calibration_timer;
 
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
+/* TSC will not stop during deep C state? */
+static bool_t tsc_nostop;
+/* TSC will be constant rate, independent with P/T state? */
+static int constant_tsc = 0;
+boolean_param("const_tsc", constant_tsc);
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -551,6 +554,10 @@ static u64 plt_stamp;            /* hard
 static u64 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
+/* following 2 variables are for deep C state TSC restore usage */
+static u64 initial_tsc_stamp;    /* initial tsc stamp while plt starting */
+static s_time_t initial_stime_platform_stamp; /* initial stime stamp */
+
 static void plt_overflow(void *unused)
 {
     u64 count;
@@ -664,25 +671,41 @@ static void init_platform_timer(void)
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+static void cstate_init_stamp(void)
+{
+    if ( tsc_nostop || !constant_tsc )
+        return;
+
+    initial_stime_platform_stamp = read_platform_stime();
+    rdtscll(initial_tsc_stamp);
+}
+
+static inline void __restore_tsc(s_time_t plt_stime)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
     s_time_t stime_delta;
     u64 tsc_delta;
 
-    if ( tsc_invariant )
+    if ( tsc_nostop )
         return;
 
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
+    stime_delta = plt_stime - 
+        (constant_tsc ? initial_stime_platform_stamp : t->stime_master_stamp);
+
     if ( stime_delta < 0 )
         stime_delta = 0;
 
     tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
 
-    wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
+    wrmsrl(MSR_IA32_TSC, 
+        (constant_tsc ? initial_tsc_stamp : t->local_tsc_stamp) + tsc_delta);
 }
 
+void cstate_restore_tsc(void)
+{
+    __restore_tsc(read_platform_stime());
+}
 /***************************************************************************
  * CMOS Timer functions
  ***************************************************************************/
@@ -960,6 +983,18 @@ static void local_time_calibration(void)
            curr_master_stime - curr_local_stime);
 #endif
 
+    if ( constant_tsc )
+    {
+        local_irq_disable();
+        t->local_tsc_stamp    = curr_tsc;
+        t->stime_local_stamp  = curr_master_stime;
+        t->stime_master_stamp = curr_master_stime;
+        local_irq_enable();
+
+        update_vcpu_system_time(current);
+        goto out;
+    }
+
     /* Local time warps forward if it lags behind master time. */
     if ( curr_local_stime < curr_master_stime )
         curr_local_stime = curr_master_stime;
@@ -1082,6 +1117,8 @@ static void time_calibration_rendezvous(
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
+    __restore_tsc(r->master_stime);
+
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
@@ -1125,9 +1162,23 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    /* Is TSC invariant during deep C state? */
+    /* for recent intel x86 model, the tsc increments at a constant rate */
+    if ( (current_cpu_data.x86 == 0xf && current_cpu_data.x86_model >= 0x03) ||
+         (current_cpu_data.x86 == 0x6 && current_cpu_data.x86_model >= 0x0e) )
+    {
+        int cpu;
+
+        constant_tsc = 1;
+
+        for_each_cpu(cpu)
+        {
+            per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+        }
+    }
+
+    /* Is TSC not stop during deep C state ? */
     if ( cpuid_edx(0x80000007) & (1u<<8) )
-        tsc_invariant = 1;
+        tsc_nostop = 1;
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1139,6 +1190,8 @@ int __init init_xen_time(void)
 
     stime_platform_stamp = NOW();
     init_platform_timer();
+
+    cstate_init_stamp();
 
     init_percpu_time();
 
@@ -1260,6 +1313,8 @@ int time_resume(void)
     disable_pit_irq();
 
     init_percpu_time();
+
+    cstate_init_stamp();
 
     do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
 

Attachment: tsc-skew-20081213-1.patch
Description: tsc-skew-20081213-1.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.