[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] Fix performance issue brought by TSC-sync logic



Keir Fraser wrote:
On 23/02/2009 22:33, "Yang, Xiaowei" <xiaowei.yang@xxxxxxxxx> wrote:

For further improvement to reach the effect of 3), e.g. by taking care
of cache consistance amongs CPUs, there will be more overhead. And
considering the function is called per second, we are hesitating to do
this. What's your idea?:)

Maybe we should see what that overhead is... Also we may not really need to
run that function every second.

 -- Keir



I measured time_calibration_rendezvous()'s overhead on my machine. It's around 5-6k TSC. And I made anther patch to add loop. The cycle skew introduced by TSC-sync is gone with it. And a bit surprisingly, the expected extra overhead is not even noticeable:)

The new patch is attached.

Thanks,
Xiaowei
diff -r 0b0e7c2b4eef xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Tue Jan 20 21:21:16 2009 +0800
+++ b/xen/arch/x86/time.c       Wed Feb 25 03:11:50 2009 +0800
@@ -1079,38 +1079,69 @@ static void local_time_calibration(void)
  */
 struct calibration_rendezvous {
     cpumask_t cpu_calibration_map;
-    atomic_t nr_cpus;
+    atomic_t count_start;
+    atomic_t count_end;
     s_time_t master_stime;
     u64 master_tsc_stamp;
 };
 
+#define NR_LOOPS 5
+
 static void time_calibration_rendezvous(void *_r)
 {
+    int i;
     struct cpu_calibration *c = &this_cpu(cpu_calibration);
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
 
-    if ( smp_processor_id() == 0 )
-    {
-        while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
-            cpu_relax();
-        r->master_stime = read_platform_stime();
-        rdtscll(r->master_tsc_stamp);
-        mb(); /* write r->master_* /then/ signal */
-        atomic_inc(&r->nr_cpus);
-        c->local_tsc_stamp = r->master_tsc_stamp;
-    }
-    else
-    {
-        atomic_inc(&r->nr_cpus);
-        while ( atomic_read(&r->nr_cpus) != total_cpus )
-            cpu_relax();
-        mb(); /* receive signal /then/ read r->master_* */
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-            wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
-        rdtscll(c->local_tsc_stamp);
-    }
-
+    /* 
+     * Loop is used here to get rid of the cache's side effect to enlarge
+     * the TSC difference among CPUs.
+     */
+    for ( i = 0; i < NR_LOOPS; i++ )
+    {
+        if ( smp_processor_id() == 0 )
+        {
+            while ( atomic_read(&r->count_start) != (total_cpus - 1) )
+                mb();
+   
+            if ( r->master_stime == 0 )
+            {
+                r->master_stime = read_platform_stime();
+                if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+                    rdtscll(r->master_tsc_stamp);
+            }
+            atomic_set(&r->count_end, 0);
+            wmb();
+            atomic_inc(&r->count_start);
+    
+            if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && 
+                 i == NR_LOOPS - 1 )
+                write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp 
>> 32));
+    
+            while (atomic_read(&r->count_end) != total_cpus - 1)
+                mb();
+            atomic_set(&r->count_start, 0);
+            wmb();
+            atomic_inc(&r->count_end);
+        }
+        else
+        {
+            atomic_inc(&r->count_start);
+            while ( atomic_read(&r->count_start) != total_cpus )
+                mb();
+    
+            if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && 
+                 i == NR_LOOPS - 1 )
+                write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp 
>> 32));
+    
+            atomic_inc(&r->count_end);
+            while (atomic_read(&r->count_end) != total_cpus)
+                mb();
+        }
+    }
+
+    rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
@@ -1121,7 +1152,9 @@ static void time_calibration(void *unuse
 {
     struct calibration_rendezvous r = {
         .cpu_calibration_map = cpu_online_map,
-        .nr_cpus = ATOMIC_INIT(0)
+        .count_start = ATOMIC_INIT(0),
+        .count_end = ATOMIC_INIT(0),
+        .master_stime = 0
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.