[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86 vtsc: use debug-key to check/test reliable tsc



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1254897830 -3600
# Node ID 006686ed2088c92ad722f7982d19b2b34413f1ed
# Parent  b7d4d51b5cf1a1abea30416bffaaa54483b21c45
x86 vtsc: use debug-key to check/test reliable tsc

Previous attempt was rejected as too intrusive, but
further app rdtsc optimization work is very dependent
on Xen being able to determine if TSC is reliable
or not.

This patch starts to introduce the concept of
X86_FEATURE_TSC_RELIABLE as it is defined and
used by Linux, but uses it and tests it only in
a debug-key for now, so that a wide variety of
hardware can be measured by the broader Xen
community to confirm/deny TSC assumptions.
The eventual goal is for the evaluation of
TSC reliability to be exported to userland
so that apps can use rdtsc natively if and when
it is safe to do so.

(See http://lists.xensource.com/archives/html/xen-devel/2009-10/msg00056.html)

Note that the original Linux code for tsc_sync.c
uses a raw spinlock to ensure the "fastest, inlined,
non-debug version of a critical section".  Xen
doesn't provide a _raw_spin_lock() so I used
regular spinlocks, but I would prefer the code
to use something more strict as Linux does.

(Also includes a minor nit: "NOSTOP" was used in
an early version of a Linux patch, but mainline
now uses "NONSTOP"... correct this for consistency.)

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
---
 xen/arch/x86/cpu/amd.c           |    2 
 xen/arch/x86/cpu/intel.c         |    3 -
 xen/arch/x86/time.c              |  108 ++++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/cpufeature.h |    3 -
 4 files changed, 112 insertions(+), 4 deletions(-)

diff -r b7d4d51b5cf1 -r 006686ed2088 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Wed Oct 07 07:35:06 2009 +0100
+++ b/xen/arch/x86/cpu/amd.c    Wed Oct 07 07:43:50 2009 +0100
@@ -463,7 +463,7 @@ static void __devinit init_amd(struct cp
                c->x86_power = cpuid_edx(0x80000007);
                if (c->x86_power & (1<<8)) {
                        set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-                       set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+                       set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
                }
        }
 
diff -r b7d4d51b5cf1 -r 006686ed2088 xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c  Wed Oct 07 07:35:06 2009 +0100
+++ b/xen/arch/x86/cpu/intel.c  Wed Oct 07 07:43:50 2009 +0100
@@ -210,7 +210,8 @@ static void __devinit init_intel(struct 
                set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
        if (cpuid_edx(0x80000007) & (1u<<8)) {
                set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-               set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+               set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+               set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
        }
        if ((c->cpuid_level >= 0x00000006) &&
            (cpuid_eax(0x00000006) & (1u<<2)))
diff -r b7d4d51b5cf1 -r 006686ed2088 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Wed Oct 07 07:35:06 2009 +0100
+++ b/xen/arch/x86/time.c       Wed Oct 07 07:43:50 2009 +0100
@@ -698,7 +698,7 @@ void cstate_restore_tsc(void)
     s_time_t stime_delta;
     u64 new_tsc;
 
-    if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
+    if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
     stime_delta = read_platform_stime() - t->stime_master_stamp;
@@ -1437,6 +1437,102 @@ struct tm wallclock_time(void)
     return gmtime(seconds);
 }
 
+/*
+ * TSC Reliability check
+ */
+
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
+{
+#define rdtsc_barrier() mb()
+    static DEFINE_SPINLOCK(sync_lock);
+    static cycles_t last_tsc;
+
+    cycles_t start, now, prev, end;
+    int i;
+
+    rdtsc_barrier();
+    start = get_cycles();
+    rdtsc_barrier();
+
+    /* The measurement runs for 20 msecs: */
+    end = start + tsc_khz * 20ULL;
+    now = start;
+
+    for ( i = 0; ; i++ )
+    {
+        /*
+         * We take the global lock, measure TSC, save the
+         * previous TSC that was measured (possibly on
+         * another CPU) and update the previous TSC timestamp.
+         */
+        spin_lock(&sync_lock);
+        prev = last_tsc;
+        rdtsc_barrier();
+        now = get_cycles();
+        rdtsc_barrier();
+        last_tsc = now;
+        spin_unlock(&sync_lock);
+
+        /*
+         * Be nice every now and then (and also check whether measurement is 
+         * done [we also insert a 10 million loops safety exit, so we dont 
+         * lock up in case the TSC readout is totally broken]):
+         */
+        if ( unlikely(!(i & 7)) )
+        {
+            if ( (now > end) || (i > 10000000) )
+                break;
+            cpu_relax();
+            /*touch_nmi_watchdog();*/
+        }
+
+        /*
+         * Outside the critical section we can now see whether we saw a 
+         * time-warp of the TSC going backwards:
+         */
+        if ( unlikely(prev > now) )
+        {
+            spin_lock(&sync_lock);
+            if ( *max_warp > prev - now )
+                *max_warp = prev - now;
+            spin_unlock(&sync_lock);
+        }
+    }
+}
+
+static unsigned long tsc_max_warp, tsc_check_count;
+static cpumask_t tsc_check_cpumask = CPU_MASK_NONE;
+
+static void tsc_check_slave(void *unused)
+{
+    unsigned int cpu = smp_processor_id();
+    local_irq_disable();
+    while ( !cpu_isset(cpu, tsc_check_cpumask) )
+        mb();
+    check_tsc_warp(cpu_khz, &tsc_max_warp);
+    cpu_clear(cpu, tsc_check_cpumask);
+    local_irq_enable();
+}
+
+static void tsc_check_reliability(void)
+{
+    unsigned int cpu = smp_processor_id();
+    static DEFINE_SPINLOCK(lock);
+
+    spin_lock(&lock);
+
+    tsc_check_count++;
+    smp_call_function(tsc_check_slave, NULL, 0);
+    tsc_check_cpumask = cpu_online_map;
+    local_irq_disable();
+    check_tsc_warp(cpu_khz, &tsc_max_warp);
+    cpu_clear(cpu, tsc_check_cpumask);
+    local_irq_enable();
+    while ( !cpus_empty(tsc_check_cpumask) )
+        cpu_relax();
+
+    spin_unlock(&lock);
+}
 
 /*
  * PV SoftTSC Emulation.
@@ -1470,6 +1566,16 @@ static void dump_softtsc(unsigned char k
     struct domain *d;
     int domcnt = 0;
 
+    tsc_check_reliability();
+    if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+        printk("TSC marked as reliable, "
+               "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
+    else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) )
+        printk("TSC marked as constant but not reliable, "
+               "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
+    else
+        printk("TSC not marked as either constant or reliable, "
+               "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
     for_each_domain ( d )
     {
         if ( !d->arch.vtsc )
diff -r b7d4d51b5cf1 -r 006686ed2088 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h  Wed Oct 07 07:35:06 2009 +0100
+++ b/xen/include/asm-x86/cpufeature.h  Wed Oct 07 07:43:50 2009 +0100
@@ -74,9 +74,10 @@
 #define X86_FEATURE_P3         (3*32+ 6) /* P3 */
 #define X86_FEATURE_P4         (3*32+ 7) /* P4 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_NOSTOP_TSC (3*32+ 9) /* TSC does not stop in C states */
+#define X86_FEATURE_NONSTOP_TSC        (3*32+ 9) /* TSC does not stop in C 
states */
 #define X86_FEATURE_ARAT       (3*32+ 10) /* Always running APIC timer */
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_TSC_RELIABLE (3*32+12) /* TSC is known to be reliable */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3       (4*32+ 0) /* Streaming SIMD Extensions-3 */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.