[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v4 1/4] x86/APIC: calibrate against platform timer when possible


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 31 Mar 2022 11:29:44 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=izkM3/z4sPkt4zpxlsHGtY1fc5KRMyoYNoGbHZjH4Q0=; b=TS4TuKTtl+T7D+tlbqCyAA/F1RxlZTGihpOyzIzLxJdw7XKrMOZtj9TZU2hKh9fuiivrCV3Vh+0JK1M9Kj9IcDcQSlxy0RrtRqWfiNnZ0TFlP74tjBHF4NQTdyCaaFnjpkgzqfEucKWFTnzoQVeIdQodRXfDjuX6xN3DS+avOzaoCatTTRX5pz3K79Sw5HK2avZm7llaZ1LJFNFJX85Goj5M11Ti/6BcgicFqq9WjbKf+RbeDUbXzZHuQJ0ELjN+Gd8pDgheAvU/gUHXjpcxsiYVJJLRro6phoIjtuPTCzS90zwv0dMTUl+P4sqi2UoyM82uBhzn386pTYyHcQ3mLQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=TCRIT5/BMJ3RHYIz3kjxkjPL8Nw8rd20IpoCeFWWKn2oIgMIe6FoIFmpYiNIjZVFp/1f/C33YROc6wsHcWYzubMEdBedVtZ/XsYsVeevq9/IOorDjwZuFjs5sDixuKw9bMcmIa4s9DV7N+3MxBtaAH71TLebn5MnNuxEQabydnjUbbKtNmD30QCFzIHuW7zTDMOssWsVLELiQIFKLGpDwYNB5o+uEYoNHd2KCuXVtVMVNNA+e1XxBS4zvQk9QLh8Y3pwaZd59BJecTqa+m+nYsz1buAM7iiqXuc/8kJjLZG/3VyK/gRLg7J+1k56QoGBnYL8lx5KDb9Q8yd3APWHAg==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Thu, 31 Mar 2022 09:29:53 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Use the original calibration against PIT only when the platform timer
is PIT. This implicitly excludes the "xen_guest" case from using the PIT
logic (init_pit() fails there, and as of 5e73b2594c54 ["x86/time: minor
adjustments to init_pit()"] using_pit also isn't being set too early
anymore), so the respective hack there can be dropped at the same time.
This also reduces calibration time from 100ms to 50ms, albeit this step
is being skipped as of 0731a56c7c72 ("x86/APIC: no need for timer
calibration when using TDT") anyway.

While re-indenting the PIT logic in calibrate_APIC_clock(), besides
adjusting style also switch around the 2nd TSC/TMCCT read pair, to match
the order of the 1st one, yielding more consistent deltas.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Open-coding apic_read() in apic_tmcct_read() isn't overly nice, but I
wanted to avoid x2apic_enabled being evaluated twice in close
succession. And I also wouldn't want to have the barrier there even for
the (uncached) MMIO read.

Unlike the CPU frequencies enumerated in CPUID leaf 0x16 (which aren't
precise), using CPUID[0x15].ECX - if populated - may be an option to
skip calibration altogether. Aiui the value there is precise, but using
the systems I have easy access to I cannot verify this: In the sample
of three I have, none have ECX populated.

I wonder whether the secondary CPU freq measurement (used for display
purposes only) wouldn't better be dropped at this occasion.
---
v3: Move (and rename) read_tmcct() as previously suggested.
v2: New.

--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1183,18 +1183,20 @@ static void __init check_deadline_errata
            "please update microcode to version %#x (or later)\n", rev);
 }
 
-static void __init wait_tick_pvh(void)
+uint32_t __init apic_tmcct_read(void)
 {
-    u64 lapse_ns = 1000000000ULL / HZ;
-    s_time_t start, curr_time;
-
-    start = NOW();
+    if ( x2apic_enabled )
+    {
+        /*
+         * Have a barrier here just like in rdtsc_ordered() as it's
+         * unclear whether this non-serializing RDMSR also can be
+         * executed speculatively (like RDTSC can).
+         */
+        alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
+        return apic_rdmsr(APIC_TMCCT);
+    }
 
-    /* Won't wrap around */
-    do {
-        cpu_relax();
-        curr_time = NOW();
-    } while ( curr_time - start < lapse_ns );
+    return apic_mem_read(APIC_TMCCT);
 }
 
 /*
@@ -1212,9 +1214,6 @@ static void __init wait_tick_pvh(void)
 
 static void __init calibrate_APIC_clock(void)
 {
-    unsigned long long t1, t2;
-    unsigned long tt1, tt2;
-    unsigned int i;
     unsigned long bus_freq; /* KAF: pointer-size avoids compile warns. */
     unsigned int bus_cycle; /* length of one bus cycle in pico-seconds */
 #define LOOPS_FRAC 10U      /* measure for one tenth of a second */
@@ -1227,39 +1226,38 @@ static void __init calibrate_APIC_clock(
      */
     __setup_APIC_LVTT(0xffffffff);
 
-    if ( !xen_guest )
+    bus_freq = calibrate_apic_timer();
+    if ( !bus_freq )
+    {
+        unsigned int i, tt1, tt2;
+        unsigned long t1, t2;
+
+        ASSERT(!xen_guest);
+
         /*
-         * The timer chip counts down to zero. Let's wait
-         * for a wraparound to start exact measurement:
-         * (the current tick might have been already half done)
+         * The timer chip counts down to zero. Let's wait for a wraparound to
+         * start exact measurement (the current tick might have been already
+         * half done):
          */
         wait_8254_wraparound();
-    else
-        wait_tick_pvh();
 
-    /*
-     * We wrapped around just now. Let's start:
-     */
-    t1 = rdtsc_ordered();
-    tt1 = apic_read(APIC_TMCCT);
+        /* We wrapped around just now. Let's start: */
+        t1 = rdtsc_ordered();
+        tt1 = apic_read(APIC_TMCCT);
 
-    /*
-     * Let's wait HZ / LOOPS_FRAC ticks:
-     */
-    for (i = 0; i < HZ / LOOPS_FRAC; i++)
-        if ( !xen_guest )
+        /* Let's wait HZ / LOOPS_FRAC ticks: */
+        for ( i = 0; i < HZ / LOOPS_FRAC; ++i )
             wait_8254_wraparound();
-        else
-            wait_tick_pvh();
 
-    tt2 = apic_read(APIC_TMCCT);
-    t2 = rdtsc_ordered();
+        t2 = rdtsc_ordered();
+        tt2 = apic_read(APIC_TMCCT);
 
-    bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
+        bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
 
-    apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
-                ((unsigned long)(t2 - t1) * LOOPS_FRAC) / 1000000,
-                (((unsigned long)(t2 - t1) * LOOPS_FRAC) / 100) % 10000);
+        apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
+                    ((t2 - t1) * LOOPS_FRAC) / 1000000,
+                    (((t2 - t1) * LOOPS_FRAC) / 100) % 10000);
+    }
 
     apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
                 bus_freq / 1000000, (bus_freq / 100) % 10000);
--- a/xen/arch/x86/include/asm/apic.h
+++ b/xen/arch/x86/include/asm/apic.h
@@ -192,6 +192,9 @@ extern void record_boot_APIC_mode(void);
 extern enum apic_mode current_local_apic_mode(void);
 extern void check_for_unexpected_msi(unsigned int vector);
 
+uint64_t calibrate_apic_timer(void);
+uint32_t apic_tmcct_read(void);
+
 extern void check_nmi_watchdog(void);
 
 extern unsigned int nmi_watchdog;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -26,6 +26,7 @@
 #include <xen/symbols.h>
 #include <xen/keyhandler.h>
 #include <xen/guest_access.h>
+#include <asm/apic.h>
 #include <asm/io.h>
 #include <asm/iocap.h>
 #include <asm/msr.h>
@@ -1018,6 +1019,67 @@ static u64 __init init_platform_timer(vo
     return rc;
 }
 
+static uint64_t __init read_pt_and_tmcct(uint32_t *tmcct)
+{
+    uint32_t tmcct_prev = *tmcct = apic_tmcct_read(), tmcct_min = ~0;
+    uint64_t best = best;
+    unsigned int i;
+
+    for ( i = 0; ; ++i )
+    {
+        uint64_t pt = plt_src.read_counter();
+        uint32_t tmcct_cur = apic_tmcct_read();
+        uint32_t tmcct_delta = tmcct_prev - tmcct_cur;
+
+        if ( tmcct_delta < tmcct_min )
+        {
+            tmcct_min = tmcct_delta;
+            *tmcct = tmcct_cur;
+            best = pt;
+        }
+        else if ( i > 2 )
+            break;
+
+        tmcct_prev = tmcct_cur;
+    }
+
+    return best;
+}
+
+uint64_t __init calibrate_apic_timer(void)
+{
+    uint32_t start, end;
+    uint64_t count = read_pt_and_tmcct(&start), elapsed;
+    uint64_t target = CALIBRATE_VALUE(plt_src.frequency), actual;
+    uint64_t mask = (uint64_t)~0 >> (64 - plt_src.counter_bits);
+
+    /*
+     * PIT cannot be used here as it requires the timer interrupt to maintain
+     * its 32-bit software counter, yet here we run with IRQs disabled.
+     */
+    if ( using_pit )
+        return 0;
+
+    while ( ((plt_src.read_counter() - count) & mask) < target )
+        continue;
+
+    actual = read_pt_and_tmcct(&end) - count;
+    elapsed = start - end;
+
+    if ( likely(actual > target) )
+    {
+        /* See the comment in calibrate_tsc(). */
+        while ( unlikely(actual > (uint32_t)actual) )
+        {
+            actual >>= 1;
+            target >>= 1;
+        }
+        elapsed = muldiv64(elapsed, target, actual);
+    }
+
+    return elapsed * CALIBRATE_FRAC;
+}
+
 u64 stime2tsc(s_time_t stime)
 {
     struct cpu_time *t;




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.