[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] x86: increase NMI timer frequency if necessary


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 25 Jan 2024 17:55:39 +0100
  • Autocrypt: addr=jbeulich@xxxxxxxx; keydata= xsDiBFk3nEQRBADAEaSw6zC/EJkiwGPXbWtPxl2xCdSoeepS07jW8UgcHNurfHvUzogEq5xk hu507c3BarVjyWCJOylMNR98Yd8VqD9UfmX0Hb8/BrA+Hl6/DB/eqGptrf4BSRwcZQM32aZK 7Pj2XbGWIUrZrd70x1eAP9QE3P79Y2oLrsCgbZJfEwCgvz9JjGmQqQkRiTVzlZVCJYcyGGsD /0tbFCzD2h20ahe8rC1gbb3K3qk+LpBtvjBu1RY9drYk0NymiGbJWZgab6t1jM7sk2vuf0Py O9Hf9XBmK0uE9IgMaiCpc32XV9oASz6UJebwkX+zF2jG5I1BfnO9g7KlotcA/v5ClMjgo6Gl MDY4HxoSRu3i1cqqSDtVlt+AOVBJBACrZcnHAUSuCXBPy0jOlBhxPqRWv6ND4c9PH1xjQ3NP nxJuMBS8rnNg22uyfAgmBKNLpLgAGVRMZGaGoJObGf72s6TeIqKJo/LtggAS9qAUiuKVnygo 3wjfkS9A3DRO+SpU7JqWdsveeIQyeyEJ/8PTowmSQLakF+3fote9ybzd880fSmFuIEJldWxp Y2ggPGpiZXVsaWNoQHN1c2UuY29tPsJgBBMRAgAgBQJZN5xEAhsDBgsJCAcDAgQVAggDBBYC AwECHgECF4AACgkQoDSui/t3IH4J+wCfQ5jHdEjCRHj23O/5ttg9r9OIruwAn3103WUITZee e7Sbg12UgcQ5lv7SzsFNBFk3nEQQCACCuTjCjFOUdi5Nm244F+78kLghRcin/awv+IrTcIWF hUpSs1Y91iQQ7KItirz5uwCPlwejSJDQJLIS+QtJHaXDXeV6NI0Uef1hP20+y8qydDiVkv6l IreXjTb7DvksRgJNvCkWtYnlS3mYvQ9NzS9PhyALWbXnH6sIJd2O9lKS1Mrfq+y0IXCP10eS FFGg+Av3IQeFatkJAyju0PPthyTqxSI4lZYuJVPknzgaeuJv/2NccrPvmeDg6Coe7ZIeQ8Yj t0ARxu2xytAkkLCel1Lz1WLmwLstV30g80nkgZf/wr+/BXJW/oIvRlonUkxv+IbBM3dX2OV8 AmRv1ySWPTP7AAMFB/9PQK/VtlNUJvg8GXj9ootzrteGfVZVVT4XBJkfwBcpC/XcPzldjv+3 HYudvpdNK3lLujXeA5fLOH+Z/G9WBc5pFVSMocI71I8bT8lIAzreg0WvkWg5V2WZsUMlnDL9 mpwIGFhlbM3gfDMs7MPMu8YQRFVdUvtSpaAs8OFfGQ0ia3LGZcjA6Ik2+xcqscEJzNH+qh8V m5jjp28yZgaqTaRbg3M/+MTbMpicpZuqF4rnB0AQD12/3BNWDR6bmh+EkYSMcEIpQmBM51qM EKYTQGybRCjpnKHGOxG0rfFY1085mBDZCH5Kx0cl0HVJuQKC+dV2ZY5AqjcKwAxpE75MLFkr wkkEGBECAAkFAlk3nEQCGwwACgkQoDSui/t3IH7nnwCfcJWUDUFKdCsBH/E5d+0ZnMQi+G0A nAuWpQkjM1ASeQwSHEeAWPgskBQL
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Thu, 25 Jan 2024 16:55:51 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Since the performance counters used for the NMI watchdog count non-
halted cycles, they may count at a rate higher than cpu_khz. Thus the
watchdog tick may occur more frequently than invocations of the timer
if we don't account for the ratio between nominal and maximum CPU clock
speeds, which would be a problem in particular when "watchdog_timeout=1"
is in effect (for high enough ratios even larger timout values may pose
a problem).

Leverage the so far display-only data we collect on newer Intel and AMD
CPUs. On older CPUs we just have to (continue to) hope that the default
frequency of 1 Hz is okay(-ish) to use.

While adding the new variable, also move the (now adjacent) cpu_khz to
.data.ro_after_init.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
This renders the "log" in the function names somewhat stale, but I don't
think this strictly warrants renaming the functions right away.

--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -657,12 +657,18 @@ void amd_log_freq(const struct cpuinfo_x
                                     : (((v) & 0xff) * 25 * 8) / (((v) >> 8) & 
0x3f))
        if (idx && idx < h &&
            !rdmsr_safe(0xC0010064 + idx, val) && (val >> 63) &&
-           !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
+           !rdmsr_safe(0xC0010064, hi) && (hi >> 63)) {
+               if (c == &boot_cpu_data)
+                       cpu_max_mhz = FREQ(hi);
                printk("CPU%u: %lu (%lu ... %lu) MHz\n",
                       smp_processor_id(), FREQ(val), FREQ(lo), FREQ(hi));
-       else if (h && !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
+       }
+       else if (h && !rdmsr_safe(0xC0010064, hi) && (hi >> 63)) {
+               if (c == &boot_cpu_data)
+                       cpu_max_mhz = FREQ(hi);
                printk("CPU%u: %lu ... %lu MHz\n",
                       smp_processor_id(), FREQ(lo), FREQ(hi));
+       }
        else
                printk("CPU%u: %lu MHz\n", smp_processor_id(), FREQ(lo));
 #undef FREQ
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -456,7 +456,11 @@ static void intel_log_freq(const struct
             if ( eax )
                 printk(" base: %u MHz", eax);
             if ( ebx )
+            {
+                if ( c == &boot_cpu_data )
+                    cpu_max_mhz = ebx;
                 printk(" max: %u MHz", ebx);
+            }
             printk("\n");
         }
     }
@@ -522,6 +526,8 @@ static void intel_log_freq(const struct
     printk("CPU%u: ", smp_processor_id());
     if ( min_ratio )
         printk("%u ... ", (factor * min_ratio + 50) / 100);
+    if ( c == &boot_cpu_data && !cpu_max_mhz )
+        cpu_max_mhz = (factor * max_ratio + 50) / 100;
     printk("%u MHz\n", (factor * max_ratio + 50) / 100);
 }
 
--- a/xen/arch/x86/include/asm/time.h
+++ b/xen/arch/x86/include/asm/time.h
@@ -8,6 +8,8 @@ typedef u64 cycles_t;
 
 extern bool disable_tsc_sync;
 
+extern unsigned int cpu_max_mhz;
+
 static inline cycles_t get_cycles(void)
 {
     return rdtsc_ordered();
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -213,10 +213,12 @@ void __init check_nmi_watchdog(void)
     return;
 }
 
+static unsigned int __ro_after_init timer_gap = MILLISECS(1000);
+
 static void cf_check nmi_timer_fn(void *unused)
 {
     this_cpu(nmi_timer_ticks)++;
-    set_timer(&this_cpu(nmi_timer), NOW() + MILLISECS(1000));
+    set_timer(&this_cpu(nmi_timer), NOW() + timer_gap);
 }
 
 void disable_lapic_nmi_watchdog(void)
@@ -477,8 +479,17 @@ bool watchdog_enabled(void)
 
 int __init watchdog_setup(void)
 {
+    unsigned long cpu_mhz = cpu_khz / 1000;
     unsigned int cpu;
 
+    if ( cpu_max_mhz > cpu_mhz )
+    {
+        timer_gap = timer_gap * cpu_mhz / cpu_max_mhz;
+        /* To be on the safe side, bound to 1ms. */
+        if ( timer_gap < MILLISECS(1) )
+            timer_gap = MILLISECS(1);
+    }
+
     /*
      * Activate periodic heartbeats. We cannot do this earlier during 
      * setup because the timer infrastructure is not available.
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -47,7 +47,9 @@
 static char __initdata opt_clocksource[10];
 string_param("clocksource", opt_clocksource);
 
-unsigned long __read_mostly cpu_khz;  /* CPU clock frequency in kHz. */
+unsigned long __ro_after_init cpu_khz;    /* CPU clock frequency in kHz. */
+unsigned int __ro_after_init cpu_max_mhz; /* CPU max (known) clkfreq in MHz. */
+
 DEFINE_SPINLOCK(rtc_lock);
 unsigned long pit0_ticks;
 



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.