[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86: retrieve and log CPU frequency information



commit f6b6517cd5dbd7b2cdb3cfc4cda9751d1db19a1a
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri May 15 16:16:29 2020 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri May 15 16:16:29 2020 +0200

    x86: retrieve and log CPU frequency information
    
    While from just a single Skylake system it is already clear that we
    can't base any of our logic on CPUID leaf 15 [1] (leaf 16 is
    documented to be used for display purposes only anyway), logging this
    information may still give us some reference in case of problems as well
    as for future work. Additionally on the AMD side it is unclear whether
    the deviation between reported and measured frequencies is because of us
    not doing well, or because of nominal and actual frequencies being quite
    far apart.
    
    The chosen variable naming in amd_log_freq() has pointed out a naming
    problem in rdmsr_safe(), which is being taken care of at the same time.
    Symmetrically wrmsr_safe(), being an inline function, also gets an
    unnecessary underscore dropped from one of its local variables.
    
    [1] With a core crystal clock of 24MHz and a ratio of 216/2, the
        reported frequency nevertheless is 2600MHz, rather than the to be
        expected (and calibrated by both us and Linux) 2592MHz.
    
    Suggested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
 xen/arch/x86/cpu/amd.c    | 103 ++++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/cpu/cpu.h    |   1 +
 xen/arch/x86/cpu/hygon.c  |   2 +
 xen/arch/x86/cpu/intel.c  |  74 +++++++++++++++++++++++++++++++++
 xen/include/asm-x86/msr.h |  16 +++----
 5 files changed, 188 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index f95a8e0fd3..05cbcbad73 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -532,6 +532,107 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
                                                           : c->cpu_core_id);
 }
 
+void amd_log_freq(const struct cpuinfo_x86 *c)
+{
+       unsigned int idx = 0, h;
+       uint64_t hi, lo, val;
+
+       if (c->x86 < 0x10 || c->x86 > 0x19 ||
+           (c != &boot_cpu_data &&
+            (!opt_cpu_info || (c->apicid & (c->x86_num_siblings - 1)))))
+               return;
+
+       if (c->x86 < 0x17) {
+               unsigned int node = 0;
+               uint64_t nbcfg;
+
+               /*
+                * Make an attempt at determining the node ID, but assume
+                * symmetric setup (using node 0) if this fails.
+                */
+               if (c->extended_cpuid_level >= 0x8000001e &&
+                   cpu_has(c, X86_FEATURE_TOPOEXT)) {
+                       node = cpuid_ecx(0x8000001e) & 0xff;
+                       if (node > 7)
+                               node = 0;
+               } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
+                       rdmsrl(0xC001100C, val);
+                       node = val & 7;
+               }
+
+               /*
+                * Enable (and use) Extended Config Space accesses, as we
+                * can't be certain that MCFG is available here during boot.
+                */
+               rdmsrl(MSR_AMD64_NB_CFG, nbcfg);
+               wrmsrl(MSR_AMD64_NB_CFG,
+                      nbcfg | (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT));
+#define PCI_ECS_ADDRESS(sbdf, reg) \
+    (0x80000000 | ((sbdf).bdf << 8) | ((reg) & 0xfc) | (((reg) & 0xf00) << 16))
+
+               for ( ; ; ) {
+                       pci_sbdf_t sbdf = PCI_SBDF(0, 0, 0x18 | node, 4);
+
+                       switch (pci_conf_read32(sbdf, PCI_VENDOR_ID)) {
+                       case 0x00000000:
+                       case 0xffffffff:
+                               /* No device at this SBDF. */
+                               if (!node)
+                                       break;
+                               node = 0;
+                               continue;
+
+                       default:
+                               /*
+                                * Core Performance Boost Control, family
+                                * dependent up to 3 bits starting at bit 2.
+                                *
+                                * Note that boost states operate at a frequency
+                                * above the base one, and thus need to be
+                                * accounted for in order to correctly fetch the
+                                * nominal frequency of the processor.
+                                */
+                               switch (c->x86) {
+                               case 0x10: idx = 1; break;
+                               case 0x12: idx = 7; break;
+                               case 0x14: idx = 7; break;
+                               case 0x15: idx = 7; break;
+                               case 0x16: idx = 7; break;
+                               }
+                               idx &= pci_conf_read(PCI_ECS_ADDRESS(sbdf,
+                                                                    0x15c),
+                                                    0, 4) >> 2;
+                               break;
+                       }
+                       break;
+               }
+
+#undef PCI_ECS_ADDRESS
+               wrmsrl(MSR_AMD64_NB_CFG, nbcfg);
+       }
+
+       lo = 0; /* gcc may not recognize the loop having at least 5 iterations 
*/
+       for (h = c->x86 == 0x10 ? 5 : 8; h--; )
+               if (!rdmsr_safe(0xC0010064 + h, lo) && (lo >> 63))
+                       break;
+       if (!(lo >> 63))
+               return;
+
+#define FREQ(v) (c->x86 < 0x17 ? ((((v) & 0x3f) + 0x10) * 100) >> (((v) >> 6) 
& 7) \
+                                    : (((v) & 0xff) * 25 * 8) / (((v) >> 8) & 
0x3f))
+       if (idx && idx < h &&
+           !rdmsr_safe(0xC0010064 + idx, val) && (val >> 63) &&
+           !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
+               printk("CPU%u: %lu (%lu..%lu) MHz\n",
+                      smp_processor_id(), FREQ(val), FREQ(lo), FREQ(hi));
+       else if (h && !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
+               printk("CPU%u: %lu..%lu MHz\n",
+                      smp_processor_id(), FREQ(lo), FREQ(hi));
+       else
+               printk("CPU%u: %lu MHz\n", smp_processor_id(), FREQ(lo));
+#undef FREQ
+}
+
 void early_init_amd(struct cpuinfo_x86 *c)
 {
        if (c == &boot_cpu_data)
@@ -803,6 +904,8 @@ static void init_amd(struct cpuinfo_x86 *c)
                disable_c1_ramping();
 
        check_syscfg_dram_mod_en();
+
+       amd_log_freq(c);
 }
 
 const struct cpu_dev amd_cpu_dev = {
diff --git a/xen/arch/x86/cpu/cpu.h b/xen/arch/x86/cpu/cpu.h
index c2f4d9a06a..1992596d1b 100644
--- a/xen/arch/x86/cpu/cpu.h
+++ b/xen/arch/x86/cpu/cpu.h
@@ -19,3 +19,4 @@ extern void detect_ht(struct cpuinfo_x86 *c);
 extern bool detect_extended_topology(struct cpuinfo_x86 *c);
 
 void early_init_amd(struct cpuinfo_x86 *c);
+void amd_log_freq(const struct cpuinfo_x86 *c);
diff --git a/xen/arch/x86/cpu/hygon.c b/xen/arch/x86/cpu/hygon.c
index 9ab7aa8622..46293f1f36 100644
--- a/xen/arch/x86/cpu/hygon.c
+++ b/xen/arch/x86/cpu/hygon.c
@@ -99,6 +99,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
                value |= (1 << 27); /* Enable read-only APERF/MPERF bit */
                wrmsrl(MSR_K7_HWCR, value);
        }
+
+       amd_log_freq(c);
 }
 
 const struct cpu_dev hygon_cpu_dev = {
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index 7966f4aa8a..b77c1a78ed 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -344,6 +344,76 @@ static int num_cpu_cores(struct cpuinfo_x86 *c)
                return 1;
 }
 
+static void intel_log_freq(const struct cpuinfo_x86 *c)
+{
+    unsigned int eax, ebx, ecx, edx;
+    uint64_t msrval;
+    uint8_t max_ratio;
+
+    if ( c->cpuid_level >= 0x15 )
+    {
+        cpuid(0x15, &eax, &ebx, &ecx, &edx);
+        if ( ecx && ebx && eax )
+        {
+            unsigned long long val = ecx;
+
+            val *= ebx;
+            do_div(val, eax);
+            printk("CPU%u: TSC: %uMHz * %u / %u = %LuMHz\n",
+                   smp_processor_id(), ecx, ebx, eax, val);
+        }
+        else if ( ecx | eax | ebx )
+        {
+            printk("CPU%u: TSC:", smp_processor_id());
+            if ( ecx )
+                printk(" core: %uMHz", ecx);
+            if ( ebx && eax )
+                printk(" ratio: %u / %u", ebx, eax);
+            printk("\n");
+        }
+    }
+
+    if ( c->cpuid_level >= 0x16 )
+    {
+        cpuid(0x16, &eax, &ebx, &ecx, &edx);
+        if ( ecx | eax | ebx )
+        {
+            printk("CPU%u:", smp_processor_id());
+            if ( ecx )
+                printk(" bus: %uMHz", ecx);
+            if ( eax )
+                printk(" base: %uMHz", eax);
+            if ( ebx )
+                printk(" max: %uMHz", ebx);
+            printk("\n");
+        }
+    }
+
+    if ( rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msrval) )
+        return;
+    max_ratio = msrval >> 8;
+
+    if ( max_ratio )
+    {
+        unsigned int factor = 10000;
+        uint8_t min_ratio = msrval >> 40;
+
+        if ( c->x86 == 6 )
+            switch ( c->x86_model )
+            {
+            case 0x1a: case 0x1e: case 0x1f: case 0x2e: /* Nehalem */
+            case 0x25: case 0x2c: case 0x2f: /* Westmere */
+                factor = 13333;
+                break;
+            }
+
+        printk("CPU%u: ", smp_processor_id());
+        if ( min_ratio )
+            printk("%u..", (factor * min_ratio + 50) / 100);
+        printk("%u MHz\n", (factor * max_ratio + 50) / 100);
+    }
+}
+
 static void init_intel(struct cpuinfo_x86 *c)
 {
        /* Detect the extended topology information if available */
@@ -378,6 +448,10 @@ static void init_intel(struct cpuinfo_x86 *c)
             ( c->cpuid_level >= 0x00000006 ) &&
             ( cpuid_eax(0x00000006) & (1u<<2) ) )
                __set_bit(X86_FEATURE_ARAT, c->x86_capability);
+
+       if ((opt_cpu_info && !(c->apicid & (c->x86_num_siblings - 1))) ||
+           c == &boot_cpu_data )
+               intel_log_freq(c);
 }
 
 const struct cpu_dev intel_cpu_dev = {
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index 41397e19cf..5c44c79600 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -40,8 +40,8 @@ static inline void wrmsrl(unsigned int msr, __u64 val)
 
 /* rdmsr with exception handling */
 #define rdmsr_safe(msr,val) ({\
-    int _rc; \
-    uint32_t lo, hi; \
+    int rc_; \
+    uint32_t lo_, hi_; \
     __asm__ __volatile__( \
         "1: rdmsr\n2:\n" \
         ".section .fixup,\"ax\"\n" \
@@ -49,15 +49,15 @@ static inline void wrmsrl(unsigned int msr, __u64 val)
         "   movl %5,%2\n; jmp 2b\n" \
         ".previous\n" \
         _ASM_EXTABLE(1b, 3b) \
-        : "=a" (lo), "=d" (hi), "=&r" (_rc) \
+        : "=a" (lo_), "=d" (hi_), "=&r" (rc_) \
         : "c" (msr), "2" (0), "i" (-EFAULT)); \
-    val = lo | ((uint64_t)hi << 32); \
-    _rc; })
+    val = lo_ | ((uint64_t)hi_ << 32); \
+    rc_; })
 
 /* wrmsr with exception handling */
 static inline int wrmsr_safe(unsigned int msr, uint64_t val)
 {
-    int _rc;
+    int rc;
     uint32_t lo, hi;
     lo = (uint32_t)val;
     hi = (uint32_t)(val >> 32);
@@ -68,9 +68,9 @@ static inline int wrmsr_safe(unsigned int msr, uint64_t val)
         "3: movl %5,%0\n; jmp 2b\n"
         ".previous\n"
         _ASM_EXTABLE(1b, 3b)
-        : "=&r" (_rc)
+        : "=&r" (rc)
         : "c" (msr), "a" (lo), "d" (hi), "0" (0), "i" (-EFAULT));
-    return _rc;
+    return rc;
 }
 
 static inline uint64_t msr_fold(const struct cpu_user_regs *regs)
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.