[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] x86: include the PPIN in MCE records when available



Quoting the respective Linux commit:

    Intel Xeons from Ivy Bridge onwards support a processor identification
    number set in the factory. To the user this is a handy unique number to
    identify a particular CPU. Intel can decode this to the fab/production
    run to track errors. On systems that have it, include it in the machine
    check record. I'm told that this would be helpful for users that run
    large data centers with multi-socket servers to keep track of which CPUs
    are seeing errors.

Newer AMD CPUs support this too, at different MSR numbers.

Take the opportunity and hide __MC_NMSRS from the public interface going
forward.

[Linux commit 3f5a7896a5096fd50030a04d4c3f28a7441e30a5]
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -35,6 +35,7 @@ bool __read_mostly mce_broadcast;
 bool is_mc_panic;
 DEFINE_PER_CPU_READ_MOSTLY(unsigned int, nr_mce_banks);
 unsigned int __read_mostly firstbank;
+unsigned int __read_mostly ppin_msr;
 uint8_t __read_mostly cmci_apic_vector;
 
 DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, poll_bankmask);
@@ -999,10 +1000,17 @@ static void do_mc_get_cpu_info(void *v)
     /*
      * This part needs to run on the CPU itself.
      */
-    xcp->mc_nmsrvals = __MC_NMSRS;
+    xcp->mc_nmsrvals = 1;
     xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
     rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
 
+    if ( ppin_msr && xcp->mc_nmsrvals < ARRAY_SIZE(xcp->mc_msrvalues) )
+    {
+        xcp->mc_msrvalues[xcp->mc_nmsrvals].reg = ppin_msr;
+        rdmsrl(ppin_msr, xcp->mc_msrvalues[xcp->mc_nmsrvals].value);
+        ++xcp->mc_nmsrvals;
+    }
+
     if ( c->cpuid_level >= 1 )
     {
         cpuid(1, &junk, &ebx, &junk, &junk);
--- a/xen/arch/x86/cpu/mcheck/mce.h
+++ b/xen/arch/x86/cpu/mcheck/mce.h
@@ -49,6 +49,7 @@ enum mcheck_type intel_mcheck_init(struc
 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
 
 extern unsigned int firstbank;
+extern unsigned int ppin_msr;
 
 struct mcinfo_extended *intel_get_extended_msrs(
     struct mcinfo_global *mig, struct mc_info *mi);
--- a/xen/arch/x86/cpu/mcheck/mce_amd.c
+++ b/xen/arch/x86/cpu/mcheck/mce_amd.c
@@ -315,6 +315,26 @@ amd_mcheck_init(struct cpuinfo_x86 *ci)
     if ( quirkflag == MCEQUIRK_F10_GART )
         mcequirk_amd_apply(quirkflag);
 
+    if ( cpu_has(ci, X86_FEATURE_AMD_PPIN) &&
+         (ci == &boot_cpu_data || ppin_msr) )
+    {
+        uint64_t val;
+
+        rdmsrl(MSR_AMD_PPIN_CTL, val);
+
+        /* If PPIN is disabled, but not locked, try to enable. */
+        if ( !(val & (PPIN_ENABLE | PPIN_LOCKOUT)) )
+        {
+            wrmsr_safe(MSR_PPIN_CTL, val | PPIN_ENABLE);
+            rdmsrl(MSR_AMD_PPIN_CTL, val);
+        }
+
+        if ( (val & (PPIN_ENABLE | PPIN_LOCKOUT)) != PPIN_ENABLE )
+            ppin_msr = 0;
+        else if ( ci == &boot_cpu_data )
+            ppin_msr = MSR_AMD_PPIN;
+    }
+
     x86_mce_callback_register(amd_f10_handler);
     mce_recoverable_register(mc_amd_recoverable_scan);
     mce_register_addrcheck(mc_amd_addrcheck);
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -853,6 +853,43 @@ static void intel_init_mce(void)
     mce_uhandler_num = ARRAY_SIZE(intel_mce_uhandlers);
 }
 
+static void intel_init_ppin(const struct cpuinfo_x86 *c)
+{
+    /*
+     * Even if testing the presence of the MSR would be enough, we don't
+     * want to risk the situation where other models reuse this MSR for
+     * other purposes.
+     */
+    switch ( c->x86_model )
+    {
+        uint64_t val;
+
+    case 0x3e: /* IvyBridge X */
+    case 0x3f: /* Haswell X */
+    case 0x4f: /* Broadwell X */
+    case 0x55: /* Skylake X */
+    case 0x56: /* Broadwell Xeon D */
+    case 0x57: /* Knights Landing */
+    case 0x85: /* Knights Mill */
+
+        if ( (c != &boot_cpu_data && !ppin_msr) ||
+             rdmsr_safe(MSR_PPIN_CTL, val) )
+            return;
+
+        /* If PPIN is disabled, but not locked, try to enable. */
+        if ( !(val & (PPIN_ENABLE | PPIN_LOCKOUT)) )
+        {
+            wrmsr_safe(MSR_PPIN_CTL, val | PPIN_ENABLE);
+            rdmsr_safe(MSR_PPIN_CTL, val);
+        }
+
+        if ( (val & (PPIN_ENABLE | PPIN_LOCKOUT)) != PPIN_ENABLE )
+            ppin_msr = 0;
+        else if ( c == &boot_cpu_data )
+            ppin_msr = MSR_PPIN;
+    }
+}
+
 static void cpu_mcabank_free(unsigned int cpu)
 {
     struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu);
@@ -941,6 +978,8 @@ enum mcheck_type intel_mcheck_init(struc
 
     intel_init_thermal(c);
 
+    intel_init_ppin(c);
+
     return mcheck_intel;
 }
 
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -45,6 +45,13 @@
 #define MSR_PRED_CMD                   0x00000049
 #define PRED_CMD_IBPB                  (_AC(1, ULL) << 0)
 
+/* Intel Protected Processor Inventory Number */
+#define MSR_PPIN_CTL                   0x0000004e
+#define MSR_PPIN                       0x0000004f
+
+#define PPIN_LOCKOUT                   (_AC(1, ULL) << 0)
+#define PPIN_ENABLE                    (_AC(1, ULL) << 1)
+
 #define MSR_ARCH_CAPABILITIES          0x0000010a
 #define ARCH_CAPS_RDCL_NO              (_AC(1, ULL) << 0)
 #define ARCH_CAPS_IBRS_ALL             (_AC(1, ULL) << 1)
@@ -278,6 +285,10 @@
 #define MSR_AMD_OSVW_ID_LENGTH          0xc0010140
 #define MSR_AMD_OSVW_STATUS             0xc0010141
 
+/* AMD Protected Processor Inventory Number */
+#define MSR_AMD_PPIN_CTL                0xc00102f0
+#define MSR_AMD_PPIN                    0xc00102f1
+
 /* K6 MSRs */
 #define MSR_K6_EFER                    0xc0000080
 #define MSR_K6_STAR                    0xc0000081
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -247,6 +247,7 @@ XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2
 XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A  (F)X{SAVE,RSTOR} always 
saves/restores FPU Error pointers */
 XEN_CPUFEATURE(WBNOINVD,      8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used 
by AMD) */
+XEN_CPUFEATURE(AMD_PPIN,      8*32+23) /*   Protected Processor Inventory 
Number */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
 XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A  AVX512 Neural Network Instructions 
*/
--- a/xen/include/public/arch-x86/xen-mca.h
+++ b/xen/include/public/arch-x86/xen-mca.h
@@ -246,7 +246,9 @@ typedef struct mc_info mc_info_t;
 DEFINE_XEN_GUEST_HANDLE(mc_info_t);
 
 #define __MC_MSR_ARRAYSIZE 8
+#if __XEN_INTERFACE_VERSION__ <= 0x00040d00
 #define __MC_NMSRS 1
+#endif
 #define MC_NCAPS       7       /* 7 CPU feature flag words */
 #define MC_CAPS_STD_EDX        0       /* cpuid level 0x00000001 (%edx) */
 #define MC_CAPS_AMD_EDX        1       /* cpuid level 0x80000001 (%edx) */
--- a/xen/include/public/xen-compat.h
+++ b/xen/include/public/xen-compat.h
@@ -27,7 +27,7 @@
 #ifndef __XEN_PUBLIC_XEN_COMPAT_H__
 #define __XEN_PUBLIC_XEN_COMPAT_H__
 
-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040d00
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040e00
 
 #if defined(__XEN__) || defined(__XEN_TOOLS__)
 /* Xen is built with matching headers and implements the latest interface. */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.