[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Add more features to xenoprof



This patch set syncs xenoprof features with Linux upstream:

01_cpu_type.patch: add support for Core i7 and Atom.

02_arch_perfmon.patch: add architectural perfmon support. One benefit is that
more perfmon counters can be used on Nehalem.

03_force_arch_perfmon: force use of architectural perfmon instead of the CPU
specific event set, which may be not supported by oprofile user space tool yet.

Signed-off-by: Yang Zhang <yang.zhang@xxxxxxxxx>
Signed-off-by: Yang Xiaowei <xiaowei.yang@xxxxxxxxx>

Thanks,
xiaowei

diff -r e277fb77063c xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c   Sun Aug 23 16:51:46 2009 +0800
+++ b/xen/arch/x86/oprofile/nmi_int.c   Sun Aug 23 16:56:08 2009 +0800
@@ -357,22 +357,28 @@
                *cpu_type = "i386/pii";
                break;
        case 6 ... 8:
+       case 10 ... 11:
                *cpu_type = "i386/piii";
                break;
        case 9:
+       case 13:
                *cpu_type = "i386/p6_mobile";
-               break;
-       case 10 ... 13:
-               *cpu_type = "i386/p6";
                break;
        case 14:
                *cpu_type = "i386/core";
                break;
        case 15:
        case 23:
-       case 26:
        case 29:
                *cpu_type = "i386/core_2";
+               ppro_has_global_ctrl = 1;
+               break;
+       case 26:
+               *cpu_type = "i386/core_i7";
+               ppro_has_global_ctrl = 1;
+               break;
+       case 28:
+               *cpu_type = "i386/atom";
                ppro_has_global_ctrl = 1;
                break;
        default:
diff -r 629937ccc0e6 xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c  Sun Aug 23 16:56:09 2009 +0800
+++ b/xen/arch/x86/cpu/intel.c  Fri Sep 11 14:06:27 2009 +0800
@@ -148,6 +148,12 @@
 
        select_idle_routine(c);
        l2 = init_intel_cacheinfo(c);
+       if (c->cpuid_level > 9) {
+               unsigned eax = cpuid_eax(10);
+               /* Check for version and the number of counters */
+               if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+                       set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+       }
 
        /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until 
model 3 mask 3 */
        if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff -r 629937ccc0e6 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c   Sun Aug 23 16:56:09 2009 +0800
+++ b/xen/arch/x86/oprofile/nmi_int.c   Fri Sep 11 14:06:27 2009 +0800
@@ -374,18 +374,15 @@
                ppro_has_global_ctrl = 1;
                break;
        case 26:
+               arch_perfmon_setup_counters();
                *cpu_type = "i386/core_i7";
                ppro_has_global_ctrl = 1;
                break;
        case 28:
                *cpu_type = "i386/atom";
-               ppro_has_global_ctrl = 1;
                break;
        default:
                /* Unknown */
-               printk("xenoprof: Initialization failed. "
-                      "Intel processor model %d for P6 class family is not "
-                      "supported\n", cpu_model);
                return 0;
        }
 
@@ -393,10 +390,21 @@
        return 1;
 }
 
+static int __init arch_perfmon_init(char **cpu_type)
+{
+       if (!cpu_has_arch_perfmon)
+               return 0;
+       *cpu_type = "i386/arch_perfmon";
+       model = &op_arch_perfmon_spec;
+       arch_perfmon_setup_counters();
+       return 1;
+}
+
 static int __init nmi_init(void)
 {
        __u8 vendor = current_cpu_data.x86_vendor;
        __u8 family = current_cpu_data.x86;
+       __u8 _model = current_cpu_data.x86_model;
  
        if (!cpu_has_apic) {
                printk("xenoprof: Initialization failed. No APIC\n");
@@ -438,21 +446,22 @@
                        switch (family) {
                                /* Pentium IV */
                                case 0xf:
-                                       if (!p4_init(&cpu_type))
-                                               return -ENODEV;
+                                       p4_init(&cpu_type);
                                        break;
 
                                /* A P6-class processor */
                                case 6:
-                                       if (!ppro_init(&cpu_type))
-                                               return -ENODEV;
+                                       ppro_init(&cpu_type);
                                        break;
 
                                default:
+                               break;
+                       }
+                       if (!cpu_type && !arch_perfmon_init(&cpu_type)) {
                                printk("xenoprof: Initialization failed. "
-                                      "Intel processor family %d is not "
-                                      "supported\n", family);
-                                       return -ENODEV;
+                                      "Intel processor family %d model %d"
+                                      "is not supported\n", family, _model);
+                               return -ENODEV;
                        }
                        break;
 
diff -r 629937ccc0e6 xen/arch/x86/oprofile/op_model_ppro.c
--- a/xen/arch/x86/oprofile/op_model_ppro.c     Sun Aug 23 16:56:09 2009 +0800
+++ b/xen/arch/x86/oprofile/op_model_ppro.c     Fri Sep 11 14:06:27 2009 +0800
@@ -24,12 +24,24 @@
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS 2
-#define NUM_CONTROLS 2
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+       struct {
+               unsigned int version_id:8;
+               unsigned int num_counters:8;
+               unsigned int bit_width:8;
+               unsigned int mask_length:8;
+       } split;
+       unsigned int full;
+};
 
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} 
while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 
-1);} while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+static int num_counters = 2;
+static int counter_width = 32;
+
+#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) 
 
 #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
 #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
@@ -43,17 +55,18 @@
 #define CTRL_SET_EVENT(val, e) (val |= e)
 #define IS_ACTIVE(val) (val & (1 << 22) )  
 #define IS_ENABLE(val) (val & (1 << 20) )
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[OP_MAX_COUNTER];
 int ppro_has_global_ctrl = 0;
 extern int is_passive(struct domain *d);
  
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
-       msrs->counters[0].addr = MSR_P6_PERFCTR0;
-       msrs->counters[1].addr = MSR_P6_PERFCTR1;
-       
-       msrs->controls[0].addr = MSR_P6_EVNTSEL0;
-       msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+       int i;
+
+       for (i = 0; i < num_counters; i++)
+               msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
+       for (i = 0; i < num_counters; i++)
+               msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
 }
 
 
@@ -61,25 +74,41 @@
 {
        unsigned int low, high;
        int i;
+       
+       if (cpu_has_arch_perfmon) {
+               union cpuid10_eax eax;
+               eax.full = cpuid_eax(0xa);
+
+               /*
+                * For Core2 (family 6, model 15), don't reset the
+                * counter width:
+                */
+               if (!(eax.split.version_id == 0 &&
+                       current_cpu_data.x86 == 6 &&
+                               current_cpu_data.x86_model == 15)) {
+
+                       if (counter_width < eax.split.bit_width)
+                               counter_width = eax.split.bit_width;
+               }
+       }
 
        /* clear all counters */
-       for (i = 0 ; i < NUM_CONTROLS; ++i) {
+       for (i = 0 ; i < num_counters; ++i) {
                CTRL_READ(low, high, msrs, i);
                CTRL_CLEAR(low);
                CTRL_WRITE(low, high, msrs, i);
        }
        
        /* avoid a false detection of ctr overflows in NMI handler */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
-               CTR_WRITE(1, msrs, i);
-       }
+       for (i = 0; i < num_counters; ++i)
+               wrmsrl(msrs->counters[i].addr, -1LL);
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (counter_config[i].enabled) {
                        reset_value[i] = counter_config[i].count;
 
-                       CTR_WRITE(counter_config[i].count, msrs, i);
+                       wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 
                        CTRL_READ(low, high, msrs, i);
                        CTRL_CLEAR(low);
@@ -89,6 +118,8 @@
                        CTRL_SET_UM(low, counter_config[i].unit_mask);
                        CTRL_SET_EVENT(low, counter_config[i].event);
                        CTRL_WRITE(low, high, msrs, i);
+               } else {
+                       reset_value[i] = 0;
                }
        }
 }
@@ -102,26 +133,26 @@
                            struct op_msrs const * const msrs,
                            struct cpu_user_regs * const regs)
 {
-       unsigned int low, high;
+       u64 val;
        int i;
        int ovf = 0;
        unsigned long eip = regs->eip;
        int mode = xenoprofile_get_mode(current, regs);
        struct arch_msr_pair *msrs_content = vcpu_vpmu(current)->context;
 
-       for (i = 0 ; i < NUM_COUNTERS; ++i) {
+       for (i = 0 ; i < num_counters; ++i) {
                if (!reset_value[i])
                        continue;
-               CTR_READ(low, high, msrs, i);
-               if (CTR_OVERFLOWED(low)) {
+               rdmsrl(msrs->counters[i].addr, val);
+               if (CTR_OVERFLOWED(val)) {
                        xenoprof_log_event(current, regs, eip, mode, i);
-                       CTR_WRITE(reset_value[i], msrs, i);
+                       wrmsrl(msrs->counters[i].addr, -reset_value[i]);
                        if ( is_passive(current->domain) && (mode != 2) && 
                                (vcpu_vpmu(current)->flags & 
PASSIVE_DOMAIN_ALLOCATED) ) 
                        {
                                if ( IS_ACTIVE(msrs_content[i].control) )
                                {
-                                       msrs_content[i].counter = (low | 
(u64)high << 32);
+                                       msrs_content[i].counter = val;
                                        if ( IS_ENABLE(msrs_content[i].control) 
)
                                                ovf = 2;
                                }
@@ -144,7 +175,7 @@
        unsigned int low,high;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (reset_value[i]) {
                        CTRL_READ(low, high, msrs, i);
                        CTRL_SET_ACTIVE(low);
@@ -155,7 +186,7 @@
      * However, this may not hold true when xenoprof starts to run.
      */
     if ( ppro_has_global_ctrl )
-        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, (1<<NUM_COUNTERS) - 1);
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, (1<<num_counters) - 1);
 }
 
 
@@ -164,7 +195,7 @@
        unsigned int low,high;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[i])
                        continue;
                CTRL_READ(low, high, msrs, i);
@@ -178,14 +209,14 @@
 static int ppro_is_arch_pmu_msr(u64 msr_index, int *type, int *index)
 {
        if ( (msr_index >= MSR_IA32_PERFCTR0) &&
-            (msr_index < (MSR_IA32_PERFCTR0 + NUM_COUNTERS)) )
+            (msr_index < (MSR_IA32_PERFCTR0 + num_counters)) )
        {
                *type = MSR_TYPE_ARCH_COUNTER;
                *index = msr_index - MSR_IA32_PERFCTR0;
                return 1;
         }
         if ( (msr_index >= MSR_P6_EVNTSEL0) &&
-            (msr_index < (MSR_P6_EVNTSEL0 + NUM_CONTROLS)) )
+            (msr_index < (MSR_P6_EVNTSEL0 + num_counters)) )
         {
                *type = MSR_TYPE_ARCH_CTRL;
                *index = msr_index - MSR_P6_EVNTSEL0;
@@ -199,11 +230,11 @@
 {
        struct vpmu_struct *vpmu = vcpu_vpmu(v);
        struct arch_msr_pair *msr_content;
-       
-       msr_content = xmalloc_bytes( sizeof(struct arch_msr_pair) * 
NUM_COUNTERS );
+
+       msr_content = xmalloc_bytes( sizeof(struct arch_msr_pair) * 
num_counters );
        if ( !msr_content )
                goto out;
-       memset(msr_content, 0, sizeof(struct arch_msr_pair) * NUM_COUNTERS);
+       memset(msr_content, 0, sizeof(struct arch_msr_pair) * num_counters);
        vpmu->context = (void *)msr_content;
        vpmu->flags = 0;
        vpmu->flags |= PASSIVE_DOMAIN_ALLOCATED;
@@ -254,9 +285,39 @@
        }       
 }
 
-struct op_x86_model_spec const op_ppro_spec = {
-       .num_counters = NUM_COUNTERS,
-       .num_controls = NUM_CONTROLS,
+/*
+ * Architectural performance monitoring.
+ *
+ * Newer Intel CPUs (Core1+) have support for architectural
+ * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
+ * The advantage of this is that it can be done without knowing about
+ * the specific CPU.
+ */
+void arch_perfmon_setup_counters(void)
+{
+       union cpuid10_eax eax;
+
+       eax.full = cpuid_eax(0xa);
+
+       /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
+       if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
+           current_cpu_data.x86_model == 15) {
+               eax.split.version_id = 2;
+               eax.split.num_counters = 2;
+               eax.split.bit_width = 40;
+       }
+
+       num_counters = min_t(u8, eax.split.num_counters, OP_MAX_COUNTER);
+
+       op_arch_perfmon_spec.num_counters = num_counters;
+       op_arch_perfmon_spec.num_controls = num_counters;
+       op_ppro_spec.num_counters = num_counters;
+       op_ppro_spec.num_controls = num_counters;
+}
+
+struct op_x86_model_spec op_ppro_spec = {
+       .num_counters = 2,
+       .num_controls = 2,
        .fill_in_addresses = &ppro_fill_in_addresses,
        .setup_ctrs = &ppro_setup_ctrs,
        .check_ctrs = &ppro_check_ctrs,
@@ -268,3 +329,17 @@
        .load_msr = &ppro_load_msr,
        .save_msr = &ppro_save_msr
 };
+
+struct op_x86_model_spec op_arch_perfmon_spec = {
+       /* num_counters/num_controls filled in at runtime */
+       .fill_in_addresses = &ppro_fill_in_addresses,
+       .setup_ctrs = &ppro_setup_ctrs,
+       .check_ctrs = &ppro_check_ctrs,
+       .start = &ppro_start,
+       .stop = &ppro_stop,
+       .is_arch_pmu_msr = &ppro_is_arch_pmu_msr,
+       .allocated_msr = &ppro_allocate_msr,
+       .free_msr = &ppro_free_msr,
+       .load_msr = &ppro_load_msr,
+       .save_msr = &ppro_save_msr
+};
diff -r 629937ccc0e6 xen/arch/x86/oprofile/op_x86_model.h
--- a/xen/arch/x86/oprofile/op_x86_model.h      Sun Aug 23 16:56:09 2009 +0800
+++ b/xen/arch/x86/oprofile/op_x86_model.h      Fri Sep 11 14:06:27 2009 +0800
@@ -32,8 +32,8 @@
  * various x86 CPU model's perfctr support.
  */
 struct op_x86_model_spec {
-       unsigned int const num_counters;
-       unsigned int const num_controls;
+       unsigned int num_counters;
+       unsigned int num_controls;
        void (*fill_in_addresses)(struct op_msrs * const msrs);
        void (*setup_ctrs)(struct op_msrs const * const msrs);
        int (*check_ctrs)(unsigned int const cpu, 
@@ -48,9 +48,11 @@
         void (*save_msr)(struct vcpu * const v, int type, int index, u64 
msr_content);
 };
 
-extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec op_ppro_spec;
+extern struct op_x86_model_spec op_arch_perfmon_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
 extern struct op_x86_model_spec const op_athlon_spec;
 
+void arch_perfmon_setup_counters(void);
 #endif /* OP_X86_MODEL_H */
diff -r 629937ccc0e6 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h  Sun Aug 23 16:56:09 2009 +0800
+++ b/xen/include/asm-x86/cpufeature.h  Fri Sep 11 14:06:27 2009 +0800
@@ -76,6 +76,7 @@
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
 #define X86_FEATURE_NOSTOP_TSC (3*32+ 9) /* TSC does not stop in C states */
 #define X86_FEATURE_ARAT       (3*32+ 10) /* Always running APIC timer */
+#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3       (4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -188,6 +189,8 @@
                                  && boot_cpu_has(X86_FEATURE_FFXSR))
 
 #define cpu_has_x2apic          boot_cpu_has(X86_FEATURE_X2APIC)
+#define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
+
 #endif /* __ASM_I386_CPUFEATURE_H */
 
 /* 
diff -r 3557803e455d xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c   Fri Sep 11 13:56:58 2009 +0800
+++ b/xen/arch/x86/oprofile/nmi_int.c   Fri Sep 11 14:03:59 2009 +0800
@@ -344,10 +344,25 @@
 }
 
 
+static int force_arch_perfmon;
+static int force_cpu_type(const char *str)
+{
+        if (!strcmp(str, "arch_perfmon")) {
+                force_arch_perfmon = 1;
+                printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+        }
+
+        return 0;
+}
+custom_param("cpu_type", force_cpu_type);
+
 extern int ppro_has_global_ctrl;
 static int __init ppro_init(char ** cpu_type)
 {
        __u8 cpu_model = current_cpu_data.x86_model;
+
+        if (force_arch_perfmon && cpu_has_arch_perfmon)
+                return 0;
 
        switch (cpu_model) {
        case 0 ... 2:
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.