[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Add xenoprof support
# HG changeset patch # User ack@xxxxxxxxxxxxxxxxxxxxxxx # Node ID e049baa9055dfa15bbf5ed0b3c3e56fabedbc386 # Parent 388c59fefaa6add89ca38622f2170cb7c98429ba Add xenoprof support Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx> diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Thu Apr 6 16:49:21 2006 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Thu Apr 6 17:58:01 2006 @@ -1231,6 +1231,7 @@ # # Instrumentation Support # +# CONFIG_PROFILING is not set # CONFIG_KPROBES is not set # diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xenU_x86_32 --- a/buildconfigs/linux-defconfig_xenU_x86_32 Thu Apr 6 16:49:21 2006 +++ b/buildconfigs/linux-defconfig_xenU_x86_32 Thu Apr 6 17:58:01 2006 @@ -779,6 +779,7 @@ # # Instrumentation Support # +# CONFIG_PROFILING is not set # CONFIG_KPROBES is not set # diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Apr 6 16:49:21 2006 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Thu Apr 6 17:58:01 2006 @@ -2892,6 +2892,7 @@ # # Instrumentation Support # +# CONFIG_PROFILING is not set # CONFIG_KPROBES is not set # diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/arch/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Apr 6 16:49:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Apr 6 17:58:01 2006 @@ -1116,9 +1116,7 @@ menu "Instrumentation Support" depends on EXPERIMENTAL -if !X86_XEN source "arch/i386/oprofile/Kconfig" -endif config KPROBES bool "Kprobes (EXPERIMENTAL)" diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Thu Apr 6 16:49:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Thu Apr 6 17:58:01 2006 @@ -177,6 +177,32 @@ EXPORT_SYMBOL(touch_pte_range); +void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot) +{ + int error; + + struct vm_struct *vma; + vma = get_vm_area (vm_size, VM_IOREMAP); + + if (vma == NULL) { + printk ("ioremap.c,vm_map_xen_pages(): " + "Failed to get VMA area\n"); + return NULL; + } + + error = direct_kernel_remap_pfn_range((unsigned long) vma->addr, + maddr >> PAGE_SHIFT, vm_size, + prot, DOMID_SELF ); + if (error == 0) { + return vma->addr; + } else { + printk ("ioremap.c,vm_map_xen_pages(): " + "Failed to map xen shared pages into kernel space\n"); + return NULL; + } +} +EXPORT_SYMBOL(vm_map_xen_pages); + /* * Does @address reside within a non-highmem page that is local to this virtual * machine (i.e., not an I/O page, nor a memory page belonging to another VM). diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Thu Apr 6 16:49:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Thu Apr 6 17:58:01 2006 @@ -335,6 +335,14 @@ { return _hypercall2(int, callback_op, cmd, arg); } + +static inline int +HYPERVISOR_xenoprof_op( + int op, unsigned long arg1, unsigned long arg2) +{ + return _hypercall3(int, xenoprof_op, op, arg1, arg2); +} + #endif /* __HYPERCALL_H__ */ diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Thu Apr 6 16:49:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Thu Apr 6 17:58:01 2006 @@ -335,6 +335,13 @@ int cmd, void *arg) { return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int +HYPERVISOR_xenoprof_op( + int op, unsigned long arg1, unsigned long arg2) +{ + return _hypercall3(int, xenoprof_op, op, arg1, arg2); } #endif /* __HYPERCALL_H__ */ diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/Makefile Thu Apr 6 17:58:01 2006 @@ -2,6 +2,7 @@ subdir-y += cpu subdir-y += genapic subdir-y += hvm +subdir-y += oprofile subdir-$(x86_32) += x86_32 subdir-$(x86_64) += x86_64 diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/domain.c Thu Apr 6 17:58:01 2006 @@ -915,6 +915,8 @@ spin_unlock_recursive(&d->page_alloc_lock); } +extern void free_xenoprof_pages(struct domain *d); + void domain_relinquish_resources(struct domain *d) { struct vcpu *v; @@ -961,6 +963,10 @@ /* Relinquish every page of memory. */ relinquish_memory(d, &d->xenpage_list); relinquish_memory(d, &d->page_list); + + /* Free page used by xen oprofile buffer */ + free_xenoprof_pages(d); + } void arch_dump_domain_info(struct domain *d) diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/x86_32/entry.S Thu Apr 6 17:58:01 2006 @@ -645,6 +645,7 @@ .long do_nmi_op .long do_arch_sched_op .long do_callback_op /* 30 */ + .long do_xenoprof_op .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr @@ -681,6 +682,7 @@ .byte 2 /* do_nmi_op */ .byte 2 /* do_arch_sched_op */ .byte 2 /* do_callback_op */ /* 30 */ + .byte 3 /* do_xenoprof_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/x86_64/entry.S Thu Apr 6 17:58:01 2006 @@ -553,6 +553,7 @@ .quad do_nmi_op .quad do_arch_sched_op .quad do_callback_op /* 30 */ + .quad do_xenoprof_op .rept NR_hypercalls-((.-hypercall_table)/8) .quad do_ni_hypercall .endr @@ -589,6 +590,7 @@ .byte 2 /* do_nmi_op */ .byte 2 /* do_arch_sched_op */ .byte 2 /* do_callback_op */ /* 30 */ + .byte 3 /* do_xenoprof_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r 388c59fefaa6 -r e049baa9055d xen/include/public/xen.h --- a/xen/include/public/xen.h Thu Apr 6 16:49:21 2006 +++ b/xen/include/public/xen.h Thu Apr 6 17:58:01 2006 @@ -61,6 +61,7 @@ #define __HYPERVISOR_nmi_op 28 #define __HYPERVISOR_sched_op 29 #define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 /* * VIRTUAL INTERRUPTS @@ -77,7 +78,8 @@ #define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ #define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ #define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ -#define NR_VIRQS 8 +#define VIRQ_XENOPROF 7 /* XenOprofile interrupt: new sample available */ +#define NR_VIRQS 9 /* * MMU-UPDATE REQUESTS diff -r 388c59fefaa6 -r e049baa9055d xen/include/xen/sched.h --- a/xen/include/xen/sched.h Thu Apr 6 16:49:21 2006 +++ b/xen/include/xen/sched.h Thu Apr 6 17:58:01 2006 @@ -14,6 +14,7 @@ #include <xen/grant_table.h> #include <xen/rangeset.h> #include <asm/domain.h> +#include <xen/xenoprof.h> extern unsigned long volatile jiffies; extern rwlock_t domlist_lock; @@ -155,6 +156,9 @@ /* Control-plane tools handle for this domain. */ xen_domain_handle_t handle; + + /* pointer to xenoprof data (oprofile support) */ + xenoprof_t *xenoprof; }; struct domain_setup_info diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/arch/i386/oprofile/Makefile --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile Thu Apr 6 17:58:01 2006 @@ -0,0 +1,16 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +ifdef CONFIG_X86_XEN +oprofile-y := $(DRIVER_OBJS) xenoprof.o +else +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ + op_model_ppro.o op_model_p4.o +oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o +endif diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Thu Apr 6 17:58:01 2006 @@ -0,0 +1,395 @@ +/** + * @file xenoprof.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon and Jose Renato Santos for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <linux/init.h> +#include <linux/notifier.h> +#include <linux/smp.h> +#include <linux/oprofile.h> +#include <linux/sysdev.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/vmalloc.h> +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <asm/pgtable.h> +#include <xen/evtchn.h> +#include "op_counter.h" + +#include <xen/interface/xen.h> +#include <xen/interface/xenoprof.h> + +static int xenoprof_start(void); +static void xenoprof_stop(void); + +void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot); + +static int xenoprof_enabled = 0; +static int num_events = 0; +static int is_primary = 0; + +/* sample buffers shared with Xen */ +xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS]; +/* Shared buffer area */ +char * shared_buffer; +/* Number of buffers in shared area (one per VCPU) */ +int nbuf; +/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */ +int ovf_irq[NR_CPUS]; +/* cpu model type string - copied from Xen memory space on XENOPROF_init command */ +char cpu_type[XENOPROF_CPU_TYPE_SIZE]; + +#ifdef CONFIG_PM + +static int xenoprof_suspend(struct sys_device * dev, pm_message_t state) +{ + if (xenoprof_enabled == 1) + xenoprof_stop(); + return 0; +} + + +static int xenoprof_resume(struct sys_device * dev) +{ + if (xenoprof_enabled == 1) + xenoprof_start(); + return 0; +} + + +static struct sysdev_class oprofile_sysclass = { + set_kset_name("oprofile"), + .resume = xenoprof_resume, + .suspend = xenoprof_suspend +}; + + +static struct sys_device device_oprofile = { + .id = 0, + .cls = &oprofile_sysclass, +}; + + +static int __init init_driverfs(void) +{ + int error; + if (!(error = sysdev_class_register(&oprofile_sysclass))) + error = sysdev_register(&device_oprofile); + return error; +} + + +static void __exit exit_driverfs(void) +{ + sysdev_unregister(&device_oprofile); + sysdev_class_unregister(&oprofile_sysclass); +} + +#else +#define init_driverfs() do { } while (0) +#define exit_driverfs() do { } while (0) +#endif /* CONFIG_PM */ + +unsigned long long oprofile_samples = 0; + +static irqreturn_t +xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs) +{ + int head, tail, size; + xenoprof_buf_t * buf; + int cpu; + + cpu = smp_processor_id(); + buf = xenoprof_buf[cpu]; + + head = buf->event_head; + tail = buf->event_tail; + size = buf->event_size; + + if (tail > head) { + while (tail < size) { + oprofile_add_pc(buf->event_log[tail].eip, + buf->event_log[tail].mode, + buf->event_log[tail].event); + oprofile_samples++; + tail++; + } + tail = 0; + } + while (tail < head) { + oprofile_add_pc(buf->event_log[tail].eip, + buf->event_log[tail].mode, + buf->event_log[tail].event); + oprofile_samples++; + tail++; + } + + buf->event_tail = tail; + + return IRQ_HANDLED; +} + + +static void unbind_virq_cpu(void * info) +{ + int cpu = smp_processor_id(); + if (ovf_irq[cpu] >= 0) { + unbind_from_irqhandler(ovf_irq[cpu], NULL); + ovf_irq[cpu] = -1; + } +} + + +static void unbind_virq(void) +{ + on_each_cpu(unbind_virq_cpu, NULL, 0, 1); +} + + +int bind_virq_error; + +static void bind_virq_cpu(void * info) +{ + int result; + int cpu = smp_processor_id(); + + result = bind_virq_to_irqhandler(VIRQ_XENOPROF, + cpu, + xenoprof_ovf_interrupt, + SA_INTERRUPT, + "xenoprof", + NULL); + + if (result<0) { + bind_virq_error = result; + printk("xenoprof.c: binding VIRQ_XENOPROF to IRQ failed on CPU " + "%d\n", cpu); + } else { + ovf_irq[cpu] = result; + } +} + + +static int bind_virq(void) +{ + bind_virq_error = 0; + on_each_cpu(bind_virq_cpu, NULL, 0, 1); + if (bind_virq_error) { + unbind_virq(); + return bind_virq_error; + } else { + return 0; + } +} + + +static int xenoprof_setup(void) +{ + int ret; + + ret = bind_virq(); + if (ret) + return ret; + + if (is_primary) { + ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, + (unsigned long)NULL, + (unsigned long)NULL); + if (ret) + goto err; + + ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, + (unsigned long)&counter_config, + (unsigned long)num_events); + if (ret) + goto err; + } + + ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, + (unsigned long)NULL, + (unsigned long)NULL); + if (ret) + goto err; + + xenoprof_enabled = 1; + return 0; + err: + unbind_virq(); + return ret; +} + + +static void xenoprof_shutdown(void) +{ + xenoprof_enabled = 0; + + HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, + (unsigned long)NULL, + (unsigned long)NULL); + + if (is_primary) { + HYPERVISOR_xenoprof_op(XENOPROF_release_counters, + (unsigned long)NULL, + (unsigned long)NULL); + } + + unbind_virq(); +} + + +static int xenoprof_start(void) +{ + int ret = 0; + + if (is_primary) + ret = HYPERVISOR_xenoprof_op(XENOPROF_start, + (unsigned long)NULL, + (unsigned long)NULL); + return ret; +} + + +static void xenoprof_stop(void) +{ + if (is_primary) + HYPERVISOR_xenoprof_op(XENOPROF_stop, + (unsigned long)NULL, + (unsigned long)NULL); +} + + +static int xenoprof_set_active(int * active_domains, + unsigned int adomains) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, + (unsigned long)active_domains, + (unsigned long)adomains); + return ret; +} + + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int xenoprof_create_files(struct super_block * sb, struct dentry * root) +{ + unsigned int i; + + for (i = 0; i < num_events; ++i) { + struct dentry * dir; + char buf[2]; + + snprintf(buf, 2, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enabled", + &counter_config[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", + &counter_config[i].event); + oprofilefs_create_ulong(sb, dir, "count", + &counter_config[i].count); + oprofilefs_create_ulong(sb, dir, "unit_mask", + &counter_config[i].unit_mask); + oprofilefs_create_ulong(sb, dir, "kernel", + &counter_config[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", + &counter_config[i].user); + } + + return 0; +} + + +struct oprofile_operations xenoprof_ops = { + .create_files = xenoprof_create_files, + .set_active = xenoprof_set_active, + .setup = xenoprof_setup, + .shutdown = xenoprof_shutdown, + .start = xenoprof_start, + .stop = xenoprof_stop +}; + + +/* in order to get driverfs right */ +static int using_xenoprof; + +int __init oprofile_arch_init(struct oprofile_operations * ops) +{ + xenoprof_init_result_t result; + xenoprof_buf_t * buf; + int max_samples = 16; + int vm_size; + int npages; + int i; + + int ret = HYPERVISOR_xenoprof_op(XENOPROF_init, + (unsigned long)max_samples, + (unsigned long)&result); + + if (!ret) { + pgprot_t prot = __pgprot(_KERNPG_TABLE); + + num_events = result.num_events; + is_primary = result.is_primary; + nbuf = result.nbuf; + + npages = (result.bufsize * nbuf - 1) / PAGE_SIZE + 1; + vm_size = npages * PAGE_SIZE; + + shared_buffer = (char *) vm_map_xen_pages(result.buf_maddr, + vm_size, prot); + if (!shared_buffer) { + ret = -ENOMEM; + goto out; + } + + for (i=0; i< nbuf; i++) { + buf = (xenoprof_buf_t*) + &shared_buffer[i * result.bufsize]; + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); + xenoprof_buf[buf->vcpu_id] = buf; + } + + /* cpu_type is detected by Xen */ + cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0; + strncpy(cpu_type, result.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1); + xenoprof_ops.cpu_type = cpu_type; + + init_driverfs(); + using_xenoprof = 1; + *ops = xenoprof_ops; + + for (i=0; i<NR_CPUS; i++) + ovf_irq[i] = -1; + } + out: + printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, " + "is_primary %d\n", ret, num_events, is_primary); + return ret; +} + + +void __exit oprofile_arch_exit(void) +{ + if (using_xenoprof) + exit_driverfs(); + + if (shared_buffer) { + vunmap(shared_buffer); + shared_buffer = NULL; + } + if (is_primary) + HYPERVISOR_xenoprof_op(XENOPROF_shutdown, + (unsigned long)NULL, + (unsigned long)NULL); +} diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile Thu Apr 6 17:58:01 2006 @@ -0,0 +1,22 @@ +# +# oprofile for x86-64. +# Just reuse the one from i386. +# + +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +ifdef +OPROFILE-y := xenoprof.o +else +OPROFILE-y := init.o backtrace.o +OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o \ + op_model_ppro.o +OPROFILE-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o +endif +oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y)) diff -r 388c59fefaa6 -r e049baa9055d patches/linux-2.6.16/xenoprof-generic.patch --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/patches/linux-2.6.16/xenoprof-generic.patch Thu Apr 6 17:58:01 2006 @@ -0,0 +1,384 @@ +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c +--- ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/buffer_sync.c 2006-04-03 15:53:05.000000000 +0100 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary +@@ -275,15 +279,24 @@ static void add_cpu_switch(int i) + last_cookie = INVALID_COOKIE; + } + +-static void add_kernel_ctx_switch(unsigned int in_kernel) ++static void add_cpu_mode_switch(unsigned int cpu_mode) + { + add_event_entry(ESCAPE_CODE); +- if (in_kernel) +- add_event_entry(KERNEL_ENTER_SWITCH_CODE); +- else +- add_event_entry(KERNEL_EXIT_SWITCH_CODE); ++ switch (cpu_mode) { ++ case CPU_MODE_USER: ++ add_event_entry(USER_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_KERNEL: ++ add_event_entry(KERNEL_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_XEN: ++ add_event_entry(XEN_ENTER_SWITCH_CODE); ++ break; ++ default: ++ break; ++ } + } +- ++ + static void + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) + { +@@ -348,9 +361,9 @@ static int add_us_sample(struct mm_struc + * for later lookup from userspace. + */ + static int +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) + { +- if (in_kernel) { ++ if (cpu_mode >= CPU_MODE_KERNEL) { + add_sample_entry(s->eip, s->event); + return 1; + } else if (mm) { +@@ -496,7 +509,7 @@ void sync_buffer(int cpu) + struct mm_struct *mm = NULL; + struct task_struct * new; + unsigned long cookie = 0; +- int in_kernel = 1; ++ int cpu_mode = 1; + unsigned int i; + sync_buffer_state state = sb_buffer_start; + unsigned long available; +@@ -513,12 +526,12 @@ void sync_buffer(int cpu) + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; + + if (is_code(s->eip)) { +- if (s->event <= CPU_IS_KERNEL) { ++ if (s->event <= CPU_MODE_XEN) { + /* kernel/userspace switch */ +- in_kernel = s->event; ++ cpu_mode = s->event; + if (state == sb_buffer_start) + state = sb_sample_start; +- add_kernel_ctx_switch(s->event); ++ add_cpu_mode_switch(s->event); + } else if (s->event == CPU_TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); +@@ -536,7 +549,7 @@ void sync_buffer(int cpu) + } + } else { + if (state >= sb_bt_start && +- !add_sample(mm, s, in_kernel)) { ++ !add_sample(mm, s, cpu_mode)) { + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/cpu_buffer.c 2006-04-03 15:53:05.000000000 +0100 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global +@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void) + goto fail; + + b->last_task = NULL; +- b->last_is_kernel = -1; ++ b->last_cpu_mode = -1; + b->tracing = 0; + b->buffer_size = buffer_size; + b->tail_pos = 0; +@@ -114,7 +118,7 @@ void cpu_buffer_reset(struct oprofile_cp + * collected will populate the buffer with proper + * values to initialize the buffer + */ +- cpu_buf->last_is_kernel = -1; ++ cpu_buf->last_cpu_mode = -1; + cpu_buf->last_task = NULL; + } + +@@ -164,13 +168,13 @@ add_code(struct oprofile_cpu_buffer * bu + * because of the head/tail separation of the writer and reader + * of the CPU buffer. + * +- * is_kernel is needed because on some architectures you cannot ++ * cpu_mode is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at +- * pc. We tag this in the buffer by generating kernel enter/exit +- * events whenever is_kernel changes ++ * pc. We tag this in the buffer by generating kernel/user (and xen) ++ * enter events whenever cpu_mode changes + */ + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, +- int is_kernel, unsigned long event) ++ int cpu_mode, unsigned long event) + { + struct task_struct * task; + +@@ -181,16 +185,16 @@ static int log_sample(struct oprofile_cp + return 0; + } + +- is_kernel = !!is_kernel; ++ WARN_ON(cpu_mode > CPU_MODE_XEN); + + task = current; + + /* notice a switch from user->kernel or vice versa */ +- if (cpu_buf->last_is_kernel != is_kernel) { +- cpu_buf->last_is_kernel = is_kernel; +- add_code(cpu_buf, is_kernel); ++ if (cpu_buf->last_cpu_mode != cpu_mode) { ++ cpu_buf->last_cpu_mode = cpu_mode; ++ add_code(cpu_buf, cpu_mode); + } +- ++ + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/cpu_buffer.h 2006-04-03 15:53:05.000000000 +0100 +@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer { + volatile unsigned long tail_pos; + unsigned long buffer_size; + struct task_struct * last_task; +- int last_is_kernel; ++ int last_cpu_mode; + int tracing; + struct op_sample * buffer; + unsigned long sample_received; +@@ -51,7 +51,9 @@ extern struct oprofile_cpu_buffer cpu_bu + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); + + /* transient events for the CPU buffer -> event buffer */ +-#define CPU_IS_KERNEL 1 +-#define CPU_TRACE_BEGIN 2 ++#define CPU_MODE_USER 0 ++#define CPU_MODE_KERNEL 1 ++#define CPU_MODE_XEN 2 ++#define CPU_TRACE_BEGIN 3 + + #endif /* OPROFILE_CPU_BUFFER_H */ +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h +--- ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/event_buffer.h 2006-04-03 15:53:05.000000000 +0100 +@@ -29,11 +29,12 @@ void wake_up_buffer_waiter(void); + #define CPU_SWITCH_CODE 2 + #define COOKIE_SWITCH_CODE 3 + #define KERNEL_ENTER_SWITCH_CODE 4 +-#define KERNEL_EXIT_SWITCH_CODE 5 ++#define USER_ENTER_SWITCH_CODE 5 + #define MODULE_LOADED_CODE 6 + #define CTX_TGID_CODE 7 + #define TRACE_BEGIN_CODE 8 + #define TRACE_END_CODE 9 ++#define XEN_ENTER_SWITCH_CODE 10 + + #define INVALID_COOKIE ~0UL + #define NO_COOKIE 0UL +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/oprof.c 2006-04-03 15:53:05.000000000 +0100 +@@ -5,6 +5,10 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/kernel.h> +@@ -19,7 +23,7 @@ + #include "cpu_buffer.h" + #include "buffer_sync.h" + #include "oprofile_stats.h" +- ++ + struct oprofile_operations oprofile_ops; + + unsigned long oprofile_started; +@@ -33,6 +37,17 @@ static DECLARE_MUTEX(start_sem); + */ + static int timer = 0; + ++extern unsigned int adomains; ++extern int active_domains[MAX_OPROF_DOMAINS]; ++ ++int oprofile_set_active(void) ++{ ++ if (oprofile_ops.set_active) ++ return oprofile_ops.set_active(active_domains, adomains); ++ ++ return -EINVAL; ++} ++ + int oprofile_setup(void) + { + int err; +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/oprof.h 2006-04-03 15:53:05.000000000 +0100 +@@ -35,5 +35,7 @@ void oprofile_create_files(struct super_ + void oprofile_timer_init(struct oprofile_operations * ops); + + int oprofile_set_backtrace(unsigned long depth); ++ ++int oprofile_set_active(void); + + #endif /* OPROF_H */ +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c +--- ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/oprofile_files.c 2006-04-03 15:53:05.000000000 +0100 +@@ -5,15 +5,21 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/fs.h> + #include <linux/oprofile.h> ++#include <asm/uaccess.h> ++#include <linux/ctype.h> + + #include "event_buffer.h" + #include "oprofile_stats.h" + #include "oprof.h" +- ++ + unsigned long fs_buffer_size = 131072; + unsigned long fs_cpu_buffer_size = 8192; + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +@@ -117,11 +123,79 @@ static ssize_t dump_write(struct file * + static struct file_operations dump_fops = { + .write = dump_write, + }; +- ++ ++#define TMPBUFSIZE 512 ++ ++unsigned int adomains = 0; ++long active_domains[MAX_OPROF_DOMAINS]; ++ ++static ssize_t adomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char tmpbuf[TMPBUFSIZE]; ++ char * startp = tmpbuf; ++ char * endp = tmpbuf; ++ int i; ++ unsigned long val; ++ ++ if (*offset) ++ return -EINVAL; ++ if (!count) ++ return 0; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ memset(tmpbuf, 0x0, TMPBUFSIZE); ++ ++ if (copy_from_user(tmpbuf, buf, count)) ++ return -EFAULT; ++ ++ for (i = 0; i < MAX_OPROF_DOMAINS; i++) ++ active_domains[i] = -1; ++ adomains = 0; ++ ++ while (1) { ++ val = simple_strtol(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp)) ++ endp++; ++ active_domains[adomains++] = val; ++ if (adomains >= MAX_OPROF_DOMAINS) ++ break; ++ startp = endp; ++ } ++ if (oprofile_set_active()) ++ return -EINVAL; ++ return count; ++} ++ ++static ssize_t adomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char tmpbuf[TMPBUFSIZE]; ++ size_t len = 0; ++ int i; ++ /* This is all screwed up if we run out of space */ ++ for (i = 0; i < adomains; i++) ++ len += snprintf(tmpbuf + len, TMPBUFSIZE - len, ++ "%u ", (unsigned int)active_domains[i]); ++ len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n"); ++ return simple_read_from_buffer((void __user *)buf, count, ++ offset, tmpbuf, len); ++} ++ ++ ++static struct file_operations active_domain_ops = { ++ .read = adomain_read, ++ .write = adomain_write, ++}; ++ + void oprofile_create_files(struct super_block * sb, struct dentry * root) + { + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); ++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); +diff -pruN ../pristine-linux-2.6.16/include/linux/oprofile.h ./include/linux/oprofile.h +--- ../pristine-linux-2.6.16/include/linux/oprofile.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./include/linux/oprofile.h 2006-04-03 15:53:05.000000000 +0100 +@@ -16,6 +16,8 @@ + #include <linux/types.h> + #include <linux/spinlock.h> + #include <asm/atomic.h> ++ ++#include <xen/interface/xenoprof.h> + + struct super_block; + struct dentry; +@@ -27,6 +29,8 @@ struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); ++ /* setup active domains with Xen */ ++ int (*set_active)(int *active_domains, unsigned int adomains); + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/Makefile --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/Makefile Thu Apr 6 17:58:01 2006 @@ -0,0 +1,5 @@ +obj-y += xenoprof.o +obj-y += nmi_int.o +obj-y += op_model_p4.o +obj-y += op_model_ppro.o +obj-y += op_model_athlon.o diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/nmi_int.c --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/nmi_int.c Thu Apr 6 17:58:01 2006 @@ -0,0 +1,399 @@ +/** + * @file nmi_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified for Xen: by Aravind Menon & Jose Renato Santos + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <xen/event.h> +#include <xen/types.h> +#include <xen/errno.h> +#include <xen/init.h> +#include <public/xen.h> +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <asm/regs.h> +#include <asm/current.h> +#include <xen/delay.h> + +#include "op_counter.h" +#include "op_x86_model.h" + +static struct op_x86_model_spec const * model; +static struct op_msrs cpu_msrs[NR_CPUS]; +static unsigned long saved_lvtpc[NR_CPUS]; + +#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1) +extern int active_domains[MAX_OPROF_DOMAINS]; +extern unsigned int adomains; +extern struct domain *primary_profiler; +extern struct domain *adomain_ptrs[MAX_OPROF_DOMAINS]; +extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE]; +extern int is_active(struct domain *d); +extern int active_id(struct domain *d); +extern int is_profiled(struct domain *d); + +extern size_t strlcpy(char *dest, const char *src, size_t size); + + +int nmi_callback(struct cpu_user_regs *regs, int cpu) +{ + int xen_mode = 0; + int ovf; + + ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs); + xen_mode = ring_0(regs); + if ( ovf ) + { + if ( is_active(current->domain) ) + { + if ( !xen_mode ) + { + send_guest_vcpu_virq(current, VIRQ_XENOPROF); + } + } + } + return 1; +} + + +static void nmi_cpu_save_registers(struct op_msrs * msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr * counters = msrs->counters; + struct op_msr * controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrs; ++i) { + rdmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } + + for (i = 0; i < nr_ctrls; ++i) { + rdmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } +} + + +static void nmi_save_registers(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + model->fill_in_addresses(msrs); + nmi_cpu_save_registers(msrs); +} + + +static void free_msrs(void) +{ + int i; + for (i = 0; i < NR_CPUS; ++i) { + xfree(cpu_msrs[i].counters); + cpu_msrs[i].counters = NULL; + xfree(cpu_msrs[i].controls); + cpu_msrs[i].controls = NULL; + } +} + + +static int allocate_msrs(void) +{ + int success = 1; + size_t controls_size = sizeof(struct op_msr) * model->num_controls; + size_t counters_size = sizeof(struct op_msr) * model->num_counters; + + int i; + for (i = 0; i < NR_CPUS; ++i) { + if (!test_bit(i, &cpu_online_map)) + continue; + + cpu_msrs[i].counters = xmalloc_bytes(counters_size); + if (!cpu_msrs[i].counters) { + success = 0; + break; + } + cpu_msrs[i].controls = xmalloc_bytes(controls_size); + if (!cpu_msrs[i].controls) { + success = 0; + break; + } + } + + if (!success) + free_msrs(); + + return success; +} + + +static void nmi_cpu_setup(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + model->setup_ctrs(msrs); +} + + +int nmi_setup_events(void) +{ + on_each_cpu(nmi_cpu_setup, NULL, 0, 1); + return 0; +} + +int nmi_reserve_counters(void) +{ + if (!allocate_msrs()) + return -ENOMEM; + + /* We walk a thin line between law and rape here. + * We need to be careful to install our NMI handler + * without actually triggering any NMIs as this will + * break the core code horrifically. + */ + if (reserve_lapic_nmi() < 0) { + free_msrs(); + return -EBUSY; + } + /* We need to serialize save and setup for HT because the subset + * of msrs are distinct for save and setup operations + */ + on_each_cpu(nmi_save_registers, NULL, 0, 1); + return 0; +} + +int nmi_enable_virq(void) +{ + set_nmi_callback(nmi_callback); + return 0; +} + + +void nmi_disable_virq(void) +{ + unset_nmi_callback(); +} + + +static void nmi_restore_registers(struct op_msrs * msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr * counters = msrs->counters; + struct op_msr * controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrls; ++i) { + wrmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } + + for (i = 0; i < nr_ctrs; ++i) { + wrmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } +} + + +static void nmi_cpu_shutdown(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + nmi_restore_registers(msrs); +} + + +void nmi_release_counters(void) +{ + on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); + release_lapic_nmi(); + free_msrs(); +} + + +static void nmi_cpu_start(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs const * msrs = &cpu_msrs[cpu]; + saved_lvtpc[cpu] = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, APIC_DM_NMI); + model->start(msrs); +} + + +int nmi_start(void) +{ + on_each_cpu(nmi_cpu_start, NULL, 0, 1); + return 0; +} + + +static void nmi_cpu_stop(void * dummy) +{ + unsigned int v; + int cpu = smp_processor_id(); + struct op_msrs const * msrs = &cpu_msrs[cpu]; + model->stop(msrs); + + /* restoring APIC_LVTPC can trigger an apic error because the delivery + * mode and vector nr combination can be illegal. That's by design: on + * power on apic lvt contain a zero vector nr which are legal only for + * NMI delivery mode. So inhibit apic err before restoring lvtpc + */ + if ( !(apic_read(APIC_LVTPC) & APIC_DM_NMI) + || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED) ) + { + printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC)); + mdelay(5000); + } + v = apic_read(APIC_LVTERR); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, saved_lvtpc[cpu]); + apic_write(APIC_LVTERR, v); +} + + +void nmi_stop(void) +{ + on_each_cpu(nmi_cpu_stop, NULL, 0, 1); +} + + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int __init p4_init(char * cpu_type) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 4) + return 0; + +#ifndef CONFIG_SMP + strncpy (cpu_type, "i386/p4", XENOPROF_CPU_TYPE_SIZE - 1); + model = &op_p4_spec; + return 1; +#else + switch (smp_num_siblings) { + case 1: + strncpy (cpu_type, "i386/p4", + XENOPROF_CPU_TYPE_SIZE - 1); + model = &op_p4_spec; + return 1; + + case 2: + strncpy (cpu_type, "i386/p4-ht", + XENOPROF_CPU_TYPE_SIZE - 1); + model = &op_p4_ht2_spec; + return 1; + } +#endif + printk("Xenoprof ERROR: P4 HyperThreading detected with > 2 threads\n"); + + return 0; +} + + +static int __init ppro_init(char *cpu_type) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 0xd) + return 0; + + if (cpu_model == 9) { + strncpy (cpu_type, "i386/p6_mobile", XENOPROF_CPU_TYPE_SIZE - 1); + } else if (cpu_model > 5) { + strncpy (cpu_type, "i386/piii", XENOPROF_CPU_TYPE_SIZE - 1); + } else if (cpu_model > 2) { + strncpy (cpu_type, "i386/pii", XENOPROF_CPU_TYPE_SIZE - 1); + } else { + strncpy (cpu_type, "i386/ppro", XENOPROF_CPU_TYPE_SIZE - 1); + } + + model = &op_ppro_spec; + return 1; +} + +int nmi_init(int *num_events, int *is_primary, char *cpu_type) +{ + __u8 vendor = current_cpu_data.x86_vendor; + __u8 family = current_cpu_data.x86; + int prim = 0; + + if (!cpu_has_apic) + return -ENODEV; + + if (primary_profiler == NULL) { + /* For now, only dom0 can be the primary profiler */ + if (current->domain->domain_id == 0) { + primary_profiler = current->domain; + prim = 1; + } + } + + /* Make sure string is NULL terminated */ + cpu_type[XENOPROF_CPU_TYPE_SIZE - 1] = 0; + + switch (vendor) { + case X86_VENDOR_AMD: + /* Needs to be at least an Athlon (or hammer in 32bit mode) */ + + switch (family) { + default: + return -ENODEV; + case 6: + model = &op_athlon_spec; + strncpy (cpu_type, "i386/athlon", + XENOPROF_CPU_TYPE_SIZE - 1); + break; + case 0xf: + model = &op_athlon_spec; + /* Actually it could be i386/hammer too, but give + user space an consistent name. */ + strncpy (cpu_type, "x86-64/hammer", + XENOPROF_CPU_TYPE_SIZE - 1); + break; + } + break; + + case X86_VENDOR_INTEL: + switch (family) { + /* Pentium IV */ + case 0xf: + if (!p4_init(cpu_type)) + return -ENODEV; + break; + + /* A P6-class processor */ + case 6: + if (!ppro_init(cpu_type)) + return -ENODEV; + break; + + default: + return -ENODEV; + } + break; + + default: + return -ENODEV; + } + + *num_events = model->num_counters; + *is_primary = prim; + + return 0; +} + diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_counter.h --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/op_counter.h Thu Apr 6 17:58:01 2006 @@ -0,0 +1,29 @@ +/** + * @file op_counter.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +#define OP_MAX_COUNTER 8 + +/* Per-perfctr configuration as set via + * oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +extern struct op_counter_config counter_config[]; + +#endif /* OP_COUNTER_H */ diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_athlon.c --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/op_model_athlon.c Thu Apr 6 17:58:01 2006 @@ -0,0 +1,168 @@ +/** + * @file op_model_athlon.h + * athlon / K7 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 4 +#define NUM_CONTROLS 4 + +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR(x) (x &= (1<<21)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT(val, e) (val |= e) + +static unsigned long reset_value[NUM_COUNTERS]; + +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip, + int mode, int event); + +static void athlon_fill_in_addresses(struct op_msrs * const msrs) +{ + msrs->counters[0].addr = MSR_K7_PERFCTR0; + msrs->counters[1].addr = MSR_K7_PERFCTR1; + msrs->counters[2].addr = MSR_K7_PERFCTR2; + msrs->counters[3].addr = MSR_K7_PERFCTR3; + + msrs->controls[0].addr = MSR_K7_EVNTSEL0; + msrs->controls[1].addr = MSR_K7_EVNTSEL1; + msrs->controls[2].addr = MSR_K7_EVNTSEL2; + msrs->controls[3].addr = MSR_K7_EVNTSEL3; +} + + +static void athlon_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT(low, counter_config[i].event); + CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; + } + } +} + + +static int athlon_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) + +{ + unsigned int low, high; + int i; + int ovf = 0; + unsigned long eip = regs->eip; + int mode = 0; + + if (guest_kernel_mode(current, regs)) + mode = 1; + else if (ring_0(regs)) + mode = 2; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + xenoprof_log_event(current, eip, mode, i); + CTR_WRITE(reset_value[i], msrs, i); + ovf = 1; + } + } + + /* See op_model_ppro.c */ + return ovf; +} + + +static void athlon_start(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (reset_value[i]) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + +static void athlon_stop(struct op_msrs const * const msrs) +{ + unsigned int low,high; + int i; + + /* Subtle: stop on all counters to avoid race with + * setting our pm callback */ + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } +} + + +struct op_x86_model_spec const op_athlon_spec = { + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &athlon_fill_in_addresses, + .setup_ctrs = &athlon_setup_ctrs, + .check_ctrs = &athlon_check_ctrs, + .start = &athlon_start, + .stop = &athlon_stop +}; diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_p4.c --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/op_model_p4.c Thu Apr 6 17:58:01 2006 @@ -0,0 +1,739 @@ +/** + * @file op_model_p4.c + * P4 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_EVENTS 39 + +#define NUM_COUNTERS_NON_HT 8 +#define NUM_ESCRS_NON_HT 45 +#define NUM_CCCRS_NON_HT 18 +#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) + +#define NUM_COUNTERS_HT2 4 +#define NUM_ESCRS_HT2 23 +#define NUM_CCCRS_HT2 9 +#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) + +static unsigned int num_counters = NUM_COUNTERS_NON_HT; + + +/* this has to be checked dynamically since the + hyper-threadedness of a chip is discovered at + kernel boot-time. */ +static inline void setup_num_counters(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + num_counters = NUM_COUNTERS_HT2; +#endif +} + +static int inline addr_increment(void) +{ +#ifdef CONFIG_SMP + return smp_num_siblings == 2 ? 2 : 1; +#else + return 1; +#endif +} + + +/* tables to simulate simplified hardware view of p4 registers */ +struct p4_counter_binding { + int virt_counter; + int counter_address; + int cccr_address; +}; + +struct p4_event_binding { + int escr_select; /* value to put in CCCR */ + int event_select; /* value to put in ESCR */ + struct { + int virt_counter; /* for this counter... */ + int escr_address; /* use this ESCR */ + } bindings[2]; +}; + +/* nb: these CTR_* defines are a duplicate of defines in + event/i386.p4*events. */ + + +#define CTR_BPU_0 (1 << 0) +#define CTR_MS_0 (1 << 1) +#define CTR_FLAME_0 (1 << 2) +#define CTR_IQ_4 (1 << 3) +#define CTR_BPU_2 (1 << 4) +#define CTR_MS_2 (1 << 5) +#define CTR_FLAME_2 (1 << 6) +#define CTR_IQ_5 (1 << 7) + +static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { + { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, + { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, + { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, + { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, + { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, + { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, + { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, + { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } +}; + +#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT + +/* All cccr we don't use. */ +static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { + MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 +}; + +/* p4 event codes in libop/op_event.h are indices into this table. */ + +static struct p4_event_binding p4_events[NUM_EVENTS] = { + + { /* BRANCH_RETIRED */ + 0x05, 0x06, + { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, + {CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* MISPRED_BRANCH_RETIRED */ + 0x04, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* TC_DELIVER_MODE */ + 0x01, 0x01, + { { CTR_MS_0, MSR_P4_TC_ESCR0}, + { CTR_MS_2, MSR_P4_TC_ESCR1} } + }, + + { /* BPU_FETCH_REQUEST */ + 0x00, 0x03, + { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, + { CTR_BPU_2, MSR_P4_BPU_ESCR1} } + }, + + { /* ITLB_REFERENCE */ + 0x03, 0x18, + { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, + { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } + }, + + { /* MEMORY_CANCEL */ + 0x05, 0x02, + { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, + { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } + }, + + { /* MEMORY_COMPLETE */ + 0x02, 0x08, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* LOAD_PORT_REPLAY */ + 0x02, 0x04, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* STORE_PORT_REPLAY */ + 0x02, 0x05, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* MOB_LOAD_REPLAY */ + 0x02, 0x03, + { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, + { CTR_BPU_2, MSR_P4_MOB_ESCR1} } + }, + + { /* PAGE_WALK_TYPE */ + 0x04, 0x01, + { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, + { CTR_BPU_2, MSR_P4_PMH_ESCR1} } + }, + + { /* BSQ_CACHE_REFERENCE */ + 0x07, 0x0c, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { CTR_BPU_2, MSR_P4_BSU_ESCR1} } + }, + + { /* IOQ_ALLOCATION */ + 0x06, 0x03, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { 0, 0 } } + }, + + { /* IOQ_ACTIVE_ENTRIES */ + 0x06, 0x1a, + { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, + { 0, 0 } } + }, + + { /* FSB_DATA_ACTIVITY */ + 0x06, 0x17, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* BSQ_ALLOCATION */ + 0x07, 0x05, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { 0, 0 } } + }, + + { /* BSQ_ACTIVE_ENTRIES */ + 0x07, 0x06, + { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, + { 0, 0 } } + }, + + { /* X87_ASSIST */ + 0x05, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* SSE_INPUT_ASSIST */ + 0x01, 0x34, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_SP_UOP */ + 0x01, 0x08, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_DP_UOP */ + 0x01, 0x0c, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_SP_UOP */ + 0x01, 0x0a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_DP_UOP */ + 0x01, 0x0e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 64BIT_MMX_UOP */ + 0x01, 0x02, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 128BIT_MMX_UOP */ + 0x01, 0x1a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_FP_UOP */ + 0x01, 0x04, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_SIMD_MOVES_UOP */ + 0x01, 0x2e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* MACHINE_CLEAR */ + 0x05, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* GLOBAL_POWER_EVENTS */ + 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* TC_MS_XFER */ + 0x00, 0x05, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* UOP_QUEUE_WRITES */ + 0x00, 0x09, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* FRONT_END_EVENT */ + 0x05, 0x08, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* EXECUTION_EVENT */ + 0x05, 0x0c, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* REPLAY_EVENT */ + 0x05, 0x09, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* INSTR_RETIRED */ + 0x04, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOPS_RETIRED */ + 0x04, 0x01, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOP_TYPE */ + 0x02, 0x02, + { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, + { CTR_IQ_5, MSR_P4_RAT_ESCR1} } + }, + + { /* RETIRED_MISPRED_BRANCH_TYPE */ + 0x02, 0x05, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + }, + + { /* RETIRED_BRANCH_TYPE */ + 0x02, 0x04, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + } +}; + + +#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) + +#define ESCR_RESERVED_BITS 0x80000003 +#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) +#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) +#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) +#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) +#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) +#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) +#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) +#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) +#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) + +#define CCCR_RESERVED_BITS 0x38030FFF +#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) +#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) +#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) +#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) +#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) +#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) +#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) +#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) + +#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) +#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) +#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) + + +/* this assigns a "stagger" to the current CPU, which is used throughout + the code in this module as an extra array offset, to select the "even" + or "odd" part of all the divided resources. */ +static unsigned int get_stagger(void) +{ +#ifdef CONFIG_SMP + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); +#endif + return 0; +} + + +/* finally, mediate access to a real hardware counter + by passing a "virtual" counter numer to this macro, + along with your stagger setting. */ +#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) + +static unsigned long reset_value[NUM_COUNTERS_NON_HT]; + + +static void p4_fill_in_addresses(struct op_msrs * const msrs) +{ + unsigned int i; + unsigned int addr, stag; + + setup_num_counters(); + stag = get_stagger(); + + /* the counter registers we pay attention to */ + for (i = 0; i < num_counters; ++i) { + msrs->counters[i].addr = + p4_counters[VIRT_CTR(stag, i)].counter_address; + } + + /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ + + /* 18 CCCR registers */ + for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; + addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* 43 ESCR registers in three or four discontiguous group */ + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 + * to avoid special case in nmi_{save|restore}_registers() */ + if (boot_cpu_data.x86_model >= 0x3) { + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + } else { + for (addr = MSR_P4_IQ_ESCR0 + stag; + addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + } + + for (addr = MSR_P4_RAT_ESCR0 + stag; + addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_MS_ESCR0 + stag; + addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_IX_ESCR0 + stag; + addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* there are 2 remaining non-contiguously located ESCRs */ + + if (num_counters == NUM_COUNTERS_NON_HT) { + /* standard non-HT CPUs handle both remaining ESCRs*/ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else if (stag == 0) { + /* HT CPUs give the first remainder to the even thread, as + the 32nd control register */ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else { + /* and two copies of the second to the odd thread, + for the 22st and 23nd control registers */ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + } +} + + +static void pmc_setup_one_p4_counter(unsigned int ctr) +{ + int i; + int const maxbind = 2; + unsigned int cccr = 0; + unsigned int escr = 0; + unsigned int high = 0; + unsigned int counter_bit; + struct p4_event_binding *ev = NULL; + unsigned int stag; + + stag = get_stagger(); + + /* convert from counter *number* to counter *bit* */ + counter_bit = 1 << VIRT_CTR(stag, ctr); + + /* find our event binding structure. */ + if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { + printk(KERN_ERR + "oprofile: P4 event code 0x%lx out of range\n", + counter_config[ctr].event); + return; + } + + ev = &(p4_events[counter_config[ctr].event - 1]); + + for (i = 0; i < maxbind; i++) { + if (ev->bindings[i].virt_counter & counter_bit) { + + /* modify ESCR */ + ESCR_READ(escr, high, ev, i); + ESCR_CLEAR(escr); + if (stag == 0) { + ESCR_SET_USR_0(escr, counter_config[ctr].user); + ESCR_SET_OS_0(escr, counter_config[ctr].kernel); + } else { + ESCR_SET_USR_1(escr, counter_config[ctr].user); + ESCR_SET_OS_1(escr, counter_config[ctr].kernel); + } + ESCR_SET_EVENT_SELECT(escr, ev->event_select); + ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); + ESCR_WRITE(escr, high, ev, i); + + /* modify CCCR */ + CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); + CCCR_CLEAR(cccr); + CCCR_SET_REQUIRED_BITS(cccr); + CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); + if (stag == 0) { + CCCR_SET_PMI_OVF_0(cccr); + } else { + CCCR_SET_PMI_OVF_1(cccr); + } + CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); + return; + } + } + + printk(KERN_ERR + "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", + counter_config[ctr].event, stag, ctr); +} + + +static void p4_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int i; + unsigned int low, high; + unsigned int addr; + unsigned int stag; + + stag = get_stagger(); + + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (! MISC_PMC_ENABLED_P(low)) { + printk(KERN_ERR "oprofile: P4 PMC not available\n"); + return; + } + + /* clear the cccrs we will use */ + for (i = 0 ; i < num_counters ; i++) { + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + CCCR_CLEAR(low); + CCCR_SET_REQUIRED_BITS(low); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + } + + /* clear cccrs outside our concern */ + for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { + rdmsr(p4_unused_cccr[i], low, high); + CCCR_CLEAR(low); + CCCR_SET_REQUIRED_BITS(low); + wrmsr(p4_unused_cccr[i], low, high); + } + + /* clear all escrs (including those outside our concern) */ + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { + wrmsr(addr, 0, 0); + } + + /* On older models clear also MSR_P4_IQ_ESCR0/1 */ + if (boot_cpu_data.x86_model < 0x3) { + wrmsr(MSR_P4_IQ_ESCR0, 0, 0); + wrmsr(MSR_P4_IQ_ESCR1, 0, 0); + } + + for (addr = MSR_P4_RAT_ESCR0 + stag; + addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { + wrmsr(addr, 0, 0); + } + + for (addr = MSR_P4_MS_ESCR0 + stag; + addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ + wrmsr(addr, 0, 0); + } + + for (addr = MSR_P4_IX_ESCR0 + stag; + addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ + wrmsr(addr, 0, 0); + } + + if (num_counters == NUM_COUNTERS_NON_HT) { + wrmsr(MSR_P4_CRU_ESCR4, 0, 0); + wrmsr(MSR_P4_CRU_ESCR5, 0, 0); + } else if (stag == 0) { + wrmsr(MSR_P4_CRU_ESCR4, 0, 0); + } else { + wrmsr(MSR_P4_CRU_ESCR5, 0, 0); + } + + /* setup all counters */ + for (i = 0 ; i < num_counters ; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + pmc_setup_one_p4_counter(i); + CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); + } else { + reset_value[i] = 0; + } + } +} + + +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip, + int mode, int event); + +static int p4_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) +{ + unsigned long ctr, low, high, stag, real; + int i; + int ovf = 0; + unsigned long eip = regs->eip; + int mode = 0; + + if (guest_kernel_mode(current, regs)) + mode = 1; + else if (ring_0(regs)) + mode = 2; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + + if (!reset_value[i]) + continue; + + /* + * there is some eccentricity in the hardware which + * requires that we perform 2 extra corrections: + * + * - check both the CCCR:OVF flag for overflow and the + * counter high bit for un-flagged overflows. + * + * - write the counter back twice to ensure it gets + * updated properly. + * + * the former seems to be related to extra NMIs happening + * during the current NMI; the latter is reported as errata + * N15 in intel doc 249199-029, pentium 4 specification + * update, though their suggested work-around does not + * appear to solve the problem. + */ + + real = VIRT_CTR(stag, i); + + CCCR_READ(low, high, real); + CTR_READ(ctr, high, real); + if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { + xenoprof_log_event(current, eip, mode, i); + CTR_WRITE(reset_value[i], real); + CCCR_CLEAR_OVF(low); + CCCR_WRITE(low, high, real); + CTR_WRITE(reset_value[i], real); + ovf = 1; + } + } + + /* P4 quirk: you have to re-unmask the apic vector */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + return ovf; +} + + +static void p4_start(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_ENABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + + +static void p4_stop(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_DISABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + + +#ifdef CONFIG_SMP +struct op_x86_model_spec const op_p4_ht2_spec = { + .num_counters = NUM_COUNTERS_HT2, + .num_controls = NUM_CONTROLS_HT2, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop +}; +#endif + +struct op_x86_model_spec const op_p4_spec = { + .num_counters = NUM_COUNTERS_NON_HT, + .num_controls = NUM_CONTROLS_NON_HT, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop +}; diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_ppro.c --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/op_model_ppro.c Thu Apr 6 17:58:01 2006 @@ -0,0 +1,153 @@ +/** + * @file op_model_ppro.h + * pentium pro / P6 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 2 +#define NUM_CONTROLS 2 + +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR(x) (x &= (1<<21)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT(val, e) (val |= e) + +static unsigned long reset_value[NUM_COUNTERS]; + +static void ppro_fill_in_addresses(struct op_msrs * const msrs) +{ + msrs->counters[0].addr = MSR_P6_PERFCTR0; + msrs->counters[1].addr = MSR_P6_PERFCTR1; + + msrs->controls[0].addr = MSR_P6_EVNTSEL0; + msrs->controls[1].addr = MSR_P6_EVNTSEL1; +} + + +static void ppro_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT(low, counter_config[i].event); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip, + int mode, int event); + +static int ppro_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) +{ + unsigned int low, high; + int i; + int ovf = 0; + unsigned long eip = regs->eip; + int mode = 0; + + if ( guest_kernel_mode(current, regs) ) + mode = 1; + else if ( ring_0(regs) ) + mode = 2; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + xenoprof_log_event(current, eip, mode, i); + CTR_WRITE(reset_value[i], msrs, i); + ovf = 1; + } + } + + /* Only P6 based Pentium M need to re-unmask the apic vector but it + * doesn't hurt other P6 variant */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + return ovf; +} + + +static void ppro_start(struct op_msrs const * const msrs) +{ + unsigned int low,high; + CTRL_READ(low, high, msrs, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); +} + + +static void ppro_stop(struct op_msrs const * const msrs) +{ + unsigned int low,high; + CTRL_READ(low, high, msrs, 0); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); +} + + +struct op_x86_model_spec const op_ppro_spec = { + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop +}; diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_x86_model.h --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/op_x86_model.h Thu Apr 6 17:58:01 2006 @@ -0,0 +1,51 @@ +/** + * @file op_x86_model.h + * interface to x86 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + */ + +#ifndef OP_X86_MODEL_H +#define OP_X86_MODEL_H + +struct op_saved_msr { + unsigned int high; + unsigned int low; +}; + +struct op_msr { + unsigned long addr; + struct op_saved_msr saved; +}; + +struct op_msrs { + struct op_msr * counters; + struct op_msr * controls; +}; + +struct pt_regs; + +/* The model vtable abstracts the differences between + * various x86 CPU model's perfctr support. + */ +struct op_x86_model_spec { + unsigned int const num_counters; + unsigned int const num_controls; + void (*fill_in_addresses)(struct op_msrs * const msrs); + void (*setup_ctrs)(struct op_msrs const * const msrs); + int (*check_ctrs)(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs); + void (*start)(struct op_msrs const * const msrs); + void (*stop)(struct op_msrs const * const msrs); +}; + +extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec const op_p4_spec; +extern struct op_x86_model_spec const op_p4_ht2_spec; +extern struct op_x86_model_spec const op_athlon_spec; + +#endif /* OP_X86_MODEL_H */ diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/xenoprof.c --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/arch/x86/oprofile/xenoprof.c Thu Apr 6 17:58:01 2006 @@ -0,0 +1,553 @@ +/* + * Copyright (C) 2005 Hewlett-Packard Co. + * written by Aravind Menon & Jose Renato Santos + * (email: xenoprof@xxxxxxxxxxxxx) + */ + +#include <xen/sched.h> +#include <public/xenoprof.h> + +#include "op_counter.h" + +/* Limit amount of pages used for shared buffer (per domain) */ +#define MAX_OPROF_SHARED_PAGES 32 + +int active_domains[MAX_OPROF_DOMAINS]; +int active_ready[MAX_OPROF_DOMAINS]; +unsigned int adomains = 0; +unsigned int activated = 0; +struct domain *primary_profiler = NULL; +int xenoprof_state = XENOPROF_IDLE; + +u64 total_samples = 0; +u64 invalid_buffer_samples = 0; +u64 corrupted_buffer_samples = 0; +u64 lost_samples = 0; +u64 active_samples = 0; +u64 idle_samples = 0; +u64 others_samples = 0; + + +extern int nmi_init(int *num_events, int *is_primary, char *cpu_type); +extern int nmi_reserve_counters(void); +extern int nmi_setup_events(void); +extern int nmi_enable_virq(void); +extern int nmi_start(void); +extern void nmi_stop(void); +extern void nmi_disable_virq(void); +extern void nmi_release_counters(void); + +int is_active(struct domain *d) +{ + xenoprof_t *x = d->xenoprof; + if ( x ) + { + if ( x->domain_type == XENOPROF_DOMAIN_ACTIVE ) + return 1; + else + return 0; + } + else + return 0; +} + +int is_profiled(struct domain *d) +{ + return is_active(d); +} + +static void xenoprof_reset_stat(void) +{ + total_samples = 0; + invalid_buffer_samples = 0; + corrupted_buffer_samples = 0; + lost_samples = 0; + active_samples = 0; + idle_samples = 0; + others_samples = 0; + + return; +} + +static void xenoprof_reset_buf(struct domain *d) +{ + int j; + xenoprof_buf_t *buf; + + if ( !d->xenoprof ) + { + printk("xenoprof_reset_buf: ERROR - Unexpected Xenoprof NULL pointer \n"); + return; + } + + for ( j=0; j<MAX_VIRT_CPUS; j++ ) + { + buf = d->xenoprof->vcpu[j].buffer; + if ( buf ) + { + buf->event_head = 0; + buf->event_tail = 0; + } + } +} + +int active_index(struct domain *d) +{ + int i; + int id; + + id = d->domain_id; + for ( i=0; i<adomains; i++ ) + if ( active_domains[i] == id ) + { + return i; + } + return -1; +} + +int set_active(struct domain *d) +{ + int ind; + xenoprof_t *x; + + ind = active_index(d); + if ( ind <0 ) + return -EPERM; + + x = d->xenoprof; + if ( x ) + { + x->domain_ready = 1; + x->domain_type = XENOPROF_DOMAIN_ACTIVE; + active_ready[ind] = 1; + activated++; + return 0; + } + else + return -EPERM; +} + +int reset_active(struct domain *d) +{ + int ind; + xenoprof_t *x; + + ind = active_index(d); + if ( ind <0 ) + return -EPERM; + + x = d->xenoprof; + if ( x ) + { + x->domain_ready = 0; + x->domain_type = XENOPROF_DOMAIN_IGNORED; + active_ready[ind] = 0; + activated--; + if ( activated <= 0 ) + adomains = 0; + return 0; + } + else + return -EPERM; +} + +int set_active_domains(int num) +{ + int primary; + int i; + struct domain *d; + + /* reset any existing active domains from previous runs */ + for ( i=0; i<adomains; i++ ) + { + if ( active_ready[i] ) + { + d = find_domain_by_id(active_domains[i]); + if ( d ) + { + reset_active(d); + put_domain(d); + } + } + } + + adomains=num; + + /* Add primary profiler to list of active domains if not there yet */ + primary = active_index(primary_profiler); + if ( primary == -1 ) + { + /* return if there is no space left on list */ + if ( num >= MAX_OPROF_DOMAINS ) + return -E2BIG; + else + { + active_domains[num] = primary_profiler->domain_id; + num++; + } + } + + adomains = num; + activated = 0; + + for ( i=0; i<adomains; i++ ) + { + active_ready[i] = 0; + } + + return 0; +} + +void xenoprof_log_event(struct vcpu *vcpu, unsigned long eip, int mode, int event) +{ + xenoprof_vcpu_t *v; + xenoprof_buf_t *buf; + int head; + int tail; + int size; + + + total_samples++; + + /* ignore samples of un-monitored domains */ + /* Count samples in idle separate from other unmonitored domains */ + if ( !is_profiled(vcpu->domain) ) + { + others_samples++; + return; + } + + v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id]; + + /* Sanity check. Should never happen */ + if ( !v->buffer ) + { + invalid_buffer_samples++; + return; + } + + buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer; + + head = buf->event_head; + tail = buf->event_tail; + size = v->event_size; + + /* make sure indexes in shared buffer are sane */ + if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) ) + { + corrupted_buffer_samples++; + return; + } + + if ( (head == tail - 1) || (head == size - 1 && tail == 0) ) + { + buf->lost_samples++; + lost_samples++; + } + else + { + buf->event_log[head].eip = eip; + buf->event_log[head].mode = mode; + buf->event_log[head].event = event; + head++; + if ( head >= size ) + head = 0; + buf->event_head = head; + active_samples++; + if ( mode == 0 ) + buf->user_samples++; + else if ( mode == 1 ) + buf->kernel_samples++; + else + buf->xen_samples++; + } +} + +char *alloc_xenoprof_buf(struct domain *d, int npages) +{ + char *rawbuf; + int i, order; + + /* allocate pages to store sample buffer shared with domain */ + order = get_order_from_pages(npages); + rawbuf = alloc_xenheap_pages(order); + if( rawbuf == NULL ) + { + printk("alloc_xenoprof_buf(): memory allocation failed\n"); + return 0; + } + + /* Share pages so that kernel can map it */ + for ( i=0; i<npages; i++ ) + { + share_xen_page_with_guest(virt_to_page(rawbuf + i * PAGE_SIZE), + d, XENSHARE_writable); + } + + return rawbuf; +} + +int alloc_xenoprof_struct(struct domain *d, int max_samples) +{ + struct vcpu *v; + int nvcpu, npages, bufsize, max_bufsize; + int i; + + d->xenoprof = xmalloc(xenoprof_t); + + if ( !d->xenoprof ) + { + printk ("alloc_xenoprof_struct(): memory " + "allocation (xmalloc) failed\n"); + return -ENOMEM; + } + + memset(d->xenoprof, 0, sizeof(*d->xenoprof)); + + nvcpu = 0; + for_each_vcpu(d, v) + nvcpu++; + + /* reduce buffer size if necessary to limit pages allocated */ + bufsize = sizeof(xenoprof_buf_t) + + (max_samples - 1) * sizeof(struct event_log); + max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu; + if ( bufsize > max_bufsize ) + { + bufsize = max_bufsize; + max_samples = ( (max_bufsize - sizeof(xenoprof_buf_t)) / + sizeof(struct event_log) ) + 1; + } + + npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1; + d->xenoprof->rawbuf = alloc_xenoprof_buf(d, npages); + if ( !d->xenoprof->rawbuf ) + { + xfree(d->xenoprof); + d->xenoprof = NULL; + return -ENOMEM; + } + + d->xenoprof->npages = npages; + d->xenoprof->nbuf = nvcpu; + d->xenoprof->bufsize = bufsize; + d->xenoprof->domain_ready = 0; + d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED; + + /* Update buffer pointers for active vcpus */ + i=0; + for_each_vcpu(d, v) + { + d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples; + d->xenoprof->vcpu[v->vcpu_id].buffer = + (xenoprof_buf_t *)&d->xenoprof->rawbuf[i * bufsize]; + d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples; + d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id; + + i++; + /* in the unlikely case that the number of active vcpus changes */ + if ( i >= nvcpu ) + break; + } + + return 0; +} + +void free_xenoprof_pages(struct domain *d) +{ + xenoprof_t *x; + int order; + + x = d->xenoprof; + + if ( x ) + { + if ( x->rawbuf ) + { + order = get_order_from_pages(x->npages); + free_xenheap_pages(x->rawbuf, order); + } + xfree(x); + d->xenoprof = NULL; + } +} + +int xenoprof_init(int max_samples, xenoprof_init_result_t *init_result) +{ + xenoprof_init_result_t result; + int is_primary, num_events; + struct domain *d = current->domain; + int ret; + + ret = nmi_init(&num_events, &is_primary, result.cpu_type); + if ( is_primary ) + primary_profiler = current->domain; + + if ( ret < 0 ) + goto err; + + /* we allocate xenoprof struct and buffers only at first time + xenoprof_init is called. Memory is then kept until domain is destroyed */ + if ( !d->xenoprof ) + { + if ( (ret = alloc_xenoprof_struct(d, max_samples)) < 0 ) + goto err; + } + + xenoprof_reset_buf(d); + + d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED; + d->xenoprof->domain_ready = 0; + d->xenoprof->is_primary = is_primary; + + result.is_primary = is_primary; + result.num_events = num_events; + result.nbuf = d->xenoprof->nbuf; + result.bufsize = d->xenoprof->bufsize; + result.buf_maddr = __pa(d->xenoprof->rawbuf); + + if ( copy_to_user((void *)init_result, (void *)&result, sizeof(result)) ) + { + ret = -EFAULT; + goto err; + } + + return ret; + + err: + if ( primary_profiler == current->domain ) + primary_profiler = NULL; + return ret; +} + +#define PRIV_OP(op) ( (op == XENOPROF_set_active) \ + || (op == XENOPROF_reserve_counters) \ + || (op == XENOPROF_setup_events) \ + || (op == XENOPROF_start) \ + || (op == XENOPROF_stop) \ + || (op == XENOPROF_release_counters) \ + || (op == XENOPROF_shutdown)) + +int do_xenoprof_op(int op, unsigned long arg1, unsigned long arg2) +{ + int ret = 0; + + if ( PRIV_OP(op) && current->domain != primary_profiler ) + { + printk("xenoprof: dom %d denied privileged operation %d\n", + current->domain->domain_id, op); + return -EPERM; + } + + switch ( op ) + { + case XENOPROF_init: + ret = xenoprof_init((int)arg1, (xenoprof_init_result_t *)arg2); + break; + + case XENOPROF_set_active: + if ( xenoprof_state != XENOPROF_IDLE ) + return -EPERM; + if ( arg2 > MAX_OPROF_DOMAINS ) + return -E2BIG; + if ( copy_from_user((void *)&active_domains, + (void *)arg1, arg2*sizeof(int)) ) + return -EFAULT; + ret = set_active_domains(arg2); + break; + + case XENOPROF_reserve_counters: + if ( xenoprof_state != XENOPROF_IDLE ) + return -EPERM; + ret = nmi_reserve_counters(); + if ( !ret ) + xenoprof_state = XENOPROF_COUNTERS_RESERVED; + break; + + case XENOPROF_setup_events: + if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED ) + return -EPERM; + if ( adomains == 0 ) + { + set_active_domains(0); + } + + if ( copy_from_user((void *)&counter_config, (void *)arg1, + arg2 * sizeof(struct op_counter_config)) ) + return -EFAULT; + ret = nmi_setup_events(); + if ( !ret ) + xenoprof_state = XENOPROF_READY; + break; + + case XENOPROF_enable_virq: + if ( current->domain == primary_profiler ) + { + nmi_enable_virq(); + xenoprof_reset_stat(); + } + xenoprof_reset_buf(current->domain); + ret = set_active(current->domain); + break; + + case XENOPROF_start: + if ( (xenoprof_state == XENOPROF_READY) && + (activated == adomains) ) + { + ret = nmi_start(); + } + else + ret= -EPERM; + + if ( !ret ) + xenoprof_state = XENOPROF_PROFILING; + break; + + case XENOPROF_stop: + if ( xenoprof_state != XENOPROF_PROFILING ) + return -EPERM; + nmi_stop(); + xenoprof_state = XENOPROF_READY; + break; + + case XENOPROF_disable_virq: + if ( (xenoprof_state == XENOPROF_PROFILING) && + (is_active(current->domain)) ) + return -EPERM; + ret = reset_active(current->domain); + break; + + case XENOPROF_release_counters: + if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) || + (xenoprof_state == XENOPROF_READY) ) + { + xenoprof_state = XENOPROF_IDLE; + nmi_release_counters(); + nmi_disable_virq(); + } + else + ret = -EPERM; + break; + + case XENOPROF_shutdown: + if ( xenoprof_state == XENOPROF_IDLE ) + { + activated = 0; + adomains=0; + primary_profiler = NULL; + ret = 0; + } + else + ret = -EPERM; + break; + + default: + ret = -EINVAL; + } + + if ( ret < 0 ) + printk("xenoprof: operation %d failed for dom %d (status : %d)\n", + op, current->domain->domain_id, ret); + + return ret; +} diff -r 388c59fefaa6 -r e049baa9055d xen/include/public/xenoprof.h --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/include/public/xenoprof.h Thu Apr 6 17:58:01 2006 @@ -0,0 +1,72 @@ +/****************************************************************************** + * xenoprof.h + * + * Interface for enabling system wide profiling based on hardware performance + * counters + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_PUBLIC_XENOPROF_H__ +#define __XEN_PUBLIC_XENOPROF_H__ + +/* + * Commands to HYPERVISOR_pmc_op(). + */ +#define XENOPROF_init 0 +#define XENOPROF_set_active 1 +#define XENOPROF_reserve_counters 3 +#define XENOPROF_setup_events 4 +#define XENOPROF_enable_virq 5 +#define XENOPROF_start 6 +#define XENOPROF_stop 7 +#define XENOPROF_disable_virq 8 +#define XENOPROF_release_counters 9 +#define XENOPROF_shutdown 10 + +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 +#define XENOPROF_CPU_TYPE_SIZE 64 + +/* Xenoprof performance events (not Xen events) */ +struct event_log { + uint64_t eip; + uint8_t mode; + uint8_t event; +}; + +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ +typedef struct xenoprof_buf { + uint32_t event_head; + uint32_t event_tail; + uint32_t event_size; + uint32_t vcpu_id; + uint64_t xen_samples; + uint64_t kernel_samples; + uint64_t user_samples; + uint64_t lost_samples; + struct event_log event_log[1]; +} xenoprof_buf_t; + +typedef struct xenoprof_init_result { + int32_t num_events; + int32_t is_primary; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_maddr; + char cpu_type[XENOPROF_CPU_TYPE_SIZE]; +} xenoprof_init_result_t; + + +#endif /* __XEN_PUBLIC_XENOPROF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 388c59fefaa6 -r e049baa9055d xen/include/xen/xenoprof.h --- /dev/null Thu Apr 6 16:49:21 2006 +++ b/xen/include/xen/xenoprof.h Thu Apr 6 17:58:01 2006 @@ -0,0 +1,40 @@ +/****************************************************************************** + * xenoprof.h + * + * Xenoprof: Xenoprof enables performance profiling in Xen + * + * Copyright (C) 2005 Hewlett-Packard Co. + * written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_XENOPROF_H__ +#define __XEN_XENOPROF_H__ + +#include <public/xenoprof.h> + +#define XENOPROF_DOMAIN_IGNORED 0 +#define XENOPROF_DOMAIN_ACTIVE 1 + +#define XENOPROF_IDLE 0 +#define XENOPROF_COUNTERS_RESERVED 1 +#define XENOPROF_READY 2 +#define XENOPROF_PROFILING 3 + + +typedef struct xenoprof_vcpu { + int event_size; + xenoprof_buf_t *buffer; +} xenoprof_vcpu_t; + +typedef struct xenoprof { + char* rawbuf; + int npages; + int nbuf; + int bufsize; + int domain_type; + int domain_ready; + int is_primary; + xenoprof_vcpu_t vcpu [MAX_VIRT_CPUS]; +} xenoprof_t; + +#endif /* __XEN__XENOPROF_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |