[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Add xenoprof support



# HG changeset patch
# User ack@xxxxxxxxxxxxxxxxxxxxxxx
# Node ID e049baa9055dfa15bbf5ed0b3c3e56fabedbc386
# Parent  388c59fefaa6add89ca38622f2170cb7c98429ba
Add xenoprof support
Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>

diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32  Thu Apr  6 16:49:21 2006
+++ b/buildconfigs/linux-defconfig_xen0_x86_32  Thu Apr  6 17:58:01 2006
@@ -1231,6 +1231,7 @@
 #
 # Instrumentation Support
 #
+# CONFIG_PROFILING is not set
 # CONFIG_KPROBES is not set
 
 #
diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xenU_x86_32
--- a/buildconfigs/linux-defconfig_xenU_x86_32  Thu Apr  6 16:49:21 2006
+++ b/buildconfigs/linux-defconfig_xenU_x86_32  Thu Apr  6 17:58:01 2006
@@ -779,6 +779,7 @@
 #
 # Instrumentation Support
 #
+# CONFIG_PROFILING is not set
 # CONFIG_KPROBES is not set
 
 #
diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32   Thu Apr  6 16:49:21 2006
+++ b/buildconfigs/linux-defconfig_xen_x86_32   Thu Apr  6 17:58:01 2006
@@ -2892,6 +2892,7 @@
 #
 # Instrumentation Support
 #
+# CONFIG_PROFILING is not set
 # CONFIG_KPROBES is not set
 
 #
diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig    Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig    Thu Apr  6 17:58:01 2006
@@ -1116,9 +1116,7 @@
 menu "Instrumentation Support"
        depends on EXPERIMENTAL
 
-if !X86_XEN
 source "arch/i386/oprofile/Kconfig"
-endif
 
 config KPROBES
        bool "Kprobes (EXPERIMENTAL)"
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Thu Apr  6 17:58:01 2006
@@ -177,6 +177,32 @@
 
 EXPORT_SYMBOL(touch_pte_range);
 
+void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
+{
+       int error;
+       
+       struct vm_struct *vma;
+       vma = get_vm_area (vm_size, VM_IOREMAP);
+      
+       if (vma == NULL) {
+               printk ("ioremap.c,vm_map_xen_pages(): "
+                       "Failed to get VMA area\n");
+               return NULL;
+       }
+
+       error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
+                                             maddr >> PAGE_SHIFT, vm_size,
+                                             prot, DOMID_SELF );
+       if (error == 0) {
+               return vma->addr;
+       } else {
+               printk ("ioremap.c,vm_map_xen_pages(): "
+                       "Failed to map xen shared pages into kernel space\n");
+               return NULL;
+       }
+}
+EXPORT_SYMBOL(vm_map_xen_pages);
+
 /*
  * Does @address reside within a non-highmem page that is local to this virtual
  * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h    Thu Apr 
 6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h    Thu Apr 
 6 17:58:01 2006
@@ -335,6 +335,14 @@
 {
        return _hypercall2(int, callback_op, cmd, arg);
 }
+
+static inline int
+HYPERVISOR_xenoprof_op(
+       int op, unsigned long arg1, unsigned long arg2)
+{
+       return _hypercall3(int, xenoprof_op, op, arg1, arg2);
+}
+
 
 #endif /* __HYPERCALL_H__ */
 
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h  Thu Apr 
 6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h  Thu Apr 
 6 17:58:01 2006
@@ -335,6 +335,13 @@
        int cmd, void *arg)
 {
        return _hypercall2(int, callback_op, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_xenoprof_op(
+       int op, unsigned long arg1, unsigned long arg2)
+{
+       return _hypercall3(int, xenoprof_op, op, arg1, arg2);
 }
 
 #endif /* __HYPERCALL_H__ */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/Makefile     Thu Apr  6 17:58:01 2006
@@ -2,6 +2,7 @@
 subdir-y += cpu
 subdir-y += genapic
 subdir-y += hvm
+subdir-y += oprofile
 
 subdir-$(x86_32) += x86_32
 subdir-$(x86_64) += x86_64
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/domain.c     Thu Apr  6 17:58:01 2006
@@ -915,6 +915,8 @@
     spin_unlock_recursive(&d->page_alloc_lock);
 }
 
+extern void free_xenoprof_pages(struct domain *d);
+
 void domain_relinquish_resources(struct domain *d)
 {
     struct vcpu *v;
@@ -961,6 +963,10 @@
     /* Relinquish every page of memory. */
     relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
+
+    /* Free page used by xen oprofile buffer */
+    free_xenoprof_pages(d);
+
 }
 
 void arch_dump_domain_info(struct domain *d)
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/x86_32/entry.S       Thu Apr  6 17:58:01 2006
@@ -645,6 +645,7 @@
         .long do_nmi_op
         .long do_arch_sched_op
         .long do_callback_op        /* 30 */
+        .long do_xenoprof_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
@@ -681,6 +682,7 @@
         .byte 2 /* do_nmi_op            */
         .byte 2 /* do_arch_sched_op     */
         .byte 2 /* do_callback_op       */  /* 30 */
+        .byte 3 /* do_xenoprof_op       */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/x86_64/entry.S       Thu Apr  6 17:58:01 2006
@@ -553,6 +553,7 @@
         .quad do_nmi_op
         .quad do_arch_sched_op
         .quad do_callback_op        /* 30 */
+        .quad do_xenoprof_op
         .rept NR_hypercalls-((.-hypercall_table)/8)
         .quad do_ni_hypercall
         .endr
@@ -589,6 +590,7 @@
         .byte 2 /* do_nmi_op            */
         .byte 2 /* do_arch_sched_op     */
         .byte 2 /* do_callback_op       */  /* 30 */
+        .byte 3 /* do_xenoprof_op       */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r 388c59fefaa6 -r e049baa9055d xen/include/public/xen.h
--- a/xen/include/public/xen.h  Thu Apr  6 16:49:21 2006
+++ b/xen/include/public/xen.h  Thu Apr  6 17:58:01 2006
@@ -61,6 +61,7 @@
 #define __HYPERVISOR_nmi_op               28
 #define __HYPERVISOR_sched_op             29
 #define __HYPERVISOR_callback_op          30
+#define __HYPERVISOR_xenoprof_op          31
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -77,7 +78,8 @@
 #define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        8
+#define VIRQ_XENOPROF   7  /* XenOprofile interrupt: new sample available */
+#define NR_VIRQS        9
 
 /*
  * MMU-UPDATE REQUESTS
diff -r 388c59fefaa6 -r e049baa9055d xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Thu Apr  6 16:49:21 2006
+++ b/xen/include/xen/sched.h   Thu Apr  6 17:58:01 2006
@@ -14,6 +14,7 @@
 #include <xen/grant_table.h>
 #include <xen/rangeset.h>
 #include <asm/domain.h>
+#include <xen/xenoprof.h>
 
 extern unsigned long volatile jiffies;
 extern rwlock_t domlist_lock;
@@ -155,6 +156,9 @@
 
     /* Control-plane tools handle for this domain. */
     xen_domain_handle_t handle;
+
+    /* pointer to xenoprof data (oprofile support) */
+    xenoprof_t *xenoprof;
 };
 
 struct domain_setup_info
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/i386/oprofile/Makefile
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile  Thu Apr  6 17:58:01 2006
@@ -0,0 +1,16 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+               oprof.o cpu_buffer.o buffer_sync.o \
+               event_buffer.o oprofile_files.o \
+               oprofilefs.o oprofile_stats.o  \
+               timer_int.o )
+
+ifdef CONFIG_X86_XEN
+oprofile-y                             := $(DRIVER_OBJS) xenoprof.o
+else 
+oprofile-y                             := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_X86_LOCAL_APIC)      += nmi_int.o op_model_athlon.o \
+                                          op_model_ppro.o op_model_p4.o
+oprofile-$(CONFIG_X86_IO_APIC)         += nmi_timer_int.o
+endif
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c        Thu Apr  6 
17:58:01 2006
@@ -0,0 +1,395 @@
+/**
+ * @file xenoprof.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon and Jose Renato Santos for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/vmalloc.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/pgtable.h>
+#include <xen/evtchn.h>
+#include "op_counter.h"
+
+#include <xen/interface/xen.h>
+#include <xen/interface/xenoprof.h>
+
+static int xenoprof_start(void);
+static void xenoprof_stop(void);
+
+void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
+
+static int xenoprof_enabled = 0;
+static int num_events = 0;
+static int is_primary = 0;
+
+/* sample buffers shared with Xen */
+xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
+/* Shared buffer area */
+char * shared_buffer;
+/* Number of buffers in shared area (one per VCPU) */
+int nbuf;
+/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
+int ovf_irq[NR_CPUS];
+/* cpu model type string - copied from Xen memory space on XENOPROF_init 
command */
+char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+
+#ifdef CONFIG_PM
+
+static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
+{
+       if (xenoprof_enabled == 1)
+               xenoprof_stop();
+       return 0;
+}
+
+
+static int xenoprof_resume(struct sys_device * dev)
+{
+       if (xenoprof_enabled == 1)
+               xenoprof_start();
+       return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+       set_kset_name("oprofile"),
+       .resume         = xenoprof_resume,
+       .suspend        = xenoprof_suspend
+};
+
+
+static struct sys_device device_oprofile = {
+       .id     = 0,
+       .cls    = &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+       int error;
+       if (!(error = sysdev_class_register(&oprofile_sysclass)))
+               error = sysdev_register(&device_oprofile);
+       return error;
+}
+
+
+static void __exit exit_driverfs(void)
+{
+       sysdev_unregister(&device_oprofile);
+       sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+
+static irqreturn_t 
+xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
+{
+       int head, tail, size;
+       xenoprof_buf_t * buf;
+       int cpu;
+
+       cpu = smp_processor_id();
+       buf = xenoprof_buf[cpu];
+
+       head = buf->event_head;
+       tail = buf->event_tail;
+       size = buf->event_size;
+
+       if (tail > head) {
+               while (tail < size) {
+                       oprofile_add_pc(buf->event_log[tail].eip,
+                                       buf->event_log[tail].mode,
+                                       buf->event_log[tail].event);
+                       oprofile_samples++;
+                       tail++;
+               }
+               tail = 0;
+       }
+       while (tail < head) {
+               oprofile_add_pc(buf->event_log[tail].eip,
+                               buf->event_log[tail].mode,
+                               buf->event_log[tail].event);
+               oprofile_samples++;
+               tail++;
+       }
+
+       buf->event_tail = tail;
+
+       return IRQ_HANDLED;
+}
+
+
+static void unbind_virq_cpu(void * info)
+{
+       int cpu = smp_processor_id();
+       if (ovf_irq[cpu] >= 0) {
+               unbind_from_irqhandler(ovf_irq[cpu], NULL);
+               ovf_irq[cpu] = -1;
+       }
+}
+
+
+static void unbind_virq(void)
+{
+       on_each_cpu(unbind_virq_cpu, NULL, 0, 1);
+}
+
+
+int bind_virq_error;
+
+static void bind_virq_cpu(void * info)
+{
+       int result;
+       int cpu = smp_processor_id();
+
+       result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
+                                        cpu,
+                                        xenoprof_ovf_interrupt,
+                                        SA_INTERRUPT,
+                                        "xenoprof",
+                                        NULL);
+
+       if (result<0) {
+               bind_virq_error = result;
+               printk("xenoprof.c: binding VIRQ_XENOPROF to IRQ failed on CPU "
+                      "%d\n", cpu);
+       } else {
+               ovf_irq[cpu] = result;
+       }
+}
+
+
+static int bind_virq(void)
+{
+       bind_virq_error = 0;
+       on_each_cpu(bind_virq_cpu, NULL, 0, 1);
+       if (bind_virq_error) {
+               unbind_virq();
+               return bind_virq_error;
+       } else {
+               return 0;
+       }
+}
+
+
+static int xenoprof_setup(void)
+{
+       int ret;
+
+       ret = bind_virq();
+       if (ret)
+               return ret;
+
+       if (is_primary) {
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters,
+                                            (unsigned long)NULL,
+                                            (unsigned long)NULL);
+               if (ret)
+                       goto err;
+
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events,
+                                            (unsigned long)&counter_config,
+                                            (unsigned long)num_events);
+               if (ret)
+                       goto err;
+       }
+
+       ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq,
+                                    (unsigned long)NULL,
+                                    (unsigned long)NULL);
+       if (ret)
+               goto err;
+
+       xenoprof_enabled = 1;
+       return 0;
+ err:
+       unbind_virq();
+       return ret;
+}
+
+
+static void xenoprof_shutdown(void)
+{
+       xenoprof_enabled = 0;
+
+       HYPERVISOR_xenoprof_op(XENOPROF_disable_virq,
+                              (unsigned long)NULL,
+                              (unsigned long)NULL);
+
+       if (is_primary) {
+               HYPERVISOR_xenoprof_op(XENOPROF_release_counters,
+                                      (unsigned long)NULL,
+                                      (unsigned long)NULL);
+       }
+
+       unbind_virq();
+}
+
+
+static int xenoprof_start(void)
+{
+       int ret = 0;
+
+       if (is_primary)
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_start,
+                                            (unsigned long)NULL,
+                                            (unsigned long)NULL);
+       return ret;
+}
+
+
+static void xenoprof_stop(void)
+{
+       if (is_primary)
+               HYPERVISOR_xenoprof_op(XENOPROF_stop,
+                                      (unsigned long)NULL,
+                                      (unsigned long)NULL);
+}
+
+
+static int xenoprof_set_active(int * active_domains,
+                         unsigned int adomains)
+{
+       int ret = 0;
+       if (is_primary)
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active,
+                                            (unsigned long)active_domains,
+                                            (unsigned long)adomains);
+       return ret;
+}
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
+{
+       unsigned int i;
+
+       for (i = 0; i < num_events; ++i) {
+               struct dentry * dir;
+               char buf[2];
+ 
+               snprintf(buf, 2, "%d", i);
+               dir = oprofilefs_mkdir(sb, root, buf);
+               oprofilefs_create_ulong(sb, dir, "enabled",
+                                       &counter_config[i].enabled);
+               oprofilefs_create_ulong(sb, dir, "event",
+                                       &counter_config[i].event);
+               oprofilefs_create_ulong(sb, dir, "count",
+                                       &counter_config[i].count);
+               oprofilefs_create_ulong(sb, dir, "unit_mask",
+                                       &counter_config[i].unit_mask);
+               oprofilefs_create_ulong(sb, dir, "kernel",
+                                       &counter_config[i].kernel);
+               oprofilefs_create_ulong(sb, dir, "user",
+                                       &counter_config[i].user);
+       }
+
+       return 0;
+}
+
+
+struct oprofile_operations xenoprof_ops = {
+       .create_files   = xenoprof_create_files,
+       .set_active     = xenoprof_set_active,
+       .setup          = xenoprof_setup,
+       .shutdown       = xenoprof_shutdown,
+       .start          = xenoprof_start,
+       .stop           = xenoprof_stop
+};
+
+
+/* in order to get driverfs right */
+static int using_xenoprof;
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+       xenoprof_init_result_t result;
+       xenoprof_buf_t * buf;
+       int max_samples = 16;
+       int vm_size;
+       int npages;
+       int i;
+
+       int ret = HYPERVISOR_xenoprof_op(XENOPROF_init,
+                                        (unsigned long)max_samples,
+                                        (unsigned long)&result);
+
+       if (!ret) {
+               pgprot_t prot = __pgprot(_KERNPG_TABLE);
+
+               num_events = result.num_events;
+               is_primary = result.is_primary;
+               nbuf = result.nbuf;
+
+               npages = (result.bufsize * nbuf - 1) / PAGE_SIZE + 1;
+               vm_size = npages * PAGE_SIZE;
+
+               shared_buffer = (char *) vm_map_xen_pages(result.buf_maddr,
+                                                         vm_size, prot);
+               if (!shared_buffer) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               for (i=0; i< nbuf; i++) {
+                       buf = (xenoprof_buf_t*) 
+                               &shared_buffer[i * result.bufsize];
+                       BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
+                       xenoprof_buf[buf->vcpu_id] = buf;
+               }
+
+               /*  cpu_type is detected by Xen */
+               cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
+               strncpy(cpu_type, result.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
+               xenoprof_ops.cpu_type = cpu_type;
+
+               init_driverfs();
+               using_xenoprof = 1;
+               *ops = xenoprof_ops;
+
+               for (i=0; i<NR_CPUS; i++)
+                       ovf_irq[i] = -1;
+       }
+ out:
+       printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, "
+              "is_primary %d\n", ret, num_events, is_primary);
+       return ret;
+}
+
+
+void __exit oprofile_arch_exit(void)
+{
+       if (using_xenoprof)
+               exit_driverfs();
+
+       if (shared_buffer) {
+               vunmap(shared_buffer);
+               shared_buffer = NULL;
+       }
+       if (is_primary)
+               HYPERVISOR_xenoprof_op(XENOPROF_shutdown,
+                                      (unsigned long)NULL,
+                                      (unsigned long)NULL);
+}
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile        Thu Apr  6 
17:58:01 2006
@@ -0,0 +1,22 @@
+#
+# oprofile for x86-64.
+# Just reuse the one from i386. 
+#
+
+obj-$(CONFIG_OPROFILE) += oprofile.o
+ 
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+       oprof.o cpu_buffer.o buffer_sync.o \
+       event_buffer.o oprofile_files.o \
+       oprofilefs.o oprofile_stats.o \
+       timer_int.o )
+
+ifdef
+OPROFILE-y := xenoprof.o
+else
+OPROFILE-y := init.o backtrace.o
+OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o 
\
+                                    op_model_ppro.o
+OPROFILE-$(CONFIG_X86_IO_APIC)    += nmi_timer_int.o 
+endif
+oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
diff -r 388c59fefaa6 -r e049baa9055d patches/linux-2.6.16/xenoprof-generic.patch
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/patches/linux-2.6.16/xenoprof-generic.patch       Thu Apr  6 17:58:01 2006
@@ -0,0 +1,384 @@
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c 
./drivers/oprofile/buffer_sync.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c    2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/buffer_sync.c   2006-04-03 15:53:05.000000000 +0100
+@@ -6,6 +6,10 @@
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+  *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+  * This is the core of the buffer management. Each
+  * CPU buffer is processed and entered into the
+  * global event buffer. Such processing is necessary
+@@ -275,15 +279,24 @@ static void add_cpu_switch(int i)
+       last_cookie = INVALID_COOKIE;
+ }
+ 
+-static void add_kernel_ctx_switch(unsigned int in_kernel)
++static void add_cpu_mode_switch(unsigned int cpu_mode)
+ {
+       add_event_entry(ESCAPE_CODE);
+-      if (in_kernel)
+-              add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
+-      else
+-              add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
++      switch (cpu_mode) {
++      case CPU_MODE_USER:
++              add_event_entry(USER_ENTER_SWITCH_CODE);
++              break;
++      case CPU_MODE_KERNEL:
++              add_event_entry(KERNEL_ENTER_SWITCH_CODE);
++              break;
++      case CPU_MODE_XEN:
++              add_event_entry(XEN_ENTER_SWITCH_CODE);
++              break;
++      default:
++              break;
++      }
+ }
+- 
++
+ static void
+ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
+ {
+@@ -348,9 +361,9 @@ static int add_us_sample(struct mm_struc
+  * for later lookup from userspace.
+  */
+ static int
+-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
+ {
+-      if (in_kernel) {
++      if (cpu_mode >= CPU_MODE_KERNEL) {
+               add_sample_entry(s->eip, s->event);
+               return 1;
+       } else if (mm) {
+@@ -496,7 +509,7 @@ void sync_buffer(int cpu)
+       struct mm_struct *mm = NULL;
+       struct task_struct * new;
+       unsigned long cookie = 0;
+-      int in_kernel = 1;
++      int cpu_mode = 1;
+       unsigned int i;
+       sync_buffer_state state = sb_buffer_start;
+       unsigned long available;
+@@ -513,12 +526,12 @@ void sync_buffer(int cpu)
+               struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
+  
+               if (is_code(s->eip)) {
+-                      if (s->event <= CPU_IS_KERNEL) {
++                      if (s->event <= CPU_MODE_XEN) {
+                               /* kernel/userspace switch */
+-                              in_kernel = s->event;
++                              cpu_mode = s->event;
+                               if (state == sb_buffer_start)
+                                       state = sb_sample_start;
+-                              add_kernel_ctx_switch(s->event);
++                              add_cpu_mode_switch(s->event);
+                       } else if (s->event == CPU_TRACE_BEGIN) {
+                               state = sb_bt_start;
+                               add_trace_begin();
+@@ -536,7 +549,7 @@ void sync_buffer(int cpu)
+                       }
+               } else {
+                       if (state >= sb_bt_start &&
+-                          !add_sample(mm, s, in_kernel)) {
++                          !add_sample(mm, s, cpu_mode)) {
+                               if (state == sb_bt_start) {
+                                       state = sb_bt_ignore;
+                                       
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c 
./drivers/oprofile/cpu_buffer.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c     2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/cpu_buffer.c    2006-04-03 15:53:05.000000000 +0100
+@@ -6,6 +6,10 @@
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+  *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+  * Each CPU has a local buffer that stores PC value/event
+  * pairs. We also log context switches when we notice them.
+  * Eventually each CPU's buffer is processed into the global
+@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
+                       goto fail;
+  
+               b->last_task = NULL;
+-              b->last_is_kernel = -1;
++              b->last_cpu_mode = -1;
+               b->tracing = 0;
+               b->buffer_size = buffer_size;
+               b->tail_pos = 0;
+@@ -114,7 +118,7 @@ void cpu_buffer_reset(struct oprofile_cp
+        * collected will populate the buffer with proper
+        * values to initialize the buffer
+        */
+-      cpu_buf->last_is_kernel = -1;
++      cpu_buf->last_cpu_mode = -1;
+       cpu_buf->last_task = NULL;
+ }
+ 
+@@ -164,13 +168,13 @@ add_code(struct oprofile_cpu_buffer * bu
+  * because of the head/tail separation of the writer and reader
+  * of the CPU buffer.
+  *
+- * is_kernel is needed because on some architectures you cannot
++ * cpu_mode is needed because on some architectures you cannot
+  * tell if you are in kernel or user space simply by looking at
+- * pc. We tag this in the buffer by generating kernel enter/exit
+- * events whenever is_kernel changes
++ * pc. We tag this in the buffer by generating kernel/user (and xen)
++ *  enter events whenever cpu_mode changes
+  */
+ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
+-                    int is_kernel, unsigned long event)
++                    int cpu_mode, unsigned long event)
+ {
+       struct task_struct * task;
+ 
+@@ -181,16 +185,16 @@ static int log_sample(struct oprofile_cp
+               return 0;
+       }
+ 
+-      is_kernel = !!is_kernel;
++      WARN_ON(cpu_mode > CPU_MODE_XEN);
+ 
+       task = current;
+ 
+       /* notice a switch from user->kernel or vice versa */
+-      if (cpu_buf->last_is_kernel != is_kernel) {
+-              cpu_buf->last_is_kernel = is_kernel;
+-              add_code(cpu_buf, is_kernel);
++      if (cpu_buf->last_cpu_mode != cpu_mode) {
++              cpu_buf->last_cpu_mode = cpu_mode;
++              add_code(cpu_buf, cpu_mode);
+       }
+-
++      
+       /* notice a task switch */
+       if (cpu_buf->last_task != task) {
+               cpu_buf->last_task = task;
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h 
./drivers/oprofile/cpu_buffer.h
+--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h     2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/cpu_buffer.h    2006-04-03 15:53:05.000000000 +0100
+@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
+       volatile unsigned long tail_pos;
+       unsigned long buffer_size;
+       struct task_struct * last_task;
+-      int last_is_kernel;
++      int last_cpu_mode;
+       int tracing;
+       struct op_sample * buffer;
+       unsigned long sample_received;
+@@ -51,7 +51,9 @@ extern struct oprofile_cpu_buffer cpu_bu
+ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
+ 
+ /* transient events for the CPU buffer -> event buffer */
+-#define CPU_IS_KERNEL 1
+-#define CPU_TRACE_BEGIN 2
++#define CPU_MODE_USER    0
++#define CPU_MODE_KERNEL  1
++#define CPU_MODE_XEN     2
++#define CPU_TRACE_BEGIN  3
+ 
+ #endif /* OPROFILE_CPU_BUFFER_H */
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h 
./drivers/oprofile/event_buffer.h
+--- ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h   2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/event_buffer.h  2006-04-03 15:53:05.000000000 +0100
+@@ -29,11 +29,12 @@ void wake_up_buffer_waiter(void);
+ #define CPU_SWITCH_CODE               2
+ #define COOKIE_SWITCH_CODE            3
+ #define KERNEL_ENTER_SWITCH_CODE      4
+-#define KERNEL_EXIT_SWITCH_CODE               5
++#define USER_ENTER_SWITCH_CODE                5
+ #define MODULE_LOADED_CODE            6
+ #define CTX_TGID_CODE                 7
+ #define TRACE_BEGIN_CODE              8
+ #define TRACE_END_CODE                        9
++#define XEN_ENTER_SWITCH_CODE         10
+  
+ #define INVALID_COOKIE ~0UL
+ #define NO_COOKIE 0UL
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.c 
./drivers/oprofile/oprof.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.c  2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/oprof.c 2006-04-03 15:53:05.000000000 +0100
+@@ -5,6 +5,10 @@
+  * @remark Read the file COPYING
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
+  */
+ 
+ #include <linux/kernel.h>
+@@ -19,7 +23,7 @@
+ #include "cpu_buffer.h"
+ #include "buffer_sync.h"
+ #include "oprofile_stats.h"
+- 
++
+ struct oprofile_operations oprofile_ops;
+ 
+ unsigned long oprofile_started;
+@@ -33,6 +37,17 @@ static DECLARE_MUTEX(start_sem);
+  */
+ static int timer = 0;
+ 
++extern unsigned int adomains;
++extern int active_domains[MAX_OPROF_DOMAINS];
++
++int oprofile_set_active(void)
++{
++      if (oprofile_ops.set_active)
++              return oprofile_ops.set_active(active_domains, adomains);
++
++      return -EINVAL;
++}
++
+ int oprofile_setup(void)
+ {
+       int err;
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.h 
./drivers/oprofile/oprof.h
+--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.h  2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/oprof.h 2006-04-03 15:53:05.000000000 +0100
+@@ -35,5 +35,7 @@ void oprofile_create_files(struct super_
+ void oprofile_timer_init(struct oprofile_operations * ops);
+ 
+ int oprofile_set_backtrace(unsigned long depth);
++
++int oprofile_set_active(void);
+  
+ #endif /* OPROF_H */
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c 
./drivers/oprofile/oprofile_files.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c 2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/oprofile_files.c        2006-04-03 15:53:05.000000000 
+0100
+@@ -5,15 +5,21 @@
+  * @remark Read the file COPYING
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.     
+  */
+ 
+ #include <linux/fs.h>
+ #include <linux/oprofile.h>
++#include <asm/uaccess.h>
++#include <linux/ctype.h>
+ 
+ #include "event_buffer.h"
+ #include "oprofile_stats.h"
+ #include "oprof.h"
+- 
++
+ unsigned long fs_buffer_size = 131072;
+ unsigned long fs_cpu_buffer_size = 8192;
+ unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
+@@ -117,11 +123,79 @@ static ssize_t dump_write(struct file * 
+ static struct file_operations dump_fops = {
+       .write          = dump_write,
+ };
+- 
++
++#define TMPBUFSIZE 512
++
++unsigned int adomains = 0;
++long active_domains[MAX_OPROF_DOMAINS];
++
++static ssize_t adomain_write(struct file * file, char const __user * buf, 
++                           size_t count, loff_t * offset)
++{
++      char tmpbuf[TMPBUFSIZE];
++      char * startp = tmpbuf;
++      char * endp = tmpbuf;
++      int i;
++      unsigned long val;
++      
++      if (*offset)
++              return -EINVAL; 
++      if (!count)
++              return 0;
++      if (count > TMPBUFSIZE - 1)
++              return -EINVAL;
++
++      memset(tmpbuf, 0x0, TMPBUFSIZE);
++
++      if (copy_from_user(tmpbuf, buf, count))
++              return -EFAULT;
++      
++      for (i = 0; i < MAX_OPROF_DOMAINS; i++)
++              active_domains[i] = -1;
++      adomains = 0;
++
++      while (1) {
++              val = simple_strtol(startp, &endp, 0);
++              if (endp == startp)
++                      break;
++              while (ispunct(*endp))
++                      endp++;
++              active_domains[adomains++] = val;
++              if (adomains >= MAX_OPROF_DOMAINS)
++                      break;
++              startp = endp;
++      }
++      if (oprofile_set_active())
++              return -EINVAL; 
++      return count;
++}
++
++static ssize_t adomain_read(struct file * file, char __user * buf, 
++                          size_t count, loff_t * offset)
++{
++      char tmpbuf[TMPBUFSIZE];
++      size_t len = 0;
++      int i;
++      /* This is all screwed up if we run out of space */
++      for (i = 0; i < adomains; i++) 
++              len += snprintf(tmpbuf + len, TMPBUFSIZE - len, 
++                              "%u ", (unsigned int)active_domains[i]);
++      len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
++      return simple_read_from_buffer((void __user *)buf, count, 
++                                     offset, tmpbuf, len);
++}
++
++
++static struct file_operations active_domain_ops = {
++      .read           = adomain_read,
++      .write          = adomain_write,
++};
++
+ void oprofile_create_files(struct super_block * sb, struct dentry * root)
+ {
+       oprofilefs_create_file(sb, root, "enable", &enable_fops);
+       oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
++      oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+       oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
+       oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
+       oprofilefs_create_ulong(sb, root, "buffer_watershed", 
&fs_buffer_watershed);
+diff -pruN ../pristine-linux-2.6.16/include/linux/oprofile.h 
./include/linux/oprofile.h
+--- ../pristine-linux-2.6.16/include/linux/oprofile.h  2006-03-20 
05:53:29.000000000 +0000
++++ ./include/linux/oprofile.h 2006-04-03 15:53:05.000000000 +0100
+@@ -16,6 +16,8 @@
+ #include <linux/types.h>
+ #include <linux/spinlock.h>
+ #include <asm/atomic.h>
++
++#include <xen/interface/xenoprof.h>
+  
+ struct super_block;
+ struct dentry;
+@@ -27,6 +29,8 @@ struct oprofile_operations {
+       /* create any necessary configuration files in the oprofile fs.
+        * Optional. */
+       int (*create_files)(struct super_block * sb, struct dentry * root);
++      /* setup active domains with Xen */
++      int (*set_active)(int *active_domains, unsigned int adomains);
+       /* Do any necessary interrupt setup. Optional. */
+       int (*setup)(void);
+       /* Do any necessary interrupt shutdown. Optional. */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/Makefile
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/Makefile    Thu Apr  6 17:58:01 2006
@@ -0,0 +1,5 @@
+obj-y += xenoprof.o
+obj-y += nmi_int.o
+obj-y += op_model_p4.o
+obj-y += op_model_ppro.o
+obj-y += op_model_athlon.o
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/nmi_int.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/nmi_int.c   Thu Apr  6 17:58:01 2006
@@ -0,0 +1,399 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified for Xen: by Aravind Menon & Jose Renato Santos
+ *   These modifications are:
+ *   Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/event.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <public/xen.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+#include <xen/delay.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1)
+extern int active_domains[MAX_OPROF_DOMAINS];
+extern unsigned int adomains;
+extern struct domain *primary_profiler;
+extern struct domain *adomain_ptrs[MAX_OPROF_DOMAINS];
+extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
+extern int is_active(struct domain *d);
+extern int active_id(struct domain *d);
+extern int is_profiled(struct domain *d);
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+
+
+int nmi_callback(struct cpu_user_regs *regs, int cpu)
+{
+       int xen_mode = 0;
+       int ovf;
+
+       ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
+       xen_mode = ring_0(regs);
+       if ( ovf )
+       {
+               if ( is_active(current->domain) )
+               {
+                       if ( !xen_mode )
+                       {
+                               send_guest_vcpu_virq(current, VIRQ_XENOPROF);
+                       } 
+               }
+       }
+       return 1;
+}
+ 
+ 
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+       unsigned int const nr_ctrs = model->num_counters;
+       unsigned int const nr_ctrls = model->num_controls; 
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
+       unsigned int i;
+
+       for (i = 0; i < nr_ctrs; ++i) {
+               rdmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
+       }
+ 
+       for (i = 0; i < nr_ctrls; ++i) {
+               rdmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
+       }
+}
+
+
+static void nmi_save_registers(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       model->fill_in_addresses(msrs);
+       nmi_cpu_save_registers(msrs);
+}
+
+
+static void free_msrs(void)
+{
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               xfree(cpu_msrs[i].counters);
+               cpu_msrs[i].counters = NULL;
+               xfree(cpu_msrs[i].controls);
+               cpu_msrs[i].controls = NULL;
+       }
+}
+
+
+static int allocate_msrs(void)
+{
+       int success = 1;
+       size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+       size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               if (!test_bit(i, &cpu_online_map))
+                       continue;
+
+               cpu_msrs[i].counters = xmalloc_bytes(counters_size);
+               if (!cpu_msrs[i].counters) {
+                       success = 0;
+                       break;
+               }
+               cpu_msrs[i].controls = xmalloc_bytes(controls_size);
+               if (!cpu_msrs[i].controls) {
+                       success = 0;
+                       break;
+               }
+       }
+
+       if (!success)
+               free_msrs();
+
+       return success;
+}
+
+
+static void nmi_cpu_setup(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       model->setup_ctrs(msrs);
+}
+
+
+int nmi_setup_events(void)
+{
+       on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+       return 0;
+}
+
+int nmi_reserve_counters(void)
+{
+       if (!allocate_msrs())
+               return -ENOMEM;
+
+       /* We walk a thin line between law and rape here.
+        * We need to be careful to install our NMI handler
+        * without actually triggering any NMIs as this will
+        * break the core code horrifically.
+        */
+       if (reserve_lapic_nmi() < 0) {
+               free_msrs();
+               return -EBUSY;
+       }
+       /* We need to serialize save and setup for HT because the subset
+        * of msrs are distinct for save and setup operations
+        */
+       on_each_cpu(nmi_save_registers, NULL, 0, 1);
+       return 0;
+}
+
+int nmi_enable_virq(void)
+{
+       set_nmi_callback(nmi_callback);
+       return 0;
+}
+
+
+void nmi_disable_virq(void)
+{
+       unset_nmi_callback();
+} 
+
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+       unsigned int const nr_ctrs = model->num_counters;
+       unsigned int const nr_ctrls = model->num_controls; 
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
+       unsigned int i;
+
+       for (i = 0; i < nr_ctrls; ++i) {
+               wrmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
+       }
+ 
+       for (i = 0; i < nr_ctrs; ++i) {
+               wrmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
+       }
+}
+ 
+
+static void nmi_cpu_shutdown(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       nmi_restore_registers(msrs);
+}
+
+ 
+void nmi_release_counters(void)
+{
+       on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+       release_lapic_nmi();
+       free_msrs();
+}
+
+ 
+static void nmi_cpu_start(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs const * msrs = &cpu_msrs[cpu];
+       saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       model->start(msrs);
+}
+ 
+
+int nmi_start(void)
+{
+       on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+       return 0;
+}
+ 
+ 
+static void nmi_cpu_stop(void * dummy)
+{
+       unsigned int v;
+       int cpu = smp_processor_id();
+       struct op_msrs const * msrs = &cpu_msrs[cpu];
+       model->stop(msrs);
+
+       /* restoring APIC_LVTPC can trigger an apic error because the delivery
+        * mode and vector nr combination can be illegal. That's by design: on
+        * power on apic lvt contain a zero vector nr which are legal only for
+        * NMI delivery mode. So inhibit apic err before restoring lvtpc
+        */
+       if ( !(apic_read(APIC_LVTPC) & APIC_DM_NMI)
+            || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED) )
+       {
+               printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
+               mdelay(5000);
+       }
+       v = apic_read(APIC_LVTERR);
+       apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+       apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+       apic_write(APIC_LVTERR, v);
+}
+ 
+ 
+void nmi_stop(void)
+{
+       on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+}
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int __init p4_init(char * cpu_type)
+{ 
+       __u8 cpu_model = current_cpu_data.x86_model;
+
+       if (cpu_model > 4)
+               return 0;
+
+#ifndef CONFIG_SMP
+       strncpy (cpu_type, "i386/p4", XENOPROF_CPU_TYPE_SIZE - 1);
+       model = &op_p4_spec;
+       return 1;
+#else
+       switch (smp_num_siblings) {
+               case 1:
+                       strncpy (cpu_type, "i386/p4", 
+                                XENOPROF_CPU_TYPE_SIZE - 1);
+                       model = &op_p4_spec;
+                       return 1;
+
+               case 2:
+                       strncpy (cpu_type, "i386/p4-ht", 
+                                XENOPROF_CPU_TYPE_SIZE - 1);
+                       model = &op_p4_ht2_spec;
+                       return 1;
+       }
+#endif
+       printk("Xenoprof ERROR: P4 HyperThreading detected with > 2 threads\n");
+
+       return 0;
+}
+
+
+static int __init ppro_init(char *cpu_type)
+{
+       __u8 cpu_model = current_cpu_data.x86_model;
+
+       if (cpu_model > 0xd)
+               return 0;
+
+       if (cpu_model == 9) {
+               strncpy (cpu_type, "i386/p6_mobile", XENOPROF_CPU_TYPE_SIZE - 
1);
+       } else if (cpu_model > 5) {
+               strncpy (cpu_type, "i386/piii", XENOPROF_CPU_TYPE_SIZE - 1);
+       } else if (cpu_model > 2) {
+               strncpy (cpu_type, "i386/pii", XENOPROF_CPU_TYPE_SIZE - 1);
+       } else {
+               strncpy (cpu_type, "i386/ppro", XENOPROF_CPU_TYPE_SIZE - 1);
+       }
+
+       model = &op_ppro_spec;
+       return 1;
+}
+
+int nmi_init(int *num_events, int *is_primary, char *cpu_type)
+{
+       __u8 vendor = current_cpu_data.x86_vendor;
+       __u8 family = current_cpu_data.x86;
+       int prim = 0;
+ 
+       if (!cpu_has_apic)
+               return -ENODEV;
+
+       if (primary_profiler == NULL) {
+               /* For now, only dom0 can be the primary profiler */
+               if (current->domain->domain_id == 0) {
+                       primary_profiler = current->domain;
+                       prim = 1;
+               }
+       }
+ 
+       /* Make sure string is NULL terminated */
+       cpu_type[XENOPROF_CPU_TYPE_SIZE - 1] = 0;
+
+       switch (vendor) {
+               case X86_VENDOR_AMD:
+                       /* Needs to be at least an Athlon (or hammer in 32bit 
mode) */
+
+                       switch (family) {
+                       default:
+                               return -ENODEV;
+                       case 6:
+                               model = &op_athlon_spec;
+                               strncpy (cpu_type, "i386/athlon", 
+                                        XENOPROF_CPU_TYPE_SIZE - 1);
+                               break;
+                       case 0xf:
+                               model = &op_athlon_spec;
+                               /* Actually it could be i386/hammer too, but 
give
+                                  user space an consistent name. */
+                               strncpy (cpu_type, "x86-64/hammer", 
+                                        XENOPROF_CPU_TYPE_SIZE - 1);
+                               break;
+                       }
+                       break;
+ 
+               case X86_VENDOR_INTEL:
+                       switch (family) {
+                               /* Pentium IV */
+                               case 0xf:
+                                       if (!p4_init(cpu_type))
+                                               return -ENODEV;
+                                       break;
+
+                               /* A P6-class processor */
+                               case 6:
+                                       if (!ppro_init(cpu_type))
+                                               return -ENODEV;
+                                       break;
+
+                               default:
+                                       return -ENODEV;
+                       }
+                       break;
+
+               default:
+                       return -ENODEV;
+       }
+
+       *num_events = model->num_counters;
+       *is_primary = prim;
+
+       return 0;
+}
+
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_counter.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_counter.h        Thu Apr  6 17:58:01 2006
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_athlon.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_model_athlon.c   Thu Apr  6 17:58:01 2006
@@ -0,0 +1,168 @@
+/**
+ * @file op_model_athlon.h
+ * athlon / K7 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 4
+#define NUM_CONTROLS 4
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} 
while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned 
int)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} 
while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} 
while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+
+extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
+                              int mode, int event);
+ 
+static void athlon_fill_in_addresses(struct op_msrs * const msrs)
+{
+       msrs->counters[0].addr = MSR_K7_PERFCTR0;
+       msrs->counters[1].addr = MSR_K7_PERFCTR1;
+       msrs->counters[2].addr = MSR_K7_PERFCTR2;
+       msrs->counters[3].addr = MSR_K7_PERFCTR3;
+
+       msrs->controls[0].addr = MSR_K7_EVNTSEL0;
+       msrs->controls[1].addr = MSR_K7_EVNTSEL1;
+       msrs->controls[2].addr = MSR_K7_EVNTSEL2;
+       msrs->controls[3].addr = MSR_K7_EVNTSEL3;
+}
+
+ 
+static void athlon_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int low, high;
+       int i;
+ 
+       /* clear all counters */
+       for (i = 0 ; i < NUM_CONTROLS; ++i) {
+               CTRL_READ(low, high, msrs, i);
+               CTRL_CLEAR(low);
+               CTRL_WRITE(low, high, msrs, i);
+       }
+       
+       /* avoid a false detection of ctr overflows in NMI handler */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               CTR_WRITE(1, msrs, i);
+       }
+
+       /* enable active counters */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+
+                       CTR_WRITE(counter_config[i].count, msrs, i);
+
+                       CTRL_READ(low, high, msrs, i);
+                       CTRL_CLEAR(low);
+                       CTRL_SET_ENABLE(low);
+                       CTRL_SET_USR(low, counter_config[i].user);
+                       CTRL_SET_KERN(low, counter_config[i].kernel);
+                       CTRL_SET_UM(low, counter_config[i].unit_mask);
+                       CTRL_SET_EVENT(low, counter_config[i].event);
+                       CTRL_WRITE(low, high, msrs, i);
+               } else {
+                       reset_value[i] = 0;
+               }
+       }
+}
+
+ 
+static int athlon_check_ctrs(unsigned int const cpu,
+                             struct op_msrs const * const msrs,
+                             struct cpu_user_regs * const regs)
+
+{
+       unsigned int low, high;
+       int i;
+       int ovf = 0;
+       unsigned long eip = regs->eip;
+       int mode = 0;
+
+       if (guest_kernel_mode(current, regs))
+               mode = 1;
+       else if (ring_0(regs))
+               mode = 2;
+
+       for (i = 0 ; i < NUM_COUNTERS; ++i) {
+               CTR_READ(low, high, msrs, i);
+               if (CTR_OVERFLOWED(low)) {
+                       xenoprof_log_event(current, eip, mode, i);
+                       CTR_WRITE(reset_value[i], msrs, i);
+                       ovf = 1;
+               }
+       }
+
+       /* See op_model_ppro.c */
+       return ovf;
+}
+
+ 
+static void athlon_start(struct op_msrs const * const msrs)
+{
+       unsigned int low, high;
+       int i;
+       for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+               if (reset_value[i]) {
+                       CTRL_READ(low, high, msrs, i);
+                       CTRL_SET_ACTIVE(low);
+                       CTRL_WRITE(low, high, msrs, i);
+               }
+       }
+}
+
+
+static void athlon_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       int i;
+
+       /* Subtle: stop on all counters to avoid race with
+        * setting our pm callback */
+       for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+               CTRL_READ(low, high, msrs, i);
+               CTRL_SET_INACTIVE(low);
+               CTRL_WRITE(low, high, msrs, i);
+       }
+}
+
+
+struct op_x86_model_spec const op_athlon_spec = {
+       .num_counters = NUM_COUNTERS,
+       .num_controls = NUM_CONTROLS,
+       .fill_in_addresses = &athlon_fill_in_addresses,
+       .setup_ctrs = &athlon_setup_ctrs,
+       .check_ctrs = &athlon_check_ctrs,
+       .start = &athlon_start,
+       .stop = &athlon_stop
+};
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_p4.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_model_p4.c       Thu Apr  6 17:58:01 2006
@@ -0,0 +1,739 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+       if (smp_num_siblings == 2)
+               num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+       return smp_num_siblings == 2 ? 2 : 1;
+#else
+       return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+       int virt_counter;
+       int counter_address;
+       int cccr_address;
+};
+
+struct p4_event_binding {
+       int escr_select;  /* value to put in CCCR */
+       int event_select; /* value to put in ESCR */
+       struct {
+               int virt_counter; /* for this counter... */
+               int escr_address; /* use this ESCR       */
+       } bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+       { CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+       { CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+       { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+       { CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+       { CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+       { CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+       { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+       { CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS       NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+       MSR_P4_BPU_CCCR1,       MSR_P4_BPU_CCCR3,
+       MSR_P4_MS_CCCR1,        MSR_P4_MS_CCCR3,
+       MSR_P4_FLAME_CCCR1,     MSR_P4_FLAME_CCCR3,
+       MSR_P4_IQ_CCCR0,        MSR_P4_IQ_CCCR1,
+       MSR_P4_IQ_CCCR2,        MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+       
+       { /* BRANCH_RETIRED */
+               0x05, 0x06, 
+               { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+       
+       { /* MISPRED_BRANCH_RETIRED */
+               0x04, 0x03, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+       
+       { /* TC_DELIVER_MODE */
+               0x01, 0x01,
+               { { CTR_MS_0, MSR_P4_TC_ESCR0},  
+                 { CTR_MS_2, MSR_P4_TC_ESCR1} }
+       },
+       
+       { /* BPU_FETCH_REQUEST */
+               0x00, 0x03, 
+               { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+                 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+       },
+
+       { /* ITLB_REFERENCE */
+               0x03, 0x18,
+               { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+       },
+
+       { /* MEMORY_CANCEL */
+               0x05, 0x02,
+               { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+       },
+
+       { /* MEMORY_COMPLETE */
+               0x02, 0x08,
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* LOAD_PORT_REPLAY */
+               0x02, 0x04, 
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* STORE_PORT_REPLAY */
+               0x02, 0x05,
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* MOB_LOAD_REPLAY */
+               0x02, 0x03,
+               { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+       },
+
+       { /* PAGE_WALK_TYPE */
+               0x04, 0x01,
+               { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+                 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+       },
+
+       { /* BSQ_CACHE_REFERENCE */
+               0x07, 0x0c, 
+               { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+                 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+       },
+
+       { /* IOQ_ALLOCATION */
+               0x06, 0x03, 
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { 0, 0 } }
+       },
+
+       { /* IOQ_ACTIVE_ENTRIES */
+               0x06, 0x1a, 
+               { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+                 { 0, 0 } }
+       },
+
+       { /* FSB_DATA_ACTIVITY */
+               0x06, 0x17, 
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+       },
+
+       { /* BSQ_ALLOCATION */
+               0x07, 0x05, 
+               { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+                 { 0, 0 } }
+       },
+
+       { /* BSQ_ACTIVE_ENTRIES */
+               0x07, 0x06,
+               { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+                 { 0, 0 } }
+       },
+
+       { /* X87_ASSIST */
+               0x05, 0x03, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* SSE_INPUT_ASSIST */
+               0x01, 0x34,
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* PACKED_SP_UOP */
+               0x01, 0x08, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* PACKED_DP_UOP */
+               0x01, 0x0c, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* SCALAR_SP_UOP */
+               0x01, 0x0a, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* SCALAR_DP_UOP */
+               0x01, 0x0e,
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* 64BIT_MMX_UOP */
+               0x01, 0x02, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* 128BIT_MMX_UOP */
+               0x01, 0x1a, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* X87_FP_UOP */
+               0x01, 0x04, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* X87_SIMD_MOVES_UOP */
+               0x01, 0x2e, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* MACHINE_CLEAR */
+               0x05, 0x02, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* GLOBAL_POWER_EVENTS */
+               0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+       },
+  
+       { /* TC_MS_XFER */
+               0x00, 0x05, 
+               { { CTR_MS_0, MSR_P4_MS_ESCR0},
+                 { CTR_MS_2, MSR_P4_MS_ESCR1} }
+       },
+
+       { /* UOP_QUEUE_WRITES */
+               0x00, 0x09,
+               { { CTR_MS_0, MSR_P4_MS_ESCR0},
+                 { CTR_MS_2, MSR_P4_MS_ESCR1} }
+       },
+
+       { /* FRONT_END_EVENT */
+               0x05, 0x08,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* EXECUTION_EVENT */
+               0x05, 0x0c,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* REPLAY_EVENT */
+               0x05, 0x09,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* INSTR_RETIRED */
+               0x04, 0x02, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+
+       { /* UOPS_RETIRED */
+               0x04, 0x01,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+
+       { /* UOP_TYPE */    
+               0x02, 0x02, 
+               { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+                 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+       },
+
+       { /* RETIRED_MISPRED_BRANCH_TYPE */
+               0x02, 0x05, 
+               { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+                 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+       },
+
+       { /* RETIRED_BRANCH_TYPE */
+               0x02, 0x04,
+               { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+                 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+       }
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, 
(escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, 
(escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, 
(low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, 
(low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), 
(h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), 
-1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+       int cpu = smp_processor_id();
+       return (cpu != first_cpu(cpu_sibling_map[cpu]));
+#endif 
+       return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+       unsigned int i; 
+       unsigned int addr, stag;
+
+       setup_num_counters();
+       stag = get_stagger();
+
+       /* the counter registers we pay attention to */
+       for (i = 0; i < num_counters; ++i) {
+               msrs->counters[i].addr = 
+                       p4_counters[VIRT_CTR(stag, i)].counter_address;
+       }
+
+       /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+       /* 18 CCCR registers */
+       for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+            addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+       
+       /* 43 ESCR registers in three or four discontiguous group */
+       for (addr = MSR_P4_BSU_ESCR0 + stag;
+            addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+
+       /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+        * to avoid special case in nmi_{save|restore}_registers() */
+       if (boot_cpu_data.x86_model >= 0x3) {
+               for (addr = MSR_P4_BSU_ESCR0 + stag;
+                    addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+                       msrs->controls[i].addr = addr;
+               }
+       } else {
+               for (addr = MSR_P4_IQ_ESCR0 + stag;
+                    addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+                       msrs->controls[i].addr = addr;
+               }
+       }
+
+       for (addr = MSR_P4_RAT_ESCR0 + stag;
+            addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+       
+       for (addr = MSR_P4_MS_ESCR0 + stag;
+            addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+               msrs->controls[i].addr = addr;
+       }
+       
+       for (addr = MSR_P4_IX_ESCR0 + stag;
+            addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+               msrs->controls[i].addr = addr;
+       }
+
+       /* there are 2 remaining non-contiguously located ESCRs */
+
+       if (num_counters == NUM_COUNTERS_NON_HT) {              
+               /* standard non-HT CPUs handle both remaining ESCRs*/
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+       } else if (stag == 0) {
+               /* HT CPUs give the first remainder to the even thread, as
+                  the 32nd control register */
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+       } else {
+               /* and two copies of the second to the odd thread,
+                  for the 22st and 23nd control registers */
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+       }
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+       int i;
+       int const maxbind = 2;
+       unsigned int cccr = 0;
+       unsigned int escr = 0;
+       unsigned int high = 0;
+       unsigned int counter_bit;
+       struct p4_event_binding *ev = NULL;
+       unsigned int stag;
+
+       stag = get_stagger();
+       
+       /* convert from counter *number* to counter *bit* */
+       counter_bit = 1 << VIRT_CTR(stag, ctr);
+       
+       /* find our event binding structure. */
+       if (counter_config[ctr].event <= 0 || counter_config[ctr].event > 
NUM_EVENTS) {
+               printk(KERN_ERR 
+                      "oprofile: P4 event code 0x%lx out of range\n", 
+                      counter_config[ctr].event);
+               return;
+       }
+       
+       ev = &(p4_events[counter_config[ctr].event - 1]);
+       
+       for (i = 0; i < maxbind; i++) {
+               if (ev->bindings[i].virt_counter & counter_bit) {
+
+                       /* modify ESCR */
+                       ESCR_READ(escr, high, ev, i);
+                       ESCR_CLEAR(escr);
+                       if (stag == 0) {
+                               ESCR_SET_USR_0(escr, counter_config[ctr].user);
+                               ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+                       } else {
+                               ESCR_SET_USR_1(escr, counter_config[ctr].user);
+                               ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+                       }
+                       ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+                       ESCR_SET_EVENT_MASK(escr, 
counter_config[ctr].unit_mask);                       
+                       ESCR_WRITE(escr, high, ev, i);
+                      
+                       /* modify CCCR */
+                       CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+                       CCCR_CLEAR(cccr);
+                       CCCR_SET_REQUIRED_BITS(cccr);
+                       CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+                       if (stag == 0) {
+                               CCCR_SET_PMI_OVF_0(cccr);
+                       } else {
+                               CCCR_SET_PMI_OVF_1(cccr);
+                       }
+                       CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+                       return;
+               }
+       }
+
+       printk(KERN_ERR 
+              "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+              counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int i;
+       unsigned int low, high;
+       unsigned int addr;
+       unsigned int stag;
+
+       stag = get_stagger();
+
+       rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+       if (! MISC_PMC_ENABLED_P(low)) {
+               printk(KERN_ERR "oprofile: P4 PMC not available\n");
+               return;
+       }
+
+       /* clear the cccrs we will use */
+       for (i = 0 ; i < num_counters ; i++) {
+               rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+               CCCR_CLEAR(low);
+               CCCR_SET_REQUIRED_BITS(low);
+               wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+       }
+
+       /* clear cccrs outside our concern */
+       for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+               rdmsr(p4_unused_cccr[i], low, high);
+               CCCR_CLEAR(low);
+               CCCR_SET_REQUIRED_BITS(low);
+               wrmsr(p4_unused_cccr[i], low, high);
+       }
+
+       /* clear all escrs (including those outside our concern) */
+       for (addr = MSR_P4_BSU_ESCR0 + stag;
+            addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+               wrmsr(addr, 0, 0);
+       }
+
+       /* On older models clear also MSR_P4_IQ_ESCR0/1 */
+       if (boot_cpu_data.x86_model < 0x3) {
+               wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+               wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+       }
+
+       for (addr = MSR_P4_RAT_ESCR0 + stag;
+            addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+               wrmsr(addr, 0, 0);
+       }
+       
+       for (addr = MSR_P4_MS_ESCR0 + stag;
+            addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+               wrmsr(addr, 0, 0);
+       }
+       
+       for (addr = MSR_P4_IX_ESCR0 + stag;
+            addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+               wrmsr(addr, 0, 0);
+       }
+
+       if (num_counters == NUM_COUNTERS_NON_HT) {              
+               wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+               wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+       } else if (stag == 0) {
+               wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+       } else {
+               wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+       }               
+       
+       /* setup all counters */
+       for (i = 0 ; i < num_counters ; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+                       pmc_setup_one_p4_counter(i);
+                       CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+               } else {
+                       reset_value[i] = 0;
+               }
+       }
+}
+
+
+extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
+                              int mode, int event);
+
+static int p4_check_ctrs(unsigned int const cpu,
+                         struct op_msrs const * const msrs,
+                         struct cpu_user_regs * const regs)
+{
+       unsigned long ctr, low, high, stag, real;
+       int i;
+       int ovf = 0;
+       unsigned long eip = regs->eip;
+       int mode = 0;
+
+       if (guest_kernel_mode(current, regs))
+               mode = 1;
+       else if (ring_0(regs))
+               mode = 2;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               
+               if (!reset_value[i]) 
+                       continue;
+
+               /* 
+                * there is some eccentricity in the hardware which
+                * requires that we perform 2 extra corrections:
+                *
+                * - check both the CCCR:OVF flag for overflow and the
+                *   counter high bit for un-flagged overflows.
+                *
+                * - write the counter back twice to ensure it gets
+                *   updated properly.
+                * 
+                * the former seems to be related to extra NMIs happening
+                * during the current NMI; the latter is reported as errata
+                * N15 in intel doc 249199-029, pentium 4 specification
+                * update, though their suggested work-around does not
+                * appear to solve the problem.
+                */
+               
+               real = VIRT_CTR(stag, i);
+
+               CCCR_READ(low, high, real);
+               CTR_READ(ctr, high, real);
+               if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+                       xenoprof_log_event(current, eip, mode, i);
+                       CTR_WRITE(reset_value[i], real);
+                       CCCR_CLEAR_OVF(low);
+                       CCCR_WRITE(low, high, real);
+                       CTR_WRITE(reset_value[i], real);
+                       ovf = 1;
+               }
+       }
+
+       /* P4 quirk: you have to re-unmask the apic vector */
+       apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+       return ovf;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+       unsigned int low, high, stag;
+       int i;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               if (!reset_value[i])
+                       continue;
+               CCCR_READ(low, high, VIRT_CTR(stag, i));
+               CCCR_SET_ENABLE(low);
+               CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+       }
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low, high, stag;
+       int i;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               CCCR_READ(low, high, VIRT_CTR(stag, i));
+               CCCR_SET_DISABLE(low);
+               CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+       }
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+       .num_counters = NUM_COUNTERS_HT2,
+       .num_controls = NUM_CONTROLS_HT2,
+       .fill_in_addresses = &p4_fill_in_addresses,
+       .setup_ctrs = &p4_setup_ctrs,
+       .check_ctrs = &p4_check_ctrs,
+       .start = &p4_start,
+       .stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+       .num_counters = NUM_COUNTERS_NON_HT,
+       .num_controls = NUM_CONTROLS_NON_HT,
+       .fill_in_addresses = &p4_fill_in_addresses,
+       .setup_ctrs = &p4_setup_ctrs,
+       .check_ctrs = &p4_check_ctrs,
+       .start = &p4_start,
+       .stop = &p4_stop
+};
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_ppro.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_model_ppro.c     Thu Apr  6 17:58:01 2006
@@ -0,0 +1,153 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} 
while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 
-1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+       msrs->counters[0].addr = MSR_P6_PERFCTR0;
+       msrs->counters[1].addr = MSR_P6_PERFCTR1;
+       
+       msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+       msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int low, high;
+       int i;
+
+       /* clear all counters */
+       for (i = 0 ; i < NUM_CONTROLS; ++i) {
+               CTRL_READ(low, high, msrs, i);
+               CTRL_CLEAR(low);
+               CTRL_WRITE(low, high, msrs, i);
+       }
+       
+       /* avoid a false detection of ctr overflows in NMI handler */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               CTR_WRITE(1, msrs, i);
+       }
+
+       /* enable active counters */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+
+                       CTR_WRITE(counter_config[i].count, msrs, i);
+
+                       CTRL_READ(low, high, msrs, i);
+                       CTRL_CLEAR(low);
+                       CTRL_SET_ENABLE(low);
+                       CTRL_SET_USR(low, counter_config[i].user);
+                       CTRL_SET_KERN(low, counter_config[i].kernel);
+                       CTRL_SET_UM(low, counter_config[i].unit_mask);
+                       CTRL_SET_EVENT(low, counter_config[i].event);
+                       CTRL_WRITE(low, high, msrs, i);
+               }
+       }
+}
+
+
+extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
+                              int mode, int event);
+ 
+static int ppro_check_ctrs(unsigned int const cpu,
+                           struct op_msrs const * const msrs,
+                           struct cpu_user_regs * const regs)
+{
+       unsigned int low, high;
+       int i;
+       int ovf = 0;
+       unsigned long eip = regs->eip;
+       int mode = 0;
+
+       if ( guest_kernel_mode(current, regs) ) 
+               mode = 1;
+       else if ( ring_0(regs) )
+               mode = 2;
+ 
+       for (i = 0 ; i < NUM_COUNTERS; ++i) {
+               CTR_READ(low, high, msrs, i);
+               if (CTR_OVERFLOWED(low)) {
+                       xenoprof_log_event(current, eip, mode, i);
+                       CTR_WRITE(reset_value[i], msrs, i);
+                       ovf = 1;
+               }
+       }
+
+       /* Only P6 based Pentium M need to re-unmask the apic vector but it
+        * doesn't hurt other P6 variant */
+       apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+       return ovf;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       CTRL_READ(low, high, msrs, 0);
+       CTRL_SET_ACTIVE(low);
+       CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       CTRL_READ(low, high, msrs, 0);
+       CTRL_SET_INACTIVE(low);
+       CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+struct op_x86_model_spec const op_ppro_spec = {
+       .num_counters = NUM_COUNTERS,
+       .num_controls = NUM_CONTROLS,
+       .fill_in_addresses = &ppro_fill_in_addresses,
+       .setup_ctrs = &ppro_setup_ctrs,
+       .check_ctrs = &ppro_check_ctrs,
+       .start = &ppro_start,
+       .stop = &ppro_stop
+};
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_x86_model.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_x86_model.h      Thu Apr  6 17:58:01 2006
@@ -0,0 +1,51 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+       unsigned int high;
+       unsigned int low;
+};
+
+struct op_msr {
+       unsigned long addr;
+       struct op_saved_msr saved;
+};
+
+struct op_msrs {
+       struct op_msr * counters;
+       struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+       unsigned int const num_counters;
+       unsigned int const num_controls;
+       void (*fill_in_addresses)(struct op_msrs * const msrs);
+       void (*setup_ctrs)(struct op_msrs const * const msrs);
+       int (*check_ctrs)(unsigned int const cpu, 
+                         struct op_msrs const * const msrs,
+                         struct cpu_user_regs * const regs);
+       void (*start)(struct op_msrs const * const msrs);
+       void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/xenoprof.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/xenoprof.c  Thu Apr  6 17:58:01 2006
@@ -0,0 +1,553 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon & Jose Renato Santos
+ *            (email: xenoprof@xxxxxxxxxxxxx)
+ */
+
+#include <xen/sched.h>
+#include <public/xenoprof.h>
+
+#include "op_counter.h"
+
+/* Limit amount of pages used for shared buffer (per domain) */
+#define MAX_OPROF_SHARED_PAGES 32
+
+int active_domains[MAX_OPROF_DOMAINS];
+int active_ready[MAX_OPROF_DOMAINS];
+unsigned int adomains = 0;
+unsigned int activated = 0;
+struct domain *primary_profiler = NULL;
+int xenoprof_state = XENOPROF_IDLE;
+
+u64 total_samples = 0;
+u64 invalid_buffer_samples = 0;
+u64 corrupted_buffer_samples = 0;
+u64 lost_samples = 0;
+u64 active_samples = 0;
+u64 idle_samples = 0;
+u64 others_samples = 0;
+
+
+extern int nmi_init(int *num_events, int *is_primary, char *cpu_type);
+extern int nmi_reserve_counters(void);
+extern int nmi_setup_events(void);
+extern int nmi_enable_virq(void);
+extern int nmi_start(void);
+extern void nmi_stop(void);
+extern void nmi_disable_virq(void);
+extern void nmi_release_counters(void);
+
+int is_active(struct domain *d)
+{
+    xenoprof_t *x = d->xenoprof;
+    if ( x )
+    {
+        if ( x->domain_type == XENOPROF_DOMAIN_ACTIVE )
+            return 1;
+        else
+            return 0;
+    }
+    else
+        return 0;
+}
+
+int is_profiled(struct domain *d)
+{
+    return is_active(d);
+}
+
+static void xenoprof_reset_stat(void)
+{
+    total_samples = 0;
+    invalid_buffer_samples = 0;
+    corrupted_buffer_samples = 0;
+    lost_samples = 0;
+    active_samples = 0;
+    idle_samples = 0;
+    others_samples = 0;
+
+    return;
+}
+
+static void xenoprof_reset_buf(struct domain *d)
+{
+    int j;
+    xenoprof_buf_t *buf;
+
+    if ( !d->xenoprof )
+    {
+        printk("xenoprof_reset_buf: ERROR - Unexpected Xenoprof NULL pointer 
\n");
+        return;
+    }
+
+    for ( j=0; j<MAX_VIRT_CPUS; j++ )
+    {
+        buf = d->xenoprof->vcpu[j].buffer;
+        if ( buf )
+        {
+            buf->event_head = 0;
+            buf->event_tail = 0;
+        }
+    }
+}
+
+int active_index(struct domain *d)
+{
+    int i;
+    int id;
+
+    id = d->domain_id;
+    for ( i=0; i<adomains; i++ )
+        if ( active_domains[i] == id )
+        {
+            return i;
+        }
+    return -1;
+}
+
+int set_active(struct domain *d)
+{
+    int ind;
+    xenoprof_t *x;
+
+    ind = active_index(d);
+    if ( ind <0 )
+        return -EPERM;
+
+    x = d->xenoprof;
+    if ( x )
+    {
+        x->domain_ready = 1;
+        x->domain_type = XENOPROF_DOMAIN_ACTIVE;
+        active_ready[ind] = 1;
+        activated++;
+        return 0;
+    }
+    else
+        return -EPERM;
+}
+
+int reset_active(struct domain *d)
+{
+    int ind;
+    xenoprof_t *x;
+
+    ind = active_index(d);
+    if ( ind <0 )
+        return -EPERM;
+
+    x = d->xenoprof;
+    if ( x )
+    {
+        x->domain_ready = 0;
+        x->domain_type = XENOPROF_DOMAIN_IGNORED;
+        active_ready[ind] = 0;
+        activated--;
+        if ( activated <= 0 )
+            adomains = 0;
+        return 0;
+    }
+    else
+        return -EPERM;
+}
+
+int set_active_domains(int num)
+{
+    int primary;
+    int i;
+    struct domain *d;
+
+    /* reset any existing active domains from previous runs */
+    for ( i=0; i<adomains; i++ )
+    {
+        if ( active_ready[i] )
+        {
+            d = find_domain_by_id(active_domains[i]);
+            if ( d )
+            {
+                reset_active(d);
+                put_domain(d);
+            }
+        }
+    }
+
+    adomains=num;
+
+    /* Add primary profiler to list of active domains if not there yet */
+    primary = active_index(primary_profiler);
+    if ( primary == -1 )
+    {
+        /* return if there is no space left on list */
+        if ( num >= MAX_OPROF_DOMAINS )
+            return -E2BIG;
+        else
+        {
+            active_domains[num] = primary_profiler->domain_id;
+            num++;
+        }
+    }
+
+    adomains = num;
+    activated = 0;
+
+    for ( i=0; i<adomains; i++ )
+    {
+        active_ready[i] = 0;
+    }
+
+    return 0;
+}
+
+void xenoprof_log_event(struct vcpu *vcpu, unsigned long eip, int mode, int 
event)
+{
+    xenoprof_vcpu_t *v;
+    xenoprof_buf_t *buf;
+    int head;
+    int tail;
+    int size;
+
+
+    total_samples++;
+
+    /* ignore samples of un-monitored domains */
+    /* Count samples in idle separate from other unmonitored domains */
+    if ( !is_profiled(vcpu->domain) )
+    {
+      others_samples++;
+      return;
+    }
+
+    v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id];
+
+    /* Sanity check. Should never happen */ 
+    if ( !v->buffer )
+    {
+        invalid_buffer_samples++;
+        return;
+    }
+
+    buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer;
+
+    head = buf->event_head;
+    tail = buf->event_tail;
+    size = v->event_size;
+
+    /* make sure indexes in shared buffer are sane */
+    if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) )
+    {
+        corrupted_buffer_samples++;
+        return;
+    }
+
+    if ( (head == tail - 1) || (head == size - 1 && tail == 0) )
+    {
+        buf->lost_samples++;
+        lost_samples++;
+    }
+    else
+    {
+        buf->event_log[head].eip = eip;
+        buf->event_log[head].mode = mode;
+        buf->event_log[head].event = event;
+        head++;
+        if ( head >= size )
+            head = 0;
+        buf->event_head = head;
+        active_samples++;
+        if ( mode == 0 )
+            buf->user_samples++;
+        else if ( mode == 1 )
+            buf->kernel_samples++;
+        else
+            buf->xen_samples++;
+    }
+}
+
+char *alloc_xenoprof_buf(struct domain *d, int npages)
+{
+    char *rawbuf;
+    int i, order;
+
+    /* allocate pages to store sample buffer shared with domain */
+    order = get_order_from_pages(npages);
+    rawbuf =  alloc_xenheap_pages(order);
+    if( rawbuf == NULL )
+    {
+        printk("alloc_xenoprof_buf(): memory allocation failed\n");
+        return 0;
+    }
+
+    /* Share pages so that kernel can map it */
+    for ( i=0; i<npages; i++ )
+    {
+        share_xen_page_with_guest(virt_to_page(rawbuf + i * PAGE_SIZE), 
+                                 d, XENSHARE_writable);
+    }
+
+    return rawbuf;
+}
+
+int alloc_xenoprof_struct(struct domain *d, int max_samples)
+{
+    struct vcpu *v;
+    int nvcpu, npages, bufsize, max_bufsize;
+    int i;
+
+    d->xenoprof = xmalloc(xenoprof_t);
+
+    if ( !d->xenoprof )
+    {
+        printk ("alloc_xenoprof_struct(): memory "
+                "allocation (xmalloc) failed\n");
+        return -ENOMEM;
+    }
+
+    memset(d->xenoprof, 0, sizeof(*d->xenoprof));
+
+    nvcpu = 0;
+    for_each_vcpu(d, v)
+        nvcpu++;
+
+    /* reduce buffer size if necessary to limit pages allocated */
+    bufsize = sizeof(xenoprof_buf_t) +
+        (max_samples - 1) * sizeof(struct event_log);
+    max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu;
+    if ( bufsize > max_bufsize )
+    {
+        bufsize = max_bufsize;
+        max_samples = ( (max_bufsize - sizeof(xenoprof_buf_t)) /
+                        sizeof(struct event_log) ) + 1;
+    }
+
+    npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1;
+    d->xenoprof->rawbuf = alloc_xenoprof_buf(d, npages);
+    if ( !d->xenoprof->rawbuf )
+    {
+        xfree(d->xenoprof);
+        d->xenoprof = NULL;
+        return -ENOMEM;
+    }
+
+    d->xenoprof->npages = npages;
+    d->xenoprof->nbuf = nvcpu;
+    d->xenoprof->bufsize = bufsize;
+    d->xenoprof->domain_ready = 0;
+    d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED;
+
+    /* Update buffer pointers for active vcpus */
+    i=0;
+    for_each_vcpu(d, v)
+    {
+        d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples;
+        d->xenoprof->vcpu[v->vcpu_id].buffer =
+            (xenoprof_buf_t *)&d->xenoprof->rawbuf[i * bufsize];
+        d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples;
+        d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id;
+
+        i++;
+        /* in the unlikely case that the number of active vcpus changes */
+        if ( i >= nvcpu )
+            break;
+    }
+
+    return 0;
+}
+
+void free_xenoprof_pages(struct domain *d)
+{
+    xenoprof_t *x;
+    int order;
+
+    x = d->xenoprof;
+
+    if ( x )
+    {
+        if ( x->rawbuf )
+        {
+            order = get_order_from_pages(x->npages);
+            free_xenheap_pages(x->rawbuf, order);
+        }
+        xfree(x);
+        d->xenoprof = NULL;
+    }
+}
+
+int xenoprof_init(int max_samples, xenoprof_init_result_t *init_result)
+{
+    xenoprof_init_result_t result;
+    int is_primary, num_events;
+    struct domain *d = current->domain;
+    int ret;
+
+    ret = nmi_init(&num_events, &is_primary, result.cpu_type);
+    if ( is_primary )
+        primary_profiler = current->domain;
+
+    if ( ret < 0 )
+        goto err;
+
+    /* we allocate xenoprof struct and buffers only at first time 
+       xenoprof_init is called. Memory is then kept until domain is destroyed 
*/
+    if ( !d->xenoprof )
+    {
+        if ( (ret = alloc_xenoprof_struct(d, max_samples)) < 0 )
+            goto err;
+    }
+
+    xenoprof_reset_buf(d);
+
+    d->xenoprof->domain_type  = XENOPROF_DOMAIN_IGNORED;
+    d->xenoprof->domain_ready = 0;
+    d->xenoprof->is_primary = is_primary;
+
+    result.is_primary = is_primary;
+    result.num_events = num_events;
+    result.nbuf = d->xenoprof->nbuf;
+    result.bufsize = d->xenoprof->bufsize;
+    result.buf_maddr = __pa(d->xenoprof->rawbuf);
+
+    if ( copy_to_user((void *)init_result, (void *)&result, sizeof(result)) )
+    {
+        ret = -EFAULT;
+        goto err;
+    }
+
+    return ret;
+
+ err:
+    if ( primary_profiler == current->domain )
+        primary_profiler = NULL;
+    return ret;
+}
+
+#define PRIV_OP(op) ( (op == XENOPROF_set_active) \
+                   || (op == XENOPROF_reserve_counters) \
+                   || (op == XENOPROF_setup_events) \
+                   || (op == XENOPROF_start) \
+                   || (op == XENOPROF_stop) \
+                   || (op == XENOPROF_release_counters) \
+                   || (op == XENOPROF_shutdown))
+
+int do_xenoprof_op(int op, unsigned long arg1, unsigned long arg2)
+{
+    int ret = 0;
+
+    if ( PRIV_OP(op) && current->domain != primary_profiler )
+    {
+        printk("xenoprof: dom %d denied privileged operation %d\n",
+               current->domain->domain_id, op);
+        return -EPERM;
+    }
+
+    switch ( op )
+    {
+    case XENOPROF_init:
+        ret = xenoprof_init((int)arg1, (xenoprof_init_result_t *)arg2);
+        break;
+
+    case XENOPROF_set_active:
+        if ( xenoprof_state != XENOPROF_IDLE )
+            return -EPERM;
+        if ( arg2 > MAX_OPROF_DOMAINS )
+            return -E2BIG;
+        if ( copy_from_user((void *)&active_domains, 
+                            (void *)arg1, arg2*sizeof(int)) )
+            return -EFAULT;
+        ret = set_active_domains(arg2);
+        break;
+
+    case XENOPROF_reserve_counters:
+        if ( xenoprof_state != XENOPROF_IDLE )
+            return -EPERM;
+        ret = nmi_reserve_counters();
+        if ( !ret )
+            xenoprof_state = XENOPROF_COUNTERS_RESERVED;
+        break;
+
+    case XENOPROF_setup_events:
+        if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED )
+            return -EPERM;
+        if ( adomains == 0 )
+        {
+            set_active_domains(0);
+        }
+
+        if ( copy_from_user((void *)&counter_config, (void *)arg1, 
+                            arg2 * sizeof(struct op_counter_config)) )
+            return -EFAULT;
+        ret = nmi_setup_events();
+        if ( !ret )
+            xenoprof_state = XENOPROF_READY;
+        break;
+
+    case XENOPROF_enable_virq:
+        if ( current->domain == primary_profiler )
+        {
+            nmi_enable_virq();
+            xenoprof_reset_stat();
+        }
+        xenoprof_reset_buf(current->domain);
+        ret = set_active(current->domain);
+        break;
+
+    case XENOPROF_start:
+        if ( (xenoprof_state == XENOPROF_READY) &&
+             (activated == adomains) )
+        {
+            ret = nmi_start();
+        }
+        else 
+            ret= -EPERM;
+
+        if ( !ret )
+            xenoprof_state = XENOPROF_PROFILING;
+        break;
+
+    case XENOPROF_stop:
+        if ( xenoprof_state != XENOPROF_PROFILING )
+            return -EPERM;
+        nmi_stop();
+        xenoprof_state = XENOPROF_READY;
+        break;
+
+    case XENOPROF_disable_virq:
+        if ( (xenoprof_state == XENOPROF_PROFILING) && 
+             (is_active(current->domain)) )
+            return -EPERM;
+        ret = reset_active(current->domain);
+        break;
+
+    case XENOPROF_release_counters:
+        if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) ||
+             (xenoprof_state == XENOPROF_READY) )
+        {
+            xenoprof_state = XENOPROF_IDLE;
+            nmi_release_counters();
+            nmi_disable_virq();
+        }
+        else
+            ret = -EPERM;
+        break;
+
+    case XENOPROF_shutdown:
+        if ( xenoprof_state == XENOPROF_IDLE )
+        {
+            activated = 0;
+            adomains=0;
+            primary_profiler = NULL;
+            ret = 0;
+        }
+        else 
+            ret = -EPERM;
+        break;
+
+    default:
+        ret = -EINVAL;
+    }
+
+    if ( ret < 0 )
+        printk("xenoprof: operation %d failed for dom %d (status : %d)\n",
+               op, current->domain->domain_id, ret);
+
+    return ret;
+}
diff -r 388c59fefaa6 -r e049baa9055d xen/include/public/xenoprof.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/include/public/xenoprof.h     Thu Apr  6 17:58:01 2006
@@ -0,0 +1,72 @@
+/******************************************************************************
+ * xenoprof.h
+ * 
+ * Interface for enabling system wide profiling based on hardware performance
+ * counters
+ * 
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Written by Aravind Menon & Jose Renato Santos
+ */
+
+#ifndef __XEN_PUBLIC_XENOPROF_H__
+#define __XEN_PUBLIC_XENOPROF_H__
+
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define XENOPROF_init               0
+#define XENOPROF_set_active         1
+#define XENOPROF_reserve_counters   3
+#define XENOPROF_setup_events       4
+#define XENOPROF_enable_virq        5
+#define XENOPROF_start              6
+#define XENOPROF_stop               7
+#define XENOPROF_disable_virq       8
+#define XENOPROF_release_counters   9
+#define XENOPROF_shutdown          10
+
+#define MAX_OPROF_EVENTS    32
+#define MAX_OPROF_DOMAINS   25 
+#define XENOPROF_CPU_TYPE_SIZE 64
+
+/* Xenoprof performance events (not Xen events) */
+struct event_log {
+    uint64_t eip;
+    uint8_t mode;
+    uint8_t event;
+};
+
+/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
+typedef struct xenoprof_buf {
+    uint32_t event_head;
+    uint32_t event_tail;
+    uint32_t event_size;
+    uint32_t vcpu_id;
+    uint64_t xen_samples;
+    uint64_t kernel_samples;
+    uint64_t user_samples;
+    uint64_t lost_samples;
+    struct event_log event_log[1];
+} xenoprof_buf_t;
+
+typedef struct xenoprof_init_result {
+    int32_t  num_events;
+    int32_t  is_primary;
+    int32_t  nbuf;
+    int32_t  bufsize;
+    uint64_t buf_maddr;
+    char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+} xenoprof_init_result_t;
+
+
+#endif /* __XEN_PUBLIC_XENOPROF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 388c59fefaa6 -r e049baa9055d xen/include/xen/xenoprof.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/include/xen/xenoprof.h        Thu Apr  6 17:58:01 2006
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * xenoprof.h
+ * 
+ * Xenoprof: Xenoprof enables performance profiling in Xen
+ * 
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon & Jose Renato Santos
+ */
+
+#ifndef __XEN_XENOPROF_H__
+#define __XEN_XENOPROF_H__
+
+#include <public/xenoprof.h>
+
+#define XENOPROF_DOMAIN_IGNORED    0
+#define XENOPROF_DOMAIN_ACTIVE     1
+
+#define XENOPROF_IDLE              0
+#define XENOPROF_COUNTERS_RESERVED 1
+#define XENOPROF_READY             2
+#define XENOPROF_PROFILING         3
+
+
+typedef struct xenoprof_vcpu {
+    int event_size;
+    xenoprof_buf_t *buffer;
+} xenoprof_vcpu_t;
+
+typedef struct xenoprof {
+    char* rawbuf;
+    int npages;
+    int nbuf;
+    int bufsize;
+    int domain_type;
+    int domain_ready;
+    int is_primary;
+    xenoprof_vcpu_t vcpu [MAX_VIRT_CPUS];
+} xenoprof_t;
+
+#endif  /* __XEN__XENOPROF_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.