[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] make all performance counter per-cpu



.. avoiding the need to update them with atomic (locked) ops.

Conversion here isn't complete in the sense that many places still use
the old per-CPU accessors (which are now redundant). Since the patch is
already rather big, I'd prefer replacing those in a subsequent patch.

While doing this, I also converted x86's multicall macros to no longer
require inclusion of asm-offsets.h in the respective C file (on IA64 the
use of asm-offsets.h in C sources seems more wide spread, hence there I
rather used IA64_ prefixes for the otherwise conflicting performance
counter indices).

On x86, a few counter increments get moved a little, to avoid duplicate
counting of preempted hypercalls.

Also, a few counters are being added.

IA64 changes only compile-tested, hence somebody doing active IA64 work
may want to have a close look at those changes.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2007-03-19/xen/arch/ia64/asm-offsets.c
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/asm-offsets.c 2007-02-12 14:00:54.000000000 
+0100
+++ 2007-03-19/xen/arch/ia64/asm-offsets.c      2007-03-27 16:35:08.000000000 
+0200
@@ -223,10 +223,11 @@ void foo(void)
 
 #ifdef PERF_COUNTERS
        BLANK();
-       DEFINE(RECOVER_TO_PAGE_FAULT_PERFC_OFS, offsetof (struct perfcounter, 
recover_to_page_fault));
-       DEFINE(RECOVER_TO_BREAK_FAULT_PERFC_OFS, offsetof (struct perfcounter, 
recover_to_break_fault));
-       DEFINE(FAST_HYPERPRIVOP_PERFC_OFS, offsetof (struct perfcounter, 
fast_hyperprivop));
-       DEFINE(FAST_REFLECT_PERFC_OFS, offsetof (struct perfcounter, 
fast_reflect));
+       DEFINE(IA64_PERFC_recover_to_page_fault, PERFC_recover_to_page_fault);
+       DEFINE(IA64_PERFC_recover_to_break_fault, PERFC_recover_to_break_fault);
+       DEFINE(IA64_PERFC_fast_vhpt_translate, PERFC_fast_vhpt_translate);
+       DEFINE(IA64_PERFC_fast_hyperprivop, PERFC_fast_hyperprivop);
+       DEFINE(IA64_PERFC_fast_reflect, PERFC_fast_reflect);
 #endif
 
        BLANK();
Index: 2007-03-19/xen/arch/ia64/xen/hyperprivop.S
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/xen/hyperprivop.S     2007-02-12 
14:00:54.000000000 +0100
+++ 2007-03-19/xen/arch/ia64/xen/hyperprivop.S  2007-03-27 16:35:51.000000000 
+0200
@@ -26,8 +26,7 @@
 # define FAST_HYPERPRIVOPS
 # ifdef PERF_COUNTERS
 #  define FAST_HYPERPRIVOP_CNT
-#  define FAST_HYPERPRIVOP_PERFC(N) \
-       (perfcounters + FAST_HYPERPRIVOP_PERFC_OFS + (4 * N))
+#  define FAST_HYPERPRIVOP_PERFC(N) PERFC(fast_hyperprivop + N)
 #  define FAST_REFLECT_CNT
 # endif
        
@@ -364,7 +363,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
        mov rp=r29;;
        mov cr.itm=r26;;        // ensure next tick
 #ifdef FAST_REFLECT_CNT
-       movl r20=perfcounters+FAST_REFLECT_PERFC_OFS+((0x3000>>8)*4);;
+       movl r20=PERFC(fast_reflect + (0x3000>>8));;
        ld4 r21=[r20];;
        adds r21=1,r21;;
        st4 [r20]=r21;;
@@ -597,7 +596,7 @@ END(fast_break_reflect)
 //     r31 == pr
 ENTRY(fast_reflect)
 #ifdef FAST_REFLECT_CNT
-       movl r22=perfcounters+FAST_REFLECT_PERFC_OFS;
+       movl r22=PERFC(fast_reflect);
        shr r23=r20,8-2;;
        add r22=r22,r23;;
        ld4 r21=[r22];;
@@ -938,7 +937,7 @@ fast_tlb_no_tr_match:
 (p7)   br.cond.spnt.few page_not_present;;
 
 #ifdef FAST_REFLECT_CNT
-       movl r21=perfcounter+FAST_VHPT_TRANSLATE_PERFC_OFS;;
+       movl r21=PERFC(fast_vhpt_translate);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
@@ -968,7 +967,7 @@ END(fast_tlb_miss_reflect)
 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
 ENTRY(recover_and_page_fault)
 #ifdef PERF_COUNTERS
-       movl r21=perfcounters + RECOVER_TO_PAGE_FAULT_PERFC_OFS;;
+       movl r21=PERFC(recover_to_page_fault);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
@@ -1832,7 +1831,7 @@ END(hyper_ptc_ga)
 // recovery block for hyper_itc metaphysical memory lookup
 ENTRY(recover_and_dispatch_break_fault)
 #ifdef PERF_COUNTERS
-       movl r21=perfcounters + RECOVER_TO_BREAK_FAULT_PERFC_OFS;;
+       movl r21=PERFC(recover_to_break_fault);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
Index: 2007-03-19/xen/arch/ia64/xen/privop_stat.c
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/xen/privop_stat.c     2006-08-31 
15:26:11.000000000 +0200
+++ 2007-03-19/xen/arch/ia64/xen/privop_stat.c  2007-03-27 16:37:00.000000000 
+0200
@@ -10,48 +10,39 @@ struct privop_addr_count {
        unsigned long addr[PRIVOP_COUNT_NADDRS];
        unsigned int count[PRIVOP_COUNT_NADDRS];
        unsigned int overflow;
-       atomic_t *perfc_addr;
-       atomic_t *perfc_count;
-       atomic_t *perfc_overflow;
 };
 
-#undef  PERFCOUNTER
-#define PERFCOUNTER(var, name)
-
-#undef  PERFCOUNTER_CPU
-#define PERFCOUNTER_CPU(var, name)
+struct privop_addr_info {
+       enum perfcounter perfc_addr;
+       enum perfcounter perfc_count;
+       enum perfcounter perfc_overflow;
+};
 
-#undef  PERFCOUNTER_ARRAY
+#define PERFCOUNTER(var, name)
 #define PERFCOUNTER_ARRAY(var, name, size)
 
-#undef  PERFSTATUS
 #define PERFSTATUS(var, name)
-
-#undef  PERFSTATUS_CPU
-#define PERFSTATUS_CPU(var, name)
-
-#undef  PERFSTATUS_ARRAY
 #define PERFSTATUS_ARRAY(var, name, size)
 
-#undef PERFPRIVOPADDR
 #define PERFPRIVOPADDR(name)                        \
     {                                               \
-        { 0 }, { 0 }, 0,                            \
-        perfcounters.privop_addr_##name##_addr,     \
-        perfcounters.privop_addr_##name##_count,    \
-        perfcounters.privop_addr_##name##_overflow  \
+        PERFC_privop_addr_##name##_addr,            \
+        PERFC_privop_addr_##name##_count,           \
+        PERFC_privop_addr_##name##_overflow         \
     },
 
-static struct privop_addr_count privop_addr_counter[] = {
+static const struct privop_addr_info privop_addr_info[] = {
 #include <asm/perfc_defn.h>
 };
 
 #define PRIVOP_COUNT_NINSTS \
-        (sizeof(privop_addr_counter) / sizeof(privop_addr_counter[0]))
+        (sizeof(privop_addr_info) / sizeof(privop_addr_info[0]))
+
+static DEFINE_PER_CPU(struct privop_addr_count[PRIVOP_COUNT_NINSTS], 
privop_addr_counter);
 
 void privop_count_addr(unsigned long iip, enum privop_inst inst)
 {
-       struct privop_addr_count *v = &privop_addr_counter[inst];
+       struct privop_addr_count *v = this_cpu(privop_addr_counter) + inst;
        int i;
 
        if (inst >= PRIVOP_COUNT_NINSTS)
@@ -72,31 +63,44 @@ void privop_count_addr(unsigned long iip
 
 void gather_privop_addrs(void)
 {
-       int i, j;
-       atomic_t *v;
-       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
-               /* Note: addresses are truncated!  */
-               v = privop_addr_counter[i].perfc_addr;
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       atomic_set(&v[j], privop_addr_counter[i].addr[j]);
-
-               v = privop_addr_counter[i].perfc_count;
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       atomic_set(&v[j], privop_addr_counter[i].count[j]);
+       unsigned int cpu;
+
+       for_each_cpu ( cpu ) {
+               perfc_t *perfcounters = per_cpu(perfcounters, cpu);
+               struct privop_addr_count *s = per_cpu(privop_addr_counter, cpu);
+               int i, j;
+
+               for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, s++) {
+                       perfc_t *d;
+
+                       /* Note: addresses are truncated!  */
+                       d = perfcounters + privop_addr_info[i].perfc_addr;
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               d[j] = s->addr[j];
+
+                       d = perfcounters + privop_addr_info[i].perfc_count;
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               d[j] = s->count[j];
                
-               atomic_set(privop_addr_counter[i].perfc_overflow,
-                          privop_addr_counter[i].overflow);
+                       perfcounters[privop_addr_info[i].perfc_overflow] =
+                               s->overflow;
+               }
        }
 }
 
 void reset_privop_addrs(void)
 {
-       int i, j;
-       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
-               struct privop_addr_count *v = &privop_addr_counter[i];
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       v->addr[j] = v->count[j] = 0;
-               v->overflow = 0;
+       unsigned int cpu;
+
+       for_each_cpu ( cpu ) {
+               struct privop_addr_count *v = per_cpu(privop_addr_counter, cpu);
+               int i, j;
+
+               for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, v++) {
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               v->addr[j] = v->count[j] = 0;
+                       v->overflow = 0;
+               }
        }
 }
 #endif
Index: 2007-03-19/xen/arch/ia64/xen/vhpt.c
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/xen/vhpt.c    2007-02-12 14:00:54.000000000 
+0100
+++ 2007-03-19/xen/arch/ia64/xen/vhpt.c 2007-03-27 15:13:06.000000000 +0200
@@ -512,7 +512,7 @@ void gather_vhpt_stats(void)
                for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
                        if (!(v->ti_tag & INVALID_TI_TAG))
                                vhpt_valid++;
-               perfc_seta(vhpt_valid_entries, cpu, vhpt_valid);
+               per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = 
vhpt_valid;
        }
 }
 #endif
Index: 2007-03-19/xen/arch/x86/mm.c
===================================================================
--- 2007-03-19.orig/xen/arch/x86/mm.c   2007-03-27 10:31:15.000000000 +0200
+++ 2007-03-19/xen/arch/x86/mm.c        2007-03-27 10:32:56.000000000 +0200
@@ -1969,6 +1969,8 @@ int do_mmuext_op(
         if ( unlikely(!guest_handle_is_null(pdone)) )
             (void)copy_from_guest(&done, pdone, 1);
     }
+    else
+        perfc_incr(calls_to_mmuext_op);
 
     if ( unlikely(!guest_handle_okay(uops, count)) )
     {
@@ -2223,6 +2225,8 @@ int do_mmuext_op(
 
     UNLOCK_BIGLOCK(d);
 
+    perfc_add(num_mmuext_ops, i);
+
  out:
     /* Add incremental work we have done to the @done output parameter. */
     if ( unlikely(!guest_handle_is_null(pdone)) )
@@ -2257,6 +2261,8 @@ int do_mmu_update(
         if ( unlikely(!guest_handle_is_null(pdone)) )
             (void)copy_from_guest(&done, pdone, 1);
     }
+    else
+        perfc_incr(calls_to_mmu_update);
 
     if ( unlikely(!guest_handle_okay(ureqs, count)) )
     {
@@ -2273,9 +2279,6 @@ int do_mmu_update(
     domain_mmap_cache_init(&mapcache);
     domain_mmap_cache_init(&sh_mapcache);
 
-    perfc_incrc(calls_to_mmu_update);
-    perfc_addc(num_page_updates, count);
-
     LOCK_BIGLOCK(d);
 
     for ( i = 0; i < count; i++ )
@@ -2438,6 +2441,8 @@ int do_mmu_update(
     domain_mmap_cache_destroy(&mapcache);
     domain_mmap_cache_destroy(&sh_mapcache);
 
+    perfc_add(num_page_updates, i);
+
  out:
     /* Add incremental work we have done to the @done output parameter. */
     if ( unlikely(!guest_handle_is_null(pdone)) )
Index: 2007-03-19/xen/arch/x86/x86_32/asm-offsets.c
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_32/asm-offsets.c   2007-03-19 
13:23:52.000000000 +0100
+++ 2007-03-19/xen/arch/x86/x86_32/asm-offsets.c        2007-03-27 
11:16:21.000000000 +0200
@@ -107,21 +107,11 @@ void __dummy__(void)
     BLANK();
 
 #if PERF_COUNTERS
-    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
-    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
+    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
+    DEFINE(PERFC_exceptions, PERFC_exceptions);
     BLANK();
 #endif
 
-    OFFSET(MULTICALL_op, struct multicall_entry, op);
-    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
-    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
-    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
-    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
-    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
-    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
-    OFFSET(MULTICALL_result, struct multicall_entry, result);
-    BLANK();
-
     DEFINE(FIXMAP_apic_base, fix_to_virt(FIX_APIC_BASE));
     BLANK();
 
Index: 2007-03-19/xen/arch/x86/x86_32/entry.S
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_32/entry.S 2007-02-28 12:10:37.000000000 
+0100
+++ 2007-03-19/xen/arch/x86/x86_32/entry.S      2007-03-27 12:12:51.000000000 
+0200
@@ -173,7 +173,7 @@ ENTRY(hypercall)
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
         jae   bad_hypercall
-        PERFC_INCR(PERFC_hypercalls, %eax)
+        PERFC_INCR(PERFC_hypercalls, %eax, %ebx)
 #ifndef NDEBUG
         /* Create shadow parameters and corrupt those not used by this call. */
         pushl %eax
@@ -429,7 +429,7 @@ handle_exception:
         movl  %esp,%edx
         pushl %edx                      # push the cpu_user_regs pointer
         GET_CURRENT(%ebx)
-        PERFC_INCR(PERFC_exceptions, %eax)
+        PERFC_INCR(PERFC_exceptions, %eax, %ebx)
         call  *exception_table(,%eax,4)
         addl  $4,%esp
         movl  UREGS_eflags(%esp),%eax
Index: 2007-03-19/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_64/asm-offsets.c   2007-03-19 
13:23:52.000000000 +0100
+++ 2007-03-19/xen/arch/x86/x86_64/asm-offsets.c        2007-03-27 
11:38:22.000000000 +0200
@@ -121,30 +121,8 @@ void __dummy__(void)
     BLANK();
 
 #if PERF_COUNTERS
-    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
-    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
-    BLANK();
-#endif
-
-    OFFSET(MULTICALL_op, struct multicall_entry, op);
-    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
-    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
-    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
-    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
-    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
-    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
-    OFFSET(MULTICALL_result, struct multicall_entry, result);
-    BLANK();
-
-#ifdef CONFIG_COMPAT
-    OFFSET(COMPAT_MULTICALL_op, struct compat_multicall_entry, op);
-    OFFSET(COMPAT_MULTICALL_arg0, struct compat_multicall_entry, args[0]);
-    OFFSET(COMPAT_MULTICALL_arg1, struct compat_multicall_entry, args[1]);
-    OFFSET(COMPAT_MULTICALL_arg2, struct compat_multicall_entry, args[2]);
-    OFFSET(COMPAT_MULTICALL_arg3, struct compat_multicall_entry, args[3]);
-    OFFSET(COMPAT_MULTICALL_arg4, struct compat_multicall_entry, args[4]);
-    OFFSET(COMPAT_MULTICALL_arg5, struct compat_multicall_entry, args[5]);
-    OFFSET(COMPAT_MULTICALL_result, struct compat_multicall_entry, result);
+    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
+    DEFINE(PERFC_exceptions, PERFC_exceptions);
     BLANK();
 #endif
 
Index: 2007-03-19/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_64/compat/entry.S  2007-03-19 
13:23:52.000000000 +0100
+++ 2007-03-19/xen/arch/x86/x86_64/compat/entry.S       2007-03-27 
12:12:29.000000000 +0200
@@ -57,7 +57,7 @@ ENTRY(compat_hypercall)
         movl  UREGS_rbx(%rsp),%edi   /* Arg 1        */
 #endif
         leaq  compat_hypercall_table(%rip),%r10
-        PERFC_INCR(PERFC_hypercalls, %rax)
+        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
         /* Deliberately corrupt parameter regs used by this hypercall. */
Index: 2007-03-19/xen/arch/x86/x86_64/entry.S
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_64/entry.S 2007-02-28 12:10:32.000000000 
+0100
+++ 2007-03-19/xen/arch/x86/x86_64/entry.S      2007-03-27 12:11:33.000000000 
+0200
@@ -147,7 +147,7 @@ ENTRY(syscall_enter)
         pushq UREGS_rip+8(%rsp)
 #endif
         leaq  hypercall_table(%rip),%r10
-        PERFC_INCR(PERFC_hypercalls, %rax)
+        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
         /* Deliberately corrupt parameter regs used by this hypercall. */
@@ -396,7 +396,7 @@ ENTRY(handle_exception)
         movl  UREGS_entry_vector(%rsp),%eax
         leaq  exception_table(%rip),%rdx
         GET_CURRENT(%rbx)
-        PERFC_INCR(PERFC_exceptions, %rax)
+        PERFC_INCR(PERFC_exceptions, %rax, %rbx)
         callq *(%rdx,%rax,8)
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
Index: 2007-03-19/xen/common/multicall.c
===================================================================
--- 2007-03-19.orig/xen/common/multicall.c      2007-03-27 10:31:15.000000000 
+0200
+++ 2007-03-19/xen/common/multicall.c   2007-03-27 10:32:56.000000000 +0200
@@ -10,6 +10,7 @@
 #include <xen/event.h>
 #include <xen/multicall.h>
 #include <xen/guest_access.h>
+#include <xen/perfc.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 
@@ -69,14 +70,18 @@ do_multicall(
         guest_handle_add_offset(call_list, 1);
     }
 
+    perfc_incr(calls_to_multicall);
+    perfc_add(calls_from_multicall, nr_calls);
     mcs->flags = 0;
     return 0;
 
  fault:
+    perfc_incr(calls_to_multicall);
     mcs->flags = 0;
     return -EFAULT;
 
  preempted:
+    perfc_add(calls_from_multicall, i);
     mcs->flags = 0;
     return hypercall_create_continuation(
         __HYPERVISOR_multicall, "hi", call_list, nr_calls-i);
Index: 2007-03-19/xen/common/perfc.c
===================================================================
--- 2007-03-19.orig/xen/common/perfc.c  2007-03-27 10:31:15.000000000 +0200
+++ 2007-03-19/xen/common/perfc.c       2007-03-27 13:33:55.000000000 +0200
@@ -10,81 +10,98 @@
 #include <public/sysctl.h>
 #include <asm/perfc.h>
 
-#undef  PERFCOUNTER
-#undef  PERFCOUNTER_CPU
-#undef  PERFCOUNTER_ARRAY
-#undef  PERFSTATUS
-#undef  PERFSTATUS_CPU
-#undef  PERFSTATUS_ARRAY
 #define PERFCOUNTER( var, name )              { name, TYPE_SINGLE, 0 },
-#define PERFCOUNTER_CPU( var, name )          { name, TYPE_CPU,    0 },
 #define PERFCOUNTER_ARRAY( var, name, size )  { name, TYPE_ARRAY,  size },
 #define PERFSTATUS( var, name )               { name, TYPE_S_SINGLE, 0 },
-#define PERFSTATUS_CPU( var, name )           { name, TYPE_S_CPU,    0 },
 #define PERFSTATUS_ARRAY( var, name, size )   { name, TYPE_S_ARRAY,  size },
-static struct {
-    char *name;
-    enum { TYPE_SINGLE, TYPE_CPU, TYPE_ARRAY,
-           TYPE_S_SINGLE, TYPE_S_CPU, TYPE_S_ARRAY
+static const struct {
+    const char *name;
+    enum { TYPE_SINGLE, TYPE_ARRAY,
+           TYPE_S_SINGLE, TYPE_S_ARRAY
     } type;
-    int nr_elements;
+    unsigned int nr_elements;
 } perfc_info[] = {
 #include <xen/perfc_defn.h>
 };
 
 #define NR_PERFCTRS (sizeof(perfc_info) / sizeof(perfc_info[0]))
 
-struct perfcounter perfcounters;
+DEFINE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
 
 void perfc_printall(unsigned char key)
 {
-    unsigned int i, j, sum;
+    unsigned int i, j;
     s_time_t now = NOW();
-    atomic_t *counters = (atomic_t *)&perfcounters;
 
     printk("Xen performance counters SHOW  (now = 0x%08X:%08X)\n",
            (u32)(now>>32), (u32)now);
 
-    for ( i = 0; i < NR_PERFCTRS; i++ ) 
+    for ( i = j = 0; i < NR_PERFCTRS; i++ )
     {
+        unsigned int k, cpu;
+        unsigned long long sum = 0;
+
         printk("%-32s  ",  perfc_info[i].name);
         switch ( perfc_info[i].type )
         {
         case TYPE_SINGLE:
         case TYPE_S_SINGLE:
-            printk("TOTAL[%10d]", atomic_read(&counters[0]));
-            counters += 1;
-            break;
-        case TYPE_CPU:
-        case TYPE_S_CPU:
-            sum = 0;
-            for_each_online_cpu ( j )
-                sum += atomic_read(&counters[j]);
-            printk("TOTAL[%10u]", sum);
-            if (sum)
+            for_each_online_cpu ( cpu )
+                sum += per_cpu(perfcounters, cpu)[j];
+            printk("TOTAL[%12Lu]", sum);
+            if ( sum )
             {
-                for_each_online_cpu ( j )
-                    printk("  CPU%02d[%10d]", j, atomic_read(&counters[j]));
+                k = 0;
+                for_each_online_cpu ( cpu )
+                {
+                    if ( k > 0 && (k % 4) == 0 )
+                        printk("\n%46s", "");
+                    printk("  CPU%02u[%10"PRIperfc"u]", cpu, 
per_cpu(perfcounters, cpu)[j]);
+                    ++k;
+                }
             }
-            counters += NR_CPUS;
+            ++j;
             break;
         case TYPE_ARRAY:
         case TYPE_S_ARRAY:
-            for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
-                sum += atomic_read(&counters[j]);
-            printk("TOTAL[%10u]", sum);
-#ifdef PERF_ARRAYS
+            for_each_online_cpu ( cpu )
+            {
+                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+
+                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
+                    sum += counters[k];
+            }
+            printk("TOTAL[%12Lu]", sum);
             if (sum)
             {
-                for ( j = 0; j < perfc_info[i].nr_elements; j++ )
+#ifdef PERF_ARRAYS
+                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
                 {
-                    if ( (j % 4) == 0 )
-                        printk("\n                 ");
-                    printk("  ARR%02d[%10d]", j, atomic_read(&counters[j]));
+                    sum = 0;
+                    for_each_online_cpu ( cpu )
+                        sum += per_cpu(perfcounters, cpu)[j + k];
+                    if ( (k % 4) == 0 )
+                        printk("\n%16s", "");
+                    printk("  ARR%02u[%10Lu]", k, sum);
+                }
+#else
+                k = 0;
+                for_each_online_cpu ( cpu )
+                {
+                    perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+                    unsigned int n;
+
+                    sum = 0;
+                    for ( n = 0; n < perfc_info[i].nr_elements; n++ )
+                        sum += counters[n];
+                    if ( k > 0 && (k % 4) == 0 )
+                        printk("\n%46s", "");
+                    printk("  CPU%02u[%10Lu]", cpu, sum);
+                    ++k;
                 }
-            }
 #endif
-            counters += j;
+            }
+            j += perfc_info[i].nr_elements;
             break;
         }
         printk("\n");
@@ -97,7 +114,6 @@ void perfc_reset(unsigned char key)
 {
     unsigned int i, j;
     s_time_t now = NOW();
-    atomic_t *counters = (atomic_t *)&perfcounters;
 
     if ( key != '\0' )
         printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
@@ -105,43 +121,39 @@ void perfc_reset(unsigned char key)
 
     /* leave STATUS counters alone -- don't reset */
 
-    for ( i = 0; i < NR_PERFCTRS; i++ ) 
+    for ( i = j = 0; i < NR_PERFCTRS; i++ )
     {
+        unsigned int cpu;
+
         switch ( perfc_info[i].type )
         {
         case TYPE_SINGLE:
-            atomic_set(&counters[0],0);
+            for_each_cpu ( cpu )
+                per_cpu(perfcounters, cpu)[j] = 0;
         case TYPE_S_SINGLE:
-            counters += 1;
-            break;
-        case TYPE_CPU:
-            for ( j = 0; j < NR_CPUS; j++ )
-                atomic_set(&counters[j],0);
-        case TYPE_S_CPU:
-            counters += NR_CPUS;
+            ++j;
             break;
         case TYPE_ARRAY:
-            for ( j = 0; j < perfc_info[i].nr_elements; j++ )
-                atomic_set(&counters[j],0);
+            for_each_cpu ( cpu )
+                memset(per_cpu(perfcounters, cpu) + j, 0,
+                       perfc_info[i].nr_elements * sizeof(perfc_t));
         case TYPE_S_ARRAY:
-            counters += perfc_info[i].nr_elements;
+            j += perfc_info[i].nr_elements;
             break;
         }
     }
 
-    arch_perfc_reset ();
+    arch_perfc_reset();
 }
 
 static xen_sysctl_perfc_desc_t perfc_d[NR_PERFCTRS];
 static xen_sysctl_perfc_val_t *perfc_vals;
-static int               perfc_nbr_vals;
+static unsigned int      perfc_nbr_vals;
 static int               perfc_init = 0;
 static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
                            XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
 {
-    unsigned int i, j;
-    unsigned int v = 0;
-    atomic_t *counters = (atomic_t *)&perfcounters;
+    unsigned int i, j, v;
 
     /* We only copy the name and array-size information once. */
     if ( !perfc_init ) 
@@ -154,11 +166,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
             {
             case TYPE_SINGLE:
             case TYPE_S_SINGLE:
-                perfc_d[i].nr_vals = 1;
-                break;
-            case TYPE_CPU:
-            case TYPE_S_CPU:
-                perfc_d[i].nr_vals = num_online_cpus();
+                perfc_d[i].nr_vals = num_possible_cpus();
                 break;
             case TYPE_ARRAY:
             case TYPE_S_ARRAY:
@@ -181,26 +189,31 @@ static int perfc_copy_info(XEN_GUEST_HAN
     arch_perfc_gather();
 
     /* We gather the counts together every time. */
-    for ( i = 0; i < NR_PERFCTRS; i++ )
+    for ( i = j = v = 0; i < NR_PERFCTRS; i++ )
     {
+        unsigned int cpu;
+
         switch ( perfc_info[i].type )
         {
         case TYPE_SINGLE:
         case TYPE_S_SINGLE:
-            perfc_vals[v++] = atomic_read(&counters[0]);
-            counters += 1;
-            break;
-        case TYPE_CPU:
-        case TYPE_S_CPU:
-            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
-                perfc_vals[v++] = atomic_read(&counters[j]);
-            counters += NR_CPUS;
+            for_each_cpu ( cpu )
+                perfc_vals[v++] = per_cpu(perfcounters, cpu)[j];
+            ++j;
             break;
         case TYPE_ARRAY:
         case TYPE_S_ARRAY:
-            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
-                perfc_vals[v++] = atomic_read(&counters[j]);
-            counters += perfc_info[i].nr_elements;
+            memset(perfc_vals + v, 0, perfc_d[i].nr_vals * 
sizeof(*perfc_vals));
+            for_each_cpu ( cpu )
+            {
+                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+                unsigned int k;
+
+                for ( k = 0; k < perfc_d[i].nr_vals; k++ )
+                    perfc_vals[v + k] += counters[k];
+            }
+            v += perfc_d[i].nr_vals;
+            j += perfc_info[i].nr_elements;
             break;
         }
     }
@@ -224,14 +237,12 @@ int perfc_control(xen_sysctl_perfc_op_t 
     switch ( pc->cmd )
     {
     case XEN_SYSCTL_PERFCOP_reset:
-        perfc_copy_info(pc->desc, pc->val);
+        rc = perfc_copy_info(pc->desc, pc->val);
         perfc_reset(0);
-        rc = 0;
         break;
 
     case XEN_SYSCTL_PERFCOP_query:
-        perfc_copy_info(pc->desc, pc->val);
-        rc = 0;
+        rc = perfc_copy_info(pc->desc, pc->val);
         break;
 
     default:
Index: 2007-03-19/xen/include/asm-ia64/linux-xen/asm/asmmacro.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-ia64/linux-xen/asm/asmmacro.h       
2006-07-31 13:57:56.000000000 +0200
+++ 2007-03-19/xen/include/asm-ia64/linux-xen/asm/asmmacro.h    2007-03-27 
16:32:49.000000000 +0200
@@ -116,4 +116,8 @@ name:
 # define dv_serialize_instruction
 #endif
 
+#ifdef PERF_COUNTERS
+#define PERFC(n) (THIS_CPU(perfcounters) + (IA64_PERFC_ ## n) * 4)
+#endif
+
 #endif /* _ASM_IA64_ASMMACRO_H */
Index: 2007-03-19/xen/include/asm-ia64/perfc_defn.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-ia64/perfc_defn.h   2006-11-15 
11:50:51.000000000 +0100
+++ 2007-03-19/xen/include/asm-ia64/perfc_defn.h        2007-03-27 
16:56:53.000000000 +0200
@@ -84,7 +84,7 @@ PERFCOUNTER_ARRAY(slow_reflect,       "s
 PERFCOUNTER_ARRAY(fast_reflect,       "fast reflection", 0x80)
 
 PERFSTATUS(vhpt_nbr_entries,          "nbr of entries per VHPT")
-PERFSTATUS_CPU(vhpt_valid_entries,    "nbr of valid entries in VHPT")
+PERFSTATUS(vhpt_valid_entries,        "nbr of valid entries in VHPT")
 
 PERFCOUNTER_ARRAY(vmx_mmio_access,    "vmx_mmio_access", 8)
 PERFCOUNTER_CPU(vmx_pal_emul,         "vmx_pal_emul")
@@ -106,6 +106,8 @@ PERFSTATUS(privop_addr_##name##_overflow
 
 PERFPRIVOPADDR(get_ifa)
 PERFPRIVOPADDR(thash)
+
+#undef PERFPRIVOPADDR
 #endif
 
 // vhpt.c
Index: 2007-03-19/xen/include/asm-ia64/privop_stat.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-ia64/privop_stat.h  2006-08-31 
15:26:11.000000000 +0200
+++ 2007-03-19/xen/include/asm-ia64/privop_stat.h       2007-03-27 
14:34:04.000000000 +0200
@@ -1,5 +1,5 @@
-#ifndef _XEN_UA64_PRIVOP_STAT_H
-#define _XEN_UA64_PRIVOP_STAT_H
+#ifndef _XEN_IA64_PRIVOP_STAT_H
+#define _XEN_IA64_PRIVOP_STAT_H
 #include <asm/config.h>
 #include <xen/types.h>
 #include <public/xen.h>
@@ -9,31 +9,24 @@
 extern void gather_privop_addrs(void);
 extern void reset_privop_addrs(void);
 
-#undef  PERFCOUNTER
 #define PERFCOUNTER(var, name)
-
-#undef  PERFCOUNTER_CPU
-#define PERFCOUNTER_CPU(var, name)
-
-#undef  PERFCOUNTER_ARRAY
 #define PERFCOUNTER_ARRAY(var, name, size)
 
-#undef  PERFSTATUS
 #define PERFSTATUS(var, name)
-
-#undef  PERFSTATUS_CPU
-#define PERFSTATUS_CPU(var, name)
-
-#undef  PERFSTATUS_ARRAY
 #define PERFSTATUS_ARRAY(var, name, size)
 
-#undef  PERFPRIVOPADDR
 #define PERFPRIVOPADDR(name) privop_inst_##name,
 
 enum privop_inst {
 #include <asm/perfc_defn.h>
 };
 
+#undef PERFCOUNTER
+#undef PERFCOUNTER_ARRAY
+
+#undef PERFSTATUS
+#undef PERFSTATUS_ARRAY
+
 #undef PERFPRIVOPADDR
 
 #define        PRIVOP_COUNT_ADDR(regs,inst) 
privop_count_addr(regs->cr_iip,inst)
@@ -45,4 +38,4 @@ extern void privop_count_addr(unsigned l
 #define reset_privop_addrs() do {} while (0)
 #endif
 
-#endif /* _XEN_UA64_PRIVOP_STAT_H */
+#endif /* _XEN_IA64_PRIVOP_STAT_H */
Index: 2007-03-19/xen/include/asm-x86/multicall.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/multicall.h     2007-01-08 
14:15:32.000000000 +0100
+++ 2007-03-19/xen/include/asm-x86/multicall.h  2007-03-27 11:48:14.000000000 
+0200
@@ -6,84 +6,94 @@
 #define __ASM_X86_MULTICALL_H__
 
 #include <xen/errno.h>
-#include <asm/asm_defns.h>
 
 #ifdef __x86_64__
 
 #define do_multicall_call(_call)                             \
     do {                                                     \
         __asm__ __volatile__ (                               \
-            "    movq  "STR(MULTICALL_op)"(%0),%%rax; "      \
+            "    movq  %c1(%0),%%rax; "                      \
+            "    leaq  hypercall_table(%%rip),%%rdi; "       \
             "    cmpq  $("STR(NR_hypercalls)"),%%rax; "      \
             "    jae   2f; "                                 \
-            "    leaq  hypercall_table(%%rip),%%rdi; "       \
-            "    leaq  (%%rdi,%%rax,8),%%rax; "              \
-            "    movq  "STR(MULTICALL_arg0)"(%0),%%rdi; "    \
-            "    movq  "STR(MULTICALL_arg1)"(%0),%%rsi; "    \
-            "    movq  "STR(MULTICALL_arg2)"(%0),%%rdx; "    \
-            "    movq  "STR(MULTICALL_arg3)"(%0),%%rcx; "    \
-            "    movq  "STR(MULTICALL_arg4)"(%0),%%r8; "     \
-            "    callq *(%%rax); "                           \
-            "1:  movq  %%rax,"STR(MULTICALL_result)"(%0)\n"  \
+            "    movq  (%%rdi,%%rax,8),%%rax; "              \
+            "    movq  %c2+0*%c3(%0),%%rdi; "                \
+            "    movq  %c2+1*%c3(%0),%%rsi; "                \
+            "    movq  %c2+2*%c3(%0),%%rdx; "                \
+            "    movq  %c2+3*%c3(%0),%%rcx; "                \
+            "    movq  %c2+4*%c3(%0),%%r8; "                 \
+            "    callq *%%rax; "                             \
+            "1:  movq  %%rax,%c4(%0)\n"                      \
             ".section .fixup,\"ax\"\n"                       \
             "2:  movq  $-"STR(ENOSYS)",%%rax\n"              \
             "    jmp   1b\n"                                 \
             ".previous\n"                                    \
-            : : "b" (_call)                                  \
+            :                                                \
+            : "b" (_call),                                   \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
               /* all the caller-saves registers */           \
             : "rax", "rcx", "rdx", "rsi", "rdi",             \
               "r8",  "r9",  "r10", "r11" );                  \
     } while ( 0 )
 
-#define compat_multicall_call(_call)                              \
-    do {                                                          \
-        __asm__ __volatile__ (                                    \
-            "    movl  "STR(COMPAT_MULTICALL_op)"(%0),%%eax; "    \
-            "    leaq  compat_hypercall_table(%%rip),%%rdi; "     \
-            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "           \
-            "    jae   2f; "                                      \
-            "    movq  (%%rdi,%%rax,8),%%rax; "                   \
-            "    movl  "STR(COMPAT_MULTICALL_arg0)"(%0),%%edi; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg1)"(%0),%%esi; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg2)"(%0),%%edx; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg3)"(%0),%%ecx; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg4)"(%0),%%r8d; "  \
-            "    callq *%%rax; "                                  \
-            "1:  movl  %%eax,"STR(COMPAT_MULTICALL_result)"(%0)\n"\
-            ".section .fixup,\"ax\"\n"                            \
-            "2:  movl  $-"STR(ENOSYS)",%%eax\n"                   \
-            "    jmp   1b\n"                                      \
-            ".previous\n"                                         \
-            : : "b" (_call)                                       \
-              /* all the caller-saves registers */                \
-            : "rax", "rcx", "rdx", "rsi", "rdi",                  \
-              "r8",  "r9",  "r10", "r11" );                       \
-    } while ( 0 )
+#define compat_multicall_call(_call)                         \
+        __asm__ __volatile__ (                               \
+            "    movl  %c1(%0),%%eax; "                      \
+            "    leaq  compat_hypercall_table(%%rip),%%rdi; "\
+            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
+            "    jae   2f; "                                 \
+            "    movq  (%%rdi,%%rax,8),%%rax; "              \
+            "    movl  %c2+0*%c3(%0),%%edi; "                \
+            "    movl  %c2+1*%c3(%0),%%esi; "                \
+            "    movl  %c2+2*%c3(%0),%%edx; "                \
+            "    movl  %c2+3*%c3(%0),%%ecx; "                \
+            "    movl  %c2+4*%c3(%0),%%r8d; "                \
+            "    callq *%%rax; "                             \
+            "1:  movl  %%eax,%c4(%0)\n"                      \
+            ".section .fixup,\"ax\"\n"                       \
+            "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
+            "    jmp   1b\n"                                 \
+            ".previous\n"                                    \
+            :                                                \
+            : "b" (_call),                                   \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
+              /* all the caller-saves registers */           \
+            : "rax", "rcx", "rdx", "rsi", "rdi",             \
+              "r8",  "r9",  "r10", "r11" )                   \
 
 #else
 
 #define do_multicall_call(_call)                             \
-    do {                                                     \
         __asm__ __volatile__ (                               \
-            "    pushl "STR(MULTICALL_arg4)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg3)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg2)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg1)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg0)"(%0); "          \
-            "    movl  "STR(MULTICALL_op)"(%0),%%eax; "      \
+            "    movl  %c1(%0),%%eax; "                      \
+            "    pushl %c2+4*%c3(%0); "                      \
+            "    pushl %c2+3*%c3(%0); "                      \
+            "    pushl %c2+2*%c3(%0); "                      \
+            "    pushl %c2+1*%c3(%0); "                      \
+            "    pushl %c2+0*%c3(%0); "                      \
             "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
             "    jae   2f; "                                 \
             "    call  *hypercall_table(,%%eax,4); "         \
-            "1:  movl  %%eax,"STR(MULTICALL_result)"(%0); "  \
+            "1:  movl  %%eax,%c4(%0); "                      \
             "    addl  $20,%%esp\n"                          \
             ".section .fixup,\"ax\"\n"                       \
             "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
             "    jmp   1b\n"                                 \
             ".previous\n"                                    \
-            : : "b" (_call)                                  \
+            :                                                \
+            : "bSD" (_call),                                 \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
               /* all the caller-saves registers */           \
-            : "eax", "ecx", "edx" );                         \
-    } while ( 0 )
+            : "eax", "ecx", "edx" )                          \
 
 #endif
 
Index: 2007-03-19/xen/include/asm-x86/perfc_defn.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/perfc_defn.h    2007-03-27 
10:31:15.000000000 +0200
+++ 2007-03-19/xen/include/asm-x86/perfc_defn.h 2007-03-27 10:32:56.000000000 
+0200
@@ -18,9 +18,11 @@ PERFCOUNTER_CPU(apic_timer,             
 
 PERFCOUNTER_CPU(domain_page_tlb_flush,  "domain page tlb flushes")
 
-PERFCOUNTER_CPU(calls_to_mmu_update,    "calls_to_mmu_update")
-PERFCOUNTER_CPU(num_page_updates,       "num_page_updates")
-PERFCOUNTER_CPU(calls_to_update_va,     "calls_to_update_va_map")
+PERFCOUNTER(calls_to_mmuext_op,         "calls to mmuext_op")
+PERFCOUNTER(num_mmuext_ops,             "mmuext ops")
+PERFCOUNTER(calls_to_mmu_update,        "calls to mmu_update")
+PERFCOUNTER(num_page_updates,           "page updates")
+PERFCOUNTER(calls_to_update_va,         "calls to update_va_map")
 PERFCOUNTER_CPU(page_faults,            "page faults")
 PERFCOUNTER_CPU(copy_user_faults,       "copy_user faults")
 
Index: 2007-03-19/xen/include/asm-x86/x86_32/asm_defns.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/x86_32/asm_defns.h      2006-02-28 
17:35:12.000000000 +0100
+++ 2007-03-19/xen/include/asm-x86/x86_32/asm_defns.h   2007-03-27 
12:19:53.000000000 +0200
@@ -1,6 +1,8 @@
 #ifndef __X86_32_ASM_DEFNS_H__
 #define __X86_32_ASM_DEFNS_H__
 
+#include <asm/percpu.h>
+
 #ifndef NDEBUG
 /* Indicate special exception stack frame by inverting the frame pointer. */
 #define SETUP_EXCEPTION_FRAME_POINTER           \
@@ -47,10 +49,14 @@
         1:
 
 #ifdef PERF_COUNTERS
-#define PERFC_INCR(_name,_idx)                          \
-        lock incl perfcounters+_name(,_idx,4)
+#define PERFC_INCR(_name,_idx,_cur)                     \
+        pushl _cur;                                     \
+        movl VCPU_processor(_cur),_cur;                 \
+        shll $PERCPU_SHIFT,_cur;                        \
+        incl per_cpu__perfcounters+_name*4(_cur,_idx,4);\
+        popl _cur
 #else
-#define PERFC_INCR(_name,_idx)
+#define PERFC_INCR(_name,_idx,_cur)
 #endif
 
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
Index: 2007-03-19/xen/include/asm-x86/x86_64/asm_defns.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/x86_64/asm_defns.h      2006-10-23 
08:33:08.000000000 +0200
+++ 2007-03-19/xen/include/asm-x86/x86_64/asm_defns.h   2007-03-27 
13:16:39.000000000 +0200
@@ -1,6 +1,8 @@
 #ifndef __X86_64_ASM_DEFNS_H__
 #define __X86_64_ASM_DEFNS_H__
 
+#include <asm/percpu.h>
+
 #ifndef NDEBUG
 /* Indicate special exception stack frame by inverting the frame pointer. */
 #define SETUP_EXCEPTION_FRAME_POINTER           \
@@ -47,13 +49,18 @@
         popq  %rdi;
 
 #ifdef PERF_COUNTERS
-#define PERFC_INCR(_name,_idx)                  \
-    pushq %rdx;                                 \
-    leaq perfcounters+_name(%rip),%rdx;         \
-    lock incl (%rdx,_idx,4);                    \
-    popq %rdx;
+#define PERFC_INCR(_name,_idx,_cur)             \
+        pushq _cur;                             \
+        movslq VCPU_processor(_cur),_cur;       \
+        pushq %rdx;                             \
+        leaq per_cpu__perfcounters(%rip),%rdx;  \
+        shlq $PERCPU_SHIFT,_cur;                \
+        addq %rdx,_cur;                         \
+        popq %rdx;                              \
+        incl _name*4(_cur,_idx,4);              \
+        popq _cur
 #else
-#define PERFC_INCR(_name,_idx)
+#define PERFC_INCR(_name,_idx,_cur)
 #endif
 
 /* Work around AMD erratum #88 */
Index: 2007-03-19/xen/include/xen/perfc.h
===================================================================
--- 2007-03-19.orig/xen/include/xen/perfc.h     2007-03-27 10:31:15.000000000 
+0200
+++ 2007-03-19/xen/include/xen/perfc.h  2007-03-27 16:57:00.000000000 +0200
@@ -6,102 +6,94 @@
 
 #include <xen/lib.h>
 #include <xen/smp.h>
-#include <asm/atomic.h>
+#include <xen/percpu.h>
 
 /* 
  * NOTE: new counters must be defined in perfc_defn.h
  * 
  * PERFCOUNTER (counter, string)              define a new performance counter
- * PERFCOUNTER_CPU (counter, string, size)    define a counter per CPU
- * PERFCOUNTER_ARRY (counter, string, size)   define an array of counters
+ * PERFCOUNTER_ARRAY (counter, string, size)  define an array of counters
  * 
  * unlike "COUNTERS", "STATUS" variables DO NOT RESET
  * PERFSTATUS (counter, string)               define a new performance stauts
- * PERFSTATUS_CPU (counter, string, size)     define a status var per CPU
- * PERFSTATUS_ARRY (counter, string, size)    define an array of status vars
+ * PERFSTATUS_ARRAY (counter, string, size)   define an array of status vars
  * 
  * unsigned long perfc_value  (counter)        get value of a counter  
- * unsigned long perfc_valuec (counter)        get value of a per CPU counter
  * unsigned long perfc_valuea (counter, index) get value of an array counter
  * unsigned long perfc_set  (counter, val)     set value of a counter  
- * unsigned long perfc_setc (counter, val)     set value of a per CPU counter
  * unsigned long perfc_seta (counter, index, val) set value of an array counter
  * void perfc_incr  (counter)                  increment a counter          
- * void perfc_incrc (counter, index)           increment a per CPU counter   
+ * void perfc_decr  (counter)                  decrement a status
  * void perfc_incra (counter, index)           increment an array counter   
  * void perfc_add   (counter, value)           add a value to a counter     
- * void perfc_addc  (counter, value)           add a value to a per CPU counter
  * void perfc_adda  (counter, index, value)    add a value to array counter 
  * void perfc_print (counter)                  print out the counter
  */
 
-#define PERFCOUNTER( var, name ) \
-  atomic_t var[1];
-#define PERFCOUNTER_CPU( var, name ) \
-  atomic_t var[NR_CPUS];
-#define PERFCOUNTER_ARRAY( var, name, size ) \
-  atomic_t var[size];
-#define PERFSTATUS( var, name ) \
-  atomic_t var[1];
-#define PERFSTATUS_CPU( var, name ) \
-  atomic_t var[NR_CPUS];
-#define PERFSTATUS_ARRAY( var, name, size ) \
-  atomic_t var[size];
+#define PERFCOUNTER( name, descr ) \
+  PERFC_ ## name,
+#define PERFCOUNTER_ARRAY( name, descr, size ) \
+  PERFC_ ## name,                              \
+  PERFC_LAST_ ## name = PERFC_ ## name + (size) - sizeof(char[2 * !!(size) - 
1]),
 
-struct perfcounter {
+#define PERFSTATUS       PERFCOUNTER
+#define PERFSTATUS_ARRAY PERFCOUNTER_ARRAY
+
+/* Compatibility: This should go away once all users got converted. */
+#define PERFCOUNTER_CPU PERFCOUNTER
+
+enum perfcounter {
 #include <xen/perfc_defn.h>
+       NUM_PERFCOUNTERS
 };
 
-extern struct perfcounter perfcounters;
+#undef PERFCOUNTER
+#undef PERFCOUNTER_ARRAY
+#undef PERFSTATUS
+#undef PERFSTATUS_ARRAY
+
+typedef unsigned perfc_t;
+#define PRIperfc ""
 
-#define perfc_value(x)    atomic_read(&perfcounters.x[0])
-#define perfc_valuec(x)   atomic_read(&perfcounters.x[smp_processor_id()])
+DECLARE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
+
+#define perfc_value(x)    this_cpu(perfcounters)[PERFC_ ## x]
 #define perfc_valuea(x,y)                                               \
-    ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ?       \
-       atomic_read(&perfcounters.x[y]) : 0 )
-#define perfc_set(x,v)    atomic_set(&perfcounters.x[0], v)
-#define perfc_setc(x,v)   atomic_set(&perfcounters.x[smp_processor_id()], v)
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
+#define perfc_set(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] = (v))
 #define perfc_seta(x,y,v)                                               \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_set(&perfcounters.x[y], v);                          \
-    } while ( 0 )
-#define perfc_incr(x)     atomic_inc(&perfcounters.x[0])
-#define perfc_decr(x)     atomic_dec(&perfcounters.x[0])
-#define perfc_incrc(x)    atomic_inc(&perfcounters.x[smp_processor_id()])
-#define perfc_decrc(x)    atomic_dec(&perfcounters.x[smp_processor_id()])
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
+#define perfc_incr(x)     (++this_cpu(perfcounters)[PERFC_ ## x])
+#define perfc_decr(x)     (--this_cpu(perfcounters)[PERFC_ ## x])
 #define perfc_incra(x,y)                                                \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_inc(&perfcounters.x[y]);                             \
-    } while ( 0 )
-#define perfc_add(x,y)    atomic_add((y), &perfcounters.x[0])
-#define perfc_addc(x,y)   atomic_add((y), &perfcounters.x[smp_processor_id()])
-#define perfc_adda(x,y,z)                                               \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_add((z), &perfcounters.x[y]);                        \
-    } while ( 0 )
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        ++this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
+#define perfc_add(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] += (v))
+#define perfc_adda(x,y,v)                                               \
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
 
 /*
  * Histogram: special treatment for 0 and 1 count. After that equally spaced 
  * with last bucket taking the rest.
  */
 #ifdef PERF_ARRAYS
-#define perfc_incr_histo(_x,_v,_n)                                          \
-    do {                                                                    \
-        if ( (_v) == 0 )                                                    \
-            perfc_incra(_x, 0);                                             \
-        else if ( (_v) == 1 )                                               \
-            perfc_incra(_x, 1);                                             \
-        else if ( (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) <               \
-                  (PERFC_MAX_ ## _n - 3) )                                  \
-            perfc_incra(_x, (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) + 2); \
-        else                                                                \
-            perfc_incra(_x, PERFC_MAX_ ## _n - 1);                          \
+#define perfc_incr_histo(x,v)                                           \
+    do {                                                                \
+        if ( (v) == 0 )                                                 \
+            perfc_incra(x, 0);                                          \
+        else if ( (v) == 1 )                                            \
+            perfc_incra(x, 1);                                          \
+        else if ( (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) <           \
+                  (PERFC_LAST_ ## x - PERFC_ ## x - 2) )                \
+            perfc_incra(x, (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) + 2); \
+        else                                                            \
+            perfc_incra(x, PERFC_LAST_ ## x - PERFC_ ## x);             \
     } while ( 0 )
 #else
-#define perfc_incr_histo(_x,_v,_n) ((void)0)
+#define perfc_incr_histo(x,v) ((void)0)
 #endif
 
 struct xen_sysctl_perfc_op;
@@ -110,22 +102,20 @@ int perfc_control(struct xen_sysctl_perf
 #else /* PERF_COUNTERS */
 
 #define perfc_value(x)    (0)
-#define perfc_valuec(x)   (0)
 #define perfc_valuea(x,y) (0)
 #define perfc_set(x,v)    ((void)0)
-#define perfc_setc(x,v)   ((void)0)
 #define perfc_seta(x,y,v) ((void)0)
 #define perfc_incr(x)     ((void)0)
 #define perfc_decr(x)     ((void)0)
-#define perfc_incrc(x)    ((void)0)
-#define perfc_decrc(x)    ((void)0)
 #define perfc_incra(x,y)  ((void)0)
 #define perfc_decra(x,y)  ((void)0)
 #define perfc_add(x,y)    ((void)0)
-#define perfc_addc(x,y)   ((void)0)
 #define perfc_adda(x,y,z) ((void)0)
 #define perfc_incr_histo(x,y,z) ((void)0)
 
 #endif /* PERF_COUNTERS */
 
+/* Compatibility: This should go away once all users got converted. */
+#define perfc_incrc     perfc_incr
+
 #endif /* __XEN_PERFC_H__ */
Index: 2007-03-19/xen/include/xen/perfc_defn.h
===================================================================
--- 2007-03-19.orig/xen/include/xen/perfc_defn.h        2007-03-27 
10:31:15.000000000 +0200
+++ 2007-03-19/xen/include/xen/perfc_defn.h     2007-03-27 10:32:56.000000000 
+0200
@@ -6,6 +6,9 @@
 
 PERFCOUNTER_ARRAY(hypercalls,           "hypercalls", NR_hypercalls)
 
+PERFCOUNTER(calls_to_multicall,         "calls to multicall")
+PERFCOUNTER(calls_from_multicall,       "calls from multicall")
+
 PERFCOUNTER_CPU(irqs,                   "#interrupts")
 PERFCOUNTER_CPU(ipis,                   "#IPIs")
 



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.