[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-ia64-devel] [PATCH 1/3] Sample implementation of Xenoprof for ia64
patch for xen tree Signed-off-by: SUZUKI Kazuhiro <kaz@xxxxxxxxxxxxxx> diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/Makefile xenoprof-ia64-unstable/xen/arch/ia64/Makefile --- xen-ia64-unstable.hg/xen/arch/ia64/Makefile 2006-06-30 11:23:29.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/Makefile 2006-06-30 15:25:01.000000000 +0900 @@ -2,6 +2,7 @@ subdir-y += vmx subdir-y += linux subdir-y += linux-xen +subdir-y += oprofile $(TARGET)-syms: linux-xen/head.o $(ALL_OBJS) xen.lds.s $(LD) $(LDFLAGS) -T xen.lds.s -N \ diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/Makefile xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/Makefile --- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/Makefile 2006-06-22 13:37:13.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/Makefile 2006-06-30 15:25:01.000000000 +0900 @@ -14,3 +14,4 @@ obj-y += unaligned.o obj-y += unwind.o obj-y += iosapic.o +obj-y += perfmon.o diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon.c xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon.c --- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon.c 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon.c 2006-07-03 15:06:40.000000000 +0900 @@ -0,0 +1,3023 @@ +/* + * This file implements the perfmon-2 subsystem which is used + * to program the IA-64 Performance Monitoring Unit (PMU). + * + * The initial version of perfmon.c was written by + * Ganesh Venkitachalam, IBM Corp. + * + * Then it was modified for perfmon-1.x by Stephane Eranian and + * David Mosberger, Hewlett Packard Co. + * + * Version Perfmon-2.x is a rewrite of perfmon-1.x + * by Stephane Eranian, Hewlett Packard Co. + * + * Copyright (C) 1999-2005 Hewlett Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * + * More information about perfmon available at: + * http://www.hpl.hp.com/research/linux/perfmon + */ + +#include <xen/config.h> +#include <xen/kernel.h> +#include <xen/sched.h> +/* #include <linux/interrupt.h> */ +/* #include <linux/seq_file.h> */ +#include <xen/init.h> +#include <asm/hw_irq.h> +#include <xen/irq.h> +#include <xen/mm.h> +#include <linux/sysctl.h> +#include <xen/guest_access.h> +#include <xen/list.h> +/* #include <linux/file.h> */ +#include <xen/bitops.h> +#include <linux/completion.h> + +#include <asm/intrinsics.h> +#include <asm/vcpu.h> +#include <asm/page.h> +#include <asm/perfmon.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/delay.h> + +#ifdef CONFIG_PERFMON +extern rwlock_t tasklist_lock; +/* + * perfmon context state + */ +#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ +#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ +#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ +#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ + +#define PFM_INVALID_ACTIVATION (~0UL) + +/* + * depth of message queue + */ +#define PFM_MAX_MSGS 32 +#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) + +/* + * type of a PMU register (bitmask). + * bitmask structure: + * bit0 : register implemented + * bit1 : end marker + * bit2-3 : reserved + * bit4 : pmc has pmc.pm + * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter + * bit6-7 : register type + * bit8-31: reserved + */ +#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ +#define PFM_REG_IMPL 0x1 /* register implemented */ +#define PFM_REG_END 0x2 /* end marker */ +#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ +#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ +#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ +#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ +#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ + +#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) +#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) + +#define PMC_OVFL_NOTIFY(i) (ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) + +/* i assumed unsigned */ +#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) +#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) + +/* XXX: these assume that register i is implemented */ +#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) +#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) + +#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value +#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask +#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] +#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] + +#define PFM_NUM_IBRS IA64_NUM_DBG_REGS +#define PFM_NUM_DBRS IA64_NUM_DBG_REGS + +#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) +#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) +#define PFM_CTX_TASK(h) (h)->ctx_task + +#define PMU_PMC_OI 5 /* position of pmc.oi bit */ + +/* XXX: does not support more than 64 PMDs */ +#define CTX_USED_PMD(mask) ctx_used_pmds[0] |= (mask) +#define CTX_IS_USED_PMD(c) ((ctx_used_pmds[0] & (1UL << (c))) != 0UL) + +#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) + +#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) +#define PFM_CODE_RR 0 /* requesting code range restriction */ +#define PFM_DATA_RR 1 /* requestion data range restriction */ + +#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) +#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) +#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) + +#define RDEP(x) (1UL<<(x)) + +/* + * context protection macros + * in SMP: + * - we need to protect against CPU concurrency (spin_lock) + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * in UP: + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * + * spin_lock_irqsave()/spin_lock_irqrestore(): + * in SMP: local_irq_disable + spin_lock + * in UP : local_irq_disable + * + * spin_lock()/spin_lock(): + * in UP : removed automatically + * in SMP: protect against context accesses from other CPU. interrupts + * are not masked. This is useful for the PMU interrupt handler + * because we know we will not get PMU concurrency in that code. + */ +#define PROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->vcpu_id)); \ + spin_lock_irqsave(&(c)->ctx_lock, f); \ + DPRINT(("spinlocked ctx %p by [%d]\n", c, current->vcpu_id)); \ + } while(0) + +#define UNPROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->vcpu_id)); \ + spin_unlock_irqrestore(&(c)->ctx_lock, f); \ + } while(0) + +#define PROTECT_CTX_NOPRINT(f) \ + do { \ + spin_lock_irqsave(&ctx_lock, f); \ + } while(0) + + +#define UNPROTECT_CTX_NOPRINT(f) \ + do { \ + spin_unlock_irqrestore(&ctx_lock, f); \ + } while(0) + + +#define PROTECT_CTX_NOIRQ(c) \ + do { \ + spin_lock(&(c)->ctx_lock); \ + } while(0) + +#define UNPROTECT_CTX_NOIRQ(c) \ + do { \ + spin_unlock(&(c)->ctx_lock); \ + } while(0) + + +#ifdef CONFIG_SMP + +#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) +#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ +#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() + +#else /* !CONFIG_SMP */ +#define SET_ACTIVATION(t) do {} while(0) +#define GET_ACTIVATION(t) do {} while(0) +#define INC_ACTIVATION(t) do {} while(0) +#endif /* CONFIG_SMP */ + +#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) +#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) +#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) + +#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) +#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) + +#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) + +/* + * cmp0 must be the value of pmc0 + */ +#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) + +#define PFMFS_MAGIC 0xa0b4d889 + +static int pfm_sysctl_debug = 0; +static int pfm_sysctl_debug_ovfl = 0; + +/* + * debugging + */ +#define PFM_DEBUGGING 1 +#ifdef PFM_DEBUGGING +#if 1 +#define DPRINT(a) \ + do { \ + if (unlikely(pfm_sysctl_debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->vcpu_id); printk a; } \ + } while (0) + +#define DPRINT_ovfl(a) \ + do { \ + if (unlikely(pfm_sysctl_debug > 0 && pfm_sysctl_debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->vcpu_id); printk a; } \ + } while (0) +#else +#define DPRINT(a) do {} while(0) +#define DPRINT_ovfl(a) do {} while(0) +#endif +#endif + +/* + * 64-bit software counter structure + * + * the next_reset_type is applied to the next call to pfm_reset_regs() + */ +typedef struct { + unsigned long val; /* virtual 64bit counter value */ + unsigned long lval; /* last reset value */ + unsigned long long_reset; /* reset value on sampling overflow */ + unsigned long short_reset; /* reset value on overflow */ + unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ + unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ + unsigned long seed; /* seed for random-number generator */ + unsigned long mask; /* mask for random-number generator */ + unsigned int flags; /* notify/do not notify */ + unsigned long eventid; /* overflow event identifier */ +} pfm_counter_t; + +/* + * context flags + */ +typedef struct { + unsigned int block:1; /* when 1, task will blocked on user notifications */ + unsigned int system:1; /* do system wide monitoring */ + unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ + unsigned int is_sampling:1; /* true if using a custom format */ + unsigned int excl_idle:1; /* exclude idle task in system wide session */ + unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ + unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ + unsigned int no_msg:1; /* no message sent on overflow */ + unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ + unsigned int reserved:22; +} pfm_context_flags_t; + +#define PFM_TRAP_REASON_NONE 0x0 /* default value */ +#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ +#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ + + +/* + * perfmon context: encapsulates all the state of a monitoring session + */ + +typedef struct pfm_context { + spinlock_t ctx_lock; /* context protection */ + + pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ + unsigned int ctx_state; /* state: active/inactive (no bitfield) */ + + struct task_struct *ctx_task; /* task to which context is attached */ + + unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ + + struct completion ctx_restart_done; /* use for blocking notification mode */ + + unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ + unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ + unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ + + unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ + unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ + unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ + + unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ + + unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ + unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ + unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ + unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ + + pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ + + u64 ctx_saved_psr_up; /* only contains psr.up value */ + + unsigned long ctx_last_activation; /* context last activation number for last_cpu */ + unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ + unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ + + int ctx_fd; /* file descriptor used my this context */ + pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ + + pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ + void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ + unsigned long ctx_smpl_size; /* size of sampling buffer */ + void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ + + wait_queue_head_t ctx_msgq_wait; + pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; + int ctx_msgq_head; + int ctx_msgq_tail; + struct fasync_struct *ctx_async_queue; + + wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ +} pfm_context_t; + +spinlock_t ctx_lock; /* context protection */ +unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ +static pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ +static pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ +unsigned int ctx_state; /* state: active/inactive (no bitfield) */ +unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ + +/* + * magic number used to verify that structure is really + * a perfmon context + */ +#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) + +#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->arch._thread.pfm_context) + +#ifdef CONFIG_SMP +#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) +#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu +#else +#define SET_LAST_CPU(ctx, v) do {} while(0) +#define GET_LAST_CPU(ctx) do {} while(0) +#endif + + +#define ctx_fl_block ctx_flags.block +#define ctx_fl_system ctx_flags.system +#define ctx_fl_using_dbreg ctx_flags.using_dbreg +#define ctx_fl_is_sampling ctx_flags.is_sampling +#define ctx_fl_excl_idle ctx_flags.excl_idle +#define ctx_fl_going_zombie ctx_flags.going_zombie +#define ctx_fl_trap_reason ctx_flags.trap_reason +#define ctx_fl_no_msg ctx_flags.no_msg +#define ctx_fl_can_restart ctx_flags.can_restart + +#define PFM_SET_WORK_PENDING(t, v) do { (t)->arch._thread.pfm_needs_checking = v; } while(0); +#define PFM_GET_WORK_PENDING(t) (t)->arch._thread.pfm_needs_checking + +/* + * global information about all sessions + * mostly used to synchronize between system wide and per-process + */ +typedef struct { + spinlock_t pfs_lock; /* lock the structure */ + + unsigned int pfs_task_sessions; /* number of per task sessions */ + unsigned int pfs_sys_sessions; /* number of per system wide sessions */ + unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ + unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ + struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ +} pfm_session_t; + +/* assume cnum is a valid monitor */ +#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) + +/* + * PMU specific flags + */ +#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ + +/* + * debug register related type definitions + */ +typedef struct { + unsigned long ibr_mask:56; + unsigned long ibr_plm:4; + unsigned long ibr_ig:3; + unsigned long ibr_x:1; +} ibr_mask_reg_t; + +typedef struct { + unsigned long dbr_mask:56; + unsigned long dbr_plm:4; + unsigned long dbr_ig:2; + unsigned long dbr_w:1; + unsigned long dbr_r:1; +} dbr_mask_reg_t; + +typedef union { + unsigned long val; + ibr_mask_reg_t ibr; + dbr_mask_reg_t dbr; +} dbreg_t; + + +/* + * perfmon command descriptions + */ +typedef struct { + int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + char *cmd_name; + int cmd_flags; + unsigned int cmd_narg; + size_t cmd_argsize; + int (*cmd_getsize)(void *arg, size_t *sz); +} pfm_cmd_desc_t; + +#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ +#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ +#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ +#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ + + +#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name +#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) +#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) +#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) +#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) + +#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ + +/* + * perfmon internal variables + */ +static pfm_session_t pfm_sessions; /* global sessions information */ + +/* static DEFINE_SPINLOCK(pfm_alt_install_check); */ + +/* static struct proc_dir_entry *perfmon_dir; */ +/* static pfm_uuid_t pfm_null_uuid = {0,}; */ + +static spinlock_t pfm_buffer_fmt_lock; +static LIST_HEAD(pfm_buffer_fmt_list); + +pmu_config_t *pmu_conf; + +#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) +#define pfm_get_cpu_data(a,b) per_cpu(a, b) + +static int interrupt_count = 0; +static int ring_count[4] = {0, 0, 0, 0}; + +static inline void +pfm_set_task_notify(struct task_struct *task) +{ + struct thread_info *info; + + info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); + set_bit(TIF_NOTIFY_RESUME, &info->flags); +} + +static inline void +pfm_clear_task_notify(void) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); +} + +static inline void +pfm_reserve_page(unsigned long a) +{ +/* SetPageReserved(vmalloc_to_page((void *)a)); */ +} +static inline void +pfm_unreserve_page(unsigned long a) +{ +/* ClearPageReserved(vmalloc_to_page((void*)a)); */ +} + +static inline unsigned long +pfm_protect_ctx_ctxsw(void) +{ + spin_lock(&ctx_lock); + return 0UL; +} + +static inline void +pfm_unprotect_ctx_ctxsw(unsigned long f) +{ + spin_unlock(&ctx_lock); +} + +DEFINE_PER_CPU(unsigned long, pfm_syst_info); +DEFINE_PER_CPU(struct task_struct *, pmu_owner); +DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); +DEFINE_PER_CPU(unsigned long, pmu_activation_number); +DEFINE_PER_CPU(pfm_ovfl_arg_t, ovfl_arg); + +/* + * forward declarations + */ +#ifndef CONFIG_SMP +static void pfm_lazy_save_regs (struct task_struct *ta); +#endif + +void dump_pmu_state(const char *); +static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + +#include "perfmon_itanium.h" +#include "perfmon_mckinley.h" +#include "perfmon_montecito.h" +#include "perfmon_generic.h" + +static pmu_config_t *pmu_confs[]={ + &pmu_conf_mont, + &pmu_conf_mck, + &pmu_conf_ita, + &pmu_conf_gen, /* must be last */ + NULL +}; + + +static inline void +pfm_clear_psr_pp(void) +{ + ia64_rsm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_pp(void) +{ + ia64_ssm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_clear_psr_up(void) +{ + ia64_rsm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_up(void) +{ + ia64_ssm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline unsigned long +pfm_get_psr(void) +{ + unsigned long tmp; + tmp = ia64_getreg(_IA64_REG_PSR); + ia64_srlz_i(); + return tmp; +} + +static inline void +pfm_set_psr_l(unsigned long val) +{ + ia64_setreg(_IA64_REG_PSR_L, val); + ia64_srlz_i(); +} + +static inline void +pfm_freeze_pmu(void) +{ + ia64_set_pmc(0,1UL); + ia64_srlz_d(); +} + +static inline void +pfm_unfreeze_pmu(void) +{ + ia64_set_pmc(0,0UL); + ia64_srlz_d(); +} + +static inline void +pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) +{ + int i; + + for (i=0; i < nibrs; i++) { + ia64_set_ibr(i, ibrs[i]); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); +} + +static inline void +pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) +{ + int i; + + for (i=0; i < ndbrs; i++) { + ia64_set_dbr(i, dbrs[i]); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); +} + +/* + * PMD[i] must be a counter. no check is made + */ +static inline void +pfm_write_soft_counter(int i, unsigned long val) +{ + unsigned long ovfl_val = pmu_conf->ovfl_val; + + ctx_pmds[i].val = val & ~ovfl_val; + /* + * writing to unimplemented part is ignore, so we do not need to + * mask off top part + */ + ia64_set_pmd(i, val & ovfl_val); + +/* DPRINT_ovfl(("<< ctx_pmd[%d] pmd=0x%lx ovfl_val=0x%lx\n", */ +/* i, */ +/* ia64_get_pmd(i) & ovfl_val, */ +/* ovfl_val)); */ +} + +static void +pfm_context_free(pfm_context_t *ctx) +{ + if (ctx) { + DPRINT(("free ctx @%p\n", ctx)); + kfree(ctx); + } +} + +static inline void +pfm_save_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + + ia64_srlz_d(); + + for (i=0; mask; i++, mask>>=1) { + if (mask & 0x1) pmds[i] = ia64_get_pmd(i); + } +} + +/* + * reload from thread state (used for ctxw only) + */ +static inline void +pfm_restore_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + unsigned long val, ovfl_val = pmu_conf->ovfl_val; + + for (i=0; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0) continue; + val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; + ia64_set_pmd(i, val); + } + ia64_srlz_d(); +} + +/* + * propagate PMD from context to thread-state + */ +static inline void +pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) +{ + struct thread_struct *thread = &task->arch._thread; + unsigned long ovfl_val = pmu_conf->ovfl_val; + unsigned long mask = ctx->ctx_all_pmds[0]; + unsigned long val; + int i; + + DPRINT(("mask=0x%lx\n", mask)); + + for (i=0; mask; i++, mask>>=1) { + + val = ctx->ctx_pmds[i].val; + + /* + * We break up the 64 bit value into 2 pieces + * the lower bits go to the machine state in the + * thread (will be reloaded on ctxsw in). + * The upper part stays in the soft-counter. + */ + if (PMD_IS_COUNTING(i)) { + ctx->ctx_pmds[i].val = val & ~ovfl_val; + val &= ovfl_val; + } + thread->pmds[i] = val; + + DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", + i, + thread->pmds[i], + ctx->ctx_pmds[i].val)); + } +} + +/* + * propagate PMC from context to thread-state + */ +static inline void +pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) +{ + struct thread_struct *thread = &task->arch._thread; + unsigned long mask = ctx->ctx_all_pmcs[0]; + int i; + + DPRINT(("mask=0x%lx\n", mask)); + + for (i=0; mask; i++, mask>>=1) { + /* masking 0 with ovfl_val yields 0 */ + thread->pmcs[i] = ctx->ctx_pmcs[i]; + DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); + } +} + + + +static inline void +pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) +{ + int i; + + for (i=0; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0) continue; + ia64_set_pmc(i, pmcs[i]); + printk("pfm_restore_pmcs: pmc[%d]: %lx\n", i, pmcs[i]); + } + ia64_srlz_d(); +} + +static inline int +pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) +{ + return memcmp(a, b, sizeof(pfm_uuid_t)); +} + +static inline int +pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); + return ret; +} + +static inline int +pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) +{ + int ret = 0; + if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); + return ret; +} + + +static inline int +pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, + int cpu, void *arg) +{ + int ret = 0; + if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); + return ret; +} + +static inline int +pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, + int cpu, void *arg) +{ + int ret = 0; + if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); + return ret; +} + +static inline int +pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); + return ret; +} + +static inline int +pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); + return ret; +} + +static pfm_buffer_fmt_t * +__pfm_find_buffer_fmt(pfm_uuid_t uuid) +{ + struct list_head * pos; + pfm_buffer_fmt_t * entry; + + list_for_each(pos, &pfm_buffer_fmt_list) { + entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); + if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) + return entry; + } + return NULL; +} + +int +pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) +{ + int ret = 0; + + /* some sanity checks */ + if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; + + /* we need at least a handler */ + if (fmt->fmt_handler == NULL) return -EINVAL; + + /* + * XXX: need check validity of fmt_arg_size + */ + + spin_lock(&pfm_buffer_fmt_lock); + + ctx_buf_fmt = fmt; + + if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { + printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); + ret = -EBUSY; + goto out; + } + list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); + printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); + +out: + spin_unlock(&pfm_buffer_fmt_lock); + return ret; +} +EXPORT_SYMBOL(pfm_register_buffer_fmt); + +int +pfm_unregister_buffer_fmt(pfm_uuid_t uuid) +{ + pfm_buffer_fmt_t *fmt; + int ret = 0; + + spin_lock(&pfm_buffer_fmt_lock); + + fmt = __pfm_find_buffer_fmt(uuid); + if (!fmt) { + printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); + ret = -EINVAL; + goto out; + } + list_del_init(&fmt->fmt_list); + printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); + +out: + spin_unlock(&pfm_buffer_fmt_lock); + return ret; + +} +EXPORT_SYMBOL(pfm_unregister_buffer_fmt); + +extern void update_pal_halt_status(int); + +static int +pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) +{ + unsigned long flags; + /* + * validy checks on cpu_mask have been done upstream + */ + LOCK_PFS(flags); + + DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + if (is_syswide) { + /* + * cannot mix system wide and per-task sessions + */ + if (pfm_sessions.pfs_task_sessions > 0UL) { + DPRINT(("system wide not possible, %u conflicting task_sessions\n", + pfm_sessions.pfs_task_sessions)); + goto abort; + } + + if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; + + DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); + + pfm_sessions.pfs_sys_session[cpu] = task; + + pfm_sessions.pfs_sys_sessions++ ; + + } else { + if (pfm_sessions.pfs_sys_sessions) goto abort; + pfm_sessions.pfs_task_sessions++; + } + + DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + /* + * disable default_idle() to go to PAL_HALT + */ + update_pal_halt_status(0); + + UNLOCK_PFS(flags); + + return 0; + +error_conflict: + DPRINT(("system wide not possible, conflicting session on CPU%d\n", + cpu)); +abort: + UNLOCK_PFS(flags); + + return -EBUSY; + +} + +static int +pfm_unreserve_session(int is_syswide, unsigned int cpu) +{ + unsigned long flags; + /* + * validy checks on cpu_mask have been done upstream + */ + LOCK_PFS(flags); + + DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + + if (is_syswide) { + pfm_sessions.pfs_sys_session[cpu] = NULL; + /* + * would not work with perfmon+more than one bit in cpu_mask + */ +/* if (ctx && ctx->ctx_fl_using_dbreg) { */ +/* if (pfm_sessions.pfs_sys_use_dbregs == 0) { */ +/* printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); */ +/* } else { */ +/* pfm_sessions.pfs_sys_use_dbregs--; */ +/* } */ +/* } */ + pfm_sessions.pfs_sys_sessions--; + } else { + pfm_sessions.pfs_task_sessions--; + } + DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + /* + * if possible, enable default_idle() to go into PAL_HALT + */ + if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) + update_pal_halt_status(1); + + UNLOCK_PFS(flags); + + return 0; +} + +#if 0 /* FIX ME */ +/* + * XXX: do something better here + */ +static int +pfm_bad_permissions(struct task_struct *task) +{ + /* inspired by ptrace_attach() */ + DPRINT(("cur: id=%d euid=%d suid=%d uid=%d egid=%d sgid=%d\n", + current->vcpu_id, + task->euid, + task->suid, + task->uid, + task->egid, + task->sgid)); + + return ((current->uid != task->euid) + || (current->uid != task->suid) + || (current->uid != task->uid) + || (current->gid != task->egid) + || (current->gid != task->sgid) + || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE); +} +#endif /* FIX ME */ + +static inline unsigned long +pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) +{ + unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; + + reg->lval = val; + return val; +} + +static void +pfm_reset_regs(unsigned long *ovfl_regs, int is_long_reset) +{ + unsigned long mask = ovfl_regs[0]; + unsigned long reset_others = 0UL; + unsigned long val; + int i; + +/* DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); */ + + /* + * now restore reset value on sampling overflowed counters + */ + mask >>= PMU_FIRST_COUNTER; + for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { + + if ((mask & 0x1UL) == 0UL) continue; + + val = pfm_new_counter_value(ctx_pmds+ i, is_long_reset); + reset_others |= ctx_pmds[i].reset_pmds[0]; + +/* DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); */ + + pfm_write_soft_counter(i, val); + } + + /* + * Now take care of resetting the other registers + */ + for(i = 0; reset_others; i++, reset_others >>= 1) { + + if ((reset_others & 0x1) == 0) continue; + + val = pfm_new_counter_value(ctx_pmds + i, is_long_reset); + + if (PMD_IS_COUNTING(i)) { + pfm_write_soft_counter(i, val); + } else { + ia64_set_pmd(i, val); + } + DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", + is_long_reset ? "long" : "short", i, val)); + } + ia64_srlz_d(); +} + +static int +pfm_write_pmcs(pfarg_reg_t *req, int count, struct pt_regs *regs) +{ + unsigned long value, pmc_pm; + unsigned long smpl_pmds, reset_pmds, impl_pmds; + unsigned int cnum, reg_flags, flags, pmc_type; + int i, can_access_pmu = 0, is_loaded, is_system; + int is_monitor, is_counting; + int ret = -EINVAL; + pfm_reg_check_t wr_func; + + impl_pmds = pmu_conf->impl_pmds[0]; +/* is_loaded = ctx_state == PFM_CTX_LOADED ? 1 : 0; */ + is_loaded = 1; + is_system = 1; + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + can_access_pmu = 1; + + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + reg_flags = req->reg_flags; + value = req->reg_value; + smpl_pmds = req->reg_smpl_pmds[0]; + reset_pmds = req->reg_reset_pmds[0]; + flags = 0; + + + if (cnum >= PMU_MAX_PMCS) { + DPRINT(("pmc%u is invalid\n", cnum)); + goto error; + } + + pmc_type = pmu_conf->pmc_desc[cnum].type; + pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; + is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; + is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; + + /* + * we reject all non implemented PMC as well + * as attempts to modify PMC[0-3] which are used + * as status registers by the PMU + */ + if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { + DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); + goto error; + } + wr_func = pmu_conf->pmc_desc[cnum].write_check; + /* + * If the PMC is a monitor, then if the value is not the default: + * - system-wide session: PMCx.pm=1 (privileged monitor) + * - per-task : PMCx.pm=0 (user monitor) + */ + if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { + DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", + cnum, + pmc_pm, + is_system)); +/* goto error; */ + } + + if (is_counting) { + /* + * enforce generation of overflow interrupt. Necessary on all + * CPUs. + */ + value |= 1 << PMU_PMC_OI; + + if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { + flags |= PFM_REGFL_OVFL_NOTIFY; + } + + if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; + + /* verify validity of smpl_pmds */ + if ((smpl_pmds & impl_pmds) != smpl_pmds) { + DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); + goto error; + } + + /* verify validity of reset_pmds */ + if ((reset_pmds & impl_pmds) != reset_pmds) { + DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); + goto error; + } + } else { + if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { + DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); + goto error; + } + /* eventid on non-counting monitors are ignored */ + } + + /* + * execute write checker, if any + */ + if (likely(wr_func != NULL)) { + ret = (*wr_func)(NULL, NULL, cnum, &value, regs); + printk("write_check: %d\n", ret); + if (ret) goto error; + ret = -EINVAL; + } + + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + /* + * Now we commit the changes to the software state + */ + + /* + * update overflow information + */ + if (is_counting) { + /* + * full flag update each time a register is programmed + */ + ctx_pmds[cnum].flags = flags; + + ctx_pmds[cnum].reset_pmds[0] = reset_pmds; + ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; + ctx_pmds[cnum].eventid = req->reg_smpl_eventid; + + /* + * Mark all PMDS to be accessed as used. + * + * We do not keep track of PMC because we have to + * systematically restore ALL of them. + * + * We do not update the used_monitors mask, because + * if we have not programmed them, then will be in + * a quiescent state, therefore we will not need to + * mask/restore then when context is MASKED. + */ + CTX_USED_PMD(reset_pmds); + CTX_USED_PMD(smpl_pmds); + /* + * make sure we do not try to reset on + * restart because we have established new values + */ + } + /* + * Needed in case the user does not initialize the equivalent + * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no + * possible leak here. + */ + CTX_USED_PMD(pmu_conf->pmc_desc[cnum].dep_pmd[0]); + + /* + * keep track of the monitor PMC that we are using. + * we save the value of the pmc in ctx_pmcs[] and if + * the monitoring is not stopped for the context we also + * place it in the saved state area so that it will be + * picked up later by the context switch code. + * + * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). + * + * The value in thread->pmcs[] may be modified on overflow, i.e., when + * monitoring needs to be stopped. + */ +/* if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); */ + + /* + * update context state + */ + ctx_pmcs[cnum] = value; + + if (is_loaded) { + /* + * write hardware register if we can + */ + if (can_access_pmu) { + u64 pmc; + printk("ia64_set_pmc: pmc[%d]: %lx\n", cnum, value); + ia64_set_pmc(cnum, value); + + pmc = ia64_get_pmc(cnum); + printk("ia64_get_pmc: pmc[%d]: %lx\n", cnum, pmc); + } +#ifdef CONFIG_SMP + else { + /* + * per-task SMP only here + * + * we are guaranteed that the task is not running on the other CPU, + * we indicate that this PMD will need to be reloaded if the task + * is rescheduled on the CPU it ran last on. + */ +/* ctx->ctx_reload_pmcs[0] |= 1UL << cnum; */ + } +#endif + } + + DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x eventid=%ld\n smpl_pmds=0x%lx reset_pmds=0x%lx\n", + cnum, + value, + is_loaded, + can_access_pmu, + flags, + ctx_pmds[cnum].eventid, + smpl_pmds, + reset_pmds)); + } + + /* + * make sure the changes are visible + */ + if (can_access_pmu) ia64_srlz_d(); + + return 0; +error: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +static int +pfm_write_pmds(pfarg_reg_t *req, int count, struct pt_regs *regs) +{ + unsigned long value, hw_value, ovfl_mask; + unsigned int cnum; + int i, can_access_pmu = 0; + int is_counting, is_loaded; + int ret = -EINVAL; + pfm_reg_check_t wr_func; + + is_loaded = 1; + ovfl_mask = pmu_conf->ovfl_val; + + /* + * on both UP and SMP, we can only write to the PMC when the task is + * the owner of the local PMU. + */ + if (likely(is_loaded)) { + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ +/* if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { */ +/* DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); */ +/* return -EBUSY; */ +/* } */ + can_access_pmu = 1; + } + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + value = req->reg_value; + DPRINT(("pmd[%u]: %lx\n", cnum, value)); + + if (!PMD_IS_IMPL(cnum)) { + DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); + goto abort_mission; + } + is_counting = PMD_IS_COUNTING(cnum); + wr_func = pmu_conf->pmd_desc[cnum].write_check; + + /* + * execute write checker, if any + */ +/* if (unlikely(expert_mode == 0 && wr_func)) { */ +/* unsigned long v = value; */ + +/* ret = (*wr_func)(task, ctx, cnum, &v, regs); */ +/* if (ret) goto abort_mission; */ + +/* value = v; */ +/* ret = -EINVAL; */ +/* } */ + + + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + /* + * now commit changes to software state + */ + hw_value = value; + + /* + * update virtualized (64bits) counter + */ + if (is_counting) { + /* + * write context state + */ + ctx_pmds[cnum].lval = value; + + /* + * when context is load we use the split value + */ + if (is_loaded) { + hw_value = value & ovfl_mask; + value = value & ~ovfl_mask; + } + } + + /* + * update reset values (not just for counters) + */ + ctx_pmds[cnum].long_reset = req->reg_long_reset; + ctx_pmds[cnum].short_reset = req->reg_short_reset; + + /* + * update randomization parameters (not just for counters) + */ + ctx_pmds[cnum].seed = req->reg_random_seed; + ctx_pmds[cnum].mask = req->reg_random_mask; + + /* + * update context value + */ + ctx_pmds[cnum].val = value; + + /* + * Keep track of what we use + * + * We do not keep track of PMC because we have to + * systematically restore ALL of them. + */ + CTX_USED_PMD(PMD_PMD_DEP(cnum)); + + /* + * mark this PMD register used as well + */ + CTX_USED_PMD(RDEP(cnum)); + + /* + * make sure we do not try to reset on + * restart because we have established new values + */ + if (is_loaded) { + /* + * write hardware register if we can + */ + if (can_access_pmu) { + ia64_set_pmd(cnum, hw_value); + } else { +#ifdef CONFIG_SMP + /* + * we are guaranteed that the task is not running on the other CPU, + * we indicate that this PMD will need to be reloaded if the task + * is rescheduled on the CPU it ran last on. + */ +/* ctx->ctx_reload_pmds[0] |= 1UL << cnum; */ +#endif + } + } + +/* DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " */ +/* "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", */ +/* cnum, */ +/* value, */ +/* is_loaded, */ +/* can_access_pmu, */ +/* hw_value, */ +/* ctx->ctx_pmds[cnum].val, */ +/* ctx->ctx_pmds[cnum].short_reset, */ +/* ctx->ctx_pmds[cnum].long_reset, */ +/* PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', */ +/* ctx->ctx_pmds[cnum].seed, */ +/* ctx->ctx_pmds[cnum].mask, */ +/* ctx->ctx_used_pmds[0], */ +/* ctx->ctx_pmds[cnum].reset_pmds[0], */ +/* ctx->ctx_reload_pmds[0], */ +/* ctx->ctx_all_pmds[0], */ +/* ctx->ctx_ovfl_regs[0])); */ + } + + /* + * make changes visible + */ + if (can_access_pmu) ia64_srlz_d(); + + return 0; + +abort_mission: + /* + * for now, we have only one possibility for error + */ + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +/* + * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. + * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an + * interrupt is delivered during the call, it will be kept pending until we leave, making + * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are + * guaranteed to return consistent data to the user, it may simply be old. It is not + * trivial to treat the overflow while inside the call because you may end up in + * some module sampling buffer code causing deadlocks. + */ +static int +pfm_read_pmds(pfm_context_t *ctx, pfarg_reg_t *req, int count, struct pt_regs *regs) +{ + struct thread_struct *thread = NULL; + struct task_struct *task; + unsigned long val = 0UL, lval, ovfl_mask, sval; + unsigned int cnum, reg_flags = 0; + int i, can_access_pmu = 0, state; + int is_loaded, is_system, is_counting, expert_mode = 0; + int ret = -EINVAL; + pfm_reg_check_t rd_func; + + /* + * access is possible when loaded only for + * self-monitoring tasks or in UP mode + */ + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + task = ctx->ctx_task; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + if (likely(is_loaded)) { + thread = &task->arch._thread; + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + /* + * this can be true when not self-monitoring only in UP + */ + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + + if (can_access_pmu) ia64_srlz_d(); + } + + DPRINT(("ld=%d apmu=%d ctx_state=%d\n", + is_loaded, + can_access_pmu, + state)); + + /* + * on both UP and SMP, we can only read the PMD from the hardware register when + * the task is the owner of the local PMU. + */ + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + reg_flags = req->reg_flags; + + if (unlikely(!PMD_IS_IMPL(cnum))) goto error; + /* + * we can only read the register that we use. That includes + * the one we explicitely initialize AND the one we want included + * in the sampling buffer (smpl_regs). + * + * Having this restriction allows optimization in the ctxsw routine + * without compromising security (leaks) + */ + if (unlikely(!CTX_IS_USED_PMD(cnum))) goto error; + + sval = ctx->ctx_pmds[cnum].val; + lval = ctx->ctx_pmds[cnum].lval; + is_counting = PMD_IS_COUNTING(cnum); + + /* + * If the task is not the current one, then we check if the + * PMU state is still in the local live register due to lazy ctxsw. + * If true, then we read directly from the registers. + */ + if (can_access_pmu){ + val = ia64_get_pmd(cnum); + } else { + /* + * context has been saved + * if context is zombie, then task does not exist anymore. + * In this case, we use the full value saved in the context (pfm_flush_regs()). + */ + val = is_loaded ? thread->pmds[cnum] : 0UL; + } + rd_func = pmu_conf->pmd_desc[cnum].read_check; + + if (is_counting) { + /* + * XXX: need to check for overflow when loaded + */ + val &= ovfl_mask; + val += sval; + } + + /* + * execute read checker, if any + */ + if (unlikely(expert_mode == 0 && rd_func)) { + unsigned long v = val; + ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); + if (ret) goto error; + val = v; + ret = -EINVAL; + } + + PFM_REG_RETFLAG_SET(reg_flags, 0); + + DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); + + /* + * update register return value, abort all if problem during copy. + * we only modify the reg_flags field. no check mode is fine because + * access has been verified upfront in sys_perfmonctl(). + */ + req->reg_value = val; + req->reg_flags = reg_flags; + req->reg_last_reset_val = lval; + } + + return 0; + +error: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +/* + * Only call this function when a process it trying to + * write the debug registers (reading is always allowed) + */ +int +pfm_use_debug_registers(struct task_struct *task) +{ + pfm_context_t *ctx = task->arch._thread.pfm_context; + unsigned long flags; + int ret = 0; + + if (pmu_conf->use_rr_dbregs == 0) return 0; + + DPRINT(("called for [%d]\n", task->vcpu_id)); + + /* + * do it only once + */ + if (task->arch._thread.flags & IA64_THREAD_DBG_VALID) return 0; + + /* + * Even on SMP, we do not need to use an atomic here because + * the only way in is via ptrace() and this is possible only when the + * process is stopped. Even in the case where the ctxsw out is not totally + * completed by the time we come here, there is no way the 'stopped' process + * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. + * So this is always safe. + */ + if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; + + LOCK_PFS(flags); + + /* + * We cannot allow setting breakpoints when system wide monitoring + * sessions are using the debug registers. + */ + if (pfm_sessions.pfs_sys_use_dbregs> 0) + ret = -1; + else + pfm_sessions.pfs_ptrace_use_dbregs++; + + DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", + pfm_sessions.pfs_ptrace_use_dbregs, + pfm_sessions.pfs_sys_use_dbregs, + task->vcpu_id, ret)); + + UNLOCK_PFS(flags); + + return ret; +} + +/* + * This function is called for every task that exits with the + * IA64_THREAD_DBG_VALID set. This indicates a task which was + * able to use the debug registers for debugging purposes via + * ptrace(). Therefore we know it was not using them for + * perfmormance monitoring, so we only decrement the number + * of "ptraced" debug register users to keep the count up to date + */ +int +pfm_release_debug_registers(struct task_struct *task) +{ + unsigned long flags; + int ret; + + if (pmu_conf->use_rr_dbregs == 0) return 0; + + LOCK_PFS(flags); + if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { + printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->vcpu_id); + ret = -1; + } else { + pfm_sessions.pfs_ptrace_use_dbregs--; + ret = 0; + } + UNLOCK_PFS(flags); + + return ret; +} + +static int +pfm_restart(pfm_context_t *ctx, struct pt_regs *regs) +{ + struct task_struct *task; + pfm_buffer_fmt_t *fmt; + pfm_ovfl_ctrl_t rst_ctrl; + int state, is_system; + int ret = 0; + + state = ctx->ctx_state; + fmt = ctx_buf_fmt; + is_system = ctx->ctx_fl_system; + task = PFM_CTX_TASK(ctx); + + switch(state) { + case PFM_CTX_MASKED: + break; + case PFM_CTX_LOADED: + if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; + /* fall through */ + case PFM_CTX_UNLOADED: + case PFM_CTX_ZOMBIE: + DPRINT(("invalid state=%d\n", state)); + return -EBUSY; + default: + DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); + return -EINVAL; + } + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + + /* sanity check */ + if (unlikely(task == NULL)) { + printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->vcpu_id); + return -EINVAL; + } + + if (task == current || is_system) { + + fmt = ctx->ctx_buf_fmt; + + DPRINT(("restarting self %d ovfl=0x%lx\n", + task->vcpu_id, + ctx->ctx_ovfl_regs[0])); + + if (CTX_HAS_SMPL(ctx)) { + + prefetch(ctx->ctx_smpl_hdr); + + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 0; + + if (state == PFM_CTX_LOADED) + ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + else + ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + } else { + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 1; + } + + if (ret == 0) { + if (rst_ctrl.bits.reset_ovfl_pmds) + pfm_reset_regs(ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); + + if (rst_ctrl.bits.mask_monitoring == 0) { + DPRINT(("resuming monitoring for [%d]\n", task->vcpu_id)); + + } else { + DPRINT(("keeping monitoring stopped for [%d]\n", task->vcpu_id)); + + // cannot use pfm_stop_monitoring(task, regs); + } + } + /* + * clear overflowed PMD mask to remove any stale information + */ + ctx->ctx_ovfl_regs[0] = 0UL; + + /* + * back to LOADED state + */ + ctx->ctx_state = PFM_CTX_LOADED; + + /* + * XXX: not really useful for self monitoring + */ + ctx->ctx_fl_can_restart = 0; + + return 0; + } + + return 0; +} + +/* + * arg can be NULL and count can be zero for this function + */ +static int +pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; + unsigned long flags; + dbreg_t dbreg; + unsigned int rnum; + int first_time; + int ret = 0, state; + int i, can_access_pmu = 0; + int is_system, is_loaded; + + if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = 1; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + /* + * on both UP and SMP, we can only write to the PMC when the task is + * the owner of the local PMU. + */ + if (is_loaded) { + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + can_access_pmu = 1; + } + + /* + * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w + * ensuring that no real breakpoint can be installed via this call. + * + * IMPORTANT: regs can be NULL in this function + */ + + first_time = ctx->ctx_fl_using_dbreg == 0; + + /* + * don't bother if we are loaded and task is being debugged + */ +/* if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { */ +/* DPRINT(("debug registers already in use for [%d]\n", task->vcpu_id)); */ +/* return -EBUSY; */ +/* } */ + + /* + * check for debug registers in system wide mode + * + * If though a check is done in pfm_context_load(), + * we must repeat it here, in case the registers are + * written after the context is loaded + */ + if (is_loaded) { + LOCK_PFS(flags); + + if (first_time && is_system) { + if (pfm_sessions.pfs_ptrace_use_dbregs) + ret = -EBUSY; + else + pfm_sessions.pfs_sys_use_dbregs++; + } + UNLOCK_PFS(flags); + } + + if (ret != 0) return ret; + + /* + * mark ourself as user of the debug registers for + * perfmon purposes. + */ +/* ctx->ctx_fl_using_dbreg = 1; */ + + /* + * clear hardware registers to make sure we don't + * pick up stale state. + * + * for a system wide session, we do not use + * thread.dbr, thread.ibr because this process + * never leaves the current CPU and the state + * is shared by all processes running on it + */ + if (first_time && can_access_pmu) { + DPRINT(("clearing ibrs, dbrs\n")); + for (i=0; i < pmu_conf->num_ibrs; i++) { + ia64_set_ibr(i, 0UL); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); + for (i=0; i < pmu_conf->num_dbrs; i++) { + ia64_set_dbr(i, 0UL); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); + } + + /* + * Now install the values into the registers + */ + for (i = 0; i < count; i++, req++) { + + rnum = req->dbreg_num; + dbreg.val = req->dbreg_value; + + ret = -EINVAL; + + if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { + DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", + rnum, dbreg.val, mode, i, count)); + + goto abort_mission; + } + + /* + * make sure we do not install enabled breakpoint + */ + if (rnum & 0x1) { + if (mode == PFM_CODE_RR) + dbreg.ibr.ibr_x = 0; + else + dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; + } + + PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); + + /* + * Debug registers, just like PMC, can only be modified + * by a kernel call. Moreover, perfmon() access to those + * registers are centralized in this routine. The hardware + * does not modify the value of these registers, therefore, + * if we save them as they are written, we can avoid having + * to save them on context switch out. This is made possible + * by the fact that when perfmon uses debug registers, ptrace() + * won't be able to modify them concurrently. + */ + if (mode == PFM_CODE_RR) { + CTX_USED_IBR(ctx, rnum); + + if (can_access_pmu) { + ia64_set_ibr(rnum, dbreg.val); + ia64_dv_serialize_instruction(); + } + + ctx->ctx_ibrs[rnum] = dbreg.val; + + DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", + rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); + } else { + CTX_USED_DBR(ctx, rnum); + + if (can_access_pmu) { + ia64_set_dbr(rnum, dbreg.val); + ia64_dv_serialize_data(); + } + ctx->ctx_dbrs[rnum] = dbreg.val; + + DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", + rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); + } + } + + return 0; + +abort_mission: + /* + * in case it was our first attempt, we undo the global modifications + */ + if (first_time) { + LOCK_PFS(flags); + if (ctx->ctx_fl_system) { + pfm_sessions.pfs_sys_use_dbregs--; + } + UNLOCK_PFS(flags); + ctx->ctx_fl_using_dbreg = 0; + } + /* + * install error return flag + */ + PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); + + return ret; +} + +static int +pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); +} + +static int +pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); +} + +int +pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_ibrs(ctx, req, nreq, regs); +} +/* EXPORT_SYMBOL(pfm_mod_write_ibrs); */ + +int +pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_dbrs(ctx, req, nreq, regs); +} +/* EXPORT_SYMBOL(pfm_mod_write_dbrs); */ + + +static int +pfm_get_features(pfarg_features_t *req) +{ + req->ft_version = PFM_VERSION; + return 0; +} + +static int +pfm_stop(struct pt_regs *regs) +{ + int is_system = 1; + + int i; + + for (i = 0; i < 4; i++) { + DPRINT(("RING%d=%d\n", i, ring_count[i])); + ring_count[i] = 0; + } + + DPRINT(("interrupt_count: %d\n", interrupt_count)); + + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + /* + * Update local PMU first + * + * disable dcr pp + */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); + ia64_srlz_i(); + + current->arch.dcr &= ~(0x1UL << 0); + + /* + * update local cpuinfo + */ + PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); + + /* + * stop monitoring, does srlz.i + */ + pfm_clear_psr_pp(); + + /* + * stop monitoring in the caller + */ + ia64_psr(regs)->pp = 0; + + return 0; + } + + + /* + * per-task mode + */ + + /* stop monitoring at kernel level */ + pfm_clear_psr_up(); + + /* + * stop monitoring at the user level + */ + ia64_psr(regs)->up = 0; + + return 0; +} + + +static int +pfm_start(struct pt_regs *regs) +{ + int is_system = 1; + + interrupt_count = 0; + + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + struct domain *d; + struct vcpu *v; + struct pt_regs *r; + + /* + * set user level psr.pp for the caller + */ + ia64_psr(regs)->pp = 1; + + /* + * now update the local PMU and cpuinfo + */ + PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); + + /* + * start monitoring at kernel level + */ + pfm_set_psr_pp(); + + /* enable dcr pp */ + current->arch.dcr |= 0x1UL << 0; + + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); + ia64_srlz_i(); + + for_each_domain(d) { + for_each_vcpu (d, v) { + r = vcpu_regs(v); + ia64_psr(r)->pp = 1; + } + } + + return 0; + } + + /* + * per-process mode + */ + + /* start monitoring at kernel level */ + pfm_set_psr_up(); + + /* + * activate monitoring at user level + */ + ia64_psr(regs)->up = 1; + + return 0; +} + +static int +pfm_get_pmc_reset(pfarg_reg_t *req, unsigned int count) +{ + unsigned int cnum; + int i; + int ret = -EINVAL; + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + + if (!PMC_IS_IMPL(cnum)) goto abort_mission; + + req->reg_value = PMC_DFL_VAL(cnum); + + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); + } + return 0; + +abort_mission: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +static int +pfm_context_load(void *arg, int count, struct pt_regs *regs) +{ + unsigned long flags; + int the_cpu; + int ret = 0; + int is_system = 1; + + /* + * can only load from unloaded or terminated state + */ + if (ctx_state != PFM_CTX_UNLOADED) { + DPRINT(("cannot load context, invalid ctx_state=%d\n", + ctx_state)); + return -EBUSY; + } + + the_cpu = smp_processor_id(); + + ret = pfm_reserve_session(current, is_system, the_cpu); + if (ret) goto error; + + ctx_state = PFM_CTX_LOADED; + + return 0; + +error: + /* + * we must undo the dbregs setting (for system-wide) + */ + if (ret && !pfm_sessions.pfs_ptrace_use_dbregs) { + LOCK_PFS(flags); + pfm_sessions.pfs_sys_use_dbregs--; + UNLOCK_PFS(flags); + } + return ret; +} + +/* + * in this function, we do not need to increase the use count + * for the task via get_task_struct(), because we hold the + * context lock. If the task were to disappear while having + * a context attached, it would go through pfm_exit_thread() + * which also grabs the context lock and would therefore be blocked + * until we are here. + */ + +static int +pfm_context_unload(struct pt_regs *regs) +{ + int prev_state; + int ret; + + DPRINT(("ctx_state=%d\n", ctx_state)); + + prev_state = ctx_state; + + /* + * unload only when necessary + */ + if (prev_state == PFM_CTX_UNLOADED) { + DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); + return 0; + } + + /* + * clear psr and dcr bits + */ + ret = pfm_stop(regs); + if (ret) return ret; + + ctx_state = PFM_CTX_UNLOADED; + + if (prev_state != PFM_CTX_ZOMBIE) + pfm_unreserve_session(1 , smp_processor_id()); + /* + * save PMDs in context + * release ownership + */ + + return 0; +} + + +long do_perfmon_op(unsigned int cmd, XEN_GUEST_HANDLE(void) arg1, XEN_GUEST_HANDLE(void) arg2, unsigned int arg3) +{ + long rc = 0; + pfm_context_t *ctx; + struct pt_regs *regs = vcpu_regs(current);; + + if ( copy_from_guest(&ctx, arg1, 1) ) + return -EFAULT; + + switch ( cmd ) { + case PFM_WRITE_PMCS: + { + pfarg_reg_t req; + + if ( copy_from_guest(&req, arg2, 1) ) + return -EFAULT; + rc = pfm_write_pmcs(&req, arg3, regs); + break; + } + case PFM_WRITE_PMDS: + { + pfarg_reg_t req; + + if ( copy_from_guest(&req, arg2, 1) ) + return -EFAULT; + rc = pfm_write_pmds(&req, arg3, regs); + break; + } + case PFM_READ_PMDS: + { + pfarg_reg_t req; + + if ( copy_from_guest(&req, arg2, 1) ) + return -EFAULT; + rc = pfm_read_pmds(ctx, &req, arg3, regs); + if ( copy_to_guest(arg2, &req, 1) ) + return -EFAULT; + break; + } + case PFM_STOP: + rc = pfm_stop(regs); + break; + case PFM_START: + rc = pfm_start(regs); + break; + case PFM_ENABLE: /* 0x06 obsolete */ + case PFM_DISABLE: /* 0x07 obsolete */ + DPRINT(("invalid cmd=%d\n", cmd)); + rc = -EINVAL; + break; + case PFM_DESTROY_CONTEXT: /* 0x09 obsolete use close() */ + DPRINT(("invalid cmd=%d\n", cmd)); + rc = -EINVAL; + break; + case PFM_RESTART: /* 0x0a */ + rc = pfm_restart(ctx, regs); + case PFM_PROTECT_CONTEXT: /* 0x0b obsolete */ + DPRINT(("invalid cmd=%d\n", cmd)); + rc = -EINVAL; + break; + case PFM_GET_FEATURES: /* 0x0c */ + { + pfarg_features_t req; + + if ( copy_from_guest(&req, arg2, 1) ) + return -EFAULT; + rc = pfm_get_features(&req); + if ( copy_to_guest(arg2, &req, 1) ) + return -EINVAL; + break; + } + case PFM_DEBUG: /* 0x0d */ + { + unsigned int m; + if ( copy_from_guest(&m, arg2, 1) ) + return -EFAULT; +/* rc = pfm_debug(m); */ + break; + } + case PFM_UNPROTECT_CONTEXT: /* 0x0e obsolete */ + DPRINT(("invalid cmd=%d\n", cmd)); + rc = -EINVAL; + break; + case PFM_GET_PMC_RESET_VAL: /* 0x0f */ + { + pfarg_reg_t req; + + if ( copy_from_guest(&req, arg2, 1) ) + return -EFAULT; + rc = pfm_get_pmc_reset(&req, arg3); + if ( copy_to_guest(arg2, &req, 1) ) + return -EINVAL; + break; + } + case PFM_LOAD_CONTEXT: /* 0x10 */ + { + pfarg_load_t req; + if ( copy_from_guest(&req, arg2, 1) ) + return -EFAULT; + rc = pfm_context_load(&req, arg3, regs); + break; + } + case PFM_UNLOAD_CONTEXT: /* 0x11 */ + rc = pfm_context_unload(regs); + break; + case PFM_FREE_CONTEXT: /* 0x12 */ + pfm_context_free(ctx); + rc = 0; + break; + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +/* + * main overflow processing routine. + * it can be called from the interrupt path or explicitely during the context switch code + */ +static void +pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs) +{ + pfm_ovfl_arg_t *ovfl_arg; + unsigned long mask; + unsigned long old_val, ovfl_val, new_val; + unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, reset_pmds; + unsigned long tstamp; + unsigned int i, has_smpl; + + /* + * sanity test. Should never happen + */ + if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; + + tstamp = ia64_get_itc(); + mask = pmc0 >> PMU_FIRST_COUNTER; + ovfl_val = pmu_conf->ovfl_val; + has_smpl = 1; + + DPRINT_ovfl(("pmc0=0x%lx iip=0x%lx ri=0x%lx\n", + pmc0, + (regs ? regs->cr_iip : 0), + (regs ? ia64_psr(regs)->ri: 0L))); + + /* + * first we update the virtual counters + * assume there was a prior ia64_srlz_d() issued + */ + for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { + + /* skip pmd which did not overflow */ + if ((mask & 0x1) == 0) continue; + + /* + * Note that the pmd is not necessarily 0 at this point as qualified events + * may have happened before the PMU was frozen. The residual count is not + * taken into consideration here but will be with any read of the pmd via + * pfm_read_pmds(). + */ + old_val = new_val = ctx_pmds[i].val; + new_val += 1 + ovfl_val; + ctx_pmds[i].val = new_val; + + /* + * check for overflow condition + */ + if (likely(old_val > new_val)) { + ovfl_pmds |= 1UL << i; + if (PMC_OVFL_NOTIFY(i)) ovfl_notify |= 1UL << i; + } + } + + /* + * there was no 64-bit overflow, nothing else to do + */ + if (ovfl_pmds == 0UL) return; + + /* + * reset all control bits + */ + reset_pmds = 0UL; + + /* + * if a sampling format module exists, then we "cache" the overflow by + * calling the module's handler() routine. + */ + if (has_smpl) { + unsigned long pmd_mask; + int ret = 0; + + pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; + ovfl_arg = &pfm_get_cpu_var(ovfl_arg); + + for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { + u64 psr; + + mask = 1UL << i; + + if ((pmd_mask & 0x1) == 0) continue; + + ovfl_arg->ovfl_pmd = (unsigned char )i; + ovfl_arg->active_set = 0; + ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ + ovfl_arg->pmd_eventid = ctx_pmds[i].eventid; + + psr = pfm_get_psr(); + + DPRINT_ovfl((">> ctx_pmd[%d] pmd=0x%lx ovfl_val=0x%lx ovfl_pmds=0x%lx regs=0x%p cregs=0x%p psr_pp=%d psr.pp=%d\n", + i, + ia64_get_pmd(i) & ovfl_val, + ovfl_val, + ovfl_pmds, + regs, + vcpu_regs(current), + ia64_psr(regs)->pp, + psr & IA64_PSR_PP ? 1 : 0)); + + ring_count[(regs->cr_ipsr & 0x300000000) >> 32]++; + + /* + * call custom buffer format record (handler) routine + */ + ret = (*ctx_buf_fmt->fmt_handler)(task, NULL, ovfl_arg, regs, tstamp); + + /* + * build the bitmask of pmds to reset now + */ + if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; + } + /* + * when the module cannot handle the rest of the overflows, we abort right here + */ + if (ret && pmd_mask) { + DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", + pmd_mask<<PMU_FIRST_COUNTER)); + } + /* + * remove the pmds we reset now from the set of pmds to reset in pfm_restart() + */ + ovfl_pmds &= ~reset_pmds; + } + +/* DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); */ + + /* + * reset the requested PMD registers using the short reset values + */ + if (reset_pmds) { + unsigned long bm = reset_pmds; + pfm_reset_regs(&bm, PFM_PMD_SHORT_RESET); + } + +/* DPRINT_ovfl(("owner [%d] ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", */ +/* GET_PMU_OWNER() ? GET_PMU_OWNER()->vcpu_id : -1, */ +/* ovfl_pmds, */ +/* ovfl_notify)); */ + return; + +sanity_check: + printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", + smp_processor_id(), + task ? task->vcpu_id : -1, + pmc0); + return; + +/* stop_monitoring: */ + /* + * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). + * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can + * come here as zombie only if the task is the current task. In which case, we + * can access the PMU hardware directly. + * + * Note that zombies do have PM_VALID set. So here we do the minimal. + * + * In case the context was zombified it could not be reclaimed at the time + * the monitoring program exited. At this point, the PMU reservation has been + * returned, the sampiing buffer has been freed. We must convert this call + * into a spurious interrupt. However, we must also avoid infinite overflows + * by stopping monitoring for this task. We can only come here for a per-task + * context. All we need to do is to stop monitoring using the psr bits which + * are always task private. By re-enabling secure montioring, we ensure that + * the monitored task will not be able to re-activate monitoring. + * The task will eventually be context switched out, at which point the context + * will be reclaimed (that includes releasing ownership of the PMU). + * + * So there might be a window of time where the number of per-task session is zero + * yet one PMU might have a owner and get at most one overflow interrupt for a zombie + * context. This is safe because if a per-task session comes in, it will push this one + * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide + * session is force on that CPU, given that we use task pinning, pfm_save_regs() will + * also push our zombie context out. + * + * Overall pretty hairy stuff.... + */ + DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->vcpu_id: -1)); + pfm_clear_psr_up(); + ia64_psr(regs)->up = 0; + ia64_psr(regs)->sp = 1; + return; +} + +static int +pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +{ + struct task_struct *task; + unsigned long flags; + u64 pmc0; + int retval = 0; + + interrupt_count++; + + /* + * srlz.d done before arriving here + */ + pmc0 = ia64_get_pmc(0); + + task = GET_PMU_OWNER(); + + /* + * if we have some pending bits set + * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 + */ +/* if (PMC0_HAS_OVFL(pmc0) && task) { */ + if (PMC0_HAS_OVFL(pmc0)) { + /* + * we assume that pmc0.fr is always set here + */ + + PROTECT_CTX_NOPRINT(flags); + + pfm_overflow_handler(task, pmc0, regs); + + UNPROTECT_CTX_NOPRINT(flags); + + } else { + retval = -1; + } + /* + * keep it unfrozen at all times + */ + pfm_unfreeze_pmu(); + + return retval; + +} + +static irqreturn_t +pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +{ + int this_cpu; + int ret; + + this_cpu = get_cpu(); + + ret = pfm_do_interrupt_handler(irq, arg, regs); + + put_cpu_no_resched(); + + return; +} + +/* + * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens + * during pfm_enable() hence before pfm_start(). We cannot assume monitoring + * is active or inactive based on mode. We must rely on the value in + * local_cpu_data->pfm_syst_info + */ +void +pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) +{ + struct pt_regs *regs; + unsigned long dcr; + unsigned long dcr_pp; + + dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; + + /* + * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 + * on every CPU, so we can rely on the pid to identify the idle task. + */ + if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->vcpu_id) { + regs = vcpu_regs(task); + ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; + return; + } + /* + * if monitoring has started + */ + if (dcr_pp) { + dcr = ia64_getreg(_IA64_REG_CR_DCR); + /* + * context switching in? + */ + if (is_ctxswin) { + /* mask monitoring for the idle task */ + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); + pfm_clear_psr_pp(); + ia64_srlz_i(); + return; + } + /* + * context switching out + * restore monitoring for next task + * + * Due to inlining this odd if-then-else construction generates + * better code. + */ + ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); + pfm_set_psr_pp(); + ia64_srlz_i(); + } +} + +void +pfm_save_regs(struct task_struct *task) +{ + struct thread_struct *t; + unsigned long flags; + u64 psr; + + t = &task->arch._thread; + + /* + * we always come here with interrupts ALREADY disabled by + * the scheduler. So we simply need to protect against concurrent + * access, not CPU concurrency. + */ + flags = pfm_protect_ctx_ctxsw(); + + /* + * save current PSR: needed because we modify it + */ + ia64_srlz_d(); + psr = pfm_get_psr(); + +/* BUG_ON(psr & (IA64_PSR_I)); */ + + /* + * save pmc0 ia64_srlz_d() done in pfm_save_pmds() + * we will need it on the restore path to check + * for pending overflow. + */ + t->pmcs[0] = ia64_get_pmc(0); + + /* + * unfreeze PMU if had pending overflows + */ + if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + + /* + * finally, allow context access. + * interrupts will still be masked after this call. + */ + pfm_unprotect_ctx_ctxsw(flags); +} + + +void +pfm_load_regs (struct task_struct *task) +{ + struct thread_struct *t; + unsigned long flags; + u64 psr; + int need_irq_resend; + + t = &task->arch._thread; + flags = pfm_protect_ctx_ctxsw(); + psr = pfm_get_psr(); + + need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; + +/* BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); */ +/* BUG_ON(psr & IA64_PSR_I); */ + + /* + * check for pending overflow at the time the state + * was saved. + */ + if (unlikely(PMC0_HAS_OVFL(t->pmcs[0])) || 1) { + /* + * reload pmc0 with the overflow information + * On McKinley PMU, this will trigger a PMU interrupt + */ + ia64_set_pmc(0, t->pmcs[0]); + ia64_srlz_d(); + t->pmcs[0] = 0UL; + + /* + * will replay the PMU interrupt + */ + if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); + } + + /* + * allow concurrent access to context + */ + pfm_unprotect_ctx_ctxsw(flags); +} + + +static struct irqaction perfmon_irqaction = { + .handler = (void*)pfm_interrupt_handler, + .name = "perfmon" +}; + +/* + * perfmon initialization routine, called from the initcall() table + */ +static int __init +pfm_probe_pmu(void) +{ + pmu_config_t **p; + int family; + + family = local_cpu_data->family; + p = pmu_confs; + + while(*p) { + if ((*p)->probe) { + if ((*p)->probe() == 0) goto found; + } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { + goto found; + } + p++; + } + return -1; +found: + pmu_conf = *p; + return 0; +} + + +int __init +pfm_init(void) +{ + unsigned int n, n_counters, i; + + printk("perfmon: version %u.%u IRQ 0x%x\n", + PFM_VERSION_MAJ, + PFM_VERSION_MIN, + IA64_PERFMON_VECTOR); + + if (pfm_probe_pmu()) { + printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", + local_cpu_data->family); + return -ENODEV; + } + + /* + * compute the number of implemented PMD/PMC from the + * description tables + */ + n = 0; + for (i=0; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); + n++; + } + pmu_conf->num_pmcs = n; + + n = 0; n_counters = 0; + for (i=0; PMD_IS_LAST(i) == 0; i++) { + if (PMD_IS_IMPL(i) == 0) continue; + pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); + n++; + if (PMD_IS_COUNTING(i)) n_counters++; + } + pmu_conf->num_pmds = n; + pmu_conf->num_counters = n_counters; + + /* + * sanity checks on the number of debug registers + */ + if (pmu_conf->use_rr_dbregs) { + if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; + return -1; + } + if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; + return -1; + } + } + + printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", + pmu_conf->pmu_name, + pmu_conf->num_pmcs, + pmu_conf->num_pmds, + pmu_conf->num_counters, + ffz(pmu_conf->ovfl_val)); + + /* sanity check */ + if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { + printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); + pmu_conf = NULL; + return -1; + } + + /* + * initialize all our spinlocks + */ + spin_lock_init(&pfm_sessions.pfs_lock); + spin_lock_init(&pfm_buffer_fmt_lock); + + spin_lock_init(&ctx_lock); + + ctx_state = PFM_CTX_UNLOADED; + + return 0; +} +__initcall(pfm_init); + +/* + * this function is called before pfm_init() + */ +void +pfm_init_percpu (void) +{ + /* + * make sure no measurement is active + * (may inherit programmed PMCs from EFI). + */ + pfm_clear_psr_pp(); + pfm_clear_psr_up(); + + /* + * we run with the PMU not frozen at all times + */ + pfm_unfreeze_pmu(); + + if (smp_processor_id() == 0) + register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + + ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); + ia64_srlz_d(); +} + +/* + * used for debug purposes only + */ +void +dump_pmu_state(const char *from) +{ + struct task_struct *task; + struct thread_struct *t; + struct pt_regs *regs; + pfm_context_t *ctx; + unsigned long psr, dcr, info, flags; + int i, this_cpu; + + local_irq_save(flags); + + this_cpu = smp_processor_id(); + regs = vcpu_regs(current); + info = PFM_CPUINFO_GET(); + dcr = ia64_getreg(_IA64_REG_CR_DCR); + + if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { + local_irq_restore(flags); + return; + } + + printk("CPU%d from %s() current [%d] iip=0x%lx\n", + this_cpu, + from, + current->vcpu_id, + regs->cr_iip); + + task = GET_PMU_OWNER(); + ctx = GET_PMU_CTX(); + + printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->vcpu_id : -1, ctx); + + psr = pfm_get_psr(); + + printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", + this_cpu, + ia64_get_pmc(0), + psr & IA64_PSR_PP ? 1 : 0, + psr & IA64_PSR_UP ? 1 : 0, + dcr & IA64_DCR_PP ? 1 : 0, + info, + ia64_psr(regs)->up, + ia64_psr(regs)->pp); + + ia64_psr(regs)->up = 0; + ia64_psr(regs)->pp = 0; + + t = ¤t->arch._thread; + + for (i=1; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); + } + + for (i=1; PMD_IS_LAST(i) == 0; i++) { + if (PMD_IS_IMPL(i) == 0) continue; + printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); + } + + if (ctx) { + printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", + this_cpu, + ctx->ctx_state, + ctx->ctx_smpl_vaddr, + ctx->ctx_smpl_hdr, + ctx->ctx_msgq_head, + ctx->ctx_msgq_tail, + ctx->ctx_saved_psr_up); + } + local_irq_restore(flags); +} + +/* + * called from process.c:copy_thread(). task is new child. + */ +void +pfm_inherit(struct task_struct *task, struct pt_regs *regs) +{ + struct thread_struct *thread; + + DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->vcpu_id)); + + thread = &task->arch._thread; + + /* + * cut links inherited from parent (current) + */ + thread->pfm_context = NULL; + + PFM_SET_WORK_PENDING(task, 0); + + /* + * the psr bits are already set properly in copy_threads() + */ +} +#endif /* CONFIG_PERFMON */ diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_default_smpl.c xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_default_smpl.c --- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_default_smpl.c 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_default_smpl.c 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + * + * This file implements the default sampling buffer format + * for the Linux/ia64 perfmon-2 subsystem. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/delay.h> +#include <linux/smp.h> + +#include <asm/perfmon.h> +#include <asm/perfmon_default_smpl.h> + +MODULE_AUTHOR("Stephane Eranian <eranian@xxxxxxxxxx>"); +MODULE_DESCRIPTION("perfmon default sampling format"); +MODULE_LICENSE("GPL"); + +#define DEFAULT_DEBUG 1 + +#ifdef DEFAULT_DEBUG +#define DPRINT(a) \ + do { \ + if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ + } while (0) + +#define DPRINT_ovfl(a) \ + do { \ + if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ + } while (0) + +#else +#define DPRINT(a) +#define DPRINT_ovfl(a) +#endif + +static int +default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) +{ + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data; + int ret = 0; + + if (data == NULL) { + DPRINT(("[%d] no argument passed\n", task->pid)); + return -EINVAL; + } + + DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu)); + + /* + * must hold at least the buffer header + one minimally sized entry + */ + if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL; + + DPRINT(("buf_size=%lu\n", arg->buf_size)); + + return ret; +} + +static int +default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size) +{ + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; + + /* + * size has been validated in default_validate + */ + *size = arg->buf_size; + + return 0; +} + +static int +default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data) +{ + pfm_default_smpl_hdr_t *hdr; + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; + + hdr = (pfm_default_smpl_hdr_t *)buf; + + hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; + hdr->hdr_buf_size = arg->buf_size; + hdr->hdr_cur_offs = sizeof(*hdr); + hdr->hdr_overflows = 0UL; + hdr->hdr_count = 0UL; + + DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", + task->pid, + buf, + hdr->hdr_buf_size, + sizeof(*hdr), + hdr->hdr_version, + hdr->hdr_cur_offs)); + + return 0; +} + +static int +default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) +{ + pfm_default_smpl_hdr_t *hdr; + pfm_default_smpl_entry_t *ent; + void *cur, *last; + unsigned long *e, entry_size; + unsigned int npmds, i; + unsigned char ovfl_pmd; + unsigned char ovfl_notify; + + if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) { + DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg)); + return -EINVAL; + } + + hdr = (pfm_default_smpl_hdr_t *)buf; + cur = buf+hdr->hdr_cur_offs; + last = buf+hdr->hdr_buf_size; + ovfl_pmd = arg->ovfl_pmd; + ovfl_notify = arg->ovfl_notify; + + /* + * precheck for sanity + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + npmds = hweight64(arg->smpl_pmds[0]); + + ent = (pfm_default_smpl_entry_t *)cur; + + prefetch(arg->smpl_pmds_values); + + entry_size = sizeof(*ent) + (npmds << 3); + + /* position for first pmd */ + e = (unsigned long *)(ent+1); + + hdr->hdr_count++; + + DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n", + task->pid, + hdr->hdr_count, + cur, last, + last-cur, + ovfl_pmd, + ovfl_notify, npmds)); + + /* + * current = task running at the time of the overflow. + * + * per-task mode: + * - this is ususally the task being monitored. + * Under certain conditions, it might be a different task + * + * system-wide: + * - this is not necessarily the task controlling the session + */ + ent->pid = current->pid; + ent->ovfl_pmd = ovfl_pmd; + ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val; + + /* + * where did the fault happen (includes slot number) + */ + ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3); + + ent->tstamp = stamp; + ent->cpu = smp_processor_id(); + ent->set = arg->active_set; + ent->tgid = current->tgid; + + /* + * selectively store PMDs in increasing index number + */ + if (npmds) { + unsigned long *val = arg->smpl_pmds_values; + for(i=0; i < npmds; i++) { + *e++ = *val++; + } + } + + /* + * update position for next entry + */ + hdr->hdr_cur_offs += entry_size; + cur += entry_size; + + /* + * post check to avoid losing the last sample + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + /* + * keep same ovfl_pmds, ovfl_notify + */ + arg->ovfl_ctrl.bits.notify_user = 0; + arg->ovfl_ctrl.bits.block_task = 0; + arg->ovfl_ctrl.bits.mask_monitoring = 0; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */ + + return 0; +full: + DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify)); + + /* + * increment number of buffer overflow. + * important to detect duplicate set of samples. + */ + hdr->hdr_overflows++; + + /* + * if no notification requested, then we saturate the buffer + */ + if (ovfl_notify == 0) { + arg->ovfl_ctrl.bits.notify_user = 0; + arg->ovfl_ctrl.bits.block_task = 0; + arg->ovfl_ctrl.bits.mask_monitoring = 1; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; + } else { + arg->ovfl_ctrl.bits.notify_user = 1; + arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */ + arg->ovfl_ctrl.bits.mask_monitoring = 1; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */ + } + return -1; /* we are full, sorry */ +} + +static int +default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + pfm_default_smpl_hdr_t *hdr; + + hdr = (pfm_default_smpl_hdr_t *)buf; + + hdr->hdr_count = 0UL; + hdr->hdr_cur_offs = sizeof(*hdr); + + ctrl->bits.mask_monitoring = 0; + ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */ + + return 0; +} + +static int +default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) +{ + DPRINT(("[%d] exit(%p)\n", task->pid, buf)); + return 0; +} + +static pfm_buffer_fmt_t default_fmt={ + .fmt_name = "default_format", + .fmt_uuid = PFM_DEFAULT_SMPL_UUID, + .fmt_arg_size = sizeof(pfm_default_smpl_arg_t), + .fmt_validate = default_validate, + .fmt_getsize = default_get_size, + .fmt_init = default_init, + .fmt_handler = default_handler, + .fmt_restart = default_restart, + .fmt_restart_active = default_restart, + .fmt_exit = default_exit, +}; + +static int __init +pfm_default_smpl_init_module(void) +{ + int ret; + + ret = pfm_register_buffer_fmt(&default_fmt); + if (ret == 0) { + printk("perfmon_default_smpl: %s v%u.%u registered\n", + default_fmt.fmt_name, + PFM_DEFAULT_SMPL_VERSION_MAJ, + PFM_DEFAULT_SMPL_VERSION_MIN); + } else { + printk("perfmon_default_smpl: %s cannot register ret=%d\n", + default_fmt.fmt_name, + ret); + } + + return ret; +} + +static void __exit +pfm_default_smpl_cleanup_module(void) +{ + int ret; + ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid); + + printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret); +} + +module_init(pfm_default_smpl_init_module); +module_exit(pfm_default_smpl_cleanup_module); + diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_generic.h xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_generic.h --- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_generic.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_generic.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,45 @@ +/* + * This file contains the generic PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + */ + +static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_gen={ + .pmu_name = "Generic", + .pmu_family = 0xff, /* any */ + .ovfl_val = (1UL << 32) - 1, + .num_ibrs = 0, /* does not use */ + .num_dbrs = 0, /* does not use */ + .pmd_desc = pfm_gen_pmd_desc, + .pmc_desc = pfm_gen_pmc_desc +}; + diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_itanium.h xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_itanium.h --- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_itanium.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_itanium.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,115 @@ +/* + * This file contains the Itanium PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + */ +static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +static pfm_reg_desc_t pfm_ita_pmc_desc[]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static int +pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret; + int is_loaded; + + /* sanitfy check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the (instruction) debug registers if pmc13.ta bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); + + /* don't mix debug with perfmon */ +/* if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; */ + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); + if (ret) return ret; + } + + /* + * we must clear the (data) debug registers if pmc11.pt bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); + + /* don't mix debug with perfmon */ +/* if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; */ + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); + if (ret) return ret; + } + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_ita={ + .pmu_name = "Itanium", + .pmu_family = 0x7, + .ovfl_val = (1UL << 32) - 1, + .pmd_desc = pfm_ita_pmd_desc, + .pmc_desc = pfm_ita_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1, /* debug register are use for range retrictions */ +}; + + diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_mckinley.h xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_mckinley.h --- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_mckinley.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_mckinley.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,180 @@ +/* + * This file contains the McKinley PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + */ +static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * PMC reserved fields must have their power-up values preserved + */ +static int +pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + unsigned long tmp1, tmp2, ival = *val; + + /* remove reserved areas from user value */ + tmp1 = ival & PMC_RSVD_MASK(cnum); + + /* get reserved fields values */ + tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); + + *val = tmp1 | tmp2; + + DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", + cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); + return 0; +} + +/* + * task can be NULL if the context is unloaded + */ +static int +pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret = 0, check_case1 = 0; + unsigned long val8 = 0, val14 = 0, val13 = 0; + int is_loaded; + + printk("pfm_mck_pmc_check\n"); + + /* first preserve the reserved fields */ + pfm_mck_reserved(cnum, val, regs); + + is_loaded = 1; + + /* + * we must clear the debug registers if pmc13 has a value which enable + * memory pipeline event constraints. In this case we need to clear the + * the debug registers if they have not yet been accessed. This is required + * to avoid picking stale state. + * PMC13 is "active" if: + * one of the pmc13.cfg_dbrpXX field is different from 0x3 + * AND + * at the corresponding pmc13.ena_dbrpXX is set. + */ + DPRINT(("cnum=%u val=0x%lx, loaded=%d\n", cnum, *val, is_loaded)); + + if (cnum == 13 && is_loaded + && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL) { + + DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val)); + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); + if (ret) return ret; + } + /* + * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled + * before they are (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL)) { + + DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val)); + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); + if (ret) return ret; + + } + + switch(cnum) { + case 4: *val |= 1UL << 23; /* force power enable bit */ + break; + case 8: val8 = *val; + val13 = ctx_pmcs[13]; + val14 = ctx_pmcs[14]; + check_case1 = 1; + break; + case 13: val8 = ctx_pmcs[8]; + val13 = *val; + val14 = ctx_pmcs[14]; + check_case1 = 1; + break; + case 14: val8 = ctx_pmcs[8]; + val13 = ctx_pmcs[13]; + val14 = *val; + check_case1 = 1; + break; + } + /* check illegal configuration which can produce inconsistencies in tagging + * i-side events in L1D and L2 caches + */ + if (check_case1) { + ret = ((val13 >> 45) & 0xf) == 0 + && ((val8 & 0x1) == 0) + && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) + ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); + + if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n")); + } + + return ret ? -EINVAL : 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_mck={ + .pmu_name = "Itanium 2", + .pmu_family = 0x1f, + .flags = PFM_PMU_IRQ_RESEND, + .ovfl_val = (1UL << 47) - 1, + .pmd_desc = pfm_mck_pmd_desc, + .pmc_desc = pfm_mck_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1 /* debug register are use for range retrictions */ +}; + + diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_montecito.h xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_montecito.h --- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_montecito.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_montecito.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,269 @@ +/* + * This file contains the Montecito PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian <eranian@xxxxxxxxxx> + */ +static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\ + RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63)) +#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36)) +#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35)) + +static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}}, +/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}}, +/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}}, +/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}}, +/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}}, +/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}}, +/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}}, +/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}}, +/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}}, +/* pmc16 */ { PFM_REG_NOTIMPL, }, +/* pmc17 */ { PFM_REG_NOTIMPL, }, +/* pmc18 */ { PFM_REG_NOTIMPL, }, +/* pmc19 */ { PFM_REG_NOTIMPL, }, +/* pmc20 */ { PFM_REG_NOTIMPL, }, +/* pmc21 */ { PFM_REG_NOTIMPL, }, +/* pmc22 */ { PFM_REG_NOTIMPL, }, +/* pmc23 */ { PFM_REG_NOTIMPL, }, +/* pmc24 */ { PFM_REG_NOTIMPL, }, +/* pmc25 */ { PFM_REG_NOTIMPL, }, +/* pmc26 */ { PFM_REG_NOTIMPL, }, +/* pmc27 */ { PFM_REG_NOTIMPL, }, +/* pmc28 */ { PFM_REG_NOTIMPL, }, +/* pmc29 */ { PFM_REG_NOTIMPL, }, +/* pmc30 */ { PFM_REG_NOTIMPL, }, +/* pmc31 */ { PFM_REG_NOTIMPL, }, +/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffff, 0x30f01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffff, 0xf01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, +/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, +/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, +/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefe, 0x1e00018181818, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, + { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_NOTIMPL, }, +/* pmd1 */ { PFM_REG_NOTIMPL, }, +/* pmd2 */ { PFM_REG_NOTIMPL, }, +/* pmd3 */ { PFM_REG_NOTIMPL, }, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}}, +/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, +/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}}, +/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}}, +/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}}, +/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}}, +/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}}, +/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}}, +/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}}, +/* pmd16 */ { PFM_REG_NOTIMPL, }, +/* pmd17 */ { PFM_REG_NOTIMPL, }, +/* pmd18 */ { PFM_REG_NOTIMPL, }, +/* pmd19 */ { PFM_REG_NOTIMPL, }, +/* pmd20 */ { PFM_REG_NOTIMPL, }, +/* pmd21 */ { PFM_REG_NOTIMPL, }, +/* pmd22 */ { PFM_REG_NOTIMPL, }, +/* pmd23 */ { PFM_REG_NOTIMPL, }, +/* pmd24 */ { PFM_REG_NOTIMPL, }, +/* pmd25 */ { PFM_REG_NOTIMPL, }, +/* pmd26 */ { PFM_REG_NOTIMPL, }, +/* pmd27 */ { PFM_REG_NOTIMPL, }, +/* pmd28 */ { PFM_REG_NOTIMPL, }, +/* pmd29 */ { PFM_REG_NOTIMPL, }, +/* pmd30 */ { PFM_REG_NOTIMPL, }, +/* pmd31 */ { PFM_REG_NOTIMPL, }, +/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, +/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, +/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}}, +/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}}, +/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}}, +/* pmd37 */ { PFM_REG_NOTIMPL, }, +/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd40 */ { PFM_REG_NOTIMPL, }, +/* pmd41 */ { PFM_REG_NOTIMPL, }, +/* pmd42 */ { PFM_REG_NOTIMPL, }, +/* pmd43 */ { PFM_REG_NOTIMPL, }, +/* pmd44 */ { PFM_REG_NOTIMPL, }, +/* pmd45 */ { PFM_REG_NOTIMPL, }, +/* pmd46 */ { PFM_REG_NOTIMPL, }, +/* pmd47 */ { PFM_REG_NOTIMPL, }, +/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, + { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * PMC reserved fields must have their power-up values preserved + */ +static int +pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + unsigned long tmp1, tmp2, ival = *val; + + /* remove reserved areas from user value */ + tmp1 = ival & PMC_RSVD_MASK(cnum); + + /* get reserved fields values */ + tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); + + *val = tmp1 | tmp2; + + DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", + cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); + return 0; +} + +/* + * task can be NULL if the context is unloaded + */ +static int +pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret = 0; + unsigned long val32 = 0, val38 = 0, val41 = 0; + unsigned long tmpval; + int check_case1 = 0; + int is_loaded; + + /* first preserve the reserved fields */ + pfm_mont_reserved(cnum, val, regs); + + tmpval = *val; + + /* sanity check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the debug registers if pmc41 has a value which enable + * memory pipeline event constraints. In this case we need to clear the + * the debug registers if they have not yet been accessed. This is required + * to avoid picking stale state. + * PMC41 is "active" if: + * one of the pmc41.cfg_dtagXX field is different from 0x3 + * AND + * at the corresponding pmc41.en_dbrpXX is set. + * AND + * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) + */ + DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); + + if (cnum == 41 && is_loaded + && (tmpval & 0x1e00000000000) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); + + /* don't mix debug with perfmon */ +/* if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; */ + + /* + * a count of 0 will mark the debug registers if: + * AND + */ + ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); + if (ret) return ret; + } + /* + * we must clear the (instruction) debug registers if: + * pmc38.ig_ibrpX is 0 (enabled) + * AND + * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) + */ + if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval)); + + /* don't mix debug with perfmon */ +/* if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; */ + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); + if (ret) return ret; + + } + switch(cnum) { + case 32: val32 = *val; + val38 = ctx->ctx_pmcs[38]; + val41 = ctx->ctx_pmcs[41]; + check_case1 = 1; + break; + case 38: val38 = *val; + val32 = ctx->ctx_pmcs[32]; + val41 = ctx->ctx_pmcs[41]; + check_case1 = 1; + break; + case 41: val41 = *val; + val32 = ctx->ctx_pmcs[32]; + val38 = ctx->ctx_pmcs[38]; + check_case1 = 1; + break; + } + /* check illegal configuration which can produce inconsistencies in tagging + * i-side events in L1D and L2 caches + */ + if (check_case1) { + ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) + && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) + || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); + if (ret) { + DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32)); + return -EINVAL; + } + } + *val = tmpval; + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_mont={ + .pmu_name = "Montecito", + .pmu_family = 0x20, + .flags = PFM_PMU_IRQ_RESEND, + .ovfl_val = (1UL << 47) - 1, + .pmd_desc = pfm_mont_pmd_desc, + .pmc_desc = pfm_mont_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1 /* debug register are use for range retrictions */ +}; diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/Makefile xenoprof-ia64-unstable/xen/arch/ia64/oprofile/Makefile --- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/Makefile 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/Makefile 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,2 @@ +obj-y += xenoprof.o +obj-y += perfmon.o diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/op_counter.h xenoprof-ia64-unstable/xen/arch/ia64/oprofile/op_counter.h --- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/op_counter.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/op_counter.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,33 @@ +/** + * @file op_counter.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +#define OP_MAX_COUNTER 8 + +/* Per-perfctr configuration as set via + * oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +extern struct op_counter_config counter_config[]; + +#endif /* OP_COUNTER_H */ diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/perfmon.c xenoprof-ia64-unstable/xen/arch/ia64/oprofile/perfmon.c --- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/perfmon.c 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/perfmon.c 2006-07-03 10:45:09.000000000 +0900 @@ -0,0 +1,143 @@ +/** + * @file perfmon.c + * + * @remark Copyright 2003 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + */ + +#include <linux/kernel.h> +#include <linux/config.h> +/* #include <linux/oprofile.h> */ +#include <linux/sched.h> +#include <asm/perfmon.h> +#include <asm/regs.h> +#include <asm/ptrace.h> +#include <xen/event.h> +/* #include <asm/errno.h> */ + +static int allow_ints; + +extern struct domain *primary_profiler; + +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip, + int mode, int event); +extern int is_active(struct domain *d); + +static int xenoprof_samples = 0; + +static int +perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, + struct pt_regs *regs, unsigned long stamp) +{ + int event = arg->pmd_eventid; + int mode = 0; + + if ( ring_2(regs) ) // DomU kernel + mode = 1; + else if ( ring_0(regs) ) // Hypervisor + mode = 2; + + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; + + /* the owner of the oprofile event buffer may have exited + * without perfmon being shutdown (e.g. SIGSEGV) + */ + if (allow_ints) { + xenoprof_log_event(current, profile_pc(regs), mode, event); + xenoprof_samples++; +/* printk("perfmon_handler: 0x%lx\n", profile_pc(regs)); */ + if ( is_active(current->domain) ) + { + if ( mode != 2) + { + send_guest_vcpu_virq(current, VIRQ_XENOPROF); + } + } + } + return 0; +} + + +int perfmon_start(void) +{ + allow_ints = 1; + return 0; +} + + +void perfmon_stop(void) +{ + allow_ints = 0; + printk("xenoprof_samples: %d\n", xenoprof_samples); + xenoprof_samples = 0; +} + + +#define OPROFILE_FMT_UUID { \ + 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } + +static pfm_buffer_fmt_t oprofile_fmt = { + .fmt_name = "oprofile_format", + .fmt_uuid = OPROFILE_FMT_UUID, + .fmt_handler = perfmon_handler, +}; + + +static char * get_cpu_type(void) +{ + __u8 family = local_cpu_data->family; + + switch (family) { + case 0x07: + return "ia64/itanium"; + case 0x1f: + return "ia64/itanium2"; + default: + return "ia64/ia64"; + } +} + + +/* all the ops are handled via userspace for IA64 perfmon */ + +static int using_perfmon; + +int perfmon_init(int *num_events, int *is_primary, char *cpu_type) +{ + int ret = 0; + int prim = 0; + + if ( primary_profiler == NULL ) { + /* For now, only dom0 can be the primary profiler */ + if ( current->domain->domain_id == 0 ) { + ret = pfm_register_buffer_fmt(&oprofile_fmt); + + if (ret) { + printk("pfm_register_buffer_fmt: %d\n", ret); + return -ENODEV; + } + primary_profiler = current->domain; + prim = 1; + } + } + + strncpy (cpu_type, get_cpu_type(), XENOPROF_CPU_TYPE_SIZE - 1); + using_perfmon = 1; + *num_events = pmu_conf->num_counters; + *is_primary = prim; +/* printk("perfmon_init: ret=%d num_events=%d is_primary=%d\n", ret, *num_events, *is_primary); */ + + return 0; +} + + +void perfmon_exit(void) +{ + if (!using_perfmon) + return; + using_perfmon = 0; + + pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); +} diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/xenoprof.c xenoprof-ia64-unstable/xen/arch/ia64/oprofile/xenoprof.c --- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/xenoprof.c 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/xenoprof.c 2006-07-03 10:43:21.000000000 +0900 @@ -0,0 +1,701 @@ +/* + * Copyright (C) 2005 Hewlett-Packard Co. + * written by Aravind Menon & Jose Renato Santos + * (email: xenoprof@xxxxxxxxxxxxx) + */ + +#include <xen/guest_access.h> +#include <xen/sched.h> +#include <public/xenoprof.h> + +#include "op_counter.h" + +/* Limit amount of pages used for shared buffer (per domain) */ +#define MAX_OPROF_SHARED_PAGES 32 + +struct domain *active_domains[MAX_OPROF_DOMAINS]; +int active_ready[MAX_OPROF_DOMAINS]; +unsigned int adomains; + +struct domain *passive_domains[MAX_OPROF_DOMAINS]; +unsigned int pdomains; + +unsigned int activated; +struct domain *primary_profiler; +int xenoprof_state = XENOPROF_IDLE; + +u64 total_samples; +u64 invalid_buffer_samples; +u64 corrupted_buffer_samples; +u64 lost_samples; +u64 active_samples; +u64 passive_samples; +u64 idle_samples; +u64 others_samples; + +#define IA64_XENOPROF_PADDR (1UL << 30) + +#define xenoprof_shared_maddr(d, t, i) \ + virt_to_maddr((char*)(t)->rawbuf + ((i) << PAGE_SHIFT)) + +#ifndef CONFIG_XEN_IA64_DOM0_VP +# define xenoprof_shared_gmfn(d, t, i) \ + ({ ((d) == dom0) ? \ + (virt_to_maddr((t)->rawbuf) >> PAGE_SHIFT) + (i): \ + assign_domain_page((d), \ + IA64_XENOPROF_PADDR + ((i) << PAGE_SHIFT), \ + xenoprof_shared_maddr((d), (t), (i))), \ + (IA64_XENOPROF_PADDR >> PAGE_SHIFT) + (i);}) +#else /* CONFIG_XEN_IA64_DOM0_VP */ +#define xenoprof_shared_gmfn(d, t, i) \ + ({ assign_domain_page((d), \ + IA64_XENOPROF_PADDR + ((i) << PAGE_SHIFT), \ + xenoprof_shared_maddr((d), (t), (i))); \ + (IA64_XENOPROF_PADDR >> PAGE_SHIFT) + (i);}) +#endif /* CONFIG_XEN_IA64_DOM0_VP */ + + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +extern int perfmon_init(int *num_events, int *is_primary, char *cpu_type); +extern int perfmon_start(void); +extern void perfmon_stop(void); +extern void perfmon_exit(void); +static void xenoprof_reset_stat(void); + +int is_active(struct domain *d) +{ + struct xenoprof *x = d->xenoprof; + return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_ACTIVE)); +} + +int is_passive(struct domain *d) +{ + struct xenoprof *x = d->xenoprof; + return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_PASSIVE)); +} + +int is_profiled(struct domain *d) +{ + return (is_active(d) || is_passive(d)); +} + +static void xenoprof_reset_stat(void) +{ + total_samples = 0; + invalid_buffer_samples = 0; + corrupted_buffer_samples = 0; + lost_samples = 0; + active_samples = 0; + passive_samples = 0; + idle_samples = 0; + others_samples = 0; +} + +static void xenoprof_reset_buf(struct domain *d) +{ + int j; + struct xenoprof_buf *buf; + + if ( d->xenoprof == NULL ) + { + printk("xenoprof_reset_buf: ERROR - Unexpected " + "Xenoprof NULL pointer \n"); + return; + } + + for ( j = 0; j < MAX_VIRT_CPUS; j++ ) + { + buf = d->xenoprof->vcpu[j].buffer; + if ( buf != NULL ) + { + buf->event_head = 0; + buf->event_tail = 0; + } + } +} + +char *alloc_xenoprof_buf(struct domain *d, int npages) +{ + char *rawbuf; + int i, order; + + /* allocate pages to store sample buffer shared with domain */ + order = get_order_from_pages(npages); + rawbuf = alloc_xenheap_pages(order); + if ( rawbuf == NULL ) + { + printk("alloc_xenoprof_buf(): memory allocation failed\n"); + return 0; + } + + /* Share pages so that kernel can map it */ + for ( i = 0; i < npages; i++ ) + share_xen_page_with_guest( + virt_to_page(rawbuf + i * PAGE_SIZE), + d, XENSHARE_writable); + + return rawbuf; +} + +int alloc_xenoprof_struct(struct domain *d, int max_samples, int is_passive) +{ + struct vcpu *v; + int nvcpu, npages, bufsize, max_bufsize; + int i; + + d->xenoprof = xmalloc(struct xenoprof); + + if ( d->xenoprof == NULL ) + { + printk ("alloc_xenoprof_struct(): memory " + "allocation (xmalloc) failed\n"); + return -ENOMEM; + } + + memset(d->xenoprof, 0, sizeof(*d->xenoprof)); + + nvcpu = 0; + for_each_vcpu ( d, v ) + nvcpu++; + + /* reduce buffer size if necessary to limit pages allocated */ + bufsize = sizeof(struct xenoprof_buf) + + (max_samples - 1) * sizeof(struct event_log); + max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu; + if ( bufsize > max_bufsize ) + { + bufsize = max_bufsize; + max_samples = ( (max_bufsize - sizeof(struct xenoprof_buf)) / + sizeof(struct event_log) ) + 1; + } + + npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1; + + d->xenoprof->rawbuf = alloc_xenoprof_buf(is_passive ? dom0 : d, npages); + + if ( d->xenoprof->rawbuf == NULL ) + { + xfree(d->xenoprof); + d->xenoprof = NULL; + return -ENOMEM; + } + + d->xenoprof->npages = npages; + d->xenoprof->nbuf = nvcpu; + d->xenoprof->bufsize = bufsize; + d->xenoprof->domain_ready = 0; + d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED; + + /* Update buffer pointers for active vcpus */ + i = 0; + for_each_vcpu ( d, v ) + { + d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples; + d->xenoprof->vcpu[v->vcpu_id].buffer = + (struct xenoprof_buf *)&d->xenoprof->rawbuf[i * bufsize]; + d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples; + d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id; + +/* printk("vcpu_id=%d event_size=%d npages=%d\n", d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id, max_samples, npages); */ + + i++; + /* in the unlikely case that the number of active vcpus changes */ + if ( i >= nvcpu ) + break; + } + + return 0; +} + +void free_xenoprof_pages(struct domain *d) +{ + struct xenoprof *x; + int order; + + x = d->xenoprof; + if ( x == NULL ) + return; + + if ( x->rawbuf != NULL ) + { + order = get_order_from_pages(x->npages); + free_xenheap_pages(x->rawbuf, order); + } + + xfree(x); + d->xenoprof = NULL; +} + +int active_index(struct domain *d) +{ + int i; + + for ( i = 0; i < adomains; i++ ) + if ( active_domains[i] == d ) + return i; + + return -1; +} + +int set_active(struct domain *d) +{ + int ind; + struct xenoprof *x; + + ind = active_index(d); + if ( ind < 0 ) + return -EPERM; + + x = d->xenoprof; + if ( x == NULL ) + return -EPERM; + + x->domain_ready = 1; + x->domain_type = XENOPROF_DOMAIN_ACTIVE; + active_ready[ind] = 1; + activated++; + + return 0; +} + +int reset_active(struct domain *d) +{ + int ind; + struct xenoprof *x; + + ind = active_index(d); + if ( ind < 0 ) + return -EPERM; + + x = d->xenoprof; + if ( x == NULL ) + return -EPERM; + + x->domain_ready = 0; + x->domain_type = XENOPROF_DOMAIN_IGNORED; + active_ready[ind] = 0; + active_domains[ind] = NULL; + activated--; + put_domain(d); + + if ( activated <= 0 ) + adomains = 0; + + return 0; +} + +void reset_passive(struct domain *d) +{ + struct xenoprof *x; + + if (d==0) + return; + + x = d->xenoprof; + if ( x == NULL ) + return; + + x->domain_type = XENOPROF_DOMAIN_IGNORED; + + return; +} + +void reset_active_list(void) +{ + int i; + + for ( i = 0; i < adomains; i++ ) + { + if ( active_ready[i] ) + { + reset_active(active_domains[i]); + } + } + + adomains = 0; + activated = 0; +} + +void reset_passive_list(void) +{ + int i; + + for ( i = 0; i < pdomains; i++ ) + { + reset_passive(passive_domains[i]); + put_domain(passive_domains[i]); + passive_domains[i] = NULL; + } + + pdomains = 0; +} + +int add_active_list (domid_t domid) +{ + struct domain *d; + + if ( adomains >= MAX_OPROF_DOMAINS ) + return -E2BIG; + + d = find_domain_by_id(domid); + if ( d == NULL ) + return -EINVAL; + + active_domains[adomains] = d; + active_ready[adomains] = 0; + adomains++; + + return 0; +} + +int add_passive_list(XEN_GUEST_HANDLE(void) arg) +{ + struct xenoprof_passive passive; + struct domain *d; + int ret = 0; + + if ( pdomains >= MAX_OPROF_DOMAINS ) + return -E2BIG; + + if ( copy_from_guest(&passive, arg, 1) ) + return -EFAULT; + + d = find_domain_by_id(passive.domain_id); + if ( d == NULL ) + return -EINVAL; + + if ( (d->xenoprof == NULL) && + ((ret = alloc_xenoprof_struct(d, passive.max_samples, 1)) < 0) ) { + put_domain(d); + return -ENOMEM; + } + + d->xenoprof->domain_type = XENOPROF_DOMAIN_PASSIVE; + passive.nbuf = d->xenoprof->nbuf; + passive.bufsize = d->xenoprof->bufsize; + + passive.buf_maddr = IA64_XENOPROF_PADDR; + +/* for (i = 0; i < d->xenoprof->npages; i++) { */ +/* unsigned long gmfn = xenoprof_shared_gmfn(d, d->xenoprof, i); */ +/* if (i == 0) */ +/* passive.buf_maddr = gmfn << PAGE_SHIFT; */ +/* } */ + +/* passive.buf_maddr = __pa(d->xenoprof->rawbuf); */ + + if ( copy_to_guest(arg, &passive, 1) ) { + put_domain(d); + return -EFAULT; + } + + passive_domains[pdomains] = d; + pdomains++; + + return ret; +} + +void xenoprof_log_event( + struct vcpu *vcpu, unsigned long eip, int mode, int event) +{ + struct xenoprof_vcpu *v; + struct xenoprof_buf *buf; + int head; + int tail; + int size; + + + total_samples++; + + /* ignore samples of un-monitored domains */ + /* Count samples in idle separate from other unmonitored domains */ + if ( !is_profiled(vcpu->domain) ) + { + others_samples++; + return; + } + + v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id]; + + /* Sanity check. Should never happen */ + if ( v->buffer == NULL ) + { + invalid_buffer_samples++; + return; + } + + buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer; + + head = buf->event_head; + tail = buf->event_tail; + size = v->event_size; + + /* make sure indexes in shared buffer are sane */ + if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) ) + { + corrupted_buffer_samples++; + return; + } + + if ( (head == tail - 1) || (head == size - 1 && tail == 0) ) + { + buf->lost_samples++; + lost_samples++; + } + else + { + buf->event_log[head].eip = eip; + buf->event_log[head].mode = mode; + buf->event_log[head].event = event; + head++; + if ( head >= size ) + head = 0; + buf->event_head = head; + if ( is_active(vcpu->domain) ) + active_samples++; + else + passive_samples++; + if ( mode == 0 ) + buf->user_samples++; + else if ( mode == 1 ) + buf->kernel_samples++; + else + buf->xen_samples++; + } +} + +int xenoprof_op_init(XEN_GUEST_HANDLE(void) arg) +{ + struct xenoprof_init xenoprof_init; + int is_primary, num_events; + struct domain *d = current->domain; + int ret; + int i; + + if ( copy_from_guest(&xenoprof_init, arg, 1) ) + return -EFAULT; + + ret = perfmon_init(&num_events, + &is_primary, + xenoprof_init.cpu_type); + if ( ret < 0 ) + goto err; + + if ( is_primary ) + primary_profiler = current->domain; + + printk("domain=%d xenoprof=%p\n", d->domain_id, d->xenoprof); + + /* + * We allocate xenoprof struct and buffers only at first time xenoprof_init + * is called. Memory is then kept until domain is destroyed. + */ + if ( (d->xenoprof == NULL) && + ((ret = alloc_xenoprof_struct(d, xenoprof_init.max_samples, 0)) < 0) ) + goto err; + + xenoprof_reset_buf(d); + + d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED; + d->xenoprof->domain_ready = 0; + d->xenoprof->is_primary = is_primary; + + xenoprof_init.is_primary = is_primary; + xenoprof_init.num_events = num_events; + xenoprof_init.nbuf = d->xenoprof->nbuf; + xenoprof_init.bufsize = d->xenoprof->bufsize; + + for (i = 0; i < d->xenoprof->npages; i++) { + unsigned long gmfn = xenoprof_shared_gmfn(d, d->xenoprof, i); + if (i == 0) + xenoprof_init.buf_maddr = gmfn << PAGE_SHIFT; + } + + if ( copy_to_guest(arg, &xenoprof_init, 1) ) + { + ret = -EFAULT; + goto err; + } + + return ret; + + err: + if ( primary_profiler == current->domain ) + primary_profiler = NULL; + return ret; +} + +#define PRIV_OP(op) ( (op == XENOPROF_set_active) \ + || (op == XENOPROF_reserve_counters) \ + || (op == XENOPROF_setup_events) \ + || (op == XENOPROF_start) \ + || (op == XENOPROF_stop) \ + || (op == XENOPROF_release_counters) \ + || (op == XENOPROF_shutdown)) + +int do_xenoprof_op(int op, XEN_GUEST_HANDLE(void) arg) +{ + int ret = 0; + + if ( PRIV_OP(op) && (current->domain != primary_profiler) ) + { + printk("xenoprof: dom %d denied privileged operation %d\n", + current->domain->domain_id, op); + return -EPERM; + } + + switch ( op ) + { + case XENOPROF_init: + ret = xenoprof_op_init(arg); + break; + + case XENOPROF_reset_active_list: + { + reset_active_list(); + ret = 0; + break; + } + case XENOPROF_reset_passive_list: + { + reset_passive_list(); + ret = 0; + break; + } + case XENOPROF_set_active: + { + domid_t domid; + if ( xenoprof_state != XENOPROF_IDLE ) + return -EPERM; + if ( copy_from_guest(&domid, arg, 1) ) + return -EFAULT; + ret = add_active_list(domid); + break; + } + case XENOPROF_set_passive: + { + if ( xenoprof_state != XENOPROF_IDLE ) + return -EPERM; + ret = add_passive_list(arg); + break; + } + case XENOPROF_reserve_counters: + if ( xenoprof_state != XENOPROF_IDLE ) + return -EPERM; + xenoprof_state = XENOPROF_COUNTERS_RESERVED; + break; + + case XENOPROF_counter: + { + struct xenoprof_counter counter; + if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED ) + return -EPERM; + if ( adomains == 0 ) + return -EPERM; + + if ( copy_from_guest(&counter, arg, 1) ) + return -EFAULT; + + if ( counter.ind > OP_MAX_COUNTER ) + return -E2BIG; + + counter_config[counter.ind].count = (unsigned long) counter.count; + counter_config[counter.ind].enabled = (unsigned long) counter.enabled; + counter_config[counter.ind].event = (unsigned long) counter.event; + counter_config[counter.ind].kernel = (unsigned long) counter.kernel; + counter_config[counter.ind].user = (unsigned long) counter.user; + counter_config[counter.ind].unit_mask = (unsigned long) counter.unit_mask; + + ret = 0; + break; + } + + case XENOPROF_setup_events: + if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED ) + return -EPERM; + xenoprof_state = XENOPROF_READY; + break; + + case XENOPROF_enable_virq: + { + int i; + if ( current->domain == primary_profiler ) + { + xenoprof_reset_stat(); + for ( i = 0; i < pdomains; i++ ) { + xenoprof_reset_buf(passive_domains[i]); + } + } + xenoprof_reset_buf(current->domain); + ret = set_active(current->domain); + break; + } + + case XENOPROF_start: + ret = -EPERM; + if ( (xenoprof_state == XENOPROF_READY) && + (activated == adomains) ) + ret = perfmon_start(); + + if ( ret == 0 ) + xenoprof_state = XENOPROF_PROFILING; + break; + + case XENOPROF_stop: + if ( xenoprof_state != XENOPROF_PROFILING ) + return -EPERM; + perfmon_stop(); + xenoprof_state = XENOPROF_READY; + break; + + case XENOPROF_disable_virq: + if ( (xenoprof_state == XENOPROF_PROFILING) && + (is_active(current->domain)) ) + return -EPERM; + ret = reset_active(current->domain); + break; + + case XENOPROF_release_counters: + ret = -EPERM; + if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) || + (xenoprof_state == XENOPROF_READY) ) + { + xenoprof_state = XENOPROF_IDLE; + reset_passive_list(); + ret = 0; + } + break; + + case XENOPROF_shutdown: + ret = -EPERM; + if ( xenoprof_state == XENOPROF_IDLE ) + { + activated = 0; + adomains=0; + primary_profiler = NULL; + ret = 0; + perfmon_exit(); + } + break; + + default: + ret = -EINVAL; + } + + if ( ret < 0 ) + printk("xenoprof: operation %d failed for dom %d (status : %d)\n", + op, current->domain->domain_id, ret); + + return ret; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/xen/domain.c xenoprof-ia64-unstable/xen/arch/ia64/xen/domain.c --- xen-ia64-unstable.hg/xen/arch/ia64/xen/domain.c 2006-06-30 11:23:29.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/xen/domain.c 2006-06-30 15:26:16.000000000 +0900 @@ -128,6 +128,10 @@ if (VMX_DOMAIN(next)) vmx_load_state(next); /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/ +#ifdef CONFIG_PERFMON + pfm_save_regs(prev); + pfm_load_regs(next); +#endif prev = ia64_switch_to(next); /* Note: ia64_switch_to does not return here at vcpu initialization. */ @@ -185,6 +189,39 @@ /* nothing to do */ } +#if 1 /* By KAZ */ +static int pal_halt = 1; +static int can_do_pal_halt = 1; + +static int __init nohalt_setup(char * str) +{ + pal_halt = can_do_pal_halt = 0; + return 1; +} +__setup("nohalt", nohalt_setup); + +void +update_pal_halt_status(int status) +{ + can_do_pal_halt = pal_halt && status; +} + +static void default_idle(void) +{ + int cpu = smp_processor_id(); + local_irq_disable(); + printk("default_idle\n"); + if ( !softirq_pending(cpu)) { +/* if (can_do_pal_halt) */ +/* safe_halt(); */ +/* else */ + cpu_relax(); + } + local_irq_enable(); +} + +#else /* By KAZ */ + static void default_idle(void) { int cpu = smp_processor_id(); @@ -193,6 +230,7 @@ safe_halt(); local_irq_enable(); } +#endif /* By KAZ */ static void continue_cpu_idle_loop(void) { @@ -245,6 +283,15 @@ init_switch_stack(v); } + ia64_psr(vcpu_regs(v))->pp = + ia64_psr(vcpu_regs(v))->up = + ia64_psr(vcpu_regs(v))->i = 1; + + printk("alloc_vcpu_struct: pp=%d up=%d i=%d\n", + ia64_psr(vcpu_regs(v))->pp, + ia64_psr(vcpu_regs(v))->up, + ia64_psr(vcpu_regs(v))->i); + if (!is_idle_domain(d)) { v->arch.privregs = alloc_xenheap_pages(get_order(sizeof(mapped_regs_t))); @@ -493,6 +540,8 @@ spin_unlock_recursive(&d->page_alloc_lock); } +extern void free_xenoprof_pages(struct domain *d); + void domain_relinquish_resources(struct domain *d) { /* Relinquish every page of memory. */ @@ -502,6 +551,9 @@ relinquish_memory(d, &d->xenpage_list); relinquish_memory(d, &d->page_list); + + /* Free page used by xen oprofile buffer */ + free_xenoprof_pages(d); } void build_physmap_table(struct domain *d) diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/xen/hypercall.c xenoprof-ia64-unstable/xen/arch/ia64/xen/hypercall.c --- xen-ia64-unstable.hg/xen/arch/ia64/xen/hypercall.c 2006-06-30 11:23:29.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/xen/hypercall.c 2006-06-30 15:25:01.000000000 +0900 @@ -72,10 +72,10 @@ (hypercall_t)do_ni_hypercall, /* do_nmi_op */ (hypercall_t)do_sched_op, (hypercall_t)do_callback_op, /* */ /* 30 */ - (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_xenoprof_op, /* */ (hypercall_t)do_event_channel_op, (hypercall_t)do_physdev_op, - (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_perfmon_op, /* */ (hypercall_t)do_ni_hypercall, /* */ /* 35 */ (hypercall_t)do_ni_hypercall, /* */ (hypercall_t)do_ni_hypercall, /* */ @@ -111,7 +111,8 @@ { uint32_t cmd = (uint32_t)regs->r2; - if (cmd < nr_hypercalls) + if (cmd < nr_hypercalls) { + perfc_incra(hypercalls, cmd); regs->r8 = (*ia64_hypercall_table[cmd])( regs->r14, regs->r15, @@ -119,7 +120,7 @@ regs->r17, regs->r18, regs->r19); - else + } else regs->r8 = -ENOSYS; return IA64_NO_FAULT; @@ -225,6 +226,7 @@ } else { pal_halt_light_count++; + perfc_incra(hypercalls, __HYPERVISOR_sched_op_compat); do_sched_op_compat(SCHEDOP_yield, 0); } regs->r8 = 0; diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/xen/xenmisc.c xenoprof-ia64-unstable/xen/arch/ia64/xen/xenmisc.c --- xen-ia64-unstable.hg/xen/arch/ia64/xen/xenmisc.c 2006-06-22 13:37:13.000000000 +0900 +++ xenoprof-ia64-unstable/xen/arch/ia64/xen/xenmisc.c 2006-06-30 15:25:01.000000000 +0900 @@ -22,6 +22,7 @@ #include <asm/vmx.h> #include <asm/vmx_vcpu.h> #include <asm/vcpu.h> +#include <asm/perfmon.h> unsigned long loops_per_jiffy = (1<<12); // from linux/init/main.c diff -Nur xen-ia64-unstable.hg/xen/common/schedule.c xenoprof-ia64-unstable/xen/common/schedule.c --- xen-ia64-unstable.hg/xen/common/schedule.c 2006-06-30 11:23:29.000000000 +0900 +++ xenoprof-ia64-unstable/xen/common/schedule.c 2006-06-30 15:25:01.000000000 +0900 @@ -210,6 +210,7 @@ else { TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id); + perfc_incrc(sched_block); __enter_scheduler(); } @@ -255,6 +256,7 @@ set_timer(&v->poll_timer, sched_poll->timeout); TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id); + perfc_incrc(sched_poll); __enter_scheduler(); stop_timer(&v->poll_timer); @@ -269,6 +271,7 @@ static long do_yield(void) { TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id); + perfc_incrc(sched_yield); __enter_scheduler(); return 0; } @@ -496,6 +499,9 @@ r_time = next_slice.time; next = next_slice.task; + if (prev == next) + perfc_incrc(sched_xen); + schedule_data[cpu].curr = next; set_timer(&schedule_data[cpu].s_timer, now + r_time); diff -Nur xen-ia64-unstable.hg/xen/common/softirq.c xenoprof-ia64-unstable/xen/common/softirq.c --- xen-ia64-unstable.hg/xen/common/softirq.c 2006-06-22 13:37:13.000000000 +0900 +++ xenoprof-ia64-unstable/xen/common/softirq.c 2006-06-30 15:25:01.000000000 +0900 @@ -14,6 +14,7 @@ #include <xen/mm.h> #include <xen/sched.h> #include <xen/softirq.h> +#include <xen/perfc.h> #ifndef __ARCH_IRQ_STAT irq_cpustat_t irq_stat[NR_CPUS]; diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/config.h xenoprof-ia64-unstable/xen/include/asm-ia64/config.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/config.h 2006-06-22 13:37:13.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/config.h 2006-06-30 15:25:01.000000000 +0900 @@ -254,6 +254,8 @@ #define CONFIG_ACPI_BOOT 1 //#endif +#define CONFIG_PERFMON 1 + #define CONFIG_XEN_ATTENTION_KEY 1 #endif /* __ASSEMBLY__ */ #endif /* __XEN_IA64_CONFIG_H__ */ diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/asm/perfmon.h xenoprof-ia64-unstable/xen/include/asm-ia64/linux/asm/perfmon.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/asm/perfmon.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/asm/perfmon.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,334 @@ +/* + * Copyright (C) 2001-2003 Hewlett-Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + */ + +#ifndef _ASM_IA64_PERFMON_H +#define _ASM_IA64_PERFMON_H + +/* + * perfmon comamnds supported on all CPU models + */ +#define PFM_WRITE_PMCS 0x01 +#define PFM_WRITE_PMDS 0x02 +#define PFM_READ_PMDS 0x03 +#define PFM_STOP 0x04 +#define PFM_START 0x05 +#define PFM_ENABLE 0x06 /* obsolete */ +#define PFM_DISABLE 0x07 /* obsolete */ +#define PFM_CREATE_CONTEXT 0x08 +#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */ +#define PFM_RESTART 0x0a +#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ +#define PFM_GET_FEATURES 0x0c +#define PFM_DEBUG 0x0d +#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ +#define PFM_GET_PMC_RESET_VAL 0x0f +#define PFM_LOAD_CONTEXT 0x10 +#define PFM_UNLOAD_CONTEXT 0x11 +#define PFM_FREE_CONTEXT 0x12 + +/* + * PMU model specific commands (may not be supported on all PMU models) + */ +#define PFM_WRITE_IBRS 0x20 +#define PFM_WRITE_DBRS 0x21 + +/* + * context flags + */ +#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */ +#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */ +#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */ + +/* + * event set flags + */ +#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */ + +/* + * PMC flags + */ +#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */ +#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */ + +/* + * PMD/PMC/IBR/DBR return flags (ignored on input) + * + * Those flags are used on output and must be checked in case EAGAIN is returned + * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure. + */ +#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */ +#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */ +#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL) + +#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) + +typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */ + +/* + * Request structure used to define a context + */ +typedef struct { + pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */ + unsigned long ctx_flags; /* noblock/block */ + unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */ + unsigned short ctx_reserved1; /* for future use */ + int ctx_fd; /* return arg: unique identification for context */ + void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */ + unsigned long ctx_reserved2[11];/* for future use */ +} pfarg_context_t; + +/* + * Request structure used to write/read a PMC or PMD + */ +typedef struct { + unsigned int reg_num; /* which register */ + unsigned short reg_set; /* event set for this register */ + unsigned short reg_reserved1; /* for future use */ + + unsigned long reg_value; /* initial pmc/pmd value */ + unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */ + + unsigned long reg_long_reset; /* reset after buffer overflow notification */ + unsigned long reg_short_reset; /* reset after counter overflow */ + + unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */ + unsigned long reg_random_seed; /* seed value when randomization is used */ + unsigned long reg_random_mask; /* bitmask used to limit random value */ + unsigned long reg_last_reset_val;/* return: PMD last reset value */ + + unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */ + unsigned long reg_smpl_eventid; /* opaque sampling event identifier */ + + unsigned long reg_reserved2[3]; /* for future use */ +} pfarg_reg_t; + +typedef struct { + unsigned int dbreg_num; /* which debug register */ + unsigned short dbreg_set; /* event set for this register */ + unsigned short dbreg_reserved1; /* for future use */ + unsigned long dbreg_value; /* value for debug register */ + unsigned long dbreg_flags; /* return: dbreg error */ + unsigned long dbreg_reserved2[1]; /* for future use */ +} pfarg_dbreg_t; + +typedef struct { + unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */ + unsigned int ft_reserved; /* reserved for future use */ + unsigned long reserved[4]; /* for future use */ +} pfarg_features_t; + +typedef struct { + pid_t load_pid; /* process to load the context into */ + unsigned short load_set; /* first event set to load */ + unsigned short load_reserved1; /* for future use */ + unsigned long load_reserved2[3]; /* for future use */ +} pfarg_load_t; + +typedef struct { + int msg_type; /* generic message header */ + int msg_ctx_fd; /* generic message header */ + unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ + unsigned short msg_active_set; /* active set at the time of overflow */ + unsigned short msg_reserved1; /* for future use */ + unsigned int msg_reserved2; /* for future use */ + unsigned long msg_tstamp; /* for perf tuning/debug */ +} pfm_ovfl_msg_t; + +typedef struct { + int msg_type; /* generic message header */ + int msg_ctx_fd; /* generic message header */ + unsigned long msg_tstamp; /* for perf tuning */ +} pfm_end_msg_t; + +typedef struct { + int msg_type; /* type of the message */ + int msg_ctx_fd; /* unique identifier for the context */ + unsigned long msg_tstamp; /* for perf tuning */ +} pfm_gen_msg_t; + +#define PFM_MSG_OVFL 1 /* an overflow happened */ +#define PFM_MSG_END 2 /* task to which context was attached ended */ + +typedef union { + pfm_ovfl_msg_t pfm_ovfl_msg; + pfm_end_msg_t pfm_end_msg; + pfm_gen_msg_t pfm_gen_msg; +} pfm_msg_t; + +/* + * Define the version numbers for both perfmon as a whole and the sampling buffer format. + */ +#define PFM_VERSION_MAJ 2U +#define PFM_VERSION_MIN 0U +#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff)) +#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff) +#define PFM_VERSION_MINOR(x) ((x) & 0xffff) + + +/* + * miscellaneous architected definitions + */ +#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */ +#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */ +#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */ + +#ifdef __KERNEL__ + +extern long perfmonctl(int fd, int cmd, void *arg, int narg); + +typedef struct { + void (*handler)(int irq, void *arg, struct pt_regs *regs); +} pfm_intr_handler_desc_t; + +extern void pfm_save_regs (struct task_struct *); +extern void pfm_load_regs (struct task_struct *); + +extern void pfm_exit_thread(struct task_struct *); +extern int pfm_use_debug_registers(struct task_struct *); +extern int pfm_release_debug_registers(struct task_struct *); +extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin); +extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs); +extern void pfm_init_percpu(void); +extern void pfm_handle_work(void); +extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); +extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); + + + +/* + * Reset PMD register flags + */ +#define PFM_PMD_SHORT_RESET 0 +#define PFM_PMD_LONG_RESET 1 + +typedef union { + unsigned int val; + struct { + unsigned int notify_user:1; /* notify user program of overflow */ + unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */ + unsigned int block_task:1; /* block monitored task on kernel exit */ + unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */ + unsigned int reserved:28; /* for future use */ + } bits; +} pfm_ovfl_ctrl_t; + +typedef struct { + unsigned char ovfl_pmd; /* index of overflowed PMD */ + unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */ + unsigned short active_set; /* event set active at the time of the overflow */ + pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */ + + unsigned long pmd_last_reset; /* last reset value of of the PMD */ + unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */ + unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */ + unsigned long pmd_value; /* current 64-bit value of the PMD */ + unsigned long pmd_eventid; /* eventid associated with PMD */ +} pfm_ovfl_arg_t; + + +typedef struct { + char *fmt_name; + pfm_uuid_t fmt_uuid; + size_t fmt_arg_size; + unsigned long fmt_flags; + + int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg); + int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size); + int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg); + int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp); + int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); + int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); + int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs); + + struct list_head fmt_list; +} pfm_buffer_fmt_t; + +extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt); +extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid); + +/* + * perfmon interface exported to modules + */ +extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); + +/* + * describe the content of the local_cpu_date->pfm_syst_info field + */ +#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */ +#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */ +#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */ + +/* + * sysctl control structure. visible to sampling formats + */ +typedef struct { + int debug; /* turn on/off debugging via syslog */ + int debug_ovfl; /* turn on/off debug printk in overflow handler */ + int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ + int expert_mode; /* turn on/off value checking */ +} pfm_sysctl_t; +extern pfm_sysctl_t pfm_sysctl; + + +/* + * information about a PMC or PMD. + * dep_pmd[]: a bitmask of dependent PMD registers + * dep_pmc[]: a bitmask of dependent PMC registers + */ +struct pfm_context;; +typedef int (*pfm_reg_check_t)(struct task_struct *task, struct pfm_context *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); +typedef struct { + unsigned int type; + int pm_pos; + unsigned long default_value; /* power-on default value */ + unsigned long reserved_mask; /* bitmask of reserved bits */ + pfm_reg_check_t read_check; + pfm_reg_check_t write_check; + unsigned long dep_pmd[4]; + unsigned long dep_pmc[4]; +} pfm_reg_desc_t; + + +/* + * This structure is initialized at boot time and contains + * a description of the PMU main characteristics. + * + * If the probe function is defined, detection is based + * on its return value: + * - 0 means recognized PMU + * - anything else means not supported + * When the probe function is not defined, then the pmu_family field + * is used and it must match the host CPU family such that: + * - cpu->family & config->pmu_family != 0 + */ +typedef struct { + unsigned long ovfl_val; /* overflow value for counters */ + + pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ + pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ + + unsigned int num_pmcs; /* number of PMCS: computed at init time */ + unsigned int num_pmds; /* number of PMDS: computed at init time */ + unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ + unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ + + char *pmu_name; /* PMU family name */ + unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ + unsigned int flags; /* pmu specific flags */ + unsigned int num_ibrs; /* number of IBRS: computed at init time */ + unsigned int num_dbrs; /* number of DBRS: computed at init time */ + unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ + int (*probe)(void); /* customized probe routine */ + unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ +} pmu_config_t; + +extern pmu_config_t *pmu_conf; + +#endif /* __KERNEL__ */ + +#endif /* _ASM_IA64_PERFMON_H */ diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/completion.h xenoprof-ia64-unstable/xen/include/asm-ia64/linux/completion.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/completion.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/completion.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,42 @@ +#ifndef __LINUX_COMPLETION_H +#define __LINUX_COMPLETION_H + +/* + * (C) Copyright 2001 Linus Torvalds + * + * Atomic wait-for-completion handler data structures. + * See kernel/sched.c for details. + */ + +#include <linux/wait.h> + +struct completion { + unsigned int done; + wait_queue_head_t wait; +}; + +#define COMPLETION_INITIALIZER(work) \ + { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + +#define DECLARE_COMPLETION(work) \ + struct completion work = COMPLETION_INITIALIZER(work) + +static inline void init_completion(struct completion *x) +{ + x->done = 0; + init_waitqueue_head(&x->wait); +} + +extern void FASTCALL(wait_for_completion(struct completion *)); +extern int FASTCALL(wait_for_completion_interruptible(struct completion *x)); +extern unsigned long FASTCALL(wait_for_completion_timeout(struct completion *x, + unsigned long timeout)); +extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout( + struct completion *x, unsigned long timeout)); + +extern void FASTCALL(complete(struct completion *)); +extern void FASTCALL(complete_all(struct completion *)); + +#define INIT_COMPLETION(x) ((x).done = 0) + +#endif diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/posix_types.h xenoprof-ia64-unstable/xen/include/asm-ia64/linux/posix_types.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/posix_types.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/posix_types.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,126 @@ +#ifndef _ASM_IA64_POSIX_TYPES_H +#define _ASM_IA64_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + * + * Based on <asm-alpha/posix_types.h>. + * + * Modified 1998-2000, 2003 + * David Mosberger-Tang <davidm@xxxxxxxxxx>, Hewlett-Packard Co + */ + +typedef unsigned long __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef unsigned int __kernel_nlink_t; +typedef long __kernel_off_t; +typedef long long __kernel_loff_t; +typedef int __kernel_pid_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +typedef __kernel_uid_t __kernel_old_uid_t; +typedef __kernel_gid_t __kernel_old_gid_t; +typedef __kernel_uid_t __kernel_uid32_t; +typedef __kernel_gid_t __kernel_gid32_t; + +typedef unsigned int __kernel_old_dev_t; + +# ifdef __KERNEL__ + +# ifndef __GNUC__ + +#define __FD_SET(d, set) ((set)->fds_bits[__FDELT(d)] |= __FDMASK(d)) +#define __FD_CLR(d, set) ((set)->fds_bits[__FDELT(d)] &= ~__FDMASK(d)) +#define __FD_ISSET(d, set) (((set)->fds_bits[__FDELT(d)] & __FDMASK(d)) != 0) +#define __FD_ZERO(set) \ + ((void) memset ((__ptr_t) (set), 0, sizeof (__kernel_fd_set))) + +# else /* !__GNUC__ */ + +/* With GNU C, use inline functions instead so args are evaluated only once: */ + +#undef __FD_SET +static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] |= (1UL<<_rem); +} + +#undef __FD_CLR +static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); +} + +#undef __FD_ISSET +static __inline__ int __FD_ISSET(unsigned long fd, const __kernel_fd_set *p) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static __inline__ void __FD_ZERO(__kernel_fd_set *p) +{ + unsigned long *tmp = p->fds_bits; + int i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; + tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; + return; + + case 8: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + return; + + case 4: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + return; + } + } + i = __FDSET_LONGS; + while (i) { + i--; + *tmp = 0; + tmp++; + } +} + +# endif /* !__GNUC__ */ +# endif /* __KERNEL__ */ +#endif /* _ASM_IA64_POSIX_TYPES_H */ diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/sysctl.h xenoprof-ia64-unstable/xen/include/asm-ia64/linux/sysctl.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/sysctl.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/sysctl.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,990 @@ +/* + * sysctl.h: General linux system control interface + * + * Begun 24 March 1995, Stephen Tweedie + * + **************************************************************** + **************************************************************** + ** + ** The values in this file are exported to user space via + ** the sysctl() binary interface. However this interface + ** is unstable and deprecated and will be removed in the future. + ** For a stable interface use /proc/sys. + ** + **************************************************************** + **************************************************************** + */ + +#ifndef _LINUX_SYSCTL_H +#define _LINUX_SYSCTL_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/compiler.h> + +struct file; +struct completion; + +#define CTL_MAXNAME 10 /* how many path components do we allow in a + call to sysctl? In other words, what is + the largest acceptable value for the nlen + member of a struct __sysctl_args to have? */ + +struct __sysctl_args { + int __user *name; + int nlen; + void __user *oldval; + size_t __user *oldlenp; + void __user *newval; + size_t newlen; + unsigned long __unused[4]; +}; + +/* Define sysctl names first */ + +/* Top-level names: */ + +/* For internal pattern-matching use only: */ +#ifdef __KERNEL__ +#define CTL_ANY -1 /* Matches any name */ +#define CTL_NONE 0 +#endif + +enum +{ + CTL_KERN=1, /* General kernel info and control */ + CTL_VM=2, /* VM management */ + CTL_NET=3, /* Networking */ + CTL_PROC=4, /* Process info */ + CTL_FS=5, /* Filesystems */ + CTL_DEBUG=6, /* Debugging */ + CTL_DEV=7, /* Devices */ + CTL_BUS=8, /* Busses */ + CTL_ABI=9, /* Binary emulation */ + CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ +}; + +/* CTL_BUS names: */ +enum +{ + CTL_BUS_ISA=1 /* ISA */ +}; + +/* /proc/sys/fs/inotify/ */ +enum +{ + INOTIFY_MAX_USER_INSTANCES=1, /* max instances per user */ + INOTIFY_MAX_USER_WATCHES=2, /* max watches per user */ + INOTIFY_MAX_QUEUED_EVENTS=3 /* max queued events per instance */ +}; + +/* CTL_KERN names: */ +enum +{ + KERN_OSTYPE=1, /* string: system version */ + KERN_OSRELEASE=2, /* string: system release */ + KERN_OSREV=3, /* int: system revision */ + KERN_VERSION=4, /* string: compile time info */ + KERN_SECUREMASK=5, /* struct: maximum rights mask */ + KERN_PROF=6, /* table: profiling information */ + KERN_NODENAME=7, + KERN_DOMAINNAME=8, + + KERN_CAP_BSET=14, /* int: capability bounding set */ + KERN_PANIC=15, /* int: panic timeout */ + KERN_REALROOTDEV=16, /* real root device to mount after initrd */ + + KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ + KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ + KERN_PRINTK=23, /* struct: control printk logging parameters */ + KERN_NAMETRANS=24, /* Name translation */ + KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */ + KERN_PPC_ZEROPAGED=26, /* turn idle page zeroing on/off on PPC */ + KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */ + KERN_MODPROBE=28, + KERN_SG_BIG_BUFF=29, + KERN_ACCT=30, /* BSD process accounting parameters */ + KERN_PPC_L2CR=31, /* l2cr register on PPC */ + + KERN_RTSIGNR=32, /* Number of rt sigs queued */ + KERN_RTSIGMAX=33, /* Max queuable */ + + KERN_SHMMAX=34, /* long: Maximum shared memory segment */ + KERN_MSGMAX=35, /* int: Maximum size of a messege */ + KERN_MSGMNB=36, /* int: Maximum message queue size */ + KERN_MSGPOOL=37, /* int: Maximum system message pool size */ + KERN_SYSRQ=38, /* int: Sysreq enable */ + KERN_MAX_THREADS=39, /* int: Maximum nr of threads in the system */ + KERN_RANDOM=40, /* Random driver */ + KERN_SHMALL=41, /* int: Maximum size of shared memory */ + KERN_MSGMNI=42, /* int: msg queue identifiers */ + KERN_SEM=43, /* struct: sysv semaphore limits */ + KERN_SPARC_STOP_A=44, /* int: Sparc Stop-A enable */ + KERN_SHMMNI=45, /* int: shm array identifiers */ + KERN_OVERFLOWUID=46, /* int: overflow UID */ + KERN_OVERFLOWGID=47, /* int: overflow GID */ + KERN_SHMPATH=48, /* string: path to shm fs */ + KERN_HOTPLUG=49, /* string: path to uevent helper (deprecated) */ + KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee instructions */ + KERN_S390_USER_DEBUG_LOGGING=51, /* int: dumps of user faults */ + KERN_CORE_USES_PID=52, /* int: use core or core.%pid */ + KERN_TAINTED=53, /* int: various kernel tainted flags */ + KERN_CADPID=54, /* int: PID of the process to notify on CAD */ + KERN_PIDMAX=55, /* int: PID # limit */ + KERN_CORE_PATTERN=56, /* string: pattern for core-file names */ + KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ + KERN_HPPA_PWRSW=58, /* int: hppa soft-power enable */ + KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */ + KERN_PRINTK_RATELIMIT=60, /* int: tune printk ratelimiting */ + KERN_PRINTK_RATELIMIT_BURST=61, /* int: tune printk ratelimiting */ + KERN_PTY=62, /* dir: pty driver */ + KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */ + KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */ + KERN_HZ_TIMER=65, /* int: hz timer on or off */ + KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */ + KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */ + KERN_RANDOMIZE=68, /* int: randomize virtual address space */ + KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ + KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ + KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ + KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ +}; + + +/* CTL_VM names: */ +enum +{ + VM_UNUSED1=1, /* was: struct: Set vm swapping control */ + VM_UNUSED2=2, /* was; int: Linear or sqrt() swapout for hogs */ + VM_UNUSED3=3, /* was: struct: Set free page thresholds */ + VM_UNUSED4=4, /* Spare */ + VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */ + VM_UNUSED5=6, /* was: struct: Set buffer memory thresholds */ + VM_UNUSED7=7, /* was: struct: Set cache memory thresholds */ + VM_UNUSED8=8, /* was: struct: Control kswapd behaviour */ + VM_UNUSED9=9, /* was: struct: Set page table cache parameters */ + VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */ + VM_DIRTY_BACKGROUND=11, /* dirty_background_ratio */ + VM_DIRTY_RATIO=12, /* dirty_ratio */ + VM_DIRTY_WB_CS=13, /* dirty_writeback_centisecs */ + VM_DIRTY_EXPIRE_CS=14, /* dirty_expire_centisecs */ + VM_NR_PDFLUSH_THREADS=15, /* nr_pdflush_threads */ + VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ + VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ + VM_HUGETLB_PAGES=18, /* int: Number of available Huge Pages */ + VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ + VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */ + VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ + VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ + VM_LAPTOP_MODE=23, /* vm laptop mode */ + VM_BLOCK_DUMP=24, /* block dump mode */ + VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ + VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ + VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ + VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ + VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ + VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ + VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ + VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ +}; + + +/* CTL_NET names: */ +enum +{ + NET_CORE=1, + NET_ETHER=2, + NET_802=3, + NET_UNIX=4, + NET_IPV4=5, + NET_IPX=6, + NET_ATALK=7, + NET_NETROM=8, + NET_AX25=9, + NET_BRIDGE=10, + NET_ROSE=11, + NET_IPV6=12, + NET_X25=13, + NET_TR=14, + NET_DECNET=15, + NET_ECONET=16, + NET_SCTP=17, + NET_LLC=18, + NET_NETFILTER=19, +}; + +/* /proc/sys/kernel/random */ +enum +{ + RANDOM_POOLSIZE=1, + RANDOM_ENTROPY_COUNT=2, + RANDOM_READ_THRESH=3, + RANDOM_WRITE_THRESH=4, + RANDOM_BOOT_ID=5, + RANDOM_UUID=6 +}; + +/* /proc/sys/kernel/pty */ +enum +{ + PTY_MAX=1, + PTY_NR=2 +}; + +/* /proc/sys/bus/isa */ +enum +{ + BUS_ISA_MEM_BASE=1, + BUS_ISA_PORT_BASE=2, + BUS_ISA_PORT_SHIFT=3 +}; + +/* /proc/sys/net/core */ +enum +{ + NET_CORE_WMEM_MAX=1, + NET_CORE_RMEM_MAX=2, + NET_CORE_WMEM_DEFAULT=3, + NET_CORE_RMEM_DEFAULT=4, +/* was NET_CORE_DESTROY_DELAY */ + NET_CORE_MAX_BACKLOG=6, + NET_CORE_FASTROUTE=7, + NET_CORE_MSG_COST=8, + NET_CORE_MSG_BURST=9, + NET_CORE_OPTMEM_MAX=10, + NET_CORE_HOT_LIST_LENGTH=11, + NET_CORE_DIVERT_VERSION=12, + NET_CORE_NO_CONG_THRESH=13, + NET_CORE_NO_CONG=14, + NET_CORE_LO_CONG=15, + NET_CORE_MOD_CONG=16, + NET_CORE_DEV_WEIGHT=17, + NET_CORE_SOMAXCONN=18, + NET_CORE_BUDGET=19, +}; + +/* /proc/sys/net/ethernet */ + +/* /proc/sys/net/802 */ + +/* /proc/sys/net/unix */ + +enum +{ + NET_UNIX_DESTROY_DELAY=1, + NET_UNIX_DELETE_DELAY=2, + NET_UNIX_MAX_DGRAM_QLEN=3, +}; + +/* /proc/sys/net/netfilter */ +enum +{ + NET_NF_CONNTRACK_MAX=1, + NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, + NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, + NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, + NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, + NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, + NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, + NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, + NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, + NET_NF_CONNTRACK_UDP_TIMEOUT=10, + NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, + NET_NF_CONNTRACK_ICMP_TIMEOUT=12, + NET_NF_CONNTRACK_GENERIC_TIMEOUT=13, + NET_NF_CONNTRACK_BUCKETS=14, + NET_NF_CONNTRACK_LOG_INVALID=15, + NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, + NET_NF_CONNTRACK_TCP_LOOSE=17, + NET_NF_CONNTRACK_TCP_BE_LIBERAL=18, + NET_NF_CONNTRACK_TCP_MAX_RETRANS=19, + NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, + NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, + NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, + NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, + NET_NF_CONNTRACK_COUNT=27, + NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28, + NET_NF_CONNTRACK_FRAG6_TIMEOUT=29, + NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30, + NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31, +}; + +/* /proc/sys/net/ipv4 */ +enum +{ + /* v2.0 compatibile variables */ + NET_IPV4_FORWARD=8, + NET_IPV4_DYNADDR=9, + + NET_IPV4_CONF=16, + NET_IPV4_NEIGH=17, + NET_IPV4_ROUTE=18, + NET_IPV4_FIB_HASH=19, + NET_IPV4_NETFILTER=20, + + NET_IPV4_TCP_TIMESTAMPS=33, + NET_IPV4_TCP_WINDOW_SCALING=34, + NET_IPV4_TCP_SACK=35, + NET_IPV4_TCP_RETRANS_COLLAPSE=36, + NET_IPV4_DEFAULT_TTL=37, + NET_IPV4_AUTOCONFIG=38, + NET_IPV4_NO_PMTU_DISC=39, + NET_IPV4_TCP_SYN_RETRIES=40, + NET_IPV4_IPFRAG_HIGH_THRESH=41, + NET_IPV4_IPFRAG_LOW_THRESH=42, + NET_IPV4_IPFRAG_TIME=43, + NET_IPV4_TCP_MAX_KA_PROBES=44, + NET_IPV4_TCP_KEEPALIVE_TIME=45, + NET_IPV4_TCP_KEEPALIVE_PROBES=46, + NET_IPV4_TCP_RETRIES1=47, + NET_IPV4_TCP_RETRIES2=48, + NET_IPV4_TCP_FIN_TIMEOUT=49, + NET_IPV4_IP_MASQ_DEBUG=50, + NET_TCP_SYNCOOKIES=51, + NET_TCP_STDURG=52, + NET_TCP_RFC1337=53, + NET_TCP_SYN_TAILDROP=54, + NET_TCP_MAX_SYN_BACKLOG=55, + NET_IPV4_LOCAL_PORT_RANGE=56, + NET_IPV4_ICMP_ECHO_IGNORE_ALL=57, + NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS=58, + NET_IPV4_ICMP_SOURCEQUENCH_RATE=59, + NET_IPV4_ICMP_DESTUNREACH_RATE=60, + NET_IPV4_ICMP_TIMEEXCEED_RATE=61, + NET_IPV4_ICMP_PARAMPROB_RATE=62, + NET_IPV4_ICMP_ECHOREPLY_RATE=63, + NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES=64, + NET_IPV4_IGMP_MAX_MEMBERSHIPS=65, + NET_TCP_TW_RECYCLE=66, + NET_IPV4_ALWAYS_DEFRAG=67, + NET_IPV4_TCP_KEEPALIVE_INTVL=68, + NET_IPV4_INET_PEER_THRESHOLD=69, + NET_IPV4_INET_PEER_MINTTL=70, + NET_IPV4_INET_PEER_MAXTTL=71, + NET_IPV4_INET_PEER_GC_MINTIME=72, + NET_IPV4_INET_PEER_GC_MAXTIME=73, + NET_TCP_ORPHAN_RETRIES=74, + NET_TCP_ABORT_ON_OVERFLOW=75, + NET_TCP_SYNACK_RETRIES=76, + NET_TCP_MAX_ORPHANS=77, + NET_TCP_MAX_TW_BUCKETS=78, + NET_TCP_FACK=79, + NET_TCP_REORDERING=80, + NET_TCP_ECN=81, + NET_TCP_DSACK=82, + NET_TCP_MEM=83, + NET_TCP_WMEM=84, + NET_TCP_RMEM=85, + NET_TCP_APP_WIN=86, + NET_TCP_ADV_WIN_SCALE=87, + NET_IPV4_NONLOCAL_BIND=88, + NET_IPV4_ICMP_RATELIMIT=89, + NET_IPV4_ICMP_RATEMASK=90, + NET_TCP_TW_REUSE=91, + NET_TCP_FRTO=92, + NET_TCP_LOW_LATENCY=93, + NET_IPV4_IPFRAG_SECRET_INTERVAL=94, + NET_IPV4_IGMP_MAX_MSF=96, + NET_TCP_NO_METRICS_SAVE=97, + NET_TCP_DEFAULT_WIN_SCALE=105, + NET_TCP_MODERATE_RCVBUF=106, + NET_TCP_TSO_WIN_DIVISOR=107, + NET_TCP_BIC_BETA=108, + NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, + NET_TCP_CONG_CONTROL=110, + NET_TCP_ABC=111, + NET_IPV4_IPFRAG_MAX_DIST=112, +}; + +enum { + NET_IPV4_ROUTE_FLUSH=1, + NET_IPV4_ROUTE_MIN_DELAY=2, + NET_IPV4_ROUTE_MAX_DELAY=3, + NET_IPV4_ROUTE_GC_THRESH=4, + NET_IPV4_ROUTE_MAX_SIZE=5, + NET_IPV4_ROUTE_GC_MIN_INTERVAL=6, + NET_IPV4_ROUTE_GC_TIMEOUT=7, + NET_IPV4_ROUTE_GC_INTERVAL=8, + NET_IPV4_ROUTE_REDIRECT_LOAD=9, + NET_IPV4_ROUTE_REDIRECT_NUMBER=10, + NET_IPV4_ROUTE_REDIRECT_SILENCE=11, + NET_IPV4_ROUTE_ERROR_COST=12, + NET_IPV4_ROUTE_ERROR_BURST=13, + NET_IPV4_ROUTE_GC_ELASTICITY=14, + NET_IPV4_ROUTE_MTU_EXPIRES=15, + NET_IPV4_ROUTE_MIN_PMTU=16, + NET_IPV4_ROUTE_MIN_ADVMSS=17, + NET_IPV4_ROUTE_SECRET_INTERVAL=18, + NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS=19, +}; + +enum +{ + NET_PROTO_CONF_ALL=-2, + NET_PROTO_CONF_DEFAULT=-3 + + /* And device ifindices ... */ +}; + +enum +{ + NET_IPV4_CONF_FORWARDING=1, + NET_IPV4_CONF_MC_FORWARDING=2, + NET_IPV4_CONF_PROXY_ARP=3, + NET_IPV4_CONF_ACCEPT_REDIRECTS=4, + NET_IPV4_CONF_SECURE_REDIRECTS=5, + NET_IPV4_CONF_SEND_REDIRECTS=6, + NET_IPV4_CONF_SHARED_MEDIA=7, + NET_IPV4_CONF_RP_FILTER=8, + NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE=9, + NET_IPV4_CONF_BOOTP_RELAY=10, + NET_IPV4_CONF_LOG_MARTIANS=11, + NET_IPV4_CONF_TAG=12, + NET_IPV4_CONF_ARPFILTER=13, + NET_IPV4_CONF_MEDIUM_ID=14, + NET_IPV4_CONF_NOXFRM=15, + NET_IPV4_CONF_NOPOLICY=16, + NET_IPV4_CONF_FORCE_IGMP_VERSION=17, + NET_IPV4_CONF_ARP_ANNOUNCE=18, + NET_IPV4_CONF_ARP_IGNORE=19, + NET_IPV4_CONF_PROMOTE_SECONDARIES=20, + __NET_IPV4_CONF_MAX +}; + +/* /proc/sys/net/ipv4/netfilter */ +enum +{ + NET_IPV4_NF_CONNTRACK_MAX=1, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, + NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT=10, + NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, + NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12, + NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13, + NET_IPV4_NF_CONNTRACK_BUCKETS=14, + NET_IPV4_NF_CONNTRACK_LOG_INVALID=15, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, + NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17, + NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18, + NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, + NET_IPV4_NF_CONNTRACK_COUNT=27, +}; + +/* /proc/sys/net/ipv6 */ +enum { + NET_IPV6_CONF=16, + NET_IPV6_NEIGH=17, + NET_IPV6_ROUTE=18, + NET_IPV6_ICMP=19, + NET_IPV6_BINDV6ONLY=20, + NET_IPV6_IP6FRAG_HIGH_THRESH=21, + NET_IPV6_IP6FRAG_LOW_THRESH=22, + NET_IPV6_IP6FRAG_TIME=23, + NET_IPV6_IP6FRAG_SECRET_INTERVAL=24, + NET_IPV6_MLD_MAX_MSF=25, +}; + +enum { + NET_IPV6_ROUTE_FLUSH=1, + NET_IPV6_ROUTE_GC_THRESH=2, + NET_IPV6_ROUTE_MAX_SIZE=3, + NET_IPV6_ROUTE_GC_MIN_INTERVAL=4, + NET_IPV6_ROUTE_GC_TIMEOUT=5, + NET_IPV6_ROUTE_GC_INTERVAL=6, + NET_IPV6_ROUTE_GC_ELASTICITY=7, + NET_IPV6_ROUTE_MTU_EXPIRES=8, + NET_IPV6_ROUTE_MIN_ADVMSS=9, + NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10 +}; + +enum { + NET_IPV6_FORWARDING=1, + NET_IPV6_HOP_LIMIT=2, + NET_IPV6_MTU=3, + NET_IPV6_ACCEPT_RA=4, + NET_IPV6_ACCEPT_REDIRECTS=5, + NET_IPV6_AUTOCONF=6, + NET_IPV6_DAD_TRANSMITS=7, + NET_IPV6_RTR_SOLICITS=8, + NET_IPV6_RTR_SOLICIT_INTERVAL=9, + NET_IPV6_RTR_SOLICIT_DELAY=10, + NET_IPV6_USE_TEMPADDR=11, + NET_IPV6_TEMP_VALID_LFT=12, + NET_IPV6_TEMP_PREFERED_LFT=13, + NET_IPV6_REGEN_MAX_RETRY=14, + NET_IPV6_MAX_DESYNC_FACTOR=15, + NET_IPV6_MAX_ADDRESSES=16, + NET_IPV6_FORCE_MLD_VERSION=17, + __NET_IPV6_MAX +}; + +/* /proc/sys/net/ipv6/icmp */ +enum { + NET_IPV6_ICMP_RATELIMIT=1 +}; + +/* /proc/sys/net/<protocol>/neigh/<dev> */ +enum { + NET_NEIGH_MCAST_SOLICIT=1, + NET_NEIGH_UCAST_SOLICIT=2, + NET_NEIGH_APP_SOLICIT=3, + NET_NEIGH_RETRANS_TIME=4, + NET_NEIGH_REACHABLE_TIME=5, + NET_NEIGH_DELAY_PROBE_TIME=6, + NET_NEIGH_GC_STALE_TIME=7, + NET_NEIGH_UNRES_QLEN=8, + NET_NEIGH_PROXY_QLEN=9, + NET_NEIGH_ANYCAST_DELAY=10, + NET_NEIGH_PROXY_DELAY=11, + NET_NEIGH_LOCKTIME=12, + NET_NEIGH_GC_INTERVAL=13, + NET_NEIGH_GC_THRESH1=14, + NET_NEIGH_GC_THRESH2=15, + NET_NEIGH_GC_THRESH3=16, + NET_NEIGH_RETRANS_TIME_MS=17, + NET_NEIGH_REACHABLE_TIME_MS=18, + __NET_NEIGH_MAX +}; + +/* /proc/sys/net/ipx */ +enum { + NET_IPX_PPROP_BROADCASTING=1, + NET_IPX_FORWARDING=2 +}; + +/* /proc/sys/net/llc */ +enum { + NET_LLC2=1, + NET_LLC_STATION=2, +}; + +/* /proc/sys/net/llc/llc2 */ +enum { + NET_LLC2_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/station */ +enum { + NET_LLC_STATION_ACK_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/llc2/timeout */ +enum { + NET_LLC2_ACK_TIMEOUT=1, + NET_LLC2_P_TIMEOUT=2, + NET_LLC2_REJ_TIMEOUT=3, + NET_LLC2_BUSY_TIMEOUT=4, +}; + +/* /proc/sys/net/appletalk */ +enum { + NET_ATALK_AARP_EXPIRY_TIME=1, + NET_ATALK_AARP_TICK_TIME=2, + NET_ATALK_AARP_RETRANSMIT_LIMIT=3, + NET_ATALK_AARP_RESOLVE_TIME=4 +}; + + +/* /proc/sys/net/netrom */ +enum { + NET_NETROM_DEFAULT_PATH_QUALITY=1, + NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER=2, + NET_NETROM_NETWORK_TTL_INITIALISER=3, + NET_NETROM_TRANSPORT_TIMEOUT=4, + NET_NETROM_TRANSPORT_MAXIMUM_TRIES=5, + NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY=6, + NET_NETROM_TRANSPORT_BUSY_DELAY=7, + NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE=8, + NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT=9, + NET_NETROM_ROUTING_CONTROL=10, + NET_NETROM_LINK_FAILS_COUNT=11, + NET_NETROM_RESET=12 +}; + +/* /proc/sys/net/ax25 */ +enum { + NET_AX25_IP_DEFAULT_MODE=1, + NET_AX25_DEFAULT_MODE=2, + NET_AX25_BACKOFF_TYPE=3, + NET_AX25_CONNECT_MODE=4, + NET_AX25_STANDARD_WINDOW=5, + NET_AX25_EXTENDED_WINDOW=6, + NET_AX25_T1_TIMEOUT=7, + NET_AX25_T2_TIMEOUT=8, + NET_AX25_T3_TIMEOUT=9, + NET_AX25_IDLE_TIMEOUT=10, + NET_AX25_N2=11, + NET_AX25_PACLEN=12, + NET_AX25_PROTOCOL=13, + NET_AX25_DAMA_SLAVE_TIMEOUT=14 +}; + +/* /proc/sys/net/rose */ +enum { + NET_ROSE_RESTART_REQUEST_TIMEOUT=1, + NET_ROSE_CALL_REQUEST_TIMEOUT=2, + NET_ROSE_RESET_REQUEST_TIMEOUT=3, + NET_ROSE_CLEAR_REQUEST_TIMEOUT=4, + NET_ROSE_ACK_HOLD_BACK_TIMEOUT=5, + NET_ROSE_ROUTING_CONTROL=6, + NET_ROSE_LINK_FAIL_TIMEOUT=7, + NET_ROSE_MAX_VCS=8, + NET_ROSE_WINDOW_SIZE=9, + NET_ROSE_NO_ACTIVITY_TIMEOUT=10 +}; + +/* /proc/sys/net/x25 */ +enum { + NET_X25_RESTART_REQUEST_TIMEOUT=1, + NET_X25_CALL_REQUEST_TIMEOUT=2, + NET_X25_RESET_REQUEST_TIMEOUT=3, + NET_X25_CLEAR_REQUEST_TIMEOUT=4, + NET_X25_ACK_HOLD_BACK_TIMEOUT=5 +}; + +/* /proc/sys/net/token-ring */ +enum +{ + NET_TR_RIF_TIMEOUT=1 +}; + +/* /proc/sys/net/decnet/ */ +enum { + NET_DECNET_NODE_TYPE = 1, + NET_DECNET_NODE_ADDRESS = 2, + NET_DECNET_NODE_NAME = 3, + NET_DECNET_DEFAULT_DEVICE = 4, + NET_DECNET_TIME_WAIT = 5, + NET_DECNET_DN_COUNT = 6, + NET_DECNET_DI_COUNT = 7, + NET_DECNET_DR_COUNT = 8, + NET_DECNET_DST_GC_INTERVAL = 9, + NET_DECNET_CONF = 10, + NET_DECNET_NO_FC_MAX_CWND = 11, + NET_DECNET_MEM = 12, + NET_DECNET_RMEM = 13, + NET_DECNET_WMEM = 14, + NET_DECNET_DEBUG_LEVEL = 255 +}; + +/* /proc/sys/net/decnet/conf/<dev> */ +enum { + NET_DECNET_CONF_LOOPBACK = -2, + NET_DECNET_CONF_DDCMP = -3, + NET_DECNET_CONF_PPP = -4, + NET_DECNET_CONF_X25 = -5, + NET_DECNET_CONF_GRE = -6, + NET_DECNET_CONF_ETHER = -7 + + /* ... and ifindex of devices */ +}; + +/* /proc/sys/net/decnet/conf/<dev>/ */ +enum { + NET_DECNET_CONF_DEV_PRIORITY = 1, + NET_DECNET_CONF_DEV_T1 = 2, + NET_DECNET_CONF_DEV_T2 = 3, + NET_DECNET_CONF_DEV_T3 = 4, + NET_DECNET_CONF_DEV_FORWARDING = 5, + NET_DECNET_CONF_DEV_BLKSIZE = 6, + NET_DECNET_CONF_DEV_STATE = 7 +}; + +/* /proc/sys/net/sctp */ +enum { + NET_SCTP_RTO_INITIAL = 1, + NET_SCTP_RTO_MIN = 2, + NET_SCTP_RTO_MAX = 3, + NET_SCTP_RTO_ALPHA = 4, + NET_SCTP_RTO_BETA = 5, + NET_SCTP_VALID_COOKIE_LIFE = 6, + NET_SCTP_ASSOCIATION_MAX_RETRANS = 7, + NET_SCTP_PATH_MAX_RETRANS = 8, + NET_SCTP_MAX_INIT_RETRANSMITS = 9, + NET_SCTP_HB_INTERVAL = 10, + NET_SCTP_PRESERVE_ENABLE = 11, + NET_SCTP_MAX_BURST = 12, + NET_SCTP_ADDIP_ENABLE = 13, + NET_SCTP_PRSCTP_ENABLE = 14, + NET_SCTP_SNDBUF_POLICY = 15, + NET_SCTP_SACK_TIMEOUT = 16, + NET_SCTP_RCVBUF_POLICY = 17, +}; + +/* /proc/sys/net/bridge */ +enum { + NET_BRIDGE_NF_CALL_ARPTABLES = 1, + NET_BRIDGE_NF_CALL_IPTABLES = 2, + NET_BRIDGE_NF_CALL_IP6TABLES = 3, + NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4, +}; + +/* CTL_PROC names: */ + +/* CTL_FS names: */ +enum +{ + FS_NRINODE=1, /* int:current number of allocated inodes */ + FS_STATINODE=2, + FS_MAXINODE=3, /* int:maximum number of inodes that can be allocated */ + FS_NRDQUOT=4, /* int:current number of allocated dquots */ + FS_MAXDQUOT=5, /* int:maximum number of dquots that can be allocated */ + FS_NRFILE=6, /* int:current number of allocated filedescriptors */ + FS_MAXFILE=7, /* int:maximum number of filedescriptors that can be allocated */ + FS_DENTRY=8, + FS_NRSUPER=9, /* int:current number of allocated super_blocks */ + FS_MAXSUPER=10, /* int:maximum number of super_blocks that can be allocated */ + FS_OVERFLOWUID=11, /* int: overflow UID */ + FS_OVERFLOWGID=12, /* int: overflow GID */ + FS_LEASES=13, /* int: leases enabled */ + FS_DIR_NOTIFY=14, /* int: directory notification enabled */ + FS_LEASE_TIME=15, /* int: maximum time to wait for a lease break */ + FS_DQSTATS=16, /* disc quota usage statistics and control */ + FS_XFS=17, /* struct: control xfs parameters */ + FS_AIO_NR=18, /* current system-wide number of aio requests */ + FS_AIO_MAX_NR=19, /* system-wide maximum number of aio requests */ + FS_INOTIFY=20, /* inotify submenu */ +}; + +/* /proc/sys/fs/quota/ */ +enum { + FS_DQ_LOOKUPS = 1, + FS_DQ_DROPS = 2, + FS_DQ_READS = 3, + FS_DQ_WRITES = 4, + FS_DQ_CACHE_HITS = 5, + FS_DQ_ALLOCATED = 6, + FS_DQ_FREE = 7, + FS_DQ_SYNCS = 8, + FS_DQ_WARNINGS = 9, +}; + +/* CTL_DEBUG names: */ + +/* CTL_DEV names: */ +enum { + DEV_CDROM=1, + DEV_HWMON=2, + DEV_PARPORT=3, + DEV_RAID=4, + DEV_MAC_HID=5, + DEV_SCSI=6, + DEV_IPMI=7, +}; + +/* /proc/sys/dev/cdrom */ +enum { + DEV_CDROM_INFO=1, + DEV_CDROM_AUTOCLOSE=2, + DEV_CDROM_AUTOEJECT=3, + DEV_CDROM_DEBUG=4, + DEV_CDROM_LOCK=5, + DEV_CDROM_CHECK_MEDIA=6 +}; + +/* /proc/sys/dev/parport */ +enum { + DEV_PARPORT_DEFAULT=-3 +}; + +/* /proc/sys/dev/raid */ +enum { + DEV_RAID_SPEED_LIMIT_MIN=1, + DEV_RAID_SPEED_LIMIT_MAX=2 +}; + +/* /proc/sys/dev/parport/default */ +enum { + DEV_PARPORT_DEFAULT_TIMESLICE=1, + DEV_PARPORT_DEFAULT_SPINTIME=2 +}; + +/* /proc/sys/dev/parport/parport n */ +enum { + DEV_PARPORT_SPINTIME=1, + DEV_PARPORT_BASE_ADDR=2, + DEV_PARPORT_IRQ=3, + DEV_PARPORT_DMA=4, + DEV_PARPORT_MODES=5, + DEV_PARPORT_DEVICES=6, + DEV_PARPORT_AUTOPROBE=16 +}; + +/* /proc/sys/dev/parport/parport n/devices/ */ +enum { + DEV_PARPORT_DEVICES_ACTIVE=-3, +}; + +/* /proc/sys/dev/parport/parport n/devices/device n */ +enum { + DEV_PARPORT_DEVICE_TIMESLICE=1, +}; + +/* /proc/sys/dev/mac_hid */ +enum { + DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES=1, + DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES=2, + DEV_MAC_HID_MOUSE_BUTTON_EMULATION=3, + DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE=4, + DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE=5, + DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES=6 +}; + +/* /proc/sys/dev/scsi */ +enum { + DEV_SCSI_LOGGING_LEVEL=1, +}; + +/* /proc/sys/dev/ipmi */ +enum { + DEV_IPMI_POWEROFF_POWERCYCLE=1, +}; + +/* /proc/sys/abi */ +enum +{ + ABI_DEFHANDLER_COFF=1, /* default handler for coff binaries */ + ABI_DEFHANDLER_ELF=2, /* default handler for ELF binaries */ + ABI_DEFHANDLER_LCALL7=3,/* default handler for procs using lcall7 */ + ABI_DEFHANDLER_LIBCSO=4,/* default handler for an libc.so ELF interp */ + ABI_TRACE=5, /* tracing flags */ + ABI_FAKE_UTSNAME=6, /* fake target utsname information */ +}; + +#ifdef __KERNEL__ +#include <linux/list.h> + +extern void sysctl_init(void); + +typedef struct ctl_table ctl_table; + +typedef int ctl_handler (ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, + void **context); + +typedef int proc_handler (ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + +extern int proc_dostring(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_bset(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_minmax(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_jiffies(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_userhz_jiffies(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_ms_jiffies(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_doulongvec_minmax(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int, + struct file *, void __user *, size_t *, loff_t *); + +extern int do_sysctl (int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen); + +extern int do_sysctl_strategy (ctl_table *table, + int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, void ** context); + +extern ctl_handler sysctl_string; +extern ctl_handler sysctl_intvec; +extern ctl_handler sysctl_jiffies; +extern ctl_handler sysctl_ms_jiffies; + + +/* + * Register a set of sysctl names by calling register_sysctl_table + * with an initialised array of ctl_table's. An entry with zero + * ctl_name terminates the table. table->de will be set up by the + * registration and need not be initialised in advance. + * + * sysctl names can be mirrored automatically under /proc/sys. The + * procname supplied controls /proc naming. + * + * The table's mode will be honoured both for sys_sysctl(2) and + * proc-fs access. + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. A + * null procname disables /proc mirroring at this node. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * More sophisticated management can be enabled by the provision of a + * strategy routine with the table entry. This will be called before + * any automatic read or write of the data is performed. + * + * The strategy routine may return: + * <0: Error occurred (error is passed to user process) + * 0: OK - proceed with automatic read or write. + * >0: OK - read or write has been done by the strategy routine, so + * return immediately. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases. + */ + +/* A sysctl table is an array of struct ctl_table: */ +struct ctl_table +{ + int ctl_name; /* Binary ID */ + const char *procname; /* Text ID for /proc/sys, or zero */ + void *data; + int maxlen; + mode_t mode; + ctl_table *child; + proc_handler *proc_handler; /* Callback for text formatting */ + ctl_handler *strategy; /* Callback function for all r/w */ + struct proc_dir_entry *de; /* /proc control block */ + void *extra1; + void *extra2; +}; + +/* struct ctl_table_header is used to maintain dynamic lists of + ctl_table trees. */ +struct ctl_table_header +{ + ctl_table *ctl_table; + struct list_head ctl_entry; + int used; + struct completion *unregistering; +}; + +struct ctl_table_header * register_sysctl_table(ctl_table * table, + int insert_at_head); +void unregister_sysctl_table(struct ctl_table_header * table); + +#else /* __KERNEL__ */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SYSCTL_H */ diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/regs.h xenoprof-ia64-unstable/xen/include/asm-ia64/regs.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/regs.h 2006-06-22 13:37:14.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/regs.h 2006-06-30 15:25:01.000000000 +0900 @@ -1,2 +1,7 @@ #include <asm/ptrace.h> #define xen_regs pt_regs + +#define ring_0(r) (ia64_psr(regs)->cpl == 0) +#define ring_1(r) (ia64_psr(regs)->cpl == 1) +#define ring_2(r) (ia64_psr(regs)->cpl == 2) +#define ring_3(r) (ia64_psr(regs)->cpl == 3) diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/asm/perfmon.h xenoprof-ia64-unstable/xen/include/asm-ia64/xen/asm/perfmon.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/asm/perfmon.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/asm/perfmon.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,334 @@ +/* + * Copyright (C) 2001-2003 Hewlett-Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + */ + +#ifndef _ASM_IA64_PERFMON_H +#define _ASM_IA64_PERFMON_H + +/* + * perfmon comamnds supported on all CPU models + */ +#define PFM_WRITE_PMCS 0x01 +#define PFM_WRITE_PMDS 0x02 +#define PFM_READ_PMDS 0x03 +#define PFM_STOP 0x04 +#define PFM_START 0x05 +#define PFM_ENABLE 0x06 /* obsolete */ +#define PFM_DISABLE 0x07 /* obsolete */ +#define PFM_CREATE_CONTEXT 0x08 +#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */ +#define PFM_RESTART 0x0a +#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ +#define PFM_GET_FEATURES 0x0c +#define PFM_DEBUG 0x0d +#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ +#define PFM_GET_PMC_RESET_VAL 0x0f +#define PFM_LOAD_CONTEXT 0x10 +#define PFM_UNLOAD_CONTEXT 0x11 +#define PFM_FREE_CONTEXT 0x12 + +/* + * PMU model specific commands (may not be supported on all PMU models) + */ +#define PFM_WRITE_IBRS 0x20 +#define PFM_WRITE_DBRS 0x21 + +/* + * context flags + */ +#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */ +#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */ +#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */ + +/* + * event set flags + */ +#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */ + +/* + * PMC flags + */ +#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */ +#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */ + +/* + * PMD/PMC/IBR/DBR return flags (ignored on input) + * + * Those flags are used on output and must be checked in case EAGAIN is returned + * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure. + */ +#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */ +#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */ +#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL) + +#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) + +typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */ + +/* + * Request structure used to define a context + */ +typedef struct { + pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */ + unsigned long ctx_flags; /* noblock/block */ + unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */ + unsigned short ctx_reserved1; /* for future use */ + int ctx_fd; /* return arg: unique identification for context */ + void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */ + unsigned long ctx_reserved2[11];/* for future use */ +} pfarg_context_t; + +/* + * Request structure used to write/read a PMC or PMD + */ +typedef struct { + unsigned int reg_num; /* which register */ + unsigned short reg_set; /* event set for this register */ + unsigned short reg_reserved1; /* for future use */ + + unsigned long reg_value; /* initial pmc/pmd value */ + unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */ + + unsigned long reg_long_reset; /* reset after buffer overflow notification */ + unsigned long reg_short_reset; /* reset after counter overflow */ + + unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */ + unsigned long reg_random_seed; /* seed value when randomization is used */ + unsigned long reg_random_mask; /* bitmask used to limit random value */ + unsigned long reg_last_reset_val;/* return: PMD last reset value */ + + unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */ + unsigned long reg_smpl_eventid; /* opaque sampling event identifier */ + + unsigned long reg_reserved2[3]; /* for future use */ +} pfarg_reg_t; + +typedef struct { + unsigned int dbreg_num; /* which debug register */ + unsigned short dbreg_set; /* event set for this register */ + unsigned short dbreg_reserved1; /* for future use */ + unsigned long dbreg_value; /* value for debug register */ + unsigned long dbreg_flags; /* return: dbreg error */ + unsigned long dbreg_reserved2[1]; /* for future use */ +} pfarg_dbreg_t; + +typedef struct { + unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */ + unsigned int ft_reserved; /* reserved for future use */ + unsigned long reserved[4]; /* for future use */ +} pfarg_features_t; + +typedef struct { + pid_t load_pid; /* process to load the context into */ + unsigned short load_set; /* first event set to load */ + unsigned short load_reserved1; /* for future use */ + unsigned long load_reserved2[3]; /* for future use */ +} pfarg_load_t; + +typedef struct { + int msg_type; /* generic message header */ + int msg_ctx_fd; /* generic message header */ + unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ + unsigned short msg_active_set; /* active set at the time of overflow */ + unsigned short msg_reserved1; /* for future use */ + unsigned int msg_reserved2; /* for future use */ + unsigned long msg_tstamp; /* for perf tuning/debug */ +} pfm_ovfl_msg_t; + +typedef struct { + int msg_type; /* generic message header */ + int msg_ctx_fd; /* generic message header */ + unsigned long msg_tstamp; /* for perf tuning */ +} pfm_end_msg_t; + +typedef struct { + int msg_type; /* type of the message */ + int msg_ctx_fd; /* unique identifier for the context */ + unsigned long msg_tstamp; /* for perf tuning */ +} pfm_gen_msg_t; + +#define PFM_MSG_OVFL 1 /* an overflow happened */ +#define PFM_MSG_END 2 /* task to which context was attached ended */ + +typedef union { + pfm_ovfl_msg_t pfm_ovfl_msg; + pfm_end_msg_t pfm_end_msg; + pfm_gen_msg_t pfm_gen_msg; +} pfm_msg_t; + +/* + * Define the version numbers for both perfmon as a whole and the sampling buffer format. + */ +#define PFM_VERSION_MAJ 2U +#define PFM_VERSION_MIN 0U +#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff)) +#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff) +#define PFM_VERSION_MINOR(x) ((x) & 0xffff) + + +/* + * miscellaneous architected definitions + */ +#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */ +#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */ +#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */ + +#ifdef __KERNEL__ + +extern long perfmonctl(int fd, int cmd, void *arg, int narg); + +typedef struct { + void (*handler)(int irq, void *arg, struct pt_regs *regs); +} pfm_intr_handler_desc_t; + +extern void pfm_save_regs (struct task_struct *); +extern void pfm_load_regs (struct task_struct *); + +extern void pfm_exit_thread(struct task_struct *); +extern int pfm_use_debug_registers(struct task_struct *); +extern int pfm_release_debug_registers(struct task_struct *); +extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin); +extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs); +extern void pfm_init_percpu(void); +extern void pfm_handle_work(void); +extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); +extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); + + + +/* + * Reset PMD register flags + */ +#define PFM_PMD_SHORT_RESET 0 +#define PFM_PMD_LONG_RESET 1 + +typedef union { + unsigned int val; + struct { + unsigned int notify_user:1; /* notify user program of overflow */ + unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */ + unsigned int block_task:1; /* block monitored task on kernel exit */ + unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */ + unsigned int reserved:28; /* for future use */ + } bits; +} pfm_ovfl_ctrl_t; + +typedef struct { + unsigned char ovfl_pmd; /* index of overflowed PMD */ + unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */ + unsigned short active_set; /* event set active at the time of the overflow */ + pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */ + + unsigned long pmd_last_reset; /* last reset value of of the PMD */ + unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */ + unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */ + unsigned long pmd_value; /* current 64-bit value of the PMD */ + unsigned long pmd_eventid; /* eventid associated with PMD */ +} pfm_ovfl_arg_t; + + +typedef struct { + char *fmt_name; + pfm_uuid_t fmt_uuid; + size_t fmt_arg_size; + unsigned long fmt_flags; + + int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg); + int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size); + int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg); + int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp); + int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); + int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); + int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs); + + struct list_head fmt_list; +} pfm_buffer_fmt_t; + +extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt); +extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid); + +/* + * perfmon interface exported to modules + */ +extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); + +/* + * describe the content of the local_cpu_date->pfm_syst_info field + */ +#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */ +#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */ +#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */ + +/* + * sysctl control structure. visible to sampling formats + */ +typedef struct { + int debug; /* turn on/off debugging via syslog */ + int debug_ovfl; /* turn on/off debug printk in overflow handler */ + int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ + int expert_mode; /* turn on/off value checking */ +} pfm_sysctl_t; +extern pfm_sysctl_t pfm_sysctl; + + +/* + * information about a PMC or PMD. + * dep_pmd[]: a bitmask of dependent PMD registers + * dep_pmc[]: a bitmask of dependent PMC registers + */ +struct pfm_context;; +typedef int (*pfm_reg_check_t)(struct task_struct *task, struct pfm_context *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); +typedef struct { + unsigned int type; + int pm_pos; + unsigned long default_value; /* power-on default value */ + unsigned long reserved_mask; /* bitmask of reserved bits */ + pfm_reg_check_t read_check; + pfm_reg_check_t write_check; + unsigned long dep_pmd[4]; + unsigned long dep_pmc[4]; +} pfm_reg_desc_t; + + +/* + * This structure is initialized at boot time and contains + * a description of the PMU main characteristics. + * + * If the probe function is defined, detection is based + * on its return value: + * - 0 means recognized PMU + * - anything else means not supported + * When the probe function is not defined, then the pmu_family field + * is used and it must match the host CPU family such that: + * - cpu->family & config->pmu_family != 0 + */ +typedef struct { + unsigned long ovfl_val; /* overflow value for counters */ + + pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ + pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ + + unsigned int num_pmcs; /* number of PMCS: computed at init time */ + unsigned int num_pmds; /* number of PMDS: computed at init time */ + unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ + unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ + + char *pmu_name; /* PMU family name */ + unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ + unsigned int flags; /* pmu specific flags */ + unsigned int num_ibrs; /* number of IBRS: computed at init time */ + unsigned int num_dbrs; /* number of DBRS: computed at init time */ + unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ + int (*probe)(void); /* customized probe routine */ + unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ +} pmu_config_t; + +extern pmu_config_t *pmu_conf; + +#endif /* __KERNEL__ */ + +#endif /* _ASM_IA64_PERFMON_H */ diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/completion.h xenoprof-ia64-unstable/xen/include/asm-ia64/xen/completion.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/completion.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/completion.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,42 @@ +#ifndef __LINUX_COMPLETION_H +#define __LINUX_COMPLETION_H + +/* + * (C) Copyright 2001 Linus Torvalds + * + * Atomic wait-for-completion handler data structures. + * See kernel/sched.c for details. + */ + +#include <linux/wait.h> + +struct completion { + unsigned int done; + wait_queue_head_t wait; +}; + +#define COMPLETION_INITIALIZER(work) \ + { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + +#define DECLARE_COMPLETION(work) \ + struct completion work = COMPLETION_INITIALIZER(work) + +static inline void init_completion(struct completion *x) +{ + x->done = 0; + init_waitqueue_head(&x->wait); +} + +extern void FASTCALL(wait_for_completion(struct completion *)); +extern int FASTCALL(wait_for_completion_interruptible(struct completion *x)); +extern unsigned long FASTCALL(wait_for_completion_timeout(struct completion *x, + unsigned long timeout)); +extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout( + struct completion *x, unsigned long timeout)); + +extern void FASTCALL(complete(struct completion *)); +extern void FASTCALL(complete_all(struct completion *)); + +#define INIT_COMPLETION(x) ((x).done = 0) + +#endif diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/posix_types.h xenoprof-ia64-unstable/xen/include/asm-ia64/xen/posix_types.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/posix_types.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/posix_types.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,126 @@ +#ifndef _ASM_IA64_POSIX_TYPES_H +#define _ASM_IA64_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + * + * Based on <asm-alpha/posix_types.h>. + * + * Modified 1998-2000, 2003 + * David Mosberger-Tang <davidm@xxxxxxxxxx>, Hewlett-Packard Co + */ + +typedef unsigned long __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef unsigned int __kernel_nlink_t; +typedef long __kernel_off_t; +typedef long long __kernel_loff_t; +typedef int __kernel_pid_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +typedef __kernel_uid_t __kernel_old_uid_t; +typedef __kernel_gid_t __kernel_old_gid_t; +typedef __kernel_uid_t __kernel_uid32_t; +typedef __kernel_gid_t __kernel_gid32_t; + +typedef unsigned int __kernel_old_dev_t; + +# ifdef __KERNEL__ + +# ifndef __GNUC__ + +#define __FD_SET(d, set) ((set)->fds_bits[__FDELT(d)] |= __FDMASK(d)) +#define __FD_CLR(d, set) ((set)->fds_bits[__FDELT(d)] &= ~__FDMASK(d)) +#define __FD_ISSET(d, set) (((set)->fds_bits[__FDELT(d)] & __FDMASK(d)) != 0) +#define __FD_ZERO(set) \ + ((void) memset ((__ptr_t) (set), 0, sizeof (__kernel_fd_set))) + +# else /* !__GNUC__ */ + +/* With GNU C, use inline functions instead so args are evaluated only once: */ + +#undef __FD_SET +static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] |= (1UL<<_rem); +} + +#undef __FD_CLR +static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); +} + +#undef __FD_ISSET +static __inline__ int __FD_ISSET(unsigned long fd, const __kernel_fd_set *p) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static __inline__ void __FD_ZERO(__kernel_fd_set *p) +{ + unsigned long *tmp = p->fds_bits; + int i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; + tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; + return; + + case 8: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + return; + + case 4: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + return; + } + } + i = __FDSET_LONGS; + while (i) { + i--; + *tmp = 0; + tmp++; + } +} + +# endif /* !__GNUC__ */ +# endif /* __KERNEL__ */ +#endif /* _ASM_IA64_POSIX_TYPES_H */ diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/sysctl.h xenoprof-ia64-unstable/xen/include/asm-ia64/xen/sysctl.h --- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/sysctl.h 1970-01-01 09:00:00.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/sysctl.h 2006-06-30 15:25:01.000000000 +0900 @@ -0,0 +1,990 @@ +/* + * sysctl.h: General linux system control interface + * + * Begun 24 March 1995, Stephen Tweedie + * + **************************************************************** + **************************************************************** + ** + ** The values in this file are exported to user space via + ** the sysctl() binary interface. However this interface + ** is unstable and deprecated and will be removed in the future. + ** For a stable interface use /proc/sys. + ** + **************************************************************** + **************************************************************** + */ + +#ifndef _LINUX_SYSCTL_H +#define _LINUX_SYSCTL_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/compiler.h> + +struct file; +struct completion; + +#define CTL_MAXNAME 10 /* how many path components do we allow in a + call to sysctl? In other words, what is + the largest acceptable value for the nlen + member of a struct __sysctl_args to have? */ + +struct __sysctl_args { + int __user *name; + int nlen; + void __user *oldval; + size_t __user *oldlenp; + void __user *newval; + size_t newlen; + unsigned long __unused[4]; +}; + +/* Define sysctl names first */ + +/* Top-level names: */ + +/* For internal pattern-matching use only: */ +#ifdef __KERNEL__ +#define CTL_ANY -1 /* Matches any name */ +#define CTL_NONE 0 +#endif + +enum +{ + CTL_KERN=1, /* General kernel info and control */ + CTL_VM=2, /* VM management */ + CTL_NET=3, /* Networking */ + CTL_PROC=4, /* Process info */ + CTL_FS=5, /* Filesystems */ + CTL_DEBUG=6, /* Debugging */ + CTL_DEV=7, /* Devices */ + CTL_BUS=8, /* Busses */ + CTL_ABI=9, /* Binary emulation */ + CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ +}; + +/* CTL_BUS names: */ +enum +{ + CTL_BUS_ISA=1 /* ISA */ +}; + +/* /proc/sys/fs/inotify/ */ +enum +{ + INOTIFY_MAX_USER_INSTANCES=1, /* max instances per user */ + INOTIFY_MAX_USER_WATCHES=2, /* max watches per user */ + INOTIFY_MAX_QUEUED_EVENTS=3 /* max queued events per instance */ +}; + +/* CTL_KERN names: */ +enum +{ + KERN_OSTYPE=1, /* string: system version */ + KERN_OSRELEASE=2, /* string: system release */ + KERN_OSREV=3, /* int: system revision */ + KERN_VERSION=4, /* string: compile time info */ + KERN_SECUREMASK=5, /* struct: maximum rights mask */ + KERN_PROF=6, /* table: profiling information */ + KERN_NODENAME=7, + KERN_DOMAINNAME=8, + + KERN_CAP_BSET=14, /* int: capability bounding set */ + KERN_PANIC=15, /* int: panic timeout */ + KERN_REALROOTDEV=16, /* real root device to mount after initrd */ + + KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ + KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ + KERN_PRINTK=23, /* struct: control printk logging parameters */ + KERN_NAMETRANS=24, /* Name translation */ + KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */ + KERN_PPC_ZEROPAGED=26, /* turn idle page zeroing on/off on PPC */ + KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */ + KERN_MODPROBE=28, + KERN_SG_BIG_BUFF=29, + KERN_ACCT=30, /* BSD process accounting parameters */ + KERN_PPC_L2CR=31, /* l2cr register on PPC */ + + KERN_RTSIGNR=32, /* Number of rt sigs queued */ + KERN_RTSIGMAX=33, /* Max queuable */ + + KERN_SHMMAX=34, /* long: Maximum shared memory segment */ + KERN_MSGMAX=35, /* int: Maximum size of a messege */ + KERN_MSGMNB=36, /* int: Maximum message queue size */ + KERN_MSGPOOL=37, /* int: Maximum system message pool size */ + KERN_SYSRQ=38, /* int: Sysreq enable */ + KERN_MAX_THREADS=39, /* int: Maximum nr of threads in the system */ + KERN_RANDOM=40, /* Random driver */ + KERN_SHMALL=41, /* int: Maximum size of shared memory */ + KERN_MSGMNI=42, /* int: msg queue identifiers */ + KERN_SEM=43, /* struct: sysv semaphore limits */ + KERN_SPARC_STOP_A=44, /* int: Sparc Stop-A enable */ + KERN_SHMMNI=45, /* int: shm array identifiers */ + KERN_OVERFLOWUID=46, /* int: overflow UID */ + KERN_OVERFLOWGID=47, /* int: overflow GID */ + KERN_SHMPATH=48, /* string: path to shm fs */ + KERN_HOTPLUG=49, /* string: path to uevent helper (deprecated) */ + KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee instructions */ + KERN_S390_USER_DEBUG_LOGGING=51, /* int: dumps of user faults */ + KERN_CORE_USES_PID=52, /* int: use core or core.%pid */ + KERN_TAINTED=53, /* int: various kernel tainted flags */ + KERN_CADPID=54, /* int: PID of the process to notify on CAD */ + KERN_PIDMAX=55, /* int: PID # limit */ + KERN_CORE_PATTERN=56, /* string: pattern for core-file names */ + KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ + KERN_HPPA_PWRSW=58, /* int: hppa soft-power enable */ + KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */ + KERN_PRINTK_RATELIMIT=60, /* int: tune printk ratelimiting */ + KERN_PRINTK_RATELIMIT_BURST=61, /* int: tune printk ratelimiting */ + KERN_PTY=62, /* dir: pty driver */ + KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */ + KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */ + KERN_HZ_TIMER=65, /* int: hz timer on or off */ + KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */ + KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */ + KERN_RANDOMIZE=68, /* int: randomize virtual address space */ + KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ + KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ + KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ + KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ +}; + + +/* CTL_VM names: */ +enum +{ + VM_UNUSED1=1, /* was: struct: Set vm swapping control */ + VM_UNUSED2=2, /* was; int: Linear or sqrt() swapout for hogs */ + VM_UNUSED3=3, /* was: struct: Set free page thresholds */ + VM_UNUSED4=4, /* Spare */ + VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */ + VM_UNUSED5=6, /* was: struct: Set buffer memory thresholds */ + VM_UNUSED7=7, /* was: struct: Set cache memory thresholds */ + VM_UNUSED8=8, /* was: struct: Control kswapd behaviour */ + VM_UNUSED9=9, /* was: struct: Set page table cache parameters */ + VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */ + VM_DIRTY_BACKGROUND=11, /* dirty_background_ratio */ + VM_DIRTY_RATIO=12, /* dirty_ratio */ + VM_DIRTY_WB_CS=13, /* dirty_writeback_centisecs */ + VM_DIRTY_EXPIRE_CS=14, /* dirty_expire_centisecs */ + VM_NR_PDFLUSH_THREADS=15, /* nr_pdflush_threads */ + VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ + VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ + VM_HUGETLB_PAGES=18, /* int: Number of available Huge Pages */ + VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ + VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */ + VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ + VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ + VM_LAPTOP_MODE=23, /* vm laptop mode */ + VM_BLOCK_DUMP=24, /* block dump mode */ + VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ + VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ + VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ + VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ + VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ + VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ + VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ + VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ +}; + + +/* CTL_NET names: */ +enum +{ + NET_CORE=1, + NET_ETHER=2, + NET_802=3, + NET_UNIX=4, + NET_IPV4=5, + NET_IPX=6, + NET_ATALK=7, + NET_NETROM=8, + NET_AX25=9, + NET_BRIDGE=10, + NET_ROSE=11, + NET_IPV6=12, + NET_X25=13, + NET_TR=14, + NET_DECNET=15, + NET_ECONET=16, + NET_SCTP=17, + NET_LLC=18, + NET_NETFILTER=19, +}; + +/* /proc/sys/kernel/random */ +enum +{ + RANDOM_POOLSIZE=1, + RANDOM_ENTROPY_COUNT=2, + RANDOM_READ_THRESH=3, + RANDOM_WRITE_THRESH=4, + RANDOM_BOOT_ID=5, + RANDOM_UUID=6 +}; + +/* /proc/sys/kernel/pty */ +enum +{ + PTY_MAX=1, + PTY_NR=2 +}; + +/* /proc/sys/bus/isa */ +enum +{ + BUS_ISA_MEM_BASE=1, + BUS_ISA_PORT_BASE=2, + BUS_ISA_PORT_SHIFT=3 +}; + +/* /proc/sys/net/core */ +enum +{ + NET_CORE_WMEM_MAX=1, + NET_CORE_RMEM_MAX=2, + NET_CORE_WMEM_DEFAULT=3, + NET_CORE_RMEM_DEFAULT=4, +/* was NET_CORE_DESTROY_DELAY */ + NET_CORE_MAX_BACKLOG=6, + NET_CORE_FASTROUTE=7, + NET_CORE_MSG_COST=8, + NET_CORE_MSG_BURST=9, + NET_CORE_OPTMEM_MAX=10, + NET_CORE_HOT_LIST_LENGTH=11, + NET_CORE_DIVERT_VERSION=12, + NET_CORE_NO_CONG_THRESH=13, + NET_CORE_NO_CONG=14, + NET_CORE_LO_CONG=15, + NET_CORE_MOD_CONG=16, + NET_CORE_DEV_WEIGHT=17, + NET_CORE_SOMAXCONN=18, + NET_CORE_BUDGET=19, +}; + +/* /proc/sys/net/ethernet */ + +/* /proc/sys/net/802 */ + +/* /proc/sys/net/unix */ + +enum +{ + NET_UNIX_DESTROY_DELAY=1, + NET_UNIX_DELETE_DELAY=2, + NET_UNIX_MAX_DGRAM_QLEN=3, +}; + +/* /proc/sys/net/netfilter */ +enum +{ + NET_NF_CONNTRACK_MAX=1, + NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, + NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, + NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, + NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, + NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, + NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, + NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, + NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, + NET_NF_CONNTRACK_UDP_TIMEOUT=10, + NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, + NET_NF_CONNTRACK_ICMP_TIMEOUT=12, + NET_NF_CONNTRACK_GENERIC_TIMEOUT=13, + NET_NF_CONNTRACK_BUCKETS=14, + NET_NF_CONNTRACK_LOG_INVALID=15, + NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, + NET_NF_CONNTRACK_TCP_LOOSE=17, + NET_NF_CONNTRACK_TCP_BE_LIBERAL=18, + NET_NF_CONNTRACK_TCP_MAX_RETRANS=19, + NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, + NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, + NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, + NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, + NET_NF_CONNTRACK_COUNT=27, + NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28, + NET_NF_CONNTRACK_FRAG6_TIMEOUT=29, + NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30, + NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31, +}; + +/* /proc/sys/net/ipv4 */ +enum +{ + /* v2.0 compatibile variables */ + NET_IPV4_FORWARD=8, + NET_IPV4_DYNADDR=9, + + NET_IPV4_CONF=16, + NET_IPV4_NEIGH=17, + NET_IPV4_ROUTE=18, + NET_IPV4_FIB_HASH=19, + NET_IPV4_NETFILTER=20, + + NET_IPV4_TCP_TIMESTAMPS=33, + NET_IPV4_TCP_WINDOW_SCALING=34, + NET_IPV4_TCP_SACK=35, + NET_IPV4_TCP_RETRANS_COLLAPSE=36, + NET_IPV4_DEFAULT_TTL=37, + NET_IPV4_AUTOCONFIG=38, + NET_IPV4_NO_PMTU_DISC=39, + NET_IPV4_TCP_SYN_RETRIES=40, + NET_IPV4_IPFRAG_HIGH_THRESH=41, + NET_IPV4_IPFRAG_LOW_THRESH=42, + NET_IPV4_IPFRAG_TIME=43, + NET_IPV4_TCP_MAX_KA_PROBES=44, + NET_IPV4_TCP_KEEPALIVE_TIME=45, + NET_IPV4_TCP_KEEPALIVE_PROBES=46, + NET_IPV4_TCP_RETRIES1=47, + NET_IPV4_TCP_RETRIES2=48, + NET_IPV4_TCP_FIN_TIMEOUT=49, + NET_IPV4_IP_MASQ_DEBUG=50, + NET_TCP_SYNCOOKIES=51, + NET_TCP_STDURG=52, + NET_TCP_RFC1337=53, + NET_TCP_SYN_TAILDROP=54, + NET_TCP_MAX_SYN_BACKLOG=55, + NET_IPV4_LOCAL_PORT_RANGE=56, + NET_IPV4_ICMP_ECHO_IGNORE_ALL=57, + NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS=58, + NET_IPV4_ICMP_SOURCEQUENCH_RATE=59, + NET_IPV4_ICMP_DESTUNREACH_RATE=60, + NET_IPV4_ICMP_TIMEEXCEED_RATE=61, + NET_IPV4_ICMP_PARAMPROB_RATE=62, + NET_IPV4_ICMP_ECHOREPLY_RATE=63, + NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES=64, + NET_IPV4_IGMP_MAX_MEMBERSHIPS=65, + NET_TCP_TW_RECYCLE=66, + NET_IPV4_ALWAYS_DEFRAG=67, + NET_IPV4_TCP_KEEPALIVE_INTVL=68, + NET_IPV4_INET_PEER_THRESHOLD=69, + NET_IPV4_INET_PEER_MINTTL=70, + NET_IPV4_INET_PEER_MAXTTL=71, + NET_IPV4_INET_PEER_GC_MINTIME=72, + NET_IPV4_INET_PEER_GC_MAXTIME=73, + NET_TCP_ORPHAN_RETRIES=74, + NET_TCP_ABORT_ON_OVERFLOW=75, + NET_TCP_SYNACK_RETRIES=76, + NET_TCP_MAX_ORPHANS=77, + NET_TCP_MAX_TW_BUCKETS=78, + NET_TCP_FACK=79, + NET_TCP_REORDERING=80, + NET_TCP_ECN=81, + NET_TCP_DSACK=82, + NET_TCP_MEM=83, + NET_TCP_WMEM=84, + NET_TCP_RMEM=85, + NET_TCP_APP_WIN=86, + NET_TCP_ADV_WIN_SCALE=87, + NET_IPV4_NONLOCAL_BIND=88, + NET_IPV4_ICMP_RATELIMIT=89, + NET_IPV4_ICMP_RATEMASK=90, + NET_TCP_TW_REUSE=91, + NET_TCP_FRTO=92, + NET_TCP_LOW_LATENCY=93, + NET_IPV4_IPFRAG_SECRET_INTERVAL=94, + NET_IPV4_IGMP_MAX_MSF=96, + NET_TCP_NO_METRICS_SAVE=97, + NET_TCP_DEFAULT_WIN_SCALE=105, + NET_TCP_MODERATE_RCVBUF=106, + NET_TCP_TSO_WIN_DIVISOR=107, + NET_TCP_BIC_BETA=108, + NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, + NET_TCP_CONG_CONTROL=110, + NET_TCP_ABC=111, + NET_IPV4_IPFRAG_MAX_DIST=112, +}; + +enum { + NET_IPV4_ROUTE_FLUSH=1, + NET_IPV4_ROUTE_MIN_DELAY=2, + NET_IPV4_ROUTE_MAX_DELAY=3, + NET_IPV4_ROUTE_GC_THRESH=4, + NET_IPV4_ROUTE_MAX_SIZE=5, + NET_IPV4_ROUTE_GC_MIN_INTERVAL=6, + NET_IPV4_ROUTE_GC_TIMEOUT=7, + NET_IPV4_ROUTE_GC_INTERVAL=8, + NET_IPV4_ROUTE_REDIRECT_LOAD=9, + NET_IPV4_ROUTE_REDIRECT_NUMBER=10, + NET_IPV4_ROUTE_REDIRECT_SILENCE=11, + NET_IPV4_ROUTE_ERROR_COST=12, + NET_IPV4_ROUTE_ERROR_BURST=13, + NET_IPV4_ROUTE_GC_ELASTICITY=14, + NET_IPV4_ROUTE_MTU_EXPIRES=15, + NET_IPV4_ROUTE_MIN_PMTU=16, + NET_IPV4_ROUTE_MIN_ADVMSS=17, + NET_IPV4_ROUTE_SECRET_INTERVAL=18, + NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS=19, +}; + +enum +{ + NET_PROTO_CONF_ALL=-2, + NET_PROTO_CONF_DEFAULT=-3 + + /* And device ifindices ... */ +}; + +enum +{ + NET_IPV4_CONF_FORWARDING=1, + NET_IPV4_CONF_MC_FORWARDING=2, + NET_IPV4_CONF_PROXY_ARP=3, + NET_IPV4_CONF_ACCEPT_REDIRECTS=4, + NET_IPV4_CONF_SECURE_REDIRECTS=5, + NET_IPV4_CONF_SEND_REDIRECTS=6, + NET_IPV4_CONF_SHARED_MEDIA=7, + NET_IPV4_CONF_RP_FILTER=8, + NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE=9, + NET_IPV4_CONF_BOOTP_RELAY=10, + NET_IPV4_CONF_LOG_MARTIANS=11, + NET_IPV4_CONF_TAG=12, + NET_IPV4_CONF_ARPFILTER=13, + NET_IPV4_CONF_MEDIUM_ID=14, + NET_IPV4_CONF_NOXFRM=15, + NET_IPV4_CONF_NOPOLICY=16, + NET_IPV4_CONF_FORCE_IGMP_VERSION=17, + NET_IPV4_CONF_ARP_ANNOUNCE=18, + NET_IPV4_CONF_ARP_IGNORE=19, + NET_IPV4_CONF_PROMOTE_SECONDARIES=20, + __NET_IPV4_CONF_MAX +}; + +/* /proc/sys/net/ipv4/netfilter */ +enum +{ + NET_IPV4_NF_CONNTRACK_MAX=1, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, + NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT=10, + NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, + NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12, + NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13, + NET_IPV4_NF_CONNTRACK_BUCKETS=14, + NET_IPV4_NF_CONNTRACK_LOG_INVALID=15, + NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, + NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17, + NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18, + NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, + NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, + NET_IPV4_NF_CONNTRACK_COUNT=27, +}; + +/* /proc/sys/net/ipv6 */ +enum { + NET_IPV6_CONF=16, + NET_IPV6_NEIGH=17, + NET_IPV6_ROUTE=18, + NET_IPV6_ICMP=19, + NET_IPV6_BINDV6ONLY=20, + NET_IPV6_IP6FRAG_HIGH_THRESH=21, + NET_IPV6_IP6FRAG_LOW_THRESH=22, + NET_IPV6_IP6FRAG_TIME=23, + NET_IPV6_IP6FRAG_SECRET_INTERVAL=24, + NET_IPV6_MLD_MAX_MSF=25, +}; + +enum { + NET_IPV6_ROUTE_FLUSH=1, + NET_IPV6_ROUTE_GC_THRESH=2, + NET_IPV6_ROUTE_MAX_SIZE=3, + NET_IPV6_ROUTE_GC_MIN_INTERVAL=4, + NET_IPV6_ROUTE_GC_TIMEOUT=5, + NET_IPV6_ROUTE_GC_INTERVAL=6, + NET_IPV6_ROUTE_GC_ELASTICITY=7, + NET_IPV6_ROUTE_MTU_EXPIRES=8, + NET_IPV6_ROUTE_MIN_ADVMSS=9, + NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10 +}; + +enum { + NET_IPV6_FORWARDING=1, + NET_IPV6_HOP_LIMIT=2, + NET_IPV6_MTU=3, + NET_IPV6_ACCEPT_RA=4, + NET_IPV6_ACCEPT_REDIRECTS=5, + NET_IPV6_AUTOCONF=6, + NET_IPV6_DAD_TRANSMITS=7, + NET_IPV6_RTR_SOLICITS=8, + NET_IPV6_RTR_SOLICIT_INTERVAL=9, + NET_IPV6_RTR_SOLICIT_DELAY=10, + NET_IPV6_USE_TEMPADDR=11, + NET_IPV6_TEMP_VALID_LFT=12, + NET_IPV6_TEMP_PREFERED_LFT=13, + NET_IPV6_REGEN_MAX_RETRY=14, + NET_IPV6_MAX_DESYNC_FACTOR=15, + NET_IPV6_MAX_ADDRESSES=16, + NET_IPV6_FORCE_MLD_VERSION=17, + __NET_IPV6_MAX +}; + +/* /proc/sys/net/ipv6/icmp */ +enum { + NET_IPV6_ICMP_RATELIMIT=1 +}; + +/* /proc/sys/net/<protocol>/neigh/<dev> */ +enum { + NET_NEIGH_MCAST_SOLICIT=1, + NET_NEIGH_UCAST_SOLICIT=2, + NET_NEIGH_APP_SOLICIT=3, + NET_NEIGH_RETRANS_TIME=4, + NET_NEIGH_REACHABLE_TIME=5, + NET_NEIGH_DELAY_PROBE_TIME=6, + NET_NEIGH_GC_STALE_TIME=7, + NET_NEIGH_UNRES_QLEN=8, + NET_NEIGH_PROXY_QLEN=9, + NET_NEIGH_ANYCAST_DELAY=10, + NET_NEIGH_PROXY_DELAY=11, + NET_NEIGH_LOCKTIME=12, + NET_NEIGH_GC_INTERVAL=13, + NET_NEIGH_GC_THRESH1=14, + NET_NEIGH_GC_THRESH2=15, + NET_NEIGH_GC_THRESH3=16, + NET_NEIGH_RETRANS_TIME_MS=17, + NET_NEIGH_REACHABLE_TIME_MS=18, + __NET_NEIGH_MAX +}; + +/* /proc/sys/net/ipx */ +enum { + NET_IPX_PPROP_BROADCASTING=1, + NET_IPX_FORWARDING=2 +}; + +/* /proc/sys/net/llc */ +enum { + NET_LLC2=1, + NET_LLC_STATION=2, +}; + +/* /proc/sys/net/llc/llc2 */ +enum { + NET_LLC2_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/station */ +enum { + NET_LLC_STATION_ACK_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/llc2/timeout */ +enum { + NET_LLC2_ACK_TIMEOUT=1, + NET_LLC2_P_TIMEOUT=2, + NET_LLC2_REJ_TIMEOUT=3, + NET_LLC2_BUSY_TIMEOUT=4, +}; + +/* /proc/sys/net/appletalk */ +enum { + NET_ATALK_AARP_EXPIRY_TIME=1, + NET_ATALK_AARP_TICK_TIME=2, + NET_ATALK_AARP_RETRANSMIT_LIMIT=3, + NET_ATALK_AARP_RESOLVE_TIME=4 +}; + + +/* /proc/sys/net/netrom */ +enum { + NET_NETROM_DEFAULT_PATH_QUALITY=1, + NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER=2, + NET_NETROM_NETWORK_TTL_INITIALISER=3, + NET_NETROM_TRANSPORT_TIMEOUT=4, + NET_NETROM_TRANSPORT_MAXIMUM_TRIES=5, + NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY=6, + NET_NETROM_TRANSPORT_BUSY_DELAY=7, + NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE=8, + NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT=9, + NET_NETROM_ROUTING_CONTROL=10, + NET_NETROM_LINK_FAILS_COUNT=11, + NET_NETROM_RESET=12 +}; + +/* /proc/sys/net/ax25 */ +enum { + NET_AX25_IP_DEFAULT_MODE=1, + NET_AX25_DEFAULT_MODE=2, + NET_AX25_BACKOFF_TYPE=3, + NET_AX25_CONNECT_MODE=4, + NET_AX25_STANDARD_WINDOW=5, + NET_AX25_EXTENDED_WINDOW=6, + NET_AX25_T1_TIMEOUT=7, + NET_AX25_T2_TIMEOUT=8, + NET_AX25_T3_TIMEOUT=9, + NET_AX25_IDLE_TIMEOUT=10, + NET_AX25_N2=11, + NET_AX25_PACLEN=12, + NET_AX25_PROTOCOL=13, + NET_AX25_DAMA_SLAVE_TIMEOUT=14 +}; + +/* /proc/sys/net/rose */ +enum { + NET_ROSE_RESTART_REQUEST_TIMEOUT=1, + NET_ROSE_CALL_REQUEST_TIMEOUT=2, + NET_ROSE_RESET_REQUEST_TIMEOUT=3, + NET_ROSE_CLEAR_REQUEST_TIMEOUT=4, + NET_ROSE_ACK_HOLD_BACK_TIMEOUT=5, + NET_ROSE_ROUTING_CONTROL=6, + NET_ROSE_LINK_FAIL_TIMEOUT=7, + NET_ROSE_MAX_VCS=8, + NET_ROSE_WINDOW_SIZE=9, + NET_ROSE_NO_ACTIVITY_TIMEOUT=10 +}; + +/* /proc/sys/net/x25 */ +enum { + NET_X25_RESTART_REQUEST_TIMEOUT=1, + NET_X25_CALL_REQUEST_TIMEOUT=2, + NET_X25_RESET_REQUEST_TIMEOUT=3, + NET_X25_CLEAR_REQUEST_TIMEOUT=4, + NET_X25_ACK_HOLD_BACK_TIMEOUT=5 +}; + +/* /proc/sys/net/token-ring */ +enum +{ + NET_TR_RIF_TIMEOUT=1 +}; + +/* /proc/sys/net/decnet/ */ +enum { + NET_DECNET_NODE_TYPE = 1, + NET_DECNET_NODE_ADDRESS = 2, + NET_DECNET_NODE_NAME = 3, + NET_DECNET_DEFAULT_DEVICE = 4, + NET_DECNET_TIME_WAIT = 5, + NET_DECNET_DN_COUNT = 6, + NET_DECNET_DI_COUNT = 7, + NET_DECNET_DR_COUNT = 8, + NET_DECNET_DST_GC_INTERVAL = 9, + NET_DECNET_CONF = 10, + NET_DECNET_NO_FC_MAX_CWND = 11, + NET_DECNET_MEM = 12, + NET_DECNET_RMEM = 13, + NET_DECNET_WMEM = 14, + NET_DECNET_DEBUG_LEVEL = 255 +}; + +/* /proc/sys/net/decnet/conf/<dev> */ +enum { + NET_DECNET_CONF_LOOPBACK = -2, + NET_DECNET_CONF_DDCMP = -3, + NET_DECNET_CONF_PPP = -4, + NET_DECNET_CONF_X25 = -5, + NET_DECNET_CONF_GRE = -6, + NET_DECNET_CONF_ETHER = -7 + + /* ... and ifindex of devices */ +}; + +/* /proc/sys/net/decnet/conf/<dev>/ */ +enum { + NET_DECNET_CONF_DEV_PRIORITY = 1, + NET_DECNET_CONF_DEV_T1 = 2, + NET_DECNET_CONF_DEV_T2 = 3, + NET_DECNET_CONF_DEV_T3 = 4, + NET_DECNET_CONF_DEV_FORWARDING = 5, + NET_DECNET_CONF_DEV_BLKSIZE = 6, + NET_DECNET_CONF_DEV_STATE = 7 +}; + +/* /proc/sys/net/sctp */ +enum { + NET_SCTP_RTO_INITIAL = 1, + NET_SCTP_RTO_MIN = 2, + NET_SCTP_RTO_MAX = 3, + NET_SCTP_RTO_ALPHA = 4, + NET_SCTP_RTO_BETA = 5, + NET_SCTP_VALID_COOKIE_LIFE = 6, + NET_SCTP_ASSOCIATION_MAX_RETRANS = 7, + NET_SCTP_PATH_MAX_RETRANS = 8, + NET_SCTP_MAX_INIT_RETRANSMITS = 9, + NET_SCTP_HB_INTERVAL = 10, + NET_SCTP_PRESERVE_ENABLE = 11, + NET_SCTP_MAX_BURST = 12, + NET_SCTP_ADDIP_ENABLE = 13, + NET_SCTP_PRSCTP_ENABLE = 14, + NET_SCTP_SNDBUF_POLICY = 15, + NET_SCTP_SACK_TIMEOUT = 16, + NET_SCTP_RCVBUF_POLICY = 17, +}; + +/* /proc/sys/net/bridge */ +enum { + NET_BRIDGE_NF_CALL_ARPTABLES = 1, + NET_BRIDGE_NF_CALL_IPTABLES = 2, + NET_BRIDGE_NF_CALL_IP6TABLES = 3, + NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4, +}; + +/* CTL_PROC names: */ + +/* CTL_FS names: */ +enum +{ + FS_NRINODE=1, /* int:current number of allocated inodes */ + FS_STATINODE=2, + FS_MAXINODE=3, /* int:maximum number of inodes that can be allocated */ + FS_NRDQUOT=4, /* int:current number of allocated dquots */ + FS_MAXDQUOT=5, /* int:maximum number of dquots that can be allocated */ + FS_NRFILE=6, /* int:current number of allocated filedescriptors */ + FS_MAXFILE=7, /* int:maximum number of filedescriptors that can be allocated */ + FS_DENTRY=8, + FS_NRSUPER=9, /* int:current number of allocated super_blocks */ + FS_MAXSUPER=10, /* int:maximum number of super_blocks that can be allocated */ + FS_OVERFLOWUID=11, /* int: overflow UID */ + FS_OVERFLOWGID=12, /* int: overflow GID */ + FS_LEASES=13, /* int: leases enabled */ + FS_DIR_NOTIFY=14, /* int: directory notification enabled */ + FS_LEASE_TIME=15, /* int: maximum time to wait for a lease break */ + FS_DQSTATS=16, /* disc quota usage statistics and control */ + FS_XFS=17, /* struct: control xfs parameters */ + FS_AIO_NR=18, /* current system-wide number of aio requests */ + FS_AIO_MAX_NR=19, /* system-wide maximum number of aio requests */ + FS_INOTIFY=20, /* inotify submenu */ +}; + +/* /proc/sys/fs/quota/ */ +enum { + FS_DQ_LOOKUPS = 1, + FS_DQ_DROPS = 2, + FS_DQ_READS = 3, + FS_DQ_WRITES = 4, + FS_DQ_CACHE_HITS = 5, + FS_DQ_ALLOCATED = 6, + FS_DQ_FREE = 7, + FS_DQ_SYNCS = 8, + FS_DQ_WARNINGS = 9, +}; + +/* CTL_DEBUG names: */ + +/* CTL_DEV names: */ +enum { + DEV_CDROM=1, + DEV_HWMON=2, + DEV_PARPORT=3, + DEV_RAID=4, + DEV_MAC_HID=5, + DEV_SCSI=6, + DEV_IPMI=7, +}; + +/* /proc/sys/dev/cdrom */ +enum { + DEV_CDROM_INFO=1, + DEV_CDROM_AUTOCLOSE=2, + DEV_CDROM_AUTOEJECT=3, + DEV_CDROM_DEBUG=4, + DEV_CDROM_LOCK=5, + DEV_CDROM_CHECK_MEDIA=6 +}; + +/* /proc/sys/dev/parport */ +enum { + DEV_PARPORT_DEFAULT=-3 +}; + +/* /proc/sys/dev/raid */ +enum { + DEV_RAID_SPEED_LIMIT_MIN=1, + DEV_RAID_SPEED_LIMIT_MAX=2 +}; + +/* /proc/sys/dev/parport/default */ +enum { + DEV_PARPORT_DEFAULT_TIMESLICE=1, + DEV_PARPORT_DEFAULT_SPINTIME=2 +}; + +/* /proc/sys/dev/parport/parport n */ +enum { + DEV_PARPORT_SPINTIME=1, + DEV_PARPORT_BASE_ADDR=2, + DEV_PARPORT_IRQ=3, + DEV_PARPORT_DMA=4, + DEV_PARPORT_MODES=5, + DEV_PARPORT_DEVICES=6, + DEV_PARPORT_AUTOPROBE=16 +}; + +/* /proc/sys/dev/parport/parport n/devices/ */ +enum { + DEV_PARPORT_DEVICES_ACTIVE=-3, +}; + +/* /proc/sys/dev/parport/parport n/devices/device n */ +enum { + DEV_PARPORT_DEVICE_TIMESLICE=1, +}; + +/* /proc/sys/dev/mac_hid */ +enum { + DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES=1, + DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES=2, + DEV_MAC_HID_MOUSE_BUTTON_EMULATION=3, + DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE=4, + DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE=5, + DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES=6 +}; + +/* /proc/sys/dev/scsi */ +enum { + DEV_SCSI_LOGGING_LEVEL=1, +}; + +/* /proc/sys/dev/ipmi */ +enum { + DEV_IPMI_POWEROFF_POWERCYCLE=1, +}; + +/* /proc/sys/abi */ +enum +{ + ABI_DEFHANDLER_COFF=1, /* default handler for coff binaries */ + ABI_DEFHANDLER_ELF=2, /* default handler for ELF binaries */ + ABI_DEFHANDLER_LCALL7=3,/* default handler for procs using lcall7 */ + ABI_DEFHANDLER_LIBCSO=4,/* default handler for an libc.so ELF interp */ + ABI_TRACE=5, /* tracing flags */ + ABI_FAKE_UTSNAME=6, /* fake target utsname information */ +}; + +#ifdef __KERNEL__ +#include <linux/list.h> + +extern void sysctl_init(void); + +typedef struct ctl_table ctl_table; + +typedef int ctl_handler (ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, + void **context); + +typedef int proc_handler (ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + +extern int proc_dostring(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_bset(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_minmax(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_jiffies(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_userhz_jiffies(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_dointvec_ms_jiffies(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_doulongvec_minmax(ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int, + struct file *, void __user *, size_t *, loff_t *); + +extern int do_sysctl (int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen); + +extern int do_sysctl_strategy (ctl_table *table, + int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, void ** context); + +extern ctl_handler sysctl_string; +extern ctl_handler sysctl_intvec; +extern ctl_handler sysctl_jiffies; +extern ctl_handler sysctl_ms_jiffies; + + +/* + * Register a set of sysctl names by calling register_sysctl_table + * with an initialised array of ctl_table's. An entry with zero + * ctl_name terminates the table. table->de will be set up by the + * registration and need not be initialised in advance. + * + * sysctl names can be mirrored automatically under /proc/sys. The + * procname supplied controls /proc naming. + * + * The table's mode will be honoured both for sys_sysctl(2) and + * proc-fs access. + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. A + * null procname disables /proc mirroring at this node. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * More sophisticated management can be enabled by the provision of a + * strategy routine with the table entry. This will be called before + * any automatic read or write of the data is performed. + * + * The strategy routine may return: + * <0: Error occurred (error is passed to user process) + * 0: OK - proceed with automatic read or write. + * >0: OK - read or write has been done by the strategy routine, so + * return immediately. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases. + */ + +/* A sysctl table is an array of struct ctl_table: */ +struct ctl_table +{ + int ctl_name; /* Binary ID */ + const char *procname; /* Text ID for /proc/sys, or zero */ + void *data; + int maxlen; + mode_t mode; + ctl_table *child; + proc_handler *proc_handler; /* Callback for text formatting */ + ctl_handler *strategy; /* Callback function for all r/w */ + struct proc_dir_entry *de; /* /proc control block */ + void *extra1; + void *extra2; +}; + +/* struct ctl_table_header is used to maintain dynamic lists of + ctl_table trees. */ +struct ctl_table_header +{ + ctl_table *ctl_table; + struct list_head ctl_entry; + int used; + struct completion *unregistering; +}; + +struct ctl_table_header * register_sysctl_table(ctl_table * table, + int insert_at_head); +void unregister_sysctl_table(struct ctl_table_header * table); + +#else /* __KERNEL__ */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SYSCTL_H */ diff -Nur xen-ia64-unstable.hg/xen/include/public/xen.h xenoprof-ia64-unstable/xen/include/public/xen.h --- xen-ia64-unstable.hg/xen/include/public/xen.h 2006-06-22 13:37:14.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/public/xen.h 2006-06-30 15:25:01.000000000 +0900 @@ -4,6 +4,8 @@ * Guest OS interface to Xen. * * Copyright (c) 2004, K A Fraser + * + * Modified by KAZ(kaz@xxxxxxxxxxxxxx) */ #ifndef __XEN_PUBLIC_XEN_H__ @@ -64,6 +66,7 @@ #define __HYPERVISOR_xenoprof_op 31 #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_perfmon_op 34 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 diff -Nur xen-ia64-unstable.hg/xen/include/xen/hypercall.h xenoprof-ia64-unstable/xen/include/xen/hypercall.h --- xen-ia64-unstable.hg/xen/include/xen/hypercall.h 2006-06-22 13:37:14.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/xen/hypercall.h 2006-06-30 15:25:01.000000000 +0900 @@ -87,4 +87,17 @@ unsigned int cmd, XEN_GUEST_HANDLE(void) arg); +extern long +do_perfmon_op( + unsigned int cmd, + XEN_GUEST_HANDLE(void) arg1, + XEN_GUEST_HANDLE(void) arg2, + unsigned int arg3); + +extern long +do_xenoprof_op( + unsigned int cmd, + unsigned int arg1, + XEN_GUEST_HANDLE(void) arg2); + #endif /* __XEN_HYPERCALL_H__ */ diff -Nur xen-ia64-unstable.hg/xen/include/xen/perfc_defn.h xenoprof-ia64-unstable/xen/include/xen/perfc_defn.h --- xen-ia64-unstable.hg/xen/include/xen/perfc_defn.h 2006-06-22 13:37:14.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/xen/perfc_defn.h 2006-06-30 15:25:01.000000000 +0900 @@ -41,6 +41,10 @@ PERFCOUNTER_CPU(timer_max, "timer max error (ns)") PERFCOUNTER_CPU(sched_irq, "sched: timer") PERFCOUNTER_CPU(sched_run, "sched: runs through scheduler") +PERFCOUNTER_CPU(sched_xen, "sched: runs hyper through scheduler") +PERFCOUNTER_CPU(sched_block, "sched: runs do_block") +PERFCOUNTER_CPU(sched_poll, "sched: runs do_poll") +PERFCOUNTER_CPU(sched_yield, "sched: runs do_yield") PERFCOUNTER_CPU(sched_ctx, "sched: context switches") PERFCOUNTER_CPU(domain_page_tlb_flush, "domain page tlb flushes") diff -Nur xen-ia64-unstable.hg/xen/include/xen/smp.h xenoprof-ia64-unstable/xen/include/xen/smp.h --- xen-ia64-unstable.hg/xen/include/xen/smp.h 2006-06-22 13:37:14.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/xen/smp.h 2006-06-30 15:25:01.000000000 +0900 @@ -111,4 +111,6 @@ #define smp_processor_id() raw_smp_processor_id() +#define put_cpu_no_resched() preempt_enable_no_resched() + #endif diff -Nur xen-ia64-unstable.hg/xen/include/xen/types.h xenoprof-ia64-unstable/xen/include/xen/types.h --- xen-ia64-unstable.hg/xen/include/xen/types.h 2006-06-22 13:37:14.000000000 +0900 +++ xenoprof-ia64-unstable/xen/include/xen/types.h 2006-06-30 15:25:01.000000000 +0900 @@ -20,6 +20,22 @@ #define LONG_MIN (-LONG_MAX - 1) #define ULONG_MAX (~0UL) +typedef unsigned int __kernel_mode_t; + +/* typedef __kernel_fd_set fd_set; */ +/* typedef __kernel_dev_t dev_t; */ +/* typedef __kernel_ino_t ino_t; */ +typedef __kernel_mode_t mode_t; +/* typedef __kernel_nlink_t nlink_t; */ +/* typedef __kernel_off_t off_t; */ +/* typedef __kernel_pid_t pid_t; */ +/* typedef __kernel_daddr_t daddr_t; */ +/* typedef __kernel_key_t key_t; */ +/* typedef __kernel_suseconds_t suseconds_t; */ +/* typedef __kernel_timer_t timer_t; */ +/* typedef __kernel_clockid_t clockid_t; */ +/* typedef __kernel_mqd_t mqd_t; */ + /* bsd */ typedef unsigned char u_char; typedef unsigned short u_short; _______________________________________________ Xen-ia64-devel mailing list Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ia64-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |