| /* |
| * This file implements the perfmon subsystem which is used |
| * to program the IA-64 Performance Monitoring Unit (PMU). |
| * |
| * Originaly Written by Ganesh Venkitachalam, IBM Corp. |
| * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com> |
| * |
| * Modifications by Stephane Eranian, Hewlett-Packard Co. |
| * Modifications by David Mosberger-Tang, Hewlett-Packard Co. |
| * |
| * Copyright (C) 1999-2003 Hewlett Packard Co |
| * Stephane Eranian <eranian@hpl.hp.com> |
| * David Mosberger-Tang <davidm@hpl.hp.com> |
| */ |
| |
| #include <linux/config.h> |
| #include <linux/kernel.h> |
| #include <linux/sched.h> |
| #include <linux/interrupt.h> |
| #include <linux/smp_lock.h> |
| #include <linux/proc_fs.h> |
| #include <linux/init.h> |
| #include <linux/vmalloc.h> |
| #include <linux/wrapper.h> |
| #include <linux/mm.h> |
| #include <linux/sysctl.h> |
| #include <linux/smp.h> |
| #include <linux/seq_file.h> |
| |
| #include <asm/bitops.h> |
| #include <asm/errno.h> |
| #include <asm/page.h> |
| #include <asm/perfmon.h> |
| #include <asm/processor.h> |
| #include <asm/signal.h> |
| #include <asm/system.h> |
| #include <asm/uaccess.h> |
| #include <asm/delay.h> /* for ia64_get_itc() */ |
| |
| #ifdef CONFIG_PERFMON |
| |
| /* |
| * For PMUs which rely on the debug registers for some features, you must |
| * you must enable the following flag to activate the support for |
| * accessing the registers via the perfmonctl() interface. |
| */ |
| #if defined(CONFIG_ITANIUM) || defined(CONFIG_MCKINLEY) |
| #define PFM_PMU_USES_DBR 1 |
| #endif |
| |
| /* |
| * perfmon context states |
| */ |
| #define PFM_CTX_DISABLED 0 |
| #define PFM_CTX_ENABLED 1 |
| |
| /* |
| * Reset register flags |
| */ |
| #define PFM_PMD_LONG_RESET 1 |
| #define PFM_PMD_SHORT_RESET 2 |
| |
| /* |
| * Misc macros and definitions |
| */ |
| #define PMU_FIRST_COUNTER 4 |
| #define PMU_MAX_PMCS 256 |
| #define PMU_MAX_PMDS 256 |
| |
| /* |
| * type of a PMU register (bitmask). |
| * bitmask structure: |
| * bit0 : register implemented |
| * bit1 : end marker |
| * bit2-3 : reserved |
| * bit4-7 : register type |
| * bit8-31: reserved |
| */ |
| #define PFM_REG_IMPL 0x1 /* register implemented */ |
| #define PFM_REG_END 0x2 /* end marker */ |
| #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ |
| #define PFM_REG_COUNTING (0x2<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm AND pmc.oi, a PMD used as a counter */ |
| #define PFM_REG_CONTROL (0x3<<4|PFM_REG_IMPL) /* PMU control register */ |
| #define PFM_REG_CONFIG (0x4<<4|PFM_REG_IMPL) /* refine configuration */ |
| #define PFM_REG_BUFFER (0x5<<4|PFM_REG_IMPL) /* PMD used as buffer */ |
| |
| #define PMC_IS_LAST(i) (pmu_conf.pmc_desc[i].type & PFM_REG_END) |
| #define PMD_IS_LAST(i) (pmu_conf.pmd_desc[i].type & PFM_REG_END) |
| |
| #define PFM_IS_DISABLED() pmu_conf.disabled |
| |
| #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_soft_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) |
| #define PFM_FL_INHERIT_MASK (PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL) |
| |
| /* i assume unsigned */ |
| #define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf.pmc_desc[i].type & PFM_REG_IMPL)) |
| #define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf.pmd_desc[i].type & PFM_REG_IMPL)) |
| |
| /* XXX: these three assume that register i is implemented */ |
| #define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) |
| #define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING) |
| #define PMC_IS_MONITOR(i) (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR) |
| #define PMC_DFL_VAL(i) pmu_conf.pmc_desc[i].default_value |
| #define PMC_RSVD_MASK(i) pmu_conf.pmc_desc[i].reserved_mask |
| #define PMD_PMD_DEP(i) pmu_conf.pmd_desc[i].dep_pmd[0] |
| #define PMC_PMD_DEP(i) pmu_conf.pmc_desc[i].dep_pmd[0] |
| |
| /* k assume unsigned */ |
| #define IBR_IS_IMPL(k) (k<pmu_conf.num_ibrs) |
| #define DBR_IS_IMPL(k) (k<pmu_conf.num_dbrs) |
| |
| #define CTX_IS_ENABLED(c) ((c)->ctx_flags.state == PFM_CTX_ENABLED) |
| #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) |
| #define CTX_INHERIT_MODE(c) ((c)->ctx_fl_inherit) |
| #define CTX_HAS_SMPL(c) ((c)->ctx_psb != NULL) |
| /* XXX: does not support more than 64 PMDs */ |
| #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) |
| #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) |
| |
| |
| #define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) |
| #define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) |
| #define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) |
| |
| #ifdef CONFIG_SMP |
| #define GET_ACTIVATION() pmu_owners[smp_processor_id()].activation_number |
| #define INC_ACTIVATION() pmu_owners[smp_processor_id()].activation_number++ |
| #define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() |
| #define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) |
| #define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu |
| #else /* !CONFIG_SMP */ |
| #define SET_ACTIVATION(t) do {} while(0) |
| #define GET_ACTIVATION(t) do {} while(0) |
| #define INC_ACTIVATION(t) do {} while(0) |
| #define SET_LAST_CPU(ctx, v) do {} while(0) |
| #define GET_LAST_CPU(ctx) do {} while(0) |
| #endif /* CONFIG_SMP */ |
| |
| |
| #define PFM_INVALID_ACTIVATION (~0UL) |
| |
| #define SET_PMU_OWNER(t) do { pmu_owners[smp_processor_id()].owner = (t); } while(0) |
| #define PMU_OWNER() pmu_owners[smp_processor_id()].owner |
| |
| #define LOCK_PFS() spin_lock(&pfm_sessions.pfs_lock) |
| #define UNLOCK_PFS() spin_unlock(&pfm_sessions.pfs_lock) |
| |
| #define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) |
| |
| #define TASK_PTREGS(t) (((struct pt_regs *)((unsigned long) (t) + IA64_STK_OFFSET))-1) |
| |
| /* |
| * cmp0 must be the value of pmc0 |
| */ |
| #define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) |
| |
| |
| /* |
| * debugging |
| */ |
| #define DBprintk(a) \ |
| do { \ |
| if (pfm_sysctl.debug >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ |
| } while (0) |
| |
| #define DBprintk_ovfl(a) \ |
| do { \ |
| if (pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ |
| } while (0) |
| |
| |
| |
| /* |
| * Architected PMC structure |
| */ |
| typedef struct { |
| unsigned long pmc_plm:4; /* privilege level mask */ |
| unsigned long pmc_ev:1; /* external visibility */ |
| unsigned long pmc_oi:1; /* overflow interrupt */ |
| unsigned long pmc_pm:1; /* privileged monitor */ |
| unsigned long pmc_ig1:1; /* reserved */ |
| unsigned long pmc_es:8; /* event select */ |
| unsigned long pmc_ig2:48; /* reserved */ |
| } pfm_monitor_t; |
| |
| /* |
| * There is one such data structure per perfmon context. It is used to describe the |
| * sampling buffer. It is to be shared among siblings whereas the pfm_context |
| * is not. |
| * Therefore we maintain a refcnt which is incremented on fork(). |
| * This buffer is private to the kernel only the actual sampling buffer |
| * including its header are exposed to the user. This construct allows us to |
| * export the buffer read-write, if needed, without worrying about security |
| * problems. |
| */ |
| typedef struct _pfm_smpl_buffer_desc { |
| spinlock_t psb_lock; /* protection lock */ |
| unsigned long psb_refcnt; /* how many users for the buffer */ |
| int psb_flags; /* bitvector of flags (not yet used) */ |
| |
| void *psb_addr; /* points to location of first entry */ |
| unsigned long psb_entries; /* maximum number of entries */ |
| unsigned long psb_size; /* aligned size of buffer */ |
| unsigned long psb_index; /* next free entry slot XXX: must use the one in buffer */ |
| unsigned long psb_entry_size; /* size of each entry including entry header */ |
| |
| perfmon_smpl_hdr_t *psb_hdr; /* points to sampling buffer header */ |
| |
| struct _pfm_smpl_buffer_desc *psb_next; /* next psb, used for rvfreeing of psb_hdr */ |
| |
| } pfm_smpl_buffer_desc_t; |
| |
| /* |
| * psb_flags |
| */ |
| #define PSB_HAS_VMA 0x1 /* a virtual mapping for the buffer exists */ |
| |
| #define LOCK_PSB(p) spin_lock(&(p)->psb_lock) |
| #define UNLOCK_PSB(p) spin_unlock(&(p)->psb_lock) |
| |
| /* |
| * 64-bit software counter structure |
| */ |
| typedef struct { |
| u64 val; /* virtual 64bit counter value */ |
| u64 lval; /* last value */ |
| u64 long_reset; /* reset value on sampling overflow */ |
| u64 short_reset;/* reset value on overflow */ |
| u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */ |
| u64 seed; /* seed for random-number generator */ |
| u64 mask; /* mask for random-number generator */ |
| unsigned int flags; /* notify/do not notify */ |
| } pfm_counter_t; |
| |
| /* |
| * perfmon context. One per process, is cloned on fork() depending on |
| * inheritance flags |
| */ |
| typedef struct { |
| unsigned int state:1; /* 0=disabled, 1=enabled */ |
| unsigned int inherit:2; /* inherit mode */ |
| unsigned int block:1; /* when 1, task will blocked on user notifications */ |
| unsigned int system:1; /* do system wide monitoring */ |
| unsigned int frozen:1; /* pmu must be kept frozen on ctxsw in */ |
| unsigned int protected:1; /* allow access to creator of context only */ |
| unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ |
| unsigned int excl_idle:1; /* exclude idle task in system wide session */ |
| unsigned int unsecure:1; /* sp = 0 for non self-monitored task */ |
| unsigned int reserved:22; |
| } pfm_context_flags_t; |
| |
| /* |
| * perfmon context: encapsulates all the state of a monitoring session |
| * XXX: probably need to change layout |
| */ |
| typedef struct pfm_context { |
| pfm_smpl_buffer_desc_t *ctx_psb; /* sampling buffer, if any */ |
| unsigned long ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ |
| |
| spinlock_t ctx_lock; |
| pfm_context_flags_t ctx_flags; /* block/noblock */ |
| |
| struct task_struct *ctx_notify_task; /* who to notify on overflow */ |
| struct task_struct *ctx_owner; /* pid of creator (debug) */ |
| |
| unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ |
| unsigned long ctx_smpl_regs[4]; /* which registers to record on overflow */ |
| |
| struct semaphore ctx_restart_sem; /* use for blocking notification mode */ |
| |
| unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ |
| unsigned long ctx_reload_pmds[4]; /* bitmask of PMD to reload on ctxsw */ |
| |
| unsigned long ctx_used_pmcs[4]; /* bitmask PMC used by context */ |
| unsigned long ctx_reload_pmcs[4]; /* bitmask of PMC to reload on ctxsw */ |
| |
| unsigned long ctx_used_ibrs[4]; /* bitmask of used IBR (speedup ctxsw) */ |
| unsigned long ctx_used_dbrs[4]; /* bitmask of used DBR (speedup ctxsw) */ |
| |
| pfm_counter_t ctx_soft_pmds[IA64_NUM_PMD_REGS]; /* XXX: size should be dynamic */ |
| |
| u64 ctx_saved_psr; /* copy of psr used for lazy ctxsw */ |
| unsigned long ctx_saved_cpus_allowed; /* copy of the task cpus_allowed (system wide) */ |
| unsigned long ctx_last_activation; /* context last activation number for last_cpu */ |
| unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ |
| unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ |
| |
| struct tasklet_struct ctx_tasklet; /* used for sending signal-based notifications */ |
| } pfm_context_t; |
| |
| #define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) |
| #define LOCK_CTX(ctx) spin_lock(&(ctx)->ctx_lock) |
| #define UNLOCK_CTX(ctx) spin_unlock(&(ctx)->ctx_lock) |
| |
| #define ctx_fl_inherit ctx_flags.inherit |
| #define ctx_fl_block ctx_flags.block |
| #define ctx_fl_system ctx_flags.system |
| #define ctx_fl_frozen ctx_flags.frozen |
| #define ctx_fl_protected ctx_flags.protected |
| #define ctx_fl_using_dbreg ctx_flags.using_dbreg |
| #define ctx_fl_excl_idle ctx_flags.excl_idle |
| #define ctx_fl_unsecure ctx_flags.unsecure |
| |
| /* |
| * global information about all sessions |
| * mostly used to synchronize between system wide and per-process |
| */ |
| typedef struct { |
| spinlock_t pfs_lock; /* lock the structure */ |
| |
| unsigned int pfs_task_sessions; /* number of per task sessions */ |
| unsigned int pfs_sys_sessions; /* number of per system wide sessions */ |
| unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ |
| unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ |
| struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ |
| } pfm_session_t; |
| |
| /* |
| * information about a PMC or PMD. |
| * dep_pmd[]: a bitmask of dependent PMD registers |
| * dep_pmc[]: a bitmask of dependent PMC registers |
| */ |
| typedef struct { |
| unsigned int type; |
| int pm_pos; |
| unsigned long default_value; /* power-on default value */ |
| unsigned long reserved_mask; /* bitmask of reserved bits */ |
| int (*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs); |
| int (*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs); |
| unsigned long dep_pmd[4]; |
| unsigned long dep_pmc[4]; |
| } pfm_reg_desc_t; |
| |
| /* assume cnum is a valid monitor */ |
| #define PMC_PM(cnum, val) (((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1) |
| #define PMC_WR_FUNC(cnum) (pmu_conf.pmc_desc[cnum].write_check) |
| #define PMD_WR_FUNC(cnum) (pmu_conf.pmd_desc[cnum].write_check) |
| #define PMD_RD_FUNC(cnum) (pmu_conf.pmd_desc[cnum].read_check) |
| |
| /* |
| * This structure is initialized at boot time and contains |
| * a description of the PMU main characteristics. |
| */ |
| typedef struct { |
| unsigned int disabled; /* indicates if perfmon is working properly */ |
| unsigned long ovfl_val; /* overflow value for generic counters */ |
| unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ |
| unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ |
| unsigned int num_pmcs; /* number of implemented PMCS */ |
| unsigned int num_pmds; /* number of implemented PMDS */ |
| unsigned int num_ibrs; /* number of implemented IBRS */ |
| unsigned int num_dbrs; /* number of implemented DBRS */ |
| unsigned int num_counters; /* number of PMD/PMC counters */ |
| pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ |
| pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ |
| } pmu_config_t; |
| |
| /* |
| * perfmon command descriptions |
| */ |
| typedef struct { |
| int (*cmd_func)(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); |
| int cmd_flags; |
| unsigned int cmd_narg; |
| size_t cmd_argsize; |
| } pfm_cmd_desc_t; |
| |
| #define PFM_CMD_PID 0x1 /* command requires pid argument */ |
| #define PFM_CMD_ARG_READ 0x2 /* command must read argument(s) */ |
| #define PFM_CMD_ARG_RW 0x4 /* command must read/write argument(s) */ |
| #define PFM_CMD_CTX 0x8 /* command needs a perfmon context */ |
| #define PFM_CMD_NOCHK 0x10 /* command does not need to check task's state */ |
| |
| #define PFM_CMD_IDX(cmd) (cmd) |
| |
| #define PFM_CMD_IS_VALID(cmd) ((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \ |
| && pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL) |
| |
| #define PFM_CMD_USE_PID(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0) |
| #define PFM_CMD_READ_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0) |
| #define PFM_CMD_RW_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW) != 0) |
| #define PFM_CMD_USE_CTX(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0) |
| #define PFM_CMD_CHK(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0) |
| |
| #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ |
| #define PFM_CMD_NARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg) |
| #define PFM_CMD_ARG_SIZE(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize) |
| |
| typedef struct { |
| int debug; /* turn on/off debugging via syslog */ |
| int debug_ovfl; /* turn on/off debug printk in overflow handler */ |
| int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ |
| } pfm_sysctl_t; |
| |
| typedef struct { |
| unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ |
| unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ |
| unsigned long pfm_recorded_samples_count; |
| unsigned long pfm_full_smpl_buffer_count; /* how many times the sampling buffer was full */ |
| char pad[SMP_CACHE_BYTES] ____cacheline_aligned; |
| } pfm_stats_t; |
| |
| /* |
| * perfmon internal variables |
| */ |
| static pfm_session_t pfm_sessions; /* global sessions information */ |
| static struct proc_dir_entry *perfmon_dir; /* for debug only */ |
| static pfm_stats_t pfm_stats[NR_CPUS]; |
| static pfm_intr_handler_desc_t *pfm_alternate_intr_handler; |
| |
| /* sysctl() controls */ |
| static pfm_sysctl_t pfm_sysctl; |
| |
| static ctl_table pfm_ctl_table[]={ |
| {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, |
| {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, |
| {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, |
| { 0, }, |
| }; |
| static ctl_table pfm_sysctl_dir[] = { |
| {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, }, |
| {0,}, |
| }; |
| static ctl_table pfm_sysctl_root[] = { |
| {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, }, |
| {0,}, |
| }; |
| static struct ctl_table_header *pfm_sysctl_header; |
| |
| static void pfm_vm_close(struct vm_area_struct * area); |
| |
| static struct vm_operations_struct pfm_vm_ops={ |
| .close = pfm_vm_close |
| }; |
| |
| /* |
| * keep track of task owning the PMU per CPU. |
| */ |
| static struct { |
| struct task_struct *owner; |
| unsigned long activation_number; |
| char pad[SMP_CACHE_BYTES] ____cacheline_aligned; |
| } pmu_owners[NR_CPUS]; |
| |
| |
| |
| /* |
| * forward declarations |
| */ |
| static void pfm_reset_pmu(struct task_struct *); |
| #ifndef CONFIG_SMP |
| static unsigned long pfm_lazy_save_regs (struct task_struct *ta); |
| #endif |
| |
| #if defined(CONFIG_ITANIUM) |
| #include "perfmon_itanium.h" |
| #elif defined(CONFIG_MCKINLEY) |
| #include "perfmon_mckinley.h" |
| #else |
| #include "perfmon_generic.h" |
| #endif |
| |
| static inline void |
| pfm_clear_psr_pp(void) |
| { |
| __asm__ __volatile__ ("rsm psr.pp;; srlz.i;;"::: "memory"); |
| } |
| |
| static inline void |
| pfm_set_psr_pp(void) |
| { |
| __asm__ __volatile__ ("ssm psr.pp;; srlz.i;;"::: "memory"); |
| } |
| |
| static inline void |
| pfm_clear_psr_up(void) |
| { |
| __asm__ __volatile__ ("rsm psr.up;; srlz.i;;"::: "memory"); |
| } |
| |
| static inline void |
| pfm_set_psr_up(void) |
| { |
| __asm__ __volatile__ ("ssm psr.up;; srlz.i;;"::: "memory"); |
| } |
| |
| static inline unsigned long |
| pfm_get_psr(void) |
| { |
| unsigned long tmp; |
| __asm__ __volatile__ ("mov %0=psr;;": "=r"(tmp) :: "memory"); |
| return tmp; |
| } |
| |
| static inline void |
| pfm_set_psr_l(unsigned long val) |
| { |
| __asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(val): "memory"); |
| } |
| |
| |
| |
| static inline void |
| pfm_freeze_pmu(void) |
| { |
| ia64_set_pmc(0,1UL); |
| ia64_srlz_d(); |
| } |
| |
| static inline void |
| pfm_unfreeze_pmu(void) |
| { |
| ia64_set_pmc(0,0UL); |
| ia64_srlz_d(); |
| } |
| |
| static inline void |
| pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) |
| { |
| int i; |
| |
| for (i=0; i < nibrs; i++) { |
| ia64_set_ibr(i, ibrs[i]); |
| } |
| ia64_srlz_i(); |
| } |
| |
| static inline void |
| pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) |
| { |
| int i; |
| |
| for (i=0; i < ndbrs; i++) { |
| ia64_set_dbr(i, dbrs[i]); |
| } |
| ia64_srlz_d(); |
| } |
| |
| static inline void |
| pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) |
| { |
| int i; |
| |
| DBprintk(("mask=0x%lx\n", mask)); |
| for (i=0; mask; i++, mask>>=1) { |
| if ((mask & 0x1) == 0) continue; |
| ia64_set_pmc(i, pmcs[i]); |
| DBprintk(("pmc[%d]=0x%lx\n", i, pmcs[i])); |
| } |
| ia64_srlz_d(); |
| } |
| |
| static inline void |
| pfm_restore_pmds(unsigned long *pmds, unsigned long mask) |
| { |
| int i; |
| unsigned long val, ovfl_val = pmu_conf.ovfl_val; |
| |
| DBprintk(("mask=0x%lx\n", mask)); |
| for (i=0; mask; i++, mask>>=1) { |
| if ((mask & 0x1) == 0) continue; |
| val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; |
| ia64_set_pmd(i, val); |
| DBprintk(("pmd[%d]=0x%lx\n", i, val)); |
| } |
| ia64_srlz_d(); |
| } |
| |
| static inline void |
| pfm_save_pmds(unsigned long *pmds, unsigned long mask) |
| { |
| int i; |
| |
| ia64_srlz_d(); |
| |
| for (i=0; mask; i++, mask>>=1) { |
| if (mask & 0x1) pmds[i] = ia64_get_pmd(i); |
| } |
| } |
| |
| static inline unsigned long |
| pfm_read_soft_counter(pfm_context_t *ctx, int i) |
| { |
| return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val); |
| } |
| |
| static inline void |
| pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) |
| { |
| ctx->ctx_soft_pmds[i].val = val & ~pmu_conf.ovfl_val; |
| /* |
| * writing to unimplemented part is ignore, so we do not need to |
| * mask off top part |
| */ |
| ia64_set_pmd(i, val & pmu_conf.ovfl_val); |
| } |
| |
| /* |
| * Generates a unique (per CPU) timestamp |
| */ |
| static inline unsigned long |
| pfm_get_stamp(void) |
| { |
| /* |
| * XXX: must find something more efficient |
| */ |
| return ia64_get_itc(); |
| } |
| |
| /* Here we want the physical address of the memory. |
| * This is used when initializing the contents of the |
| * area and marking the pages as reserved. |
| */ |
| static inline unsigned long |
| pfm_kvirt_to_pa(unsigned long adr) |
| { |
| __u64 pa = ia64_tpa(adr); |
| //DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa)); |
| return pa; |
| } |
| |
| static void * |
| pfm_rvmalloc(unsigned long size) |
| { |
| void *mem; |
| unsigned long adr, page; |
| |
| mem=vmalloc(size); |
| if (mem) { |
| //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); |
| memset(mem, 0, size); /* Clear the ram out, no junk to the user */ |
| adr=(unsigned long) mem; |
| while (size > 0) { |
| page = pfm_kvirt_to_pa(adr); |
| mem_map_reserve(virt_to_page(__va(page))); |
| adr += PAGE_SIZE; |
| size -= PAGE_SIZE; |
| } |
| } |
| return mem; |
| } |
| |
| static void |
| pfm_rvfree(void *mem, unsigned long size) |
| { |
| unsigned long adr, page = 0; |
| |
| if (mem) { |
| adr=(unsigned long) mem; |
| while (size > 0) { |
| page = pfm_kvirt_to_pa(adr); |
| mem_map_unreserve(virt_to_page(__va(page))); |
| adr+=PAGE_SIZE; |
| size-=PAGE_SIZE; |
| } |
| vfree(mem); |
| } |
| return; |
| } |
| |
| /* |
| * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer |
| * attached to the context AND the current task has a mapping for it, i.e., it is the original |
| * creator of the context. |
| * |
| * This function is used to remember the fact that the vma describing the sampling buffer |
| * has now been removed. It can only be called when no other tasks share the same mm context. |
| * |
| */ |
| static void |
| pfm_vm_close(struct vm_area_struct *vma) |
| { |
| pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data; |
| |
| if (psb == NULL) { |
| printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid); |
| return; |
| } |
| /* |
| * Add PSB to list of buffers to free on release_thread() when no more users |
| * |
| * This call is safe because, once the count is zero is cannot be modified anymore. |
| * This is not because there is no more user of the mm context, that the sampling |
| * buffer is not being used anymore outside of this task. In fact, it can still |
| * be accessed from within the kernel by another task (such as the monitored task). |
| * |
| * Therefore, we only move the psb into the list of buffers to free when we know |
| * nobody else is using it. |
| * The linked list if independent of the perfmon context, because in the case of |
| * multi-threaded processes, the last thread may not have been involved with |
| * monitoring however it will be the one removing the vma and it should therefore |
| * also remove the sampling buffer. This buffer cannot be removed until the vma |
| * is removed. |
| * |
| * This function cannot remove the buffer from here, because exit_mmap() must first |
| * complete. Given that there is no other vma related callback in the generic code, |
| * we have created our own with the linked list of sampling buffers to free. The list |
| * is part of the thread structure. In release_thread() we check if the list is |
| * empty. If not we call into perfmon to free the buffer and psb. That is the only |
| * way to ensure a safe deallocation of the sampling buffer which works when |
| * the buffer is shared between distinct processes or with multi-threaded programs. |
| * |
| * We need to lock the psb because the refcnt test and flag manipulation must |
| * looked like an atomic operation vis a vis pfm_context_exit() |
| */ |
| LOCK_PSB(psb); |
| |
| if (psb->psb_refcnt == 0) { |
| |
| psb->psb_next = current->thread.pfm_smpl_buf_list; |
| current->thread.pfm_smpl_buf_list = psb; |
| |
| DBprintk(("[%d] add smpl @%p size %lu to smpl_buf_list psb_flags=0x%x\n", |
| current->pid, psb->psb_hdr, psb->psb_size, psb->psb_flags)); |
| } |
| DBprintk(("[%d] clearing psb_flags=0x%x smpl @%p size %lu\n", |
| current->pid, psb->psb_flags, psb->psb_hdr, psb->psb_size)); |
| /* |
| * decrement the number vma for the buffer |
| */ |
| psb->psb_flags &= ~PSB_HAS_VMA; |
| |
| UNLOCK_PSB(psb); |
| } |
| |
| /* |
| * This function is called from pfm_destroy_context() and also from pfm_inherit() |
| * to explicitely remove the sampling buffer mapping from the user level address space. |
| */ |
| static int |
| pfm_remove_smpl_mapping(struct task_struct *task) |
| { |
| pfm_context_t *ctx = task->thread.pfm_context; |
| pfm_smpl_buffer_desc_t *psb; |
| int r; |
| |
| /* |
| * some sanity checks first |
| */ |
| if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) { |
| printk(KERN_DEBUG "perfmon: invalid context mm=%p\n", task->mm); |
| return -1; |
| } |
| psb = ctx->ctx_psb; |
| |
| down_write(&task->mm->mmap_sem); |
| |
| r = do_munmap(task->mm, ctx->ctx_smpl_vaddr, psb->psb_size); |
| |
| up_write(&task->mm->mmap_sem); |
| if (r !=0) { |
| printk(KERN_DEBUG "perfmon: pid %d unable to unmap sampling buffer " |
| "@0x%lx size=%ld\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size); |
| } |
| |
| DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n", |
| task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r, psb->psb_refcnt, psb->psb_flags)); |
| |
| return 0; |
| } |
| |
| static pfm_context_t * |
| pfm_context_alloc(void) |
| { |
| pfm_context_t *ctx; |
| |
| /* allocate context descriptor */ |
| ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); |
| if (ctx) memset(ctx, 0, sizeof(pfm_context_t)); |
| |
| return ctx; |
| } |
| |
| static void |
| pfm_context_free(pfm_context_t *ctx) |
| { |
| if (ctx) { |
| DBprintk(("kill tasklet for ctx %p\n", ctx)); |
| |
| tasklet_kill(&ctx->ctx_tasklet); |
| |
| DBprintk(("free ctx @%p\n", ctx)); |
| kfree(ctx); |
| } |
| } |
| |
| static int |
| pfm_remap_buffer(unsigned long buf, unsigned long addr, unsigned long size) |
| { |
| unsigned long page; |
| |
| DBprintk(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); |
| |
| while (size > 0) { |
| page = pfm_kvirt_to_pa(buf); |
| |
| if (remap_page_range(addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM; |
| |
| addr += PAGE_SIZE; |
| buf += PAGE_SIZE; |
| size -= PAGE_SIZE; |
| } |
| return 0; |
| } |
| |
| /* |
| * counts the number of PMDS to save per entry. |
| * This code is generic enough to accomodate more than 64 PMDS when they become available |
| */ |
| static unsigned long |
| pfm_smpl_entry_size(unsigned long *which, unsigned long size) |
| { |
| unsigned long res = 0; |
| int i; |
| |
| for (i=0; i < size; i++, which++) res += hweight64(*which); |
| |
| DBprintk(("weight=%ld\n", res)); |
| |
| return res; |
| } |
| |
| /* |
| * Allocates the sampling buffer and remaps it into caller's address space |
| */ |
| static int |
| pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned long entries, |
| void **user_vaddr) |
| { |
| struct mm_struct *mm = current->mm; |
| struct vm_area_struct *vma = NULL; |
| unsigned long size, regcount; |
| void *smpl_buf; |
| pfm_smpl_buffer_desc_t *psb; |
| |
| |
| /* note that regcount might be 0, in this case only the header for each |
| * entry will be recorded. |
| */ |
| regcount = pfm_smpl_entry_size(which_pmds, 1); |
| |
| if ((sizeof(perfmon_smpl_hdr_t)+ entries*sizeof(perfmon_smpl_entry_t)) <= entries) { |
| DBprintk(("requested entries %lu is too big\n", entries)); |
| return -EINVAL; |
| } |
| |
| /* |
| * 1 buffer hdr and for each entry a header + regcount PMDs to save |
| */ |
| size = PAGE_ALIGN( sizeof(perfmon_smpl_hdr_t) |
| + entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64))); |
| |
| DBprintk(("sampling buffer size=%lu bytes\n", size)); |
| |
| /* |
| * check requested size to avoid Denial-of-service attacks |
| * XXX: may have to refine this test |
| * Check against address space limit. |
| * |
| * if ((mm->total_vm << PAGE_SHIFT) + len> current->rlim[RLIMIT_AS].rlim_cur) |
| * return -ENOMEM; |
| */ |
| if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; |
| |
| /* |
| * We do the easy to undo allocations first. |
| * |
| * pfm_rvmalloc(), clears the buffer, so there is no leak |
| */ |
| smpl_buf = pfm_rvmalloc(size); |
| if (smpl_buf == NULL) { |
| DBprintk(("Can't allocate sampling buffer\n")); |
| return -ENOMEM; |
| } |
| |
| DBprintk(("smpl_buf @%p\n", smpl_buf)); |
| |
| /* allocate sampling buffer descriptor now */ |
| psb = kmalloc(sizeof(*psb), GFP_KERNEL); |
| if (psb == NULL) { |
| DBprintk(("Can't allocate sampling buffer descriptor\n")); |
| goto error_kmalloc; |
| } |
| |
| /* allocate vma */ |
| vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); |
| if (!vma) { |
| DBprintk(("Cannot allocate vma\n")); |
| goto error_kmem; |
| } |
| memset(vma, 0, sizeof(*vma)); |
| |
| /* |
| * partially initialize the vma for the sampling buffer |
| * |
| * The VM_DONTCOPY flag is very important as it ensures that the mapping |
| * will never be inherited for any child process (via fork()) which is always |
| * what we want. |
| */ |
| vma->vm_mm = mm; |
| vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY; |
| vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ |
| vma->vm_ops = &pfm_vm_ops; /* necesarry to get the close() callback */ |
| vma->vm_pgoff = 0; |
| vma->vm_file = NULL; |
| vma->vm_raend = 0; |
| vma->vm_private_data = psb; /* information needed by the pfm_vm_close() function */ |
| |
| /* |
| * Now we have everything we need and we can initialize |
| * and connect all the data structures |
| */ |
| |
| psb->psb_hdr = smpl_buf; |
| psb->psb_addr = ((char *)smpl_buf)+sizeof(perfmon_smpl_hdr_t); /* first entry */ |
| psb->psb_size = size; /* aligned size */ |
| psb->psb_index = 0; |
| psb->psb_entries = entries; |
| psb->psb_refcnt = 1; |
| psb->psb_flags = PSB_HAS_VMA; |
| |
| spin_lock_init(&psb->psb_lock); |
| |
| /* |
| * XXX: will need to do cacheline alignment to avoid false sharing in SMP mode and |
| * multitask monitoring. |
| */ |
| psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64); |
| |
| DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p refcnt=%lu psb_flags=0x%x\n", |
| (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr, |
| (void *)psb->psb_addr, psb->psb_refcnt, psb->psb_flags)); |
| |
| /* initialize some of the fields of user visible buffer header */ |
| psb->psb_hdr->hdr_version = PFM_SMPL_VERSION; |
| psb->psb_hdr->hdr_entry_size = psb->psb_entry_size; |
| psb->psb_hdr->hdr_pmds[0] = which_pmds[0]; |
| |
| /* |
| * Let's do the difficult operations next. |
| * |
| * now we atomically find some area in the address space and |
| * remap the buffer in it. |
| */ |
| down_write(¤t->mm->mmap_sem); |
| |
| |
| /* find some free area in address space, must have mmap sem held */ |
| vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); |
| if (vma->vm_start == 0UL) { |
| DBprintk(("Cannot find unmapped area for size %ld\n", size)); |
| up_write(¤t->mm->mmap_sem); |
| goto error; |
| } |
| vma->vm_end = vma->vm_start + size; |
| |
| DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start)); |
| |
| /* can only be applied to current, need to have the mm semaphore held when called */ |
| if (pfm_remap_buffer((unsigned long)smpl_buf, vma->vm_start, size)) { |
| DBprintk(("Can't remap buffer\n")); |
| up_write(¤t->mm->mmap_sem); |
| goto error; |
| } |
| |
| /* |
| * now insert the vma in the vm list for the process, must be |
| * done with mmap lock held |
| */ |
| insert_vm_struct(mm, vma); |
| |
| mm->total_vm += size >> PAGE_SHIFT; |
| |
| up_write(¤t->mm->mmap_sem); |
| |
| /* store which PMDS to record */ |
| ctx->ctx_smpl_regs[0] = which_pmds[0]; |
| |
| |
| /* link to perfmon context */ |
| ctx->ctx_psb = psb; |
| |
| /* |
| * keep track of user level virtual address |
| */ |
| ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start; |
| |
| return 0; |
| |
| error: |
| kmem_cache_free(vm_area_cachep, vma); |
| error_kmem: |
| kfree(psb); |
| error_kmalloc: |
| pfm_rvfree(smpl_buf, size); |
| return -ENOMEM; |
| } |
| |
| static int |
| pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask) |
| { |
| unsigned long m, undo_mask; |
| unsigned int n, i; |
| |
| /* |
| * validy checks on cpu_mask have been done upstream |
| */ |
| LOCK_PFS(); |
| |
| if (is_syswide) { |
| /* |
| * cannot mix system wide and per-task sessions |
| */ |
| if (pfm_sessions.pfs_task_sessions > 0UL) { |
| DBprintk(("system wide not possible, %u conflicting task_sessions\n", |
| pfm_sessions.pfs_task_sessions)); |
| goto abort; |
| } |
| |
| m = cpu_mask; undo_mask = 0UL; n = 0; |
| DBprintk(("cpu_mask=0x%lx\n", cpu_mask)); |
| for(i=0; m; i++, m>>=1) { |
| |
| if ((m & 0x1) == 0UL) continue; |
| |
| if (pfm_sessions.pfs_sys_session[i]) goto undo; |
| |
| DBprintk(("reserving CPU%d currently on CPU%d\n", i, smp_processor_id())); |
| |
| pfm_sessions.pfs_sys_session[i] = task; |
| undo_mask |= 1UL << i; |
| n++; |
| } |
| pfm_sessions.pfs_sys_sessions += n; |
| } else { |
| if (pfm_sessions.pfs_sys_sessions) goto abort; |
| pfm_sessions.pfs_task_sessions++; |
| } |
| UNLOCK_PFS(); |
| return 0; |
| undo: |
| DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n", |
| pfm_sessions.pfs_sys_session[i]->pid, i)); |
| |
| for(i=0; undo_mask; i++, undo_mask >>=1) { |
| pfm_sessions.pfs_sys_session[i] = NULL; |
| } |
| abort: |
| UNLOCK_PFS(); |
| |
| return -EBUSY; |
| |
| } |
| |
| static int |
| pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask) |
| { |
| pfm_context_t *ctx; |
| unsigned long m; |
| unsigned int n, i; |
| |
| ctx = task ? task->thread.pfm_context : NULL; |
| |
| /* |
| * validy checks on cpu_mask have been done upstream |
| */ |
| LOCK_PFS(); |
| |
| DBprintk(("[%d] sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu_mask=0x%lx\n", |
| task->pid, |
| pfm_sessions.pfs_sys_sessions, |
| pfm_sessions.pfs_task_sessions, |
| pfm_sessions.pfs_sys_use_dbregs, |
| is_syswide, |
| cpu_mask)); |
| |
| |
| if (is_syswide) { |
| m = cpu_mask; n = 0; |
| for(i=0; m; i++, m>>=1) { |
| if ((m & 0x1) == 0UL) continue; |
| pfm_sessions.pfs_sys_session[i] = NULL; |
| n++; |
| } |
| /* |
| * would not work with perfmon+more than one bit in cpu_mask |
| */ |
| if (ctx && ctx->ctx_fl_using_dbreg) { |
| if (pfm_sessions.pfs_sys_use_dbregs == 0) { |
| printk(KERN_DEBUG "perfmon: invalid release for [%d] " |
| "sys_use_dbregs=0\n", task->pid); |
| } else { |
| pfm_sessions.pfs_sys_use_dbregs--; |
| } |
| } |
| pfm_sessions.pfs_sys_sessions -= n; |
| |
| DBprintk(("CPU%d sys_sessions=%u\n", |
| smp_processor_id(), pfm_sessions.pfs_sys_sessions)); |
| } else { |
| pfm_sessions.pfs_task_sessions--; |
| DBprintk(("[%d] task_sessions=%u\n", |
| task->pid, pfm_sessions.pfs_task_sessions)); |
| } |
| |
| UNLOCK_PFS(); |
| |
| return 0; |
| } |
| |
| static void |
| pfm_send_notification_signal(unsigned long data) |
| { |
| pfm_context_t *ctx = (pfm_context_t *)data; |
| struct siginfo si; |
| int ret; |
| |
| DBprintk(("[%d] tasklet called\n", current->pid)); |
| |
| LOCK_CTX(ctx); |
| |
| if (ctx->ctx_notify_task == NULL) { |
| printk(KERN_INFO "perfmon: tasklet lost notify_task\n"); |
| goto nothing_to_do; |
| } |
| /* no leak */ |
| memset(&si,0, sizeof(si)); |
| |
| si.si_addr = NULL; |
| si.si_pid = current->pid; /* irrelevant */ |
| si.si_signo = SIGPROF; |
| si.si_code = PROF_OVFL; /* indicates a perfmon SIGPROF signal */ |
| si.si_pfm_ovfl[0] = ctx->ctx_ovfl_regs[0]; |
| |
| if (ctx->ctx_notify_task != current) read_lock(&tasklist_lock); |
| |
| DBprintk_ovfl(("[%d] tasklet sending notification to [%d]\n", current->pid, ctx->ctx_notify_task->pid)); |
| |
| ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task); |
| if (ret != 0) printk(KERN_ERR "send_sig_info(process %d, SIGPROF)=%d\n", ctx->ctx_notify_task->pid, ret); |
| |
| /* |
| * now undo the protections in order |
| */ |
| if (ctx->ctx_notify_task != current) read_unlock(&tasklist_lock); |
| nothing_to_do: |
| UNLOCK_CTX(ctx); |
| } |
| |
| /* |
| * XXX: do something better here |
| */ |
| static int |
| pfm_bad_permissions(struct task_struct *task) |
| { |
| /* stolen from bad_signal() */ |
| return (current->session != task->session) |
| && (current->euid ^ task->suid) && (current->euid ^ task->uid) |
| && (current->uid ^ task->suid) && (current->uid ^ task->uid); |
| } |
| |
| static int |
| pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx) |
| { |
| unsigned long smpl_pmds = pfx->ctx_smpl_regs[0]; |
| int ctx_flags; |
| int cpu; |
| |
| /* valid signal */ |
| |
| /* cannot send to process 1, 0 means do not notify */ |
| if (pfx->ctx_notify_pid == 1) { |
| DBprintk(("invalid notify_pid %d\n", pfx->ctx_notify_pid)); |
| return -EINVAL; |
| } |
| ctx_flags = pfx->ctx_flags; |
| |
| if ((ctx_flags & PFM_FL_INHERIT_MASK) == (PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)) { |
| DBprintk(("invalid inherit mask 0x%x\n",ctx_flags & PFM_FL_INHERIT_MASK)); |
| return -EINVAL; |
| } |
| |
| if (ctx_flags & PFM_FL_SYSTEM_WIDE) { |
| DBprintk(("cpu_mask=0x%lx\n", pfx->ctx_cpu_mask)); |
| /* |
| * cannot block in this mode |
| */ |
| if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { |
| DBprintk(("cannot use blocking mode when in system wide monitoring\n")); |
| return -EINVAL; |
| } |
| /* |
| * must only have one bit set in the CPU mask |
| */ |
| if (hweight64(pfx->ctx_cpu_mask) != 1UL) { |
| DBprintk(("invalid CPU mask specified\n")); |
| return -EINVAL; |
| } |
| /* |
| * and it must be a valid CPU |
| */ |
| cpu = ffz(~pfx->ctx_cpu_mask); |
| if (cpu_online(cpu) == 0) { |
| DBprintk(("CPU%d is not online\n", cpu)); |
| return -EINVAL; |
| } |
| /* |
| * check for pre-existing pinning, if conflicting reject |
| */ |
| if (task->cpus_allowed != ~0UL && (task->cpus_allowed & (1UL<<cpu)) == 0) { |
| DBprintk(("[%d] pinned on 0x%lx, mask for CPU%d \n", task->pid, |
| task->cpus_allowed, cpu)); |
| return -EINVAL; |
| } |
| |
| } else { |
| /* |
| * must provide a target for the signal in blocking mode even when |
| * no counter is configured with PFM_FL_REG_OVFL_NOTIFY |
| */ |
| if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) { |
| DBprintk(("must have notify_pid when blocking for [%d]\n", task->pid)); |
| return -EINVAL; |
| } |
| #if 0 |
| if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == task->pid) { |
| DBprintk(("cannot notify self when blocking for [%d]\n", task->pid)); |
| return -EINVAL; |
| } |
| #endif |
| } |
| /* verify validity of smpl_regs */ |
| if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) { |
| DBprintk(("invalid smpl_regs 0x%lx\n", smpl_pmds)); |
| return -EINVAL; |
| } |
| /* probably more to add here */ |
| |
| return 0; |
| } |
| |
| static int |
| pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int count, |
| struct pt_regs *regs) |
| { |
| pfarg_context_t tmp; |
| void *uaddr = NULL; |
| int ret; |
| int ctx_flags; |
| pid_t notify_pid; |
| |
| /* a context has already been defined */ |
| if (ctx) return -EBUSY; |
| |
| /* |
| * not yet supported |
| */ |
| if (task != current) return -EINVAL; |
| |
| if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; |
| |
| ret = pfx_is_sane(task, &tmp); |
| if (ret < 0) return ret; |
| |
| ctx_flags = tmp.ctx_flags; |
| |
| ret = pfm_reserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE, tmp.ctx_cpu_mask); |
| if (ret) goto abort; |
| |
| ret = -ENOMEM; |
| |
| ctx = pfm_context_alloc(); |
| if (!ctx) goto error; |
| |
| /* record the creator (important for inheritance) */ |
| ctx->ctx_owner = current; |
| |
| notify_pid = tmp.ctx_notify_pid; |
| |
| spin_lock_init(&ctx->ctx_lock); |
| |
| if (notify_pid == current->pid) { |
| |
| ctx->ctx_notify_task = current; |
| task->thread.pfm_context = ctx; |
| |
| } else if (notify_pid!=0) { |
| struct task_struct *notify_task; |
| |
| read_lock(&tasklist_lock); |
| |
| notify_task = find_task_by_pid(notify_pid); |
| |
| if (notify_task) { |
| |
| ret = -EPERM; |
| |
| /* |
| * check if we can send this task a signal |
| */ |
| if (pfm_bad_permissions(notify_task)) { |
| read_unlock(&tasklist_lock); |
| goto buffer_error; |
| } |
| |
| /* |
| * make visible |
| * must be done inside critical section |
| * |
| * if the initialization does not go through it is still |
| * okay because child will do the scan for nothing which |
| * won't hurt. |
| */ |
| task->thread.pfm_context = ctx; |
| |
| /* |
| * will cause task to check on exit for monitored |
| * processes that would notify it. see release_thread() |
| * Note: the scan MUST be done in release thread, once the |
| * task has been detached from the tasklist otherwise you are |
| * exposed to race conditions. |
| */ |
| atomic_add(1, &ctx->ctx_notify_task->thread.pfm_notifiers_check); |
| |
| ctx->ctx_notify_task = notify_task; |
| } |
| read_unlock(&tasklist_lock); |
| } |
| |
| /* |
| * notification process does not exist |
| */ |
| if (notify_pid != 0 && ctx->ctx_notify_task == NULL) { |
| ret = -EINVAL; |
| goto buffer_error; |
| } |
| |
| if (tmp.ctx_smpl_entries) { |
| DBprintk(("sampling entries=%lu\n",tmp.ctx_smpl_entries)); |
| |
| ret = pfm_smpl_buffer_alloc(ctx, tmp.ctx_smpl_regs, |
| tmp.ctx_smpl_entries, &uaddr); |
| if (ret<0) goto buffer_error; |
| |
| tmp.ctx_smpl_vaddr = uaddr; |
| } |
| /* initialization of context's flags */ |
| ctx->ctx_fl_inherit = ctx_flags & PFM_FL_INHERIT_MASK; |
| ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; |
| ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; |
| ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; |
| ctx->ctx_fl_unsecure = (ctx_flags & PFM_FL_UNSECURE) ? 1: 0; |
| ctx->ctx_fl_frozen = 0; |
| /* |
| * setting this flag to 0 here means, that the creator or the task that the |
| * context is being attached are granted access. Given that a context can only |
| * be created for the calling process this, in effect only allows the creator |
| * to access the context. See pfm_protect() for more. |
| */ |
| ctx->ctx_fl_protected = 0; |
| |
| /* for system wide mode only (only 1 bit set) */ |
| ctx->ctx_cpu = ffz(~tmp.ctx_cpu_mask); |
| |
| /* SMP only, means no CPU */ |
| ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; |
| SET_LAST_CPU(ctx, -1); |
| |
| sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */ |
| |
| /* |
| * initialize tasklet for signal notifications |
| * |
| * ALL signal-based (or any notification using data structures |
| * external to perfmon) MUST use tasklets to avoid lock contentions |
| * when a signal has to be sent for overflow interrupt handler. |
| */ |
| tasklet_init(&ctx->ctx_tasklet, pfm_send_notification_signal, (unsigned long)ctx); |
| |
| if (__copy_to_user(req, &tmp, sizeof(tmp))) { |
| ret = -EFAULT; |
| goto buffer_error; |
| } |
| |
| DBprintk(("context=%p, pid=%d notify_task=%p\n", |
| (void *)ctx, task->pid, ctx->ctx_notify_task)); |
| |
| DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d unsecure=%d\n", |
| (void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit, |
| ctx->ctx_fl_block, ctx->ctx_fl_system, |
| ctx->ctx_fl_excl_idle, |
| ctx->ctx_fl_unsecure)); |
| |
| /* |
| * when no notification is required, we can make this visible at the last moment |
| */ |
| if (notify_pid == 0) task->thread.pfm_context = ctx; |
| /* |
| * pin task to CPU and force reschedule on exit to ensure |
| * that when back to user level the task runs on the designated |
| * CPU. |
| */ |
| if (ctx->ctx_fl_system) { |
| ctx->ctx_saved_cpus_allowed = task->cpus_allowed; |
| task->cpus_allowed = tmp.ctx_cpu_mask; |
| task->need_resched = 1; |
| DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid, task->cpus_allowed)); |
| } |
| |
| return 0; |
| |
| buffer_error: |
| pfm_context_free(ctx); |
| error: |
| pfm_unreserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE , tmp.ctx_cpu_mask); |
| abort: |
| /* make sure we don't leave anything behind */ |
| task->thread.pfm_context = NULL; |
| |
| return ret; |
| } |
| |
| static inline unsigned long |
| pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) |
| { |
| unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; |
| unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; |
| extern unsigned long carta_random32 (unsigned long seed); |
| |
| if (reg->flags & PFM_REGFL_RANDOM) { |
| new_seed = carta_random32(old_seed); |
| val -= (old_seed & mask); /* counter values are negative numbers! */ |
| if ((mask >> 32) != 0) |
| /* construct a full 64-bit random value: */ |
| new_seed |= carta_random32(old_seed >> 32) << 32; |
| reg->seed = new_seed; |
| } |
| reg->lval = val; |
| return val; |
| } |
| |
| static void |
| pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag) |
| { |
| unsigned long mask = ovfl_regs[0]; |
| unsigned long reset_others = 0UL; |
| unsigned long val; |
| int i, is_long_reset = (flag == PFM_PMD_LONG_RESET); |
| |
| /* |
| * now restore reset value on sampling overflowed counters |
| */ |
| mask >>= PMU_FIRST_COUNTER; |
| for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { |
| if (mask & 0x1) { |
| val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset); |
| reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0]; |
| |
| DBprintk_ovfl(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid, |
| is_long_reset ? "long" : "short", i, val)); |
| |
| /* upper part is ignored on rval */ |
| pfm_write_soft_counter(ctx, i, val); |
| } |
| } |
| |
| /* |
| * Now take care of resetting the other registers |
| */ |
| for(i = 0; reset_others; i++, reset_others >>= 1) { |
| |
| if ((reset_others & 0x1) == 0) continue; |
| |
| val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset); |
| |
| if (PMD_IS_COUNTING(i)) { |
| pfm_write_soft_counter(ctx, i, val); |
| } else { |
| ia64_set_pmd(i, val); |
| } |
| DBprintk_ovfl(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid, |
| is_long_reset ? "long" : "short", i, val)); |
| } |
| ia64_srlz_d(); |
| } |
| |
| static int |
| pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) |
| { |
| struct thread_struct *th = &task->thread; |
| pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg; |
| unsigned long value, reset_pmds; |
| unsigned int cnum, reg_flags, flags; |
| int is_monitor, is_counting; |
| int i, ret = -EINVAL; |
| #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) |
| |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| |
| /* XXX: ctx locking may be required here */ |
| |
| for (i = 0; i < count; i++, req++) { |
| |
| if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; |
| |
| cnum = tmp.reg_num; |
| reg_flags = tmp.reg_flags; |
| value = tmp.reg_value; |
| reset_pmds = tmp.reg_reset_pmds[0]; |
| flags = 0; |
| |
| is_counting = PMC_IS_COUNTING(cnum); |
| is_monitor = PMC_IS_MONITOR(cnum); |
| |
| /* |
| * we reject all non implemented PMC as well |
| * as attempts to modify PMC[0-3] which are used |
| * as status registers by the PMU |
| */ |
| if (!PMC_IS_IMPL(cnum) || cnum < 4) { |
| DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum)); |
| goto error; |
| } |
| /* |
| * If the PMC is a monitor, then if the value is not the default: |
| * - system-wide session: PMCx.pm=1 (privileged monitor) |
| * - per-task : PMCx.pm=0 (user monitor) |
| */ |
| if ((is_monitor || is_counting) && value != PMC_DFL_VAL(cnum) && PFM_CHECK_PMC_PM(ctx, cnum, value)) { |
| DBprintk(("pmc%u pmc_pm=%ld fl_system=%d\n", |
| cnum, |
| PMC_PM(cnum, value), |
| ctx->ctx_fl_system)); |
| goto error; |
| } |
| |
| if (is_counting) { |
| pfm_monitor_t *p = (pfm_monitor_t *)&value; |
| /* |
| * enforce generation of overflow interrupt. Necessary on all |
| * CPUs. |
| */ |
| p->pmc_oi = 1; |
| |
| if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { |
| /* |
| * must have a target for the signal |
| */ |
| if (ctx->ctx_notify_task == NULL) { |
| DBprintk(("cannot set ovfl_notify: no notify_task\n")); |
| goto error; |
| } |
| flags |= PFM_REGFL_OVFL_NOTIFY; |
| } |
| |
| if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; |
| |
| /* verify validity of reset_pmds */ |
| if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) { |
| DBprintk(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); |
| goto error; |
| } |
| } else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { |
| DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum)); |
| goto error; |
| } |
| |
| /* |
| * execute write checker, if any |
| */ |
| if (PMC_WR_FUNC(cnum)) { |
| ret = PMC_WR_FUNC(cnum)(task, cnum, &value, regs); |
| if (ret) goto error; |
| ret = -EINVAL; |
| } |
| |
| /* |
| * no error on this register |
| */ |
| PFM_REG_RETFLAG_SET(tmp.reg_flags, 0); |
| |
| /* |
| * update register return value, abort all if problem during copy. |
| * we only modify the reg_flags field. no check mode is fine because |
| * access has been verified upfront in sys_perfmonctl(). |
| * |
| * If this fails, then the software state is not modified |
| */ |
| if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT; |
| |
| /* |
| * Now we commit the changes to the software state |
| */ |
| |
| /* |
| * full flag update each time a register is programmed |
| */ |
| ctx->ctx_soft_pmds[cnum].flags = flags; |
| |
| if (is_counting) { |
| ctx->ctx_soft_pmds[cnum].reset_pmds[0] = reset_pmds; |
| |
| /* mark all PMDS to be accessed as used */ |
| CTX_USED_PMD(ctx, reset_pmds); |
| } |
| |
| /* |
| * Needed in case the user does not initialize the equivalent |
| * PMD. Clearing is done in reset_pmu() so there is no possible |
| * leak here. |
| */ |
| CTX_USED_PMD(ctx, pmu_conf.pmc_desc[cnum].dep_pmd[0]); |
| |
| /* |
| * keep copy the pmc, used for register reload |
| */ |
| th->pmc[cnum] = value; |
| |
| ia64_set_pmc(cnum, value); |
| |
| DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n", |
| task->pid, cnum, value, |
| ctx->ctx_soft_pmds[cnum].flags, |
| ctx->ctx_used_pmds[0])); |
| |
| } |
| |
| return 0; |
| |
| error: |
| PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL); |
| |
| if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT; |
| |
| DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", task->pid, cnum, value, ret)); |
| |
| return ret; |
| } |
| |
| static int |
| pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) |
| { |
| pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg; |
| unsigned long value, hw_value; |
| unsigned int cnum; |
| int i; |
| int ret = -EINVAL; |
| |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| /* |
| * Cannot do anything before PMU is enabled |
| */ |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| /* XXX: ctx locking may be required here */ |
| |
| |
| for (i = 0; i < count; i++, req++) { |
| |
| if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; |
| |
| cnum = tmp.reg_num; |
| value = tmp.reg_value; |
| |
| if (!PMD_IS_IMPL(cnum)) { |
| DBprintk(("pmd[%u] is unimplemented or invalid\n", cnum)); |
| goto abort_mission; |
| } |
| |
| /* |
| * execute write checker, if any |
| */ |
| if (PMD_WR_FUNC(cnum)) { |
| unsigned long v = value; |
| ret = PMD_WR_FUNC(cnum)(task, cnum, &v, regs); |
| if (ret) goto abort_mission; |
| value = v; |
| ret = -EINVAL; |
| } |
| hw_value = value; |
| /* |
| * no error on this register |
| */ |
| PFM_REG_RETFLAG_SET(tmp.reg_flags, 0); |
| |
| if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT; |
| |
| /* |
| * now commit changes to software state |
| */ |
| |
| /* update virtualized (64bits) counter */ |
| if (PMD_IS_COUNTING(cnum)) { |
| ctx->ctx_soft_pmds[cnum].lval = value; |
| ctx->ctx_soft_pmds[cnum].val = value & ~pmu_conf.ovfl_val; |
| |
| hw_value = value & pmu_conf.ovfl_val; |
| |
| ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset; |
| ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset; |
| |
| ctx->ctx_soft_pmds[cnum].seed = tmp.reg_random_seed; |
| ctx->ctx_soft_pmds[cnum].mask = tmp.reg_random_mask; |
| } |
| |
| /* keep track of what we use */ |
| CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]); |
| |
| /* mark this register as used as well */ |
| CTX_USED_PMD(ctx, RDEP(cnum)); |
| |
| /* writes to unimplemented part is ignored, so this is safe */ |
| ia64_set_pmd(cnum, hw_value); |
| |
| /* to go away */ |
| ia64_srlz_d(); |
| |
| DBprintk(("[%d] pmd[%u]: value=0x%lx hw_value=0x%lx soft_pmd=0x%lx short_reset=0x%lx " |
| "long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx psr=%d\n", |
| task->pid, cnum, |
| value, hw_value, |
| ctx->ctx_soft_pmds[cnum].val, |
| ctx->ctx_soft_pmds[cnum].short_reset, |
| ctx->ctx_soft_pmds[cnum].long_reset, |
| ia64_get_pmd(cnum) & pmu_conf.ovfl_val, |
| PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', |
| ctx->ctx_used_pmds[0], |
| ctx->ctx_soft_pmds[cnum].reset_pmds[0], ia64_psr(regs)->sp)); |
| } |
| |
| return 0; |
| |
| abort_mission: |
| /* |
| * for now, we have only one possibility for error |
| */ |
| PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL); |
| |
| /* |
| * we change the return value to EFAULT in case we cannot write register return code. |
| * The caller first must correct this error, then a resubmission of the request will |
| * eventually yield the EINVAL. |
| */ |
| if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT; |
| |
| DBprintk(("[%d] pmc[%u]=0x%lx ret %d\n", task->pid, cnum, value, ret)); |
| |
| return ret; |
| } |
| |
| static int |
| pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) |
| { |
| struct thread_struct *th = &task->thread; |
| unsigned long val, lval; |
| pfarg_reg_t *req = (pfarg_reg_t *)arg; |
| unsigned int cnum, reg_flags = 0; |
| int i, ret = 0; |
| #if __GNUC__ < 3 |
| int foo; |
| #endif |
| |
| if (!CTX_IS_ENABLED(ctx)) { |
| DBprintk(("context for [%d] is disabled\n", task->pid)); |
| return -EINVAL; |
| } |
| |
| /* |
| * XXX: MUST MAKE SURE WE DON"T HAVE ANY PENDING OVERFLOW BEFORE READING |
| * This is required when the monitoring has been stoppped by user or kernel. |
| * If it is still going on, then that's fine because we a re not guaranteed |
| * to return an accurate value in this case. |
| */ |
| |
| /* XXX: ctx locking may be required here */ |
| |
| /* |
| * should we need to access the PMU, serialization is needed |
| */ |
| ia64_srlz_d(); |
| |
| for (i = 0; i < count; i++, req++) { |
| |
| #if __GNUC__ < 3 |
| foo = __get_user(cnum, &req->reg_num); |
| if (foo) return -EFAULT; |
| foo = __get_user(reg_flags, &req->reg_flags); |
| if (foo) return -EFAULT; |
| #else |
| if (__get_user(cnum, &req->reg_num)) return -EFAULT; |
| if (__get_user(reg_flags, &req->reg_flags)) return -EFAULT; |
| #endif |
| lval = 0UL; |
| |
| if (!PMD_IS_IMPL(cnum)) goto abort_mission; |
| /* |
| * we can only read the register that we use. That includes |
| * the one we explicitely initialize AND the one we want included |
| * in the sampling buffer (smpl_regs). |
| * |
| * Having this restriction allows optimization in the ctxsw routine |
| * without compromising security (leaks) |
| */ |
| if (!CTX_IS_USED_PMD(ctx, cnum)) goto abort_mission; |
| |
| /* |
| * we can access the registers directly only when task |
| * is the OWNER of the local PMU. In SMP, this can |
| * happen only when task == current. In addition |
| * this can happen when task != currrent but |
| * only in UP mode. |
| */ |
| if (task == PMU_OWNER()) { |
| val = ia64_get_pmd(cnum); |
| DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val)); |
| } else { |
| /* context has been saved */ |
| val = th->pmd[cnum]; |
| } |
| |
| if (PMD_IS_COUNTING(cnum)) { |
| /* |
| * XXX: need to check for overflow |
| */ |
| val &= pmu_conf.ovfl_val; |
| val += ctx->ctx_soft_pmds[cnum].val; |
| |
| lval = ctx->ctx_soft_pmds[cnum].lval; |
| } |
| |
| /* |
| * execute read checker, if any |
| */ |
| if (PMD_RD_FUNC(cnum)) { |
| unsigned long v = val; |
| ret = PMD_RD_FUNC(cnum)(task, cnum, &v, regs); |
| val = v; |
| } |
| |
| PFM_REG_RETFLAG_SET(reg_flags, ret); |
| |
| DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n", |
| cnum, ret, val, ia64_get_pmc(cnum))); |
| |
| /* |
| * update register return value, abort all if problem during copy. |
| * we only modify the reg_flags field. no check mode is fine because |
| * access has been verified upfront in sys_perfmonctl(). |
| */ |
| if (__put_user(cnum, &req->reg_num)) return -EFAULT; |
| if (__put_user(val, &req->reg_value)) return -EFAULT; |
| if (__put_user(reg_flags, &req->reg_flags)) return -EFAULT; |
| if (__put_user(lval, &req->reg_last_reset_value)) return -EFAULT; |
| } |
| |
| return 0; |
| |
| abort_mission: |
| PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL); |
| /* |
| * XXX: if this fails, we stick with the original failure, flag not updated! |
| */ |
| __put_user(reg_flags, &req->reg_flags); |
| |
| return -EINVAL; |
| } |
| |
| #ifdef PFM_PMU_USES_DBR |
| /* |
| * Only call this function when a process it trying to |
| * write the debug registers (reading is always allowed) |
| */ |
| int |
| pfm_use_debug_registers(struct task_struct *task) |
| { |
| pfm_context_t *ctx = task->thread.pfm_context; |
| int ret = 0; |
| |
| DBprintk(("called for [%d]\n", task->pid)); |
| |
| /* |
| * do it only once |
| */ |
| if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; |
| |
| /* |
| * Even on SMP, we do not need to use an atomic here because |
| * the only way in is via ptrace() and this is possible only when the |
| * process is stopped. Even in the case where the ctxsw out is not totally |
| * completed by the time we come here, there is no way the 'stopped' process |
| * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. |
| * So this is always safe. |
| */ |
| if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; |
| |
| LOCK_PFS(); |
| |
| /* |
| * We cannot allow setting breakpoints when system wide monitoring |
| * sessions are using the debug registers. |
| */ |
| if (pfm_sessions.pfs_sys_use_dbregs> 0) |
| ret = -1; |
| else |
| pfm_sessions.pfs_ptrace_use_dbregs++; |
| |
| DBprintk(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", |
| pfm_sessions.pfs_ptrace_use_dbregs, |
| pfm_sessions.pfs_sys_use_dbregs, |
| task->pid, ret)); |
| |
| UNLOCK_PFS(); |
| |
| return ret; |
| } |
| |
| /* |
| * This function is called for every task that exits with the |
| * IA64_THREAD_DBG_VALID set. This indicates a task which was |
| * able to use the debug registers for debugging purposes via |
| * ptrace(). Therefore we know it was not using them for |
| * perfmormance monitoring, so we only decrement the number |
| * of "ptraced" debug register users to keep the count up to date |
| */ |
| int |
| pfm_release_debug_registers(struct task_struct *task) |
| { |
| int ret; |
| |
| LOCK_PFS(); |
| if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { |
| printk(KERN_DEBUG "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", |
| task->pid); |
| ret = -1; |
| } else { |
| pfm_sessions.pfs_ptrace_use_dbregs--; |
| ret = 0; |
| } |
| UNLOCK_PFS(); |
| |
| return ret; |
| } |
| #else /* PFM_PMU_USES_DBR is true */ |
| /* |
| * in case, the PMU does not use the debug registers, these two functions are nops. |
| * The first function is called from arch/ia64/kernel/ptrace.c. |
| * The second function is called from arch/ia64/kernel/process.c. |
| */ |
| int |
| pfm_use_debug_registers(struct task_struct *task) |
| { |
| return 0; |
| } |
| |
| int |
| pfm_release_debug_registers(struct task_struct *task) |
| { |
| return 0; |
| } |
| #endif /* PFM_PMU_USES_DBR */ |
| |
| static int |
| pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| void *sem = &ctx->ctx_restart_sem; |
| |
| /* |
| * Cannot do anything before PMU is enabled |
| */ |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| if (task == current) { |
| DBprintk(("restarting self %d frozen=%d ovfl_regs=0x%lx\n", |
| task->pid, |
| ctx->ctx_fl_frozen, |
| ctx->ctx_ovfl_regs[0])); |
| |
| pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); |
| |
| ctx->ctx_ovfl_regs[0] = 0UL; |
| |
| /* |
| * We ignore block/don't block because we never block |
| * for a self-monitoring process. |
| */ |
| ctx->ctx_fl_frozen = 0; |
| |
| if (CTX_HAS_SMPL(ctx)) { |
| ctx->ctx_psb->psb_hdr->hdr_count = 0; |
| ctx->ctx_psb->psb_index = 0; |
| } |
| |
| /* simply unfreeze */ |
| pfm_unfreeze_pmu(); |
| |
| return 0; |
| } |
| /* restart on another task */ |
| |
| /* |
| * if blocking, then post the semaphore. |
| * if non-blocking, then we ensure that the task will go into |
| * pfm_overflow_must_block() before returning to user mode. |
| * We cannot explicitely reset another task, it MUST always |
| * be done by the task itself. This works for system wide because |
| * the tool that is controlling the session is doing "self-monitoring". |
| * |
| * XXX: what if the task never goes back to user? |
| * |
| */ |
| if (CTX_OVFL_NOBLOCK(ctx) == 0) { |
| DBprintk(("unblocking %d \n", task->pid)); |
| up(sem); |
| } else { |
| task->thread.pfm_ovfl_block_reset = 1; |
| } |
| #if 0 |
| /* |
| * in case of non blocking mode, then it's just a matter of |
| * of reseting the sampling buffer (if any) index. The PMU |
| * is already active. |
| */ |
| |
| /* |
| * must reset the header count first |
| */ |
| if (CTX_HAS_SMPL(ctx)) { |
| DBprintk(("resetting sampling indexes for %d \n", task->pid)); |
| ctx->ctx_psb->psb_hdr->hdr_count = 0; |
| ctx->ctx_psb->psb_index = 0; |
| } |
| #endif |
| return 0; |
| } |
| |
| static int |
| pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| /* |
| * Cannot do anything before PMU is enabled |
| */ |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| DBprintk(("[%d] fl_system=%d owner=%p current=%p\n", |
| current->pid, |
| ctx->ctx_fl_system, PMU_OWNER(), |
| current)); |
| |
| /* simply stop monitoring but not the PMU */ |
| if (ctx->ctx_fl_system) { |
| |
| /* disable dcr pp */ |
| ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP); |
| |
| /* stop monitoring */ |
| pfm_clear_psr_pp(); |
| ia64_srlz_i(); |
| |
| PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); |
| |
| ia64_psr(regs)->pp = 0; |
| |
| } else { |
| |
| /* stop monitoring */ |
| pfm_clear_psr_up(); |
| ia64_srlz_i(); |
| |
| /* |
| * clear user level psr.up |
| */ |
| ia64_psr(regs)->up = 0; |
| } |
| return 0; |
| } |
| |
| static int |
| pfm_disable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| /* |
| * stop monitoring, freeze PMU, and save state in context |
| * this call will clear IA64_THREAD_PM_VALID for per-task sessions. |
| */ |
| pfm_flush_regs(task); |
| |
| if (ctx->ctx_fl_system) { |
| ia64_psr(regs)->pp = 0; |
| } else { |
| ia64_psr(regs)->up = 0; |
| } |
| /* |
| * goes back to default behavior: no user level control |
| * no need to change live psr.sp because useless at the kernel level |
| */ |
| ia64_psr(regs)->sp = 1; |
| |
| DBprintk(("enabling psr.sp for [%d]\n", current->pid)); |
| |
| ctx->ctx_flags.state = PFM_CTX_DISABLED; |
| |
| return 0; |
| } |
| |
| static int |
| pfm_context_destroy(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| /* |
| * if context was never enabled, then there is not much |
| * to do |
| */ |
| if (!CTX_IS_ENABLED(ctx)) goto skipped_stop; |
| |
| /* |
| * Disable context: stop monitoring, flush regs to software state (useless here), |
| * and freeze PMU |
| * |
| * The IA64_THREAD_PM_VALID is cleared by pfm_flush_regs() called from pfm_disable() |
| */ |
| pfm_disable(task, ctx, arg, count, regs); |
| |
| if (ctx->ctx_fl_system) { |
| ia64_psr(regs)->pp = 0; |
| } else { |
| ia64_psr(regs)->up = 0; |
| } |
| |
| skipped_stop: |
| /* |
| * remove sampling buffer mapping, if any |
| */ |
| if (ctx->ctx_smpl_vaddr) { |
| pfm_remove_smpl_mapping(task); |
| ctx->ctx_smpl_vaddr = 0UL; |
| } |
| /* now free context and related state */ |
| pfm_context_exit(task); |
| |
| return 0; |
| } |
| |
| /* |
| * does nothing at the moment |
| */ |
| static int |
| pfm_context_unprotect(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| return 0; |
| } |
| |
| static int |
| pfm_protect_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* |
| * from now on, only the creator of the context has access to it |
| */ |
| ctx->ctx_fl_protected = 1; |
| |
| /* |
| * reinforce secure monitoring: cannot toggle psr.up |
| */ |
| if (ctx->ctx_fl_unsecure == 0) ia64_psr(regs)->sp = 1; |
| |
| DBprintk(("[%d] protected psr.sp=%d\n", task->pid, ia64_psr(regs)->sp)); |
| |
| return 0; |
| } |
| |
| static int |
| pfm_debug(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| unsigned int mode = *(unsigned int *)arg; |
| |
| pfm_sysctl.debug = mode == 0 ? 0 : 1; |
| |
| printk(KERN_INFO "perfmon debugging %s\n", pfm_sysctl.debug ? "on" : "off"); |
| |
| return 0; |
| } |
| |
| #ifdef PFM_PMU_USES_DBR |
| |
| typedef struct { |
| unsigned long ibr_mask:56; |
| unsigned long ibr_plm:4; |
| unsigned long ibr_ig:3; |
| unsigned long ibr_x:1; |
| } ibr_mask_reg_t; |
| |
| typedef struct { |
| unsigned long dbr_mask:56; |
| unsigned long dbr_plm:4; |
| unsigned long dbr_ig:2; |
| unsigned long dbr_w:1; |
| unsigned long dbr_r:1; |
| } dbr_mask_reg_t; |
| |
| typedef union { |
| unsigned long val; |
| ibr_mask_reg_t ibr; |
| dbr_mask_reg_t dbr; |
| } dbreg_t; |
| |
| |
| static int |
| pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs) |
| { |
| struct thread_struct *thread = &task->thread; |
| pfm_context_t *ctx = task->thread.pfm_context; |
| pfarg_dbreg_t tmp, *req = (pfarg_dbreg_t *)arg; |
| dbreg_t dbreg; |
| unsigned int rnum; |
| int first_time; |
| int i, ret = 0; |
| |
| /* |
| * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w |
| * ensuring that no real breakpoint can be installed via this call. |
| */ |
| |
| first_time = ctx->ctx_fl_using_dbreg == 0; |
| |
| /* |
| * check for debug registers in system wide mode |
| * |
| */ |
| LOCK_PFS(); |
| if (ctx->ctx_fl_system && first_time) { |
| if (pfm_sessions.pfs_ptrace_use_dbregs) |
| ret = -EBUSY; |
| else |
| pfm_sessions.pfs_sys_use_dbregs++; |
| } |
| UNLOCK_PFS(); |
| |
| if (ret != 0) return ret; |
| |
| if (ctx->ctx_fl_system) { |
| /* we mark ourselves as owner of the debug registers */ |
| ctx->ctx_fl_using_dbreg = 1; |
| DBprintk(("system-wide setting fl_using_dbreg for [%d]\n", task->pid)); |
| } else if (first_time) { |
| ret= -EBUSY; |
| if ((thread->flags & IA64_THREAD_DBG_VALID) != 0) { |
| DBprintk(("debug registers already in use for [%d]\n", task->pid)); |
| goto abort_mission; |
| } |
| /* we mark ourselves as owner of the debug registers */ |
| ctx->ctx_fl_using_dbreg = 1; |
| |
| DBprintk(("setting fl_using_dbreg for [%d]\n", task->pid)); |
| /* |
| * Given debug registers cannot be used for both debugging |
| * and performance monitoring at the same time, we reuse |
| * the storage area to save and restore the registers on ctxsw. |
| */ |
| memset(task->thread.dbr, 0, sizeof(task->thread.dbr)); |
| memset(task->thread.ibr, 0, sizeof(task->thread.ibr)); |
| } |
| |
| if (first_time) { |
| DBprintk(("[%d] clearing ibrs,dbrs\n", task->pid)); |
| /* |
| * clear hardware registers to make sure we don't |
| * pick up stale state. |
| * |
| * for a system wide session, we do not use |
| * thread.dbr, thread.ibr because this process |
| * never leaves the current CPU and the state |
| * is shared by all processes running on it |
| */ |
| for (i=0; i < pmu_conf.num_ibrs; i++) { |
| ia64_set_ibr(i, 0UL); |
| } |
| ia64_srlz_i(); |
| for (i=0; i < pmu_conf.num_dbrs; i++) { |
| ia64_set_dbr(i, 0UL); |
| } |
| ia64_srlz_d(); |
| } |
| |
| ret = -EFAULT; |
| |
| /* |
| * Now install the values into the registers |
| */ |
| for (i = 0; i < count; i++, req++) { |
| |
| if (__copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission; |
| |
| rnum = tmp.dbreg_num; |
| dbreg.val = tmp.dbreg_value; |
| |
| ret = -EINVAL; |
| |
| if ((mode == 0 && !IBR_IS_IMPL(rnum)) || ((mode == 1) && !DBR_IS_IMPL(rnum))) { |
| DBprintk(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", |
| rnum, dbreg.val, mode, i, count)); |
| |
| goto abort_mission; |
| } |
| |
| /* |
| * make sure we do not install enabled breakpoint |
| */ |
| if (rnum & 0x1) { |
| if (mode == 0) |
| dbreg.ibr.ibr_x = 0; |
| else |
| dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; |
| } |
| |
| /* |
| * clear return flags and copy back to user |
| * |
| * XXX: fix once EAGAIN is implemented |
| */ |
| ret = -EFAULT; |
| |
| PFM_REG_RETFLAG_SET(tmp.dbreg_flags, 0); |
| |
| if (__copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission; |
| |
| /* |
| * Debug registers, just like PMC, can only be modified |
| * by a kernel call. Moreover, perfmon() access to those |
| * registers are centralized in this routine. The hardware |
| * does not modify the value of these registers, therefore, |
| * if we save them as they are written, we can avoid having |
| * to save them on context switch out. This is made possible |
| * by the fact that when perfmon uses debug registers, ptrace() |
| * won't be able to modify them concurrently. |
| */ |
| if (mode == 0) { |
| CTX_USED_IBR(ctx, rnum); |
| |
| ia64_set_ibr(rnum, dbreg.val); |
| ia64_srlz_i(); |
| |
| thread->ibr[rnum] = dbreg.val; |
| |
| DBprintk(("write ibr%u=0x%lx used_ibrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_ibrs[0])); |
| } else { |
| CTX_USED_DBR(ctx, rnum); |
| |
| ia64_set_dbr(rnum, dbreg.val); |
| ia64_srlz_d(); |
| |
| thread->dbr[rnum] = dbreg.val; |
| |
| DBprintk(("write dbr%u=0x%lx used_dbrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_dbrs[0])); |
| } |
| } |
| |
| return 0; |
| |
| abort_mission: |
| /* |
| * in case it was our first attempt, we undo the global modifications |
| */ |
| if (first_time) { |
| LOCK_PFS(); |
| if (ctx->ctx_fl_system) { |
| pfm_sessions.pfs_sys_use_dbregs--; |
| } |
| UNLOCK_PFS(); |
| ctx->ctx_fl_using_dbreg = 0; |
| } |
| /* |
| * install error return flag |
| */ |
| if (ret != -EFAULT) { |
| /* |
| * XXX: for now we can only come here on EINVAL |
| */ |
| PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL); |
| if (__put_user(tmp.dbreg_flags, &req->dbreg_flags)) ret = -EFAULT; |
| } |
| return ret; |
| } |
| |
| static int |
| pfm_write_ibrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| return pfm_write_ibr_dbr(0, task, arg, count, regs); |
| } |
| |
| static int |
| pfm_write_dbrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| return pfm_write_ibr_dbr(1, task, arg, count, regs); |
| } |
| |
| #endif /* PFM_PMU_USES_DBR */ |
| |
| static int |
| pfm_get_features(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) |
| { |
| pfarg_features_t tmp; |
| |
| memset(&tmp, 0, sizeof(tmp)); |
| |
| tmp.ft_version = PFM_VERSION; |
| tmp.ft_smpl_version = PFM_SMPL_VERSION; |
| |
| if (__copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT; |
| |
| return 0; |
| } |
| |
| static int |
| pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* we don't quite support this right now */ |
| if (task != current) return -EINVAL; |
| |
| /* |
| * Cannot do anything before PMU is enabled |
| */ |
| if (!CTX_IS_ENABLED(ctx)) return -EINVAL; |
| |
| DBprintk(("[%d] fl_system=%d owner=%p current=%p\n", |
| current->pid, |
| ctx->ctx_fl_system, PMU_OWNER(), |
| current)); |
| |
| if (PMU_OWNER() != task) { |
| printk(KERN_DEBUG "perfmon: pfm_start task [%d] not pmu owner\n", task->pid); |
| return -EINVAL; |
| } |
| |
| if (ctx->ctx_fl_system) { |
| |
| PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); |
| |
| /* set user level psr.pp */ |
| ia64_psr(regs)->pp = 1; |
| |
| /* start monitoring at kernel level */ |
| pfm_set_psr_pp(); |
| |
| /* enable dcr pp */ |
| ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP); |
| |
| ia64_srlz_i(); |
| |
| } else { |
| if ((task->thread.flags & IA64_THREAD_PM_VALID) == 0) { |
| printk(KERN_DEBUG "perfmon: pfm_start task flag not set for [%d]\n", |
| task->pid); |
| return -EINVAL; |
| } |
| /* set user level psr.up */ |
| ia64_psr(regs)->up = 1; |
| |
| /* start monitoring at kernel level */ |
| pfm_set_psr_up(); |
| |
| ia64_srlz_i(); |
| } |
| |
| return 0; |
| } |
| |
| static int |
| pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| /* we don't quite support this right now */ |
| if (task != current) { |
| DBprintk(("task [%d] != current [%d]\n", task->pid, current->pid)); |
| return -EINVAL; |
| } |
| |
| #ifndef CONFIG_SMP |
| if (ctx->ctx_fl_system == 0 && PMU_OWNER() && PMU_OWNER() != current) |
| pfm_lazy_save_regs(PMU_OWNER()); |
| #endif |
| |
| /* reset all registers to stable quiet state */ |
| pfm_reset_pmu(task); |
| |
| /* make sure nothing starts */ |
| if (ctx->ctx_fl_system) { |
| ia64_psr(regs)->pp = 0; |
| ia64_psr(regs)->up = 0; /* just to make sure! */ |
| |
| /* make sure monitoring is stopped */ |
| pfm_clear_psr_pp(); |
| ia64_srlz_i(); |
| |
| PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); |
| PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); |
| if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); |
| } else { |
| /* |
| * needed in case the task was a passive task during |
| * a system wide session and now wants to have its own |
| * session |
| */ |
| ia64_psr(regs)->pp = 0; /* just to make sure! */ |
| ia64_psr(regs)->up = 0; |
| |
| /* make sure monitoring is stopped */ |
| pfm_clear_psr_up(); |
| ia64_srlz_i(); |
| |
| DBprintk(("clearing psr.sp for [%d]\n", current->pid)); |
| |
| /* allow user level control */ |
| ia64_psr(regs)->sp = 0; |
| |
| /* PMU state will be saved/restored on ctxsw */ |
| task->thread.flags |= IA64_THREAD_PM_VALID; |
| } |
| |
| SET_PMU_OWNER(task); |
| |
| ctx->ctx_flags.state = PFM_CTX_ENABLED; |
| SET_LAST_CPU(ctx, smp_processor_id()); |
| INC_ACTIVATION(); |
| SET_ACTIVATION(ctx); |
| |
| /* simply unfreeze */ |
| pfm_unfreeze_pmu(); |
| |
| return 0; |
| } |
| |
| static int |
| pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, |
| struct pt_regs *regs) |
| { |
| pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg; |
| unsigned int cnum; |
| int i, ret = -EINVAL; |
| |
| for (i = 0; i < count; i++, req++) { |
| |
| if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; |
| |
| cnum = tmp.reg_num; |
| |
| if (!PMC_IS_IMPL(cnum)) goto abort_mission; |
| |
| tmp.reg_value = PMC_DFL_VAL(cnum); |
| |
| PFM_REG_RETFLAG_SET(tmp.reg_flags, 0); |
| |
| DBprintk(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, tmp.reg_value)); |
| |
| if (__copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT; |
| } |
| return 0; |
| abort_mission: |
| PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL); |
| if (__copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT; |
| |
| return ret; |
| } |
| |
| /* |
| * functions MUST be listed in the increasing order of their index (see permfon.h) |
| */ |
| static pfm_cmd_desc_t pfm_cmd_tab[]={ |
| /* 0 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, |
| /* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, |
| /* 3 */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, |
| /* 4 */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, |
| /* 5 */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, |
| /* 6 */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, |
| /* 7 */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, |
| /* 8 */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_RW, 1, sizeof(pfarg_context_t)}, |
| /* 9 */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, |
| /* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0}, |
| /* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, |
| /* 12 */{ pfm_get_features, PFM_CMD_ARG_RW, 0, 0}, |
| /* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)}, |
| /* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0}, |
| /* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)}, |
| /* 16 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 17 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 18 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 19 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 20 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 21 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 22 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 23 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 24 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 25 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 26 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 27 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 28 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 29 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 30 */{ NULL, 0, 0, 0}, /* not used */ |
| /* 31 */{ NULL, 0, 0, 0}, /* not used */ |
| #ifdef PFM_PMU_USES_DBR |
| /* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}, |
| /* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)} |
| #endif |
| }; |
| #define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) |
| |
| static int |
| check_task_state(struct task_struct *task) |
| { |
| int ret = 0; |
| #ifdef CONFIG_SMP |
| /* We must wait until the state has been completely |
| * saved. There can be situations where the reader arrives before |
| * after the task is marked as STOPPED but before pfm_save_regs() |
| * is completed. |
| */ |
| for (;;) { |
| |
| task_lock(task); |
| DBprintk((" [%d] state=%ld\n", task->pid, task->state)); |
| if (!task_has_cpu(task)) break; |
| task_unlock(task); |
| |
| do { |
| if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) { |
| DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state)); |
| return -EBUSY; |
| } |
| barrier(); |
| cpu_relax(); |
| } while (task_has_cpu(task)); |
| } |
| task_unlock(task); |
| #else |
| if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) { |
| DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state)); |
| ret = -EBUSY; |
| } |
| #endif |
| return ret; |
| } |
| |
| asmlinkage long |
| sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6, long arg7, |
| long arg8, long stack) |
| { |
| struct pt_regs *regs = (struct pt_regs *)&stack; |
| struct task_struct *task = current; |
| pfm_context_t *ctx; |
| size_t sz; |
| long ret; |
| int narg; |
| |
| /* |
| * reject any call if perfmon was disabled at initialization time |
| */ |
| if (PFM_IS_DISABLED()) return -ENOSYS; |
| |
| DBprintk(("cmd=%d idx=%d valid=%d narg=0x%x\n", cmd, PFM_CMD_IDX(cmd), |
| PFM_CMD_IS_VALID(cmd), PFM_CMD_NARG(cmd))); |
| |
| if (PFM_CMD_IS_VALID(cmd) == 0) return -EINVAL; |
| |
| /* ingore arguments when command has none */ |
| narg = PFM_CMD_NARG(cmd); |
| if ((narg == PFM_CMD_ARG_MANY && count == 0) || (narg > 0 && narg != count)) return -EINVAL; |
| |
| sz = PFM_CMD_ARG_SIZE(cmd); |
| |
| if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT; |
| |
| if (PFM_CMD_RW_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT; |
| |
| if (PFM_CMD_USE_PID(cmd)) { |
| /* |
| * XXX: may need to fine tune this one |
| */ |
| if (pid < 2) return -EPERM; |
| |
| if (pid != current->pid) { |
| |
| ret = -ESRCH; |
| |
| read_lock(&tasklist_lock); |
| |
| task = find_task_by_pid(pid); |
| |
| if (!task) goto abort_call; |
| |
| ret = -EPERM; |
| |
| if (pfm_bad_permissions(task)) goto abort_call; |
| |
| if (PFM_CMD_CHK(cmd)) { |
| ret = check_task_state(task); |
| if (ret != 0) { |
| DBprintk(("check_task_state=%ld for [%d]\n", ret, task->pid)); |
| goto abort_call; |
| } |
| } |
| } |
| } |
| |
| ctx = PFM_GET_CTX(task); |
| |
| if (PFM_CMD_USE_CTX(cmd)) { |
| ret = -EINVAL; |
| if (ctx == NULL) { |
| DBprintk(("no context for task %d\n", task->pid)); |
| goto abort_call; |
| } |
| |
| |
| ret = -EPERM; |
| /* |
| * we only grant access to the context if: |
| * - the caller is the creator of the context (ctx_owner) |
| * OR - the context is attached to the caller AND The context IS NOT |
| * in protected mode |
| */ |
| if (ctx->ctx_owner != current && (ctx->ctx_fl_protected || task != current)) { |
| DBprintk(("context protected, no access for [%d]\n", task->pid)); |
| goto abort_call; |
| } |
| } |
| |
| ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(task, ctx, arg, count, regs); |
| |
| abort_call: |
| if (task != current) read_unlock(&tasklist_lock); |
| |
| return ret; |
| } |
| |
| void asmlinkage |
| pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, |
| u64 arg6, u64 arg7, long info) |
| { |
| struct thread_struct *th = ¤t->thread; |
| pfm_context_t *ctx = current->thread.pfm_context; |
| int ret; |
| |
| /* |
| * clear the flag, to make sure we won't get here |
| * again |
| */ |
| th->pfm_ovfl_block_reset = 0; |
| |
| /* |
| * do some sanity checks first |
| */ |
| if (!ctx) { |
| printk(KERN_DEBUG "perfmon: [%d] has no PFM context\n", current->pid); |
| return; |
| } |
| |
| if (CTX_OVFL_NOBLOCK(ctx)) goto non_blocking; |
| |
| DBprintk(("[%d] before sleeping\n", current->pid)); |
| |
| /* |
| * may go through without blocking on SMP systems |
| * if restart has been received already by the time we call down() |
| */ |
| ret = down_interruptible(&ctx->ctx_restart_sem); |
| |
| DBprintk(("[%d] after sleeping ret=%d\n", current->pid, ret)); |
| |
| /* |
| * in case of interruption of down() we don't restart anything |
| */ |
| if (ret >= 0) { |
| |
| non_blocking: |
| /* we reactivate on context switch */ |
| ctx->ctx_fl_frozen = 0; |
| /* |
| * the ovfl_sem is cleared by the restart task and this is safe because we always |
| * use the local reference |
| */ |
| |
| pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); |
| |
| ctx->ctx_ovfl_regs[0] = 0UL; |
| |
| /* |
| * Unlock sampling buffer and reset index atomically |
| * XXX: not really needed when blocking |
| */ |
| if (CTX_HAS_SMPL(ctx)) { |
| ctx->ctx_psb->psb_hdr->hdr_count = 0; |
| ctx->ctx_psb->psb_index = 0; |
| } |
| |
| pfm_unfreeze_pmu(); |
| |
| /* state restored, can go back to work (user mode) */ |
| } |
| } |
| |
| /* |
| * This function will record an entry in the sampling if it is not full already. |
| * Input: |
| * ovfl_mask: mask of overflowed PMD. MUST NEVER be 0. |
| * Return: |
| * 0 : buffer is not full (did not BECOME full: still space or was already full) |
| * 1 : buffer is full (recorded the last entry) |
| */ |
| static int |
| pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ovfl_mask, struct pt_regs *regs) |
| { |
| pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb; |
| unsigned long *e, m, idx; |
| perfmon_smpl_entry_t *h; |
| int j; |
| |
| idx = ia64_fetch_and_add(1, &psb->psb_index); |
| DBprintk_ovfl(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries)); |
| |
| /* |
| * XXX: there is a small chance that we could run out on index before resetting |
| * but index is unsigned long, so it will take some time..... |
| * We use > instead of == because fetch_and_add() is off by one (see below) |
| * |
| * This case can happen in non-blocking mode or with multiple processes. |
| * For non-blocking, we need to reload and continue. |
| */ |
| if (idx > psb->psb_entries) return 0; |
| |
| /* first entry is really entry 0, not 1 caused by fetch_and_add */ |
| idx--; |
| |
| h = (perfmon_smpl_entry_t *)(((char *)psb->psb_addr) + idx*(psb->psb_entry_size)); |
| |
| /* |
| * initialize entry header |
| */ |
| h->pid = ctx->ctx_fl_system ? current->pid : task->pid; |
| h->cpu = smp_processor_id(); |
| h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL; |
| h->ip = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL; |
| h->regs = ovfl_mask; /* which registers overflowed */ |
| |
| /* guaranteed to monotonically increase on each cpu */ |
| h->stamp = pfm_get_stamp(); |
| |
| /* position for first pmd */ |
| e = (unsigned long *)(h+1); |
| |
| /* |
| * selectively store PMDs in increasing index number |
| */ |
| m = ctx->ctx_smpl_regs[0]; |
| for (j=0; m; m >>=1, j++) { |
| |
| if ((m & 0x1) == 0) continue; |
| |
| if (PMD_IS_COUNTING(j)) { |
| *e = pfm_read_soft_counter(ctx, j); |
| } else { |
| *e = ia64_get_pmd(j); /* slow */ |
| } |
| DBprintk_ovfl(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e)); |
| e++; |
| } |
| pfm_stats[smp_processor_id()].pfm_recorded_samples_count++; |
| |
| /* |
| * make the new entry visible to user, needs to be atomic |
| */ |
| ia64_fetch_and_add(1, &psb->psb_hdr->hdr_count); |
| |
| DBprintk_ovfl(("index=%ld entries=%ld hdr_count=%ld\n", |
| idx, psb->psb_entries, psb->psb_hdr->hdr_count)); |
| /* |
| * sampling buffer full ? |
| */ |
| if (idx == (psb->psb_entries-1)) { |
| DBprintk_ovfl(("sampling buffer full\n")); |
| /* |
| * XXX: must reset buffer in blocking mode and lost notified |
| */ |
| pfm_stats[smp_processor_id()].pfm_full_smpl_buffer_count++; |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * main overflow processing routine. |
| * it can be called from the interrupt path or explicitely during the context switch code |
| * Return: |
| * new value of pmc[0]. if 0x0 then unfreeze, else keep frozen |
| */ |
| static unsigned long |
| pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) |
| { |
| unsigned long mask; |
| struct thread_struct *t; |
| unsigned long old_val; |
| unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL; |
| int i; |
| int ret = 1; |
| /* |
| * It is never safe to access the task for which the overflow interrupt is destinated |
| * using the current variable as the interrupt may occur in the middle of a context switch |
| * where current does not hold the task that is running yet. |
| * |
| * For monitoring, however, we do need to get access to the task which caused the overflow |
| * to account for overflow on the counters. |
| * |
| * We accomplish this by maintaining a current owner of the PMU per CPU. During context |
| * switch the ownership is changed in a way such that the reflected owner is always the |
| * valid one, i.e. the one that caused the interrupt. |
| */ |
| |
| t = &task->thread; |
| |
| /* |
| * XXX: debug test |
| * Don't think this could happen given upfront tests |
| */ |
| if ((t->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) { |
| printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d not " |
| "using perfmon\n", task->pid); |
| return 0x1; |
| } |
| /* |
| * sanity test. Should never happen |
| */ |
| if ((pmc0 & 0x1) == 0) { |
| printk(KERN_DEBUG "perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n", |
| task->pid, pmc0); |
| return 0x0; |
| } |
| |
| mask = pmc0 >> PMU_FIRST_COUNTER; |
| |
| DBprintk_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s" |
| " mode used_pmds=0x%lx used_pmcs=0x%lx reload_pmcs=0x%lx\n", |
| pmc0, task->pid, (regs ? regs->cr_iip : 0), |
| CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", |
| ctx->ctx_used_pmds[0], |
| ctx->ctx_used_pmcs[0], |
| ctx->ctx_reload_pmcs[0])); |
| |
| /* |
| * First we update the virtual counters |
| */ |
| for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { |
| |
| /* skip pmd which did not overflow */ |
| if ((mask & 0x1) == 0) continue; |
| |
| DBprintk_ovfl(("pmd[%d] overflowed hw_pmd=0x%lx soft_pmd=0x%lx\n", |
| i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val)); |
| |
| /* |
| * Note that the pmd is not necessarily 0 at this point as qualified events |
| * may have happened before the PMU was frozen. The residual count is not |
| * taken into consideration here but will be with any read of the pmd via |
| * pfm_read_pmds(). |
| */ |
| old_val = ctx->ctx_soft_pmds[i].val; |
| ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val; |
| |
| /* |
| * check for overflow condition |
| */ |
| if (old_val > ctx->ctx_soft_pmds[i].val) { |
| |
| ovfl_pmds |= 1UL << i; |
| |
| if (PMC_OVFL_NOTIFY(ctx, i)) { |
| ovfl_notify |= 1UL << i; |
| } |
| } else { |
| /* |
| * clear top bits (maintain counts in lower part, may not always be zero) |
| */ |
| ia64_set_pmd(i, ia64_get_pmd(i) & pmu_conf.ovfl_val); |
| } |
| DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", |
| i, ctx->ctx_soft_pmds[i].val, old_val, |
| ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify)); |
| } |
| |
| /* |
| * check for sampling buffer |
| * |
| * if present, record sample only when a 64-bit counter has overflowed. |
| * We propagate notification ONLY when buffer becomes full. |
| */ |
| if(CTX_HAS_SMPL(ctx) && ovfl_pmds) { |
| ret = pfm_record_sample(task, ctx, ovfl_pmds, regs); |
| if (ret == 1) { |
| /* |
| * Sampling buffer became full |
| * If no notication was requested, then we reset buffer index |
| * and reset registers (done below) and resume. |
| * If notification requested, then defer reset until pfm_restart() |
| */ |
| if (ovfl_notify == 0UL) { |
| ctx->ctx_psb->psb_hdr->hdr_count = 0UL; |
| ctx->ctx_psb->psb_index = 0UL; |
| } |
| } else { |
| /* |
| * sample recorded in buffer, no need to notify user |
| */ |
| ovfl_notify = 0UL; |
| } |
| } |
| |
| /* |
| * No overflow requiring a user level notification |
| */ |
| if (ovfl_notify == 0UL) { |
| if (ovfl_pmds) |
| pfm_reset_regs(ctx, &ovfl_pmds, PFM_PMD_SHORT_RESET); |
| return 0x0; |
| } |
| |
| /* |
| * keep track of what to reset when unblocking |
| */ |
| ctx->ctx_ovfl_regs[0] = ovfl_pmds; |
| |
| /* |
| * As a consequence of the overflow, we always resume |
| * with monitoring turned off. pfm_restart() will |
| * reactivate. |
| */ |
| ctx->ctx_fl_frozen = 1; |
| |
| /* |
| * we have come to this point because there was an overflow and that notification |
| * was requested. The notify_task may have disappeared, in which case notify_task |
| * is NULL. |
| */ |
| LOCK_CTX(ctx); |
| |
| if (ctx->ctx_notify_task) { |
| if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_notify_task != task) { |
| t->pfm_ovfl_block_reset = 1; /* will cause blocking */ |
| } else { |
| t->pfm_ovfl_block_reset = 0; |
| } |
| |
| DBprintk_ovfl(("[%d] scheduling tasklet\n", current-> |