blob: 12655bd5d84adbd854c9fe5fc7bd9839ed4c64cd [file] [log] [blame]
#include "ras.h"
#include "cpumap.h"
#include "debug.h"
#include "event-parse.h"
#include "evsel.h"
#include "trace_event.h"
#include "xyarray.h"
int __perf_evsel__sample_size(u64 sample_type)
{
u64 mask = sample_type & PERF_SAMPLE_MASK;
int size = 0;
int i;
for (i = 0; i < 64; i++) {
if (mask & (1ULL << i))
size++;
}
size *= sizeof(u64);
return size;
}
/**
* __perf_evsel__calc_id_pos - calculate id_pos.
* @sample_type: sample type
*
* This function returns the position of the event id (PERF_SAMPLE_ID or
* PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
* sample_event.
*/
static int __perf_evsel__calc_id_pos(u64 sample_type)
{
int idx = 0;
if (sample_type & PERF_SAMPLE_IDENTIFIER)
return 0;
if (!(sample_type & PERF_SAMPLE_ID))
return -1;
if (sample_type & PERF_SAMPLE_IP)
idx += 1;
if (sample_type & PERF_SAMPLE_TID)
idx += 1;
if (sample_type & PERF_SAMPLE_TIME)
idx += 1;
if (sample_type & PERF_SAMPLE_ADDR)
idx += 1;
return idx;
}
/**
* __perf_evsel__calc_is_pos - calculate is_pos.
* @sample_type: sample type
*
* This function returns the position (counting backwards) of the event id
* (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
* sample_id_all is used there is an id sample appended to non-sample events.
*/
static int __perf_evsel__calc_is_pos(u64 sample_type)
{
int idx = 1;
if (sample_type & PERF_SAMPLE_IDENTIFIER)
return 1;
if (!(sample_type & PERF_SAMPLE_ID))
return -1;
if (sample_type & PERF_SAMPLE_CPU)
idx += 1;
if (sample_type & PERF_SAMPLE_STREAM_ID)
idx += 1;
return idx;
}
void perf_evsel__calc_id_pos(struct perf_evsel *evsel)
{
evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type);
evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type);
}
void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
}
void perf_evsel__delete(struct perf_evsel *evsel)
{
perf_evsel__exit(evsel);
/* close_cgroup(evsel->cgrp); */
zfree(&evsel->group_name);
/*
if (evsel->tp_format)
pevent_free_format(evsel->tp_format);
*/
zfree(&evsel->name);
free(evsel);
}
void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
enum perf_event_sample_format bit)
{
if (!(evsel->attr.sample_type & bit)) {
evsel->attr.sample_type |= bit;
evsel->sample_size += sizeof(u64);
perf_evsel__calc_id_pos(evsel);
}
}
void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
enum perf_event_sample_format bit)
{
if (evsel->attr.sample_type & bit) {
evsel->attr.sample_type &= ~bit;
evsel->sample_size -= sizeof(u64);
perf_evsel__calc_id_pos(evsel);
}
}
void perf_evsel__set_sample_id(struct perf_evsel *evsel,
bool can_sample_identifier)
{
if (can_sample_identifier) {
perf_evsel__reset_sample_bit(evsel, ID);
perf_evsel__set_sample_bit(evsel, IDENTIFIER);
} else {
perf_evsel__set_sample_bit(evsel, ID);
}
evsel->attr.read_format |= PERF_FORMAT_ID;
}
void perf_evsel__init(struct perf_evsel *evsel,
struct perf_event_attr *attr, int idx)
{
evsel->idx = idx;
evsel->attr = *attr;
evsel->leader = evsel;
evsel->unit = "";
evsel->scale = 1.0;
INIT_LIST_HEAD(&evsel->node);
/* hists__init(&evsel->hists); */
evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
perf_evsel__calc_id_pos(evsel);
}
struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
if (evsel != NULL)
perf_evsel__init(evsel, attr, idx);
return evsel;
}
struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
if (evsel != NULL) {
struct perf_event_attr attr = {
.type = PERF_TYPE_TRACEPOINT,
.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
};
if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
goto out_free;
evsel->tp_format = trace_event__tp_format(sys, name);
if (evsel->tp_format == NULL)
goto out_free;
event_attr_init(&attr);
attr.config = evsel->tp_format->id;
attr.sample_period = 1;
perf_evsel__init(evsel, &attr, idx);
}
return evsel;
out_free:
zfree(&evsel->name);
free(evsel);
return NULL;
}
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
{
evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
if (evsel->sample_id == NULL)
return -ENOMEM;
evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
if (evsel->id == NULL) {
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
return -ENOMEM;
}
return 0;
}
static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
const union perf_event *event,
struct perf_sample *sample)
{
u64 type = evsel->attr.sample_type;
const u64 *array = event->sample.array;
bool swapped = evsel->needs_swap;
union u64_swap u;
array += ((event->header.size -
sizeof(event->header)) / sizeof(u64)) - 1;
if (type & PERF_SAMPLE_IDENTIFIER) {
sample->id = *array;
array--;
}
if (type & PERF_SAMPLE_CPU) {
u.val64 = *array;
if (swapped) {
/* undo swap of u64, then swap on individual u32s */
u.val64 = bswap_64(u.val64);
u.val32[0] = bswap_32(u.val32[0]);
}
sample->cpu = u.val32[0];
array--;
}
if (type & PERF_SAMPLE_STREAM_ID) {
sample->stream_id = *array;
array--;
}
if (type & PERF_SAMPLE_ID) {
sample->id = *array;
array--;
}
if (type & PERF_SAMPLE_TIME) {
sample->time = *array;
array--;
}
if (type & PERF_SAMPLE_TID) {
u.val64 = *array;
if (swapped) {
/* undo swap of u64, then swap on individual u32s */
u.val64 = bswap_64(u.val64);
u.val32[0] = bswap_32(u.val32[0]);
u.val32[1] = bswap_32(u.val32[1]);
}
sample->pid = u.val32[0];
sample->tid = u.val32[1];
array--;
}
return 0;
}
static inline bool overflow(const void *endp, u16 max_size, const void *offset,
u64 size)
{
return size > max_size || offset + size > endp;
}
#define OVERFLOW_CHECK(offset, size, max_size) \
do { \
if (overflow(endp, (max_size), (offset), (size))) \
return -EFAULT; \
} while (0)
#define OVERFLOW_CHECK_u64(offset) \
OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
u64 type = evsel->attr.sample_type;
bool swapped = evsel->needs_swap;
const u64 *array;
u16 max_size = event->header.size;
const void *endp = (void *)event + max_size;
u64 sz;
/*
* used for cross-endian analysis. See git commit 65014ab3
* for why this goofiness is needed.
*/
union u64_swap u;
memset(data, 0, sizeof(*data));
data->cpu = data->pid = data->tid = -1;
data->stream_id = data->id = data->time = -1ULL;
data->period = evsel->attr.sample_period;
data->weight = 0;
if (event->header.type != PERF_RECORD_SAMPLE) {
if (!evsel->attr.sample_id_all)
return 0;
return perf_evsel__parse_id_sample(evsel, event, data);
}
array = event->sample.array;
/*
* The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
* up to PERF_SAMPLE_PERIOD. After that overflow() must be used to
* check the format does not go past the end of the event.
*/
if (evsel->sample_size + sizeof(event->header) > event->header.size)
return -EFAULT;
data->id = -1ULL;
if (type & PERF_SAMPLE_IDENTIFIER) {
data->id = *array;
array++;
}
if (type & PERF_SAMPLE_IP) {
data->ip = *array;
array++;
}
if (type & PERF_SAMPLE_TID) {
u.val64 = *array;
if (swapped) {
/* undo swap of u64, then swap on individual u32s */
u.val64 = bswap_64(u.val64);
u.val32[0] = bswap_32(u.val32[0]);
u.val32[1] = bswap_32(u.val32[1]);
}
data->pid = u.val32[0];
data->tid = u.val32[1];
array++;
}
if (type & PERF_SAMPLE_TIME) {
data->time = *array;
array++;
}
data->addr = 0;
if (type & PERF_SAMPLE_ADDR) {
data->addr = *array;
array++;
}
if (type & PERF_SAMPLE_ID) {
data->id = *array;
array++;
}
if (type & PERF_SAMPLE_STREAM_ID) {
data->stream_id = *array;
array++;
}
if (type & PERF_SAMPLE_CPU) {
u.val64 = *array;
if (swapped) {
/* undo swap of u64, then swap on individual u32s */
u.val64 = bswap_64(u.val64);
u.val32[0] = bswap_32(u.val32[0]);
}
data->cpu = u.val32[0];
array++;
}
if (type & PERF_SAMPLE_PERIOD) {
data->period = *array;
array++;
}
if (type & PERF_SAMPLE_READ) {
u64 read_format = evsel->attr.read_format;
OVERFLOW_CHECK_u64(array);
if (read_format & PERF_FORMAT_GROUP)
data->read.group.nr = *array;
else
data->read.one.value = *array;
array++;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
OVERFLOW_CHECK_u64(array);
data->read.time_enabled = *array;
array++;
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
OVERFLOW_CHECK_u64(array);
data->read.time_running = *array;
array++;
}
/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
if (read_format & PERF_FORMAT_GROUP) {
const u64 max_group_nr = UINT64_MAX /
sizeof(struct sample_read_value);
if (data->read.group.nr > max_group_nr)
return -EFAULT;
sz = data->read.group.nr *
sizeof(struct sample_read_value);
OVERFLOW_CHECK(array, sz, max_size);
data->read.group.values =
(struct sample_read_value *)array;
array = (void *)array + sz;
} else {
OVERFLOW_CHECK_u64(array);
data->read.one.id = *array;
array++;
}
}
#if 0
if (type & PERF_SAMPLE_CALLCHAIN) {
const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
OVERFLOW_CHECK_u64(array);
data->callchain = (struct ip_callchain *)array++;
if (data->callchain->nr > max_callchain_nr)
return -EFAULT;
sz = data->callchain->nr * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
#endif
if (type & PERF_SAMPLE_RAW) {
OVERFLOW_CHECK_u64(array);
u.val64 = *array;
if (WARN_ONCE(swapped,
"Endianness of raw data not corrected!\n")) {
/* undo swap of u64, then swap on individual u32s */
u.val64 = bswap_64(u.val64);
u.val32[0] = bswap_32(u.val32[0]);
u.val32[1] = bswap_32(u.val32[1]);
}
data->raw_size = u.val32[0];
array = (void *)array + sizeof(u32);
OVERFLOW_CHECK(array, data->raw_size, max_size);
data->raw_data = (void *)array;
array = (void *)array + data->raw_size;
}
#if 0
if (type & PERF_SAMPLE_BRANCH_STACK) {
const u64 max_branch_nr = UINT64_MAX /
sizeof(struct branch_entry);
OVERFLOW_CHECK_u64(array);
data->branch_stack = (struct branch_stack *)array++;
if (data->branch_stack->nr > max_branch_nr)
return -EFAULT;
sz = data->branch_stack->nr * sizeof(struct branch_entry);
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
#endif
if (type & PERF_SAMPLE_REGS_USER) {
OVERFLOW_CHECK_u64(array);
data->user_regs.abi = *array;
array++;
if (data->user_regs.abi) {
u64 mask = evsel->attr.sample_regs_user;
sz = hweight_long(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
data->user_regs.mask = mask;
data->user_regs.regs = (u64 *)array;
array = (void *)array + sz;
}
}
if (type & PERF_SAMPLE_STACK_USER) {
OVERFLOW_CHECK_u64(array);
sz = *array++;
data->user_stack.offset = ((char *)(array - 1)
- (char *) event);
if (!sz) {
data->user_stack.size = 0;
} else {
OVERFLOW_CHECK(array, sz, max_size);
data->user_stack.data = (char *)array;
array = (void *)array + sz;
OVERFLOW_CHECK_u64(array);
data->user_stack.size = *array++;
if (WARN_ONCE(data->user_stack.size > sz,
"user stack dump failure\n"))
return -EFAULT;
}
}
data->weight = 0;
if (type & PERF_SAMPLE_WEIGHT) {
OVERFLOW_CHECK_u64(array);
data->weight = *array;
array++;
}
data->data_src = PERF_MEM_DATA_SRC_NONE;
if (type & PERF_SAMPLE_DATA_SRC) {
OVERFLOW_CHECK_u64(array);
data->data_src = *array;
array++;
}
data->transaction = 0;
if (type & PERF_SAMPLE_TRANSACTION) {
OVERFLOW_CHECK_u64(array);
data->transaction = *array;
array++;
}
return 0;
}
static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
{
struct perf_evsel *leader = evsel->leader;
int fd;
if (perf_evsel__is_group_leader(evsel))
return -1;
/*
* Leader must be already processed/open,
* if not it's a bug.
*/
BUG_ON(!leader->fd);
fd = FD(leader, cpu, thread);
BUG_ON(fd == -1);
return fd;
}
#define __PRINT_ATTR(fmt, cast, field) \
fprintf(fp, " %-19s "fmt"\n", #field, cast attr->field)
#define PRINT_ATTR_U32(field) __PRINT_ATTR("%u" , , field)
#define PRINT_ATTR_X32(field) __PRINT_ATTR("%#x", , field)
#define PRINT_ATTR_U64(field) __PRINT_ATTR("%" PRIu64, (uint64_t), field)
#define PRINT_ATTR_X64(field) __PRINT_ATTR("%#"PRIx64, (uint64_t), field)
#define PRINT_ATTR2N(name1, field1, name2, field2) \
fprintf(fp, " %-19s %u %-19s %u\n", \
name1, attr->field1, name2, attr->field2)
#define PRINT_ATTR2(field1, field2) \
PRINT_ATTR2N(#field1, field1, #field2, field2)
static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
{
size_t ret = 0;
ret += fprintf(fp, "%.60s\n", graph_dotted_line);
ret += fprintf(fp, "perf_event_attr:\n");
ret += PRINT_ATTR_U32(type);
ret += PRINT_ATTR_U32(size);
ret += PRINT_ATTR_X64(config);
ret += PRINT_ATTR_U64(sample_period);
ret += PRINT_ATTR_U64(sample_freq);
ret += PRINT_ATTR_X64(sample_type);
ret += PRINT_ATTR_X64(read_format);
ret += PRINT_ATTR2(disabled, inherit);
ret += PRINT_ATTR2(pinned, exclusive);
ret += PRINT_ATTR2(exclude_user, exclude_kernel);
ret += PRINT_ATTR2(exclude_hv, exclude_idle);
ret += PRINT_ATTR2(mmap, comm);
ret += PRINT_ATTR2(freq, inherit_stat);
ret += PRINT_ATTR2(enable_on_exec, task);
ret += PRINT_ATTR2(watermark, precise_ip);
ret += PRINT_ATTR2(mmap_data, sample_id_all);
ret += PRINT_ATTR2(exclude_host, exclude_guest);
ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel,
"excl.callchain_user", exclude_callchain_user);
ret += PRINT_ATTR_U32(mmap2);
ret += PRINT_ATTR_U32(wakeup_events);
ret += PRINT_ATTR_U32(wakeup_watermark);
ret += PRINT_ATTR_X32(bp_type);
ret += PRINT_ATTR_X64(bp_addr);
ret += PRINT_ATTR_X64(config1);
ret += PRINT_ATTR_U64(bp_len);
ret += PRINT_ATTR_X64(config2);
ret += PRINT_ATTR_X64(branch_sample_type);
ret += PRINT_ATTR_X64(sample_regs_user);
ret += PRINT_ATTR_U32(sample_stack_user);
ret += fprintf(fp, "%.60s\n", graph_dotted_line);
return ret;
}
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
for (cpu = 0; cpu < ncpus; cpu++) {
for (thread = 0; thread < nthreads; thread++) {
FD(evsel, cpu, thread) = -1;
}
}
}
return evsel->fd != NULL ? 0 : -ENOMEM;
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
{
xyarray__delete(evsel->fd);
evsel->fd = NULL;
}
void perf_evsel__free_id(struct perf_evsel *evsel)
{
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
zfree(&evsel->id);
}
static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads)
{
int cpu, thread;
unsigned long flags = 0;
int pid = -1, err;
enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
if (evsel->fd == NULL &&
perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
return -ENOMEM;
#if 0
if (evsel->cgrp) {
flags = PERF_FLAG_PID_CGROUP;
pid = evsel->cgrp->fd;
}
fallback_missing_features:
if (perf_missing_features.mmap2)
evsel->attr.mmap2 = 0;
if (perf_missing_features.exclude_guest)
evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
#endif
if (verbose >= 2)
perf_event_attr__fprintf(&evsel->attr, stderr);
for (cpu = 0; cpu < cpus->nr; cpu++) {
for (thread = 0; thread < threads->nr; thread++) {
int group_fd;
if (!evsel->cgrp)
pid = threads->map[thread];
group_fd = get_group_fd(evsel, cpu, thread);
retry_open:
pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n",
pid, cpus->map[cpu], group_fd, flags);
FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
pid,
cpus->map[cpu],
group_fd, flags);
if (FD(evsel, cpu, thread) < 0) {
err = -errno;
pr_debug2("sys_perf_event_open failed, error %d\n",
err);
goto try_fallback;
}
set_rlimit = NO_CHANGE;
}
}
return 0;
try_fallback:
/*
* perf stat needs between 5 and 22 fds per CPU. When we run out
* of them try to increase the limits.
*/
if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
struct rlimit l;
int old_errno = errno;
if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
if (set_rlimit == NO_CHANGE)
l.rlim_cur = l.rlim_max;
else {
l.rlim_cur = l.rlim_max + 1000;
l.rlim_max = l.rlim_cur;
}
if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
set_rlimit++;
errno = old_errno;
goto retry_open;
}
}
errno = old_errno;
}
if (err != -EINVAL || cpu > 0 || thread > 0)
goto out_close;
#if 0
if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
perf_missing_features.mmap2 = true;
goto fallback_missing_features;
} else if (!perf_missing_features.exclude_guest &&
(evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
perf_missing_features.exclude_guest = true;
goto fallback_missing_features;
} else if (!perf_missing_features.sample_id_all) {
perf_missing_features.sample_id_all = true;
goto retry_sample_id;
}
#endif
out_close:
do {
while (--thread >= 0) {
close(FD(evsel, cpu, thread));
FD(evsel, cpu, thread) = -1;
}
thread = threads->nr;
} while (--cpu >= 0);
return err;
}
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
for (cpu = 0; cpu < ncpus; cpu++)
for (thread = 0; thread < nthreads; ++thread) {
close(FD(evsel, cpu, thread));
FD(evsel, cpu, thread) = -1;
}
}
void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
{
if (evsel->fd == NULL)
return;
perf_evsel__close_fd(evsel, ncpus, nthreads);
perf_evsel__free_fd(evsel);
}
static struct {
struct cpu_map map;
int cpus[1];
} empty_cpu_map = {
.map.nr = 1,
.cpus = { -1, },
};
static struct {
struct thread_map map;
int threads[1];
} empty_thread_map = {
.map.nr = 1,
.threads = { -1, },
};
int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads)
{
if (cpus == NULL) {
/* Work around old compiler warnings about strict aliasing */
cpus = &empty_cpu_map.map;
}
if (threads == NULL)
threads = &empty_thread_map.map;
return __perf_evsel__open(evsel, cpus, threads);
}