blob: 0af27cda6c3b6798c0110ac86ee9a044a0bd3b95 [file] [log] [blame]
/*
* Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License (not later!)
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
/** FIXME: Convert numbers based on machine and file */
#define _LARGEFILE64_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef NO_AUDIT
#include <libaudit.h>
#endif
#include "trace-local.h"
#include "trace-hash.h"
#include <linux/time64.h>
#ifdef WARN_NO_AUDIT
# warning "lib audit not found, using raw syscalls " \
"(install libaudit-devel and try again)"
#endif
#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
#define TASK_STATE_MAX 1024
#define task_from_item(item) container_of(item, struct task_data, hash)
#define start_from_item(item) container_of(item, struct start_data, hash)
#define event_from_item(item) container_of(item, struct event_hash, hash)
#define stack_from_item(item) container_of(item, struct stack_data, hash)
#define group_from_item(item) container_of(item, struct group_data, hash)
#define event_data_from_item(item) container_of(item, struct event_data, hash)
static unsigned long long nsecs_per_sec(unsigned long long ts)
{
return ts / NSEC_PER_SEC;
}
static unsigned long long mod_to_usec(unsigned long long ts)
{
return ((ts % NSEC_PER_SEC) + NSEC_PER_USEC / 2) / NSEC_PER_USEC;
}
struct handle_data;
struct event_hash;
struct event_data;
typedef void (*event_data_print)(struct trace_seq *s, struct event_hash *hash);
typedef int (*handle_event_func)(struct handle_data *h, unsigned long long pid,
struct event_data *data,
struct pevent_record *record, int cpu);
enum event_data_type {
EVENT_TYPE_UNDEFINED,
EVENT_TYPE_STACK,
EVENT_TYPE_SCHED_SWITCH,
EVENT_TYPE_WAKEUP,
EVENT_TYPE_FUNC,
EVENT_TYPE_SYSCALL,
EVENT_TYPE_IRQ,
EVENT_TYPE_SOFTIRQ,
EVENT_TYPE_SOFTIRQ_RAISE,
EVENT_TYPE_PROCESS_EXEC,
EVENT_TYPE_USER_MATE,
};
struct event_data {
struct trace_hash_item hash;
int id;
int trace;
struct event_format *event;
struct event_data *end;
struct event_data *start;
struct format_field *pid_field;
struct format_field *start_match_field; /* match with start */
struct format_field *end_match_field; /* match with end */
struct format_field *data_field; /* optional */
event_data_print print_func;
handle_event_func handle_event;
void *private;
int migrate; /* start/end pairs can migrate cpus */
int global; /* use global tasks */
enum event_data_type type;
};
struct stack_data {
struct trace_hash_item hash;
unsigned long long count;
unsigned long long time;
unsigned long long time_min;
unsigned long long ts_min;
unsigned long long time_max;
unsigned long long ts_max;
unsigned long long time_avg;
unsigned long size;
char caller[];
};
struct stack_holder {
unsigned long size;
void *caller;
struct pevent_record *record;
};
struct start_data {
struct trace_hash_item hash;
struct event_data *event_data;
struct list_head list;
struct task_data *task;
unsigned long long timestamp;
unsigned long long search_val;
unsigned long long val;
int cpu;
struct stack_holder stack;
};
struct event_hash {
struct trace_hash_item hash;
struct event_data *event_data;
unsigned long long search_val;
unsigned long long val;
unsigned long long count;
unsigned long long time_total;
unsigned long long time_avg;
unsigned long long time_max;
unsigned long long ts_max;
unsigned long long time_min;
unsigned long long ts_min;
unsigned long long time_std;
unsigned long long last_time;
struct trace_hash stacks;
};
struct group_data {
struct trace_hash_item hash;
char *comm;
struct trace_hash event_hash;
};
struct task_data {
struct trace_hash_item hash;
int pid;
int sleeping;
char *comm;
struct trace_hash start_hash;
struct trace_hash event_hash;
struct task_data *proxy;
struct start_data *last_start;
struct event_hash *last_event;
struct pevent_record *last_stack;
struct handle_data *handle;
struct group_data *group;
};
struct cpu_info {
int current;
};
struct sched_switch_data {
struct format_field *prev_state;
int match_state;
};
struct handle_data {
struct handle_data *next;
struct tracecmd_input *handle;
struct pevent *pevent;
struct trace_hash events;
struct trace_hash group_hash;
struct cpu_info **cpu_data;
struct format_field *common_pid;
struct format_field *wakeup_comm;
struct format_field *switch_prev_comm;
struct format_field *switch_next_comm;
struct sched_switch_data sched_switch_blocked;
struct sched_switch_data sched_switch_preempt;
struct trace_hash task_hash;
struct list_head *cpu_starts;
struct list_head migrate_starts;
struct task_data *global_task;
struct task_data *global_percpu_tasks;
int cpus;
};
static struct handle_data *handles;
static struct event_data *stacktrace_event;
static bool merge_like_comms = false;
void trace_profile_set_merge_like_comms(void)
{
merge_like_comms = true;
}
static struct start_data *
add_start(struct task_data *task,
struct event_data *event_data, struct pevent_record *record,
unsigned long long search_val, unsigned long long val)
{
struct start_data *start;
start = malloc(sizeof(*start));
if (!start)
return NULL;
memset(start, 0, sizeof(*start));
start->hash.key = trace_hash(search_val);
start->search_val = search_val;
start->val = val;
start->timestamp = record->ts;
start->event_data = event_data;
start->cpu = record->cpu;
start->task = task;
trace_hash_add(&task->start_hash, &start->hash);
if (event_data->migrate)
list_add(&start->list, &task->handle->migrate_starts);
else
list_add(&start->list, &task->handle->cpu_starts[record->cpu]);
return start;
}
struct event_data_match {
struct event_data *event_data;
unsigned long long search_val;
unsigned long long val;
};
static int match_start(struct trace_hash_item *item, void *data)
{
struct start_data *start = start_from_item(item);
struct event_data_match *edata = data;
return start->event_data == edata->event_data &&
start->search_val == edata->search_val;
}
static int match_event(struct trace_hash_item *item, void *data)
{
struct event_data_match *edata = data;
struct event_hash *event = event_from_item(item);
return event->event_data == edata->event_data &&
event->search_val == edata->search_val &&
event->val == edata->val;
}
static struct event_hash *
find_event_hash(struct task_data *task, struct event_data_match *edata)
{
struct event_hash *event_hash;
struct trace_hash_item *item;
unsigned long long key;
key = (unsigned long)edata->event_data +
(unsigned long)edata->search_val +
(unsigned long)edata->val;
key = trace_hash(key);
item = trace_hash_find(&task->event_hash, key, match_event, edata);
if (item)
return event_from_item(item);
event_hash = malloc(sizeof(*event_hash));
if (!event_hash)
return NULL;
memset(event_hash, 0, sizeof(*event_hash));
event_hash->event_data = edata->event_data;
event_hash->search_val = edata->search_val;
event_hash->val = edata->val;
event_hash->hash.key = key;
trace_hash_init(&event_hash->stacks, 32);
trace_hash_add(&task->event_hash, &event_hash->hash);
return event_hash;
}
static struct event_hash *
find_start_event_hash(struct task_data *task, struct event_data *event_data,
struct start_data *start)
{
struct event_data_match edata;
edata.event_data = event_data;
edata.search_val = start->search_val;
edata.val = start->val;
return find_event_hash(task, &edata);
}
static struct start_data *
find_start(struct task_data *task, struct event_data *event_data,
unsigned long long search_val)
{
unsigned long long key = trace_hash(search_val);
struct event_data_match edata;
void *data = &edata;
struct trace_hash_item *item;
struct start_data *start;
edata.event_data = event_data;
edata.search_val = search_val;
item = trace_hash_find(&task->start_hash, key, match_start, data);
if (!item)
return NULL;
start = start_from_item(item);
return start;
}
struct stack_match {
void *caller;
unsigned long size;
};
static int match_stack(struct trace_hash_item *item, void *data)
{
struct stack_data *stack = stack_from_item(item);
struct stack_match *match = data;
if (match->size != stack->size)
return 0;
return memcmp(stack->caller, match->caller, stack->size) == 0;
}
static void add_event_stack(struct event_hash *event_hash,
void *caller, unsigned long size,
unsigned long long time, unsigned long long ts)
{
unsigned long long key;
struct stack_data *stack;
struct stack_match match;
struct trace_hash_item *item;
int i;
match.caller = caller;
match.size = size;
if (size < sizeof(int))
die("Stack size of less than sizeof(int)??");
for (key = 0, i = 0; i <= size - sizeof(int); i += sizeof(int))
key += trace_hash(*(int *)(caller + i));
item = trace_hash_find(&event_hash->stacks, key, match_stack, &match);
if (!item) {
stack = malloc(sizeof(*stack) + size);
if (!stack) {
warning("Could not allocate stack");
return;
}
memset(stack, 0, sizeof(*stack));
memcpy(&stack->caller, caller, size);
stack->size = size;
stack->hash.key = key;
trace_hash_add(&event_hash->stacks, &stack->hash);
} else
stack = stack_from_item(item);
stack->count++;
stack->time += time;
if (stack->count == 1 || time < stack->time_min) {
stack->time_min = time;
stack->ts_min = ts;
}
if (time > stack->time_max) {
stack->time_max = time;
stack->ts_max = ts;
}
}
static void free_start(struct start_data *start)
{
if (start->task->last_start == start)
start->task->last_start = NULL;
if (start->stack.record)
free_record(start->stack.record);
trace_hash_del(&start->hash);
list_del(&start->list);
free(start);
}
static struct event_hash *
add_and_free_start(struct task_data *task, struct start_data *start,
struct event_data *event_data, unsigned long long ts)
{
struct event_hash *event_hash;
long long delta;
delta = ts - start->timestamp;
/*
* It's possible on a live trace, because of timestamps being
* different on different CPUs, we can go back in time. When
* that happens, just zero out the delta.
*/
if (delta < 0)
delta = 0;
event_hash = find_start_event_hash(task, event_data, start);
if (!event_hash)
return NULL;
event_hash->count++;
event_hash->time_total += delta;
event_hash->last_time = delta;
if (delta > event_hash->time_max) {
event_hash->time_max = delta;
event_hash->ts_max = ts;
}
if (event_hash->count == 1 || delta < event_hash->time_min) {
event_hash->time_min = delta;
event_hash->ts_min = ts;
}
if (start->stack.record) {
unsigned long size;
void *caller;
size = start->stack.size;
caller = start->stack.caller;
add_event_stack(event_hash, caller, size, delta,
start->stack.record->ts);
free_record(start->stack.record);
start->stack.record = NULL;
}
free_start(start);
return event_hash;
}
static struct event_hash *
find_and_update_start(struct task_data *task, struct event_data *event_data,
unsigned long long ts, unsigned long long search_val)
{
struct start_data *start;
start = find_start(task, event_data, search_val);
if (!start)
return NULL;
return add_and_free_start(task, start, event_data, ts);
}
static int match_task(struct trace_hash_item *item, void *data)
{
struct task_data *task = task_from_item(item);
int pid = *(unsigned long *)data;
return task->pid == pid;
}
static void init_task(struct handle_data *h, struct task_data *task)
{
task->handle = h;
trace_hash_init(&task->start_hash, 16);
trace_hash_init(&task->event_hash, 32);
}
static struct task_data *
add_task(struct handle_data *h, int pid)
{
unsigned long long key = trace_hash(pid);
struct task_data *task;
task = malloc(sizeof(*task));
if (!task) {
warning("Could not allocate task");
return NULL;
}
memset(task, 0, sizeof(*task));
task->pid = pid;
task->hash.key = key;
trace_hash_add(&h->task_hash, &task->hash);
init_task(h, task);
return task;
}
static struct task_data *
find_task(struct handle_data *h, int pid)
{
unsigned long long key = trace_hash(pid);
struct trace_hash_item *item;
static struct task_data *last_task;
void *data = (unsigned long *)&pid;
if (last_task && last_task->pid == pid)
return last_task;
item = trace_hash_find(&h->task_hash, key, match_task, data);
if (item)
last_task = task_from_item(item);
else
last_task = add_task(h, pid);
return last_task;
}
static int match_group(struct trace_hash_item *item, void *data)
{
struct group_data *group = group_from_item(item);
return strcmp(group->comm, (char *)data) == 0;
}
static void
add_task_comm(struct task_data *task, struct format_field *field,
struct pevent_record *record)
{
const char *comm;
task->comm = malloc(field->size + 1);
if (!task->comm) {
warning("Could not allocate task comm");
return;
}
comm = record->data + field->offset;
memcpy(task->comm, comm, field->size);
task->comm[field->size] = 0;
}
/* Account for tasks that don't have starts */
static void account_task(struct task_data *task, struct event_data *event_data,
struct pevent_record *record)
{
struct event_data_match edata;
struct event_hash *event_hash;
struct task_data *proxy = NULL;
unsigned long long search_val = 0;
unsigned long long val = 0;
unsigned long long pid;
/*
* If an event has the pid_field set, then find that task for
* this event instead. Let this task proxy for it to handle
* stack traces on this event.
*/
if (event_data->pid_field) {
pevent_read_number_field(event_data->pid_field,
record->data, &pid);
proxy = task;
task = find_task(task->handle, pid);
if (!task)
return;
proxy->proxy = task;
}
/*
* If data_field is defined, use that for val,
* if the start_field is defined, use that for search_val.
*/
if (event_data->data_field) {
pevent_read_number_field(event_data->data_field,
record->data, &val);
}
if (event_data->start_match_field) {
pevent_read_number_field(event_data->start_match_field,
record->data, &search_val);
}
edata.event_data = event_data;
edata.search_val = val;
edata.val = val;
event_hash = find_event_hash(task, &edata);
if (!event_hash) {
warning("failed to allocate event_hash");
return;
}
event_hash->count++;
task->last_event = event_hash;
}
static struct task_data *
find_event_task(struct handle_data *h, struct event_data *event_data,
struct pevent_record *record, unsigned long long pid)
{
if (event_data->global) {
if (event_data->migrate)
return h->global_task;
else
return &h->global_percpu_tasks[record->cpu];
}
/* If pid_field is defined, use that to find the task */
if (event_data->pid_field)
pevent_read_number_field(event_data->pid_field,
record->data, &pid);
return find_task(h, pid);
}
static struct task_data *
handle_end_event(struct handle_data *h, struct event_data *event_data,
struct pevent_record *record, int pid)
{
struct event_hash *event_hash;
struct task_data *task;
unsigned long long val;
task = find_event_task(h, event_data, record, pid);
if (!task)
return NULL;
pevent_read_number_field(event_data->start_match_field, record->data,
&val);
event_hash = find_and_update_start(task, event_data->start, record->ts, val);
task->last_start = NULL;
task->last_event = event_hash;
return task;
}
static struct task_data *
handle_start_event(struct handle_data *h, struct event_data *event_data,
struct pevent_record *record, unsigned long long pid)
{
struct start_data *start;
struct task_data *task;
unsigned long long val;
task = find_event_task(h, event_data, record, pid);
if (!task)
return NULL;
pevent_read_number_field(event_data->end_match_field, record->data,
&val);
start = add_start(task, event_data, record, val, val);
if (!start) {
warning("Failed to allocate start of task");
return NULL;
}
task->last_start = start;
task->last_event = NULL;
return task;
}
static int handle_event_data(struct handle_data *h,
unsigned long long pid,
struct event_data *event_data,
struct pevent_record *record, int cpu)
{
struct task_data *task = NULL;
/* If this is the end of a event pair (start is set) */
if (event_data->start)
task = handle_end_event(h, event_data, record, pid);
/* If this is the start of a event pair (end is set) */
if (event_data->end) {
task = handle_start_event(h, event_data, record, pid);
/* handle_start_event only returns NULL on error */
if (!task)
return -1;
}
if (!task) {
task = find_task(h, pid);
if (!task)
return -1;
task->proxy = NULL;
task->last_start = NULL;
task->last_event = NULL;
account_task(task, event_data, record);
}
return 0;
}
static void handle_missed_events(struct handle_data *h, int cpu)
{
struct start_data *start;
struct start_data *n;
/* Clear all starts on this CPU */
list_for_each_entry_safe(start, n, &h->cpu_starts[cpu], list) {
free_start(start);
}
/* Now clear all starts whose events can migrate */
list_for_each_entry_safe(start, n, &h->migrate_starts, list) {
free_start(start);
}
}
static int match_event_data(struct trace_hash_item *item, void *data)
{
struct event_data *event_data = event_data_from_item(item);
int id = (int)(unsigned long)data;
return event_data->id == id;
}
static struct event_data *
find_event_data(struct handle_data *h, int id)
{
struct trace_hash_item *item;
unsigned long long key = trace_hash(id);
void *data = (void *)(unsigned long)id;
item = trace_hash_find(&h->events, key, match_event_data, data);
if (item)
return event_data_from_item(item);
return NULL;
}
static void trace_profile_record(struct tracecmd_input *handle,
struct pevent_record *record)
{
static struct handle_data *last_handle;
struct pevent_record *stack_record;
struct event_data *event_data;
struct task_data *task;
struct handle_data *h;
struct pevent *pevent;
unsigned long long pid;
int cpu = record->cpu;
int id;
if (last_handle && last_handle->handle == handle)
h = last_handle;
else {
for (h = handles; h; h = h->next) {
if (h->handle == handle)
break;
}
if (!h)
die("Handle not found?");
last_handle = h;
}
if (record->missed_events)
handle_missed_events(h, cpu);
pevent = h->pevent;
id = pevent_data_type(pevent, record);
event_data = find_event_data(h, id);
if (!event_data)
return;
/* Get this current PID */
pevent_read_number_field(h->common_pid, record->data, &pid);
task = find_task(h, pid);
if (!task)
return;
stack_record = task->last_stack;
if (event_data->handle_event)
event_data->handle_event(h, pid, event_data, record, cpu);
else
handle_event_data(h, pid, event_data, record, cpu);
/* If the last stack hasn't changed, free it */
if (stack_record && task->last_stack == stack_record) {
free_record(stack_record);
task->last_stack = NULL;
}
}
static struct event_data *
add_event(struct handle_data *h, const char *system, const char *event_name,
enum event_data_type type)
{
struct event_format *event;
struct event_data *event_data;
event = pevent_find_event_by_name(h->pevent, system, event_name);
if (!event)
return NULL;
if (!h->common_pid) {
h->common_pid = pevent_find_common_field(event, "common_pid");
if (!h->common_pid)
die("No 'common_pid' found in event");
}
event_data = malloc(sizeof(*event_data));
if (!event_data) {
warning("Could not allocate event_data");
return NULL;
}
memset(event_data, 0, sizeof(*event_data));
event_data->id = event->id;
event_data->event = event;
event_data->type = type;
event_data->hash.key = trace_hash(event_data->event->id);
trace_hash_add(&h->events, &event_data->hash);
return event_data;
}
static void
mate_events(struct handle_data *h, struct event_data *start,
const char *pid_field, const char *end_match_field,
struct event_data *end, const char *start_match_field,
int migrate, int global)
{
start->end = end;
end->start = start;
if (pid_field) {
start->pid_field = pevent_find_field(start->event, pid_field);
if (!start->pid_field)
die("Event: %s does not have field %s",
start->event->name, pid_field);
}
/* Field to match with end */
start->end_match_field = pevent_find_field(start->event, end_match_field);
if (!start->end_match_field)
die("Event: %s does not have field %s",
start->event->name, end_match_field);
/* Field to match with start */
end->start_match_field = pevent_find_field(end->event, start_match_field);
if (!end->start_match_field)
die("Event: %s does not have field %s",
end->event->name, start_match_field);
start->migrate = migrate;
start->global = global;
end->migrate = migrate;
end->global = global;
}
/**
* tracecmd_mate_events - match events to profile against
* @handle: The input handle where the events exist.
* @start_event: The event that starts the transaction
* @pid_field: Use this over common_pid (may be NULL to use common_pid)
* @end_match_field: The field that matches the end events @start_match_field
* @end_event: The event that ends the transaction
* @start_match_field: The end event field that matches start's @end_match_field
* @migrate: Can the transaction switch CPUs? 1 for yes, 0 for no
* @global: The events are global and not per task
*/
void tracecmd_mate_events(struct tracecmd_input *handle,
struct event_format *start_event,
const char *pid_field, const char *end_match_field,
struct event_format *end_event,
const char *start_match_field,
int migrate, int global)
{
struct handle_data *h;
struct event_data *start;
struct event_data *end;
for (h = handles; h; h = h->next) {
if (h->handle == handle)
break;
}
if (!h)
die("Handle not found for trace profile");
start = add_event(h, start_event->system, start_event->name,
EVENT_TYPE_USER_MATE);
end = add_event(h, end_event->system, end_event->name,
EVENT_TYPE_USER_MATE);
if (!start || !end)
return;
mate_events(h, start, pid_field, end_match_field, end, start_match_field,
migrate, global);
}
static void func_print(struct trace_seq *s, struct event_hash *event_hash)
{
const char *func;
func = pevent_find_function(event_hash->event_data->event->pevent,
event_hash->val);
if (func)
trace_seq_printf(s, "func: %s()", func);
else
trace_seq_printf(s, "func: 0x%llx", event_hash->val);
}
static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
{
#ifndef NO_AUDIT
const char *name = NULL;
int machine;
machine = audit_detect_machine();
if (machine < 0)
goto fail;
name = audit_syscall_to_name(event_hash->val, machine);
if (!name)
goto fail;
trace_seq_printf(s, "syscall:%s", name);
return;
fail:
#endif
trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
(int)event_hash->val);
}
/* From Linux include/linux/interrupt.h */
#define SOFTIRQS \
C(HI), \
C(TIMER), \
C(NET_TX), \
C(NET_RX), \
C(BLOCK), \
C(BLOCK_IOPOLL), \
C(TASKLET), \
C(SCHED), \
C(HRTIMER), \
C(RCU), \
C(NR),
#undef C
#define C(a) a##_SOFTIRQ
enum { SOFTIRQS };
#undef C
#define C(a) #a
static const char *softirq_map[] = { SOFTIRQS };
static void softirq_print(struct trace_seq *s, struct event_hash *event_hash)
{
int softirq = (int)event_hash->val;
if (softirq < NR_SOFTIRQ)
trace_seq_printf(s, "%s:%s", event_hash->event_data->event->name,
softirq_map[softirq]);
else
trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
softirq);
}
static void sched_switch_print(struct trace_seq *s, struct event_hash *event_hash)
{
const char states[] = TASK_STATE_TO_CHAR_STR;
int i;
trace_seq_printf(s, "%s:", event_hash->event_data->event->name);
if (event_hash->val) {
int val = event_hash->val;
for (i = 0; val && i < sizeof(states) - 1; i++, val >>= 1) {
if (val & 1)
trace_seq_putc(s, states[i+1]);
}
} else
trace_seq_putc(s, 'R');
}
static int handle_sched_switch_event(struct handle_data *h,
unsigned long long pid,
struct event_data *event_data,
struct pevent_record *record, int cpu)
{
struct task_data *task;
unsigned long long prev_pid;
unsigned long long prev_state;
unsigned long long next_pid;
struct start_data *start;
/* pid_field holds prev_pid, data_field holds prev_state */
pevent_read_number_field(event_data->pid_field,
record->data, &prev_pid);
pevent_read_number_field(event_data->data_field,
record->data, &prev_state);
/* only care about real states */
prev_state &= TASK_STATE_MAX - 1;
/* end_match_field holds next_pid */
pevent_read_number_field(event_data->end_match_field,
record->data, &next_pid);
task = find_task(h, prev_pid);
if (!task)
return -1;
if (!task->comm)
add_task_comm(task, h->switch_prev_comm, record);
if (prev_state)
task->sleeping = 1;
else
task->sleeping = 0;
/* task is being scheduled out. prev_state tells why */
start = add_start(task, event_data, record, prev_pid, prev_state);
task->last_start = start;
task->last_event = NULL;
task = find_task(h, next_pid);
if (!task)
return -1;
if (!task->comm)
add_task_comm(task, h->switch_next_comm, record);
/*
* If the next task was blocked, it required a wakeup to
* restart, and there should be one.
* But if it was preempted, we look for the previous sched switch.
* Unfortunately, we have to look for both types of events as
* we do not know why next_pid scheduled out.
*
* event_data->start holds the sched_wakeup event data.
*/
find_and_update_start(task, event_data->start, record->ts, next_pid);
/* Look for this task if it was preempted (no wakeup found). */
find_and_update_start(task, event_data, record->ts, next_pid);
return 0;
}
static int handle_stacktrace_event(struct handle_data *h,
unsigned long long pid,
struct event_data *event_data,
struct pevent_record *record, int cpu)
{
struct task_data *orig_task;
struct task_data *proxy;
struct task_data *task;
unsigned long long size;
struct event_hash *event_hash;
struct start_data *start;
void *caller;
task = find_task(h, pid);
if (!task)
return -1;
if (task->last_stack) {
free_record(task->last_stack);
task->last_stack = NULL;
}
if ((proxy = task->proxy)) {
task->proxy = NULL;
orig_task = task;
task = proxy;
}
if (!task->last_start && !task->last_event) {
/*
* Save this stack in case function graph needs it.
* Need the original task, not a proxy.
*/
if (proxy)
task = orig_task;
tracecmd_record_ref(record);
task->last_stack = record;
return 0;
}
/*
* start_match_field holds the size.
* data_field holds the caller location.
*/
size = record->size - event_data->data_field->offset;
caller = record->data + event_data->data_field->offset;
/*
* If there's a "start" then don't add the stack until
* it finds a matching "end".
*/
if ((start = task->last_start)) {
tracecmd_record_ref(record);
start->stack.record = record;
start->stack.size = size;
start->stack.caller = caller;
task->last_start = NULL;
task->last_event = NULL;
return 0;
}
event_hash = task->last_event;
task->last_event = NULL;
add_event_stack(event_hash, caller, size, event_hash->last_time,
record->ts);
return 0;
}
static int handle_fgraph_entry_event(struct handle_data *h,
unsigned long long pid,
struct event_data *event_data,
struct pevent_record *record, int cpu)
{
unsigned long long size;
struct start_data *start;
struct task_data *task;
void *caller;
task = handle_start_event(h, event_data, record, pid);
if (!task)
return -1;
/*
* If a stack trace hasn't been used for a previous task,
* then it could be a function trace that we can use for
* the function graph. But stack traces come before the function
* graph events (unfortunately). So we need to attach the previous
* stack trace (if there is one) to this start event.
*/
if (task->last_stack) {
start = task->last_start;
record = task->last_stack;
size = record->size - stacktrace_event->data_field->offset;
caller = record->data + stacktrace_event->data_field->offset;
start->stack.record = record;
start->stack.size = size;
start->stack.caller = caller;
task->last_stack = NULL;
task->last_event = NULL;
}
/* Do not map stacks after this event to this event */
task->last_start = NULL;
return 0;
}
static int handle_fgraph_exit_event(struct handle_data *h,
unsigned long long pid,
struct event_data *event_data,
struct pevent_record *record, int cpu)
{
struct task_data *task;
task = handle_end_event(h, event_data, record, pid);
if (!task)
return -1;
/* Do not match stacks with function graph exit events */
task->last_event = NULL;
return 0;
}
static int handle_process_exec(struct handle_data *h,
unsigned long long pid,
struct event_data *event_data,
struct pevent_record *record, int cpu)
{
struct task_data *task;
unsigned long long val;
/* Task has execed, remove the comm for it */
if (event_data->data_field) {
pevent_read_number_field(event_data->data_field,
record->data, &val);
pid = val;
}
task = find_task(h, pid);
if (!task)
return -1;
free(task->comm);
task->comm = NULL;
return 0;
}
static int handle_sched_wakeup_event(struct handle_data *h,
unsigned long long pid,
struct event_data *event_data,
struct pevent_record *record, int cpu)
{
struct task_data *proxy;
struct task_data *task = NULL;
struct start_data *start;
unsigned long long success;
proxy = find_task(h, pid);
if (!proxy)
return -1;
/* If present, data_field holds "success" */
if (event_data->data_field) {
pevent_read_number_field(event_data->data_field,
record->data, &success);
/* If not a successful wakeup, ignore this */
if (!success)
return 0;
}
pevent_read_number_field(event_data->pid_field,
record->data, &pid);
task = find_task(h, pid);
if (!task)
return -1;
if (!task->comm)
add_task_comm(task, h->wakeup_comm, record);
/* if the task isn't sleeping, then ignore the wake up */
if (!task->sleeping) {
/* Ignore any following stack traces */
proxy->proxy = NULL;
proxy->last_start = NULL;
proxy->last_event = NULL;
return 0;
}
/* It's being woken up */
task->sleeping = 0;
/*
* We need the stack trace to be hooked to the woken up
* task, not the waker.
*/
proxy->proxy = task;
/* There should be a blocked schedule out of this task */
find_and_update_start(task, event_data->start, record->ts, pid);
/* Set this up for timing how long the wakeup takes */
start = add_start(task, event_data, record, pid, pid);
task->last_event = NULL;
task->last_start = start;
return 0;
}
void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook,
int global)
{
struct pevent *pevent = tracecmd_get_pevent(handle);
struct event_format **events;
struct format_field **fields;
struct handle_data *h;
struct event_data *event_data;
struct event_data *sched_switch;
struct event_data *sched_wakeup;
struct event_data *irq_entry;
struct event_data *irq_exit;
struct event_data *softirq_entry;
struct event_data *softirq_exit;
struct event_data *softirq_raise;
struct event_data *fgraph_entry;
struct event_data *fgraph_exit;
struct event_data *syscall_enter;
struct event_data *syscall_exit;
struct event_data *process_exec;
struct event_data *start_event;
struct event_data *end_event;
int ret;
int i;
tracecmd_set_show_data_func(handle, trace_profile_record);
h = malloc(sizeof(*h));
if (!h) {
warning("Could not allocate handle");
return;
};
memset(h, 0, sizeof(*h));
h->next = handles;
handles = h;
trace_hash_init(&h->task_hash, 1024);
trace_hash_init(&h->events, 1024);
trace_hash_init(&h->group_hash, 512);
h->handle = handle;
h->pevent = pevent;
h->cpus = tracecmd_cpus(handle);
/*
* For streaming profiling, cpus will not be set up yet.
* In this case, we simply use the number of cpus on the
* system.
*/
if (!h->cpus)
h->cpus = count_cpus();
list_head_init(&h->migrate_starts);
h->cpu_starts = malloc(sizeof(*h->cpu_starts) * h->cpus);
if (!h->cpu_starts)
goto free_handle;
for (i = 0; i < h->cpus; i++)
list_head_init(&h->cpu_starts[i]);
h->cpu_data = malloc(h->cpus * sizeof(*h->cpu_data));
if (!h->cpu_data)
goto free_starts;
memset(h->cpu_data, 0, h->cpus * sizeof(h->cpu_data));
h->global_task = malloc(sizeof(struct task_data));
if (!h->global_task)
goto free_data;
memset(h->global_task, 0, sizeof(struct task_data));
init_task(h, h->global_task);
h->global_task->comm = strdup("Global Events");
if (!h->global_task->comm)
die("malloc");
h->global_task->pid = -1;
h->global_percpu_tasks = calloc(h->cpus, sizeof(struct task_data));
if (!h->global_percpu_tasks)
die("malloc");
for (i = 0; i < h->cpus; i++) {
init_task(h, &h->global_percpu_tasks[i]);
ret = asprintf(&h->global_percpu_tasks[i].comm,
"Global CPU[%d] Events", i);
if (ret < 0)
die("malloc");
h->global_percpu_tasks[i].pid = -1 - i;
}
irq_entry = add_event(h, "irq", "irq_handler_entry", EVENT_TYPE_IRQ);
irq_exit = add_event(h, "irq", "irq_handler_exit", EVENT_TYPE_IRQ);
softirq_entry = add_event(h, "irq", "softirq_entry", EVENT_TYPE_SOFTIRQ);
softirq_exit = add_event(h, "irq", "softirq_exit", EVENT_TYPE_SOFTIRQ);
softirq_raise = add_event(h, "irq", "softirq_raise", EVENT_TYPE_SOFTIRQ_RAISE);
sched_wakeup = add_event(h, "sched", "sched_wakeup", EVENT_TYPE_WAKEUP);
sched_switch = add_event(h, "sched", "sched_switch", EVENT_TYPE_SCHED_SWITCH);
fgraph_entry = add_event(h, "ftrace", "funcgraph_entry", EVENT_TYPE_FUNC);
fgraph_exit = add_event(h, "ftrace", "funcgraph_exit", EVENT_TYPE_FUNC);
syscall_enter = add_event(h, "raw_syscalls", "sys_enter", EVENT_TYPE_SYSCALL);
syscall_exit = add_event(h, "raw_syscalls", "sys_exit", EVENT_TYPE_SYSCALL);
process_exec = add_event(h, "sched", "sched_process_exec",
EVENT_TYPE_PROCESS_EXEC);
stacktrace_event = add_event(h, "ftrace", "kernel_stack", EVENT_TYPE_STACK);
if (stacktrace_event) {
stacktrace_event->handle_event = handle_stacktrace_event;
stacktrace_event->data_field = pevent_find_field(stacktrace_event->event,
"caller");
if (!stacktrace_event->data_field)
die("Event: %s does not have field caller",
stacktrace_event->event->name);
}
if (process_exec) {
process_exec->handle_event = handle_process_exec;
process_exec->data_field = pevent_find_field(process_exec->event,
"old_pid");
}
if (sched_switch) {
sched_switch->handle_event = handle_sched_switch_event;
sched_switch->data_field = pevent_find_field(sched_switch->event,
"prev_state");
if (!sched_switch->data_field)
die("Event: %s does not have field prev_state",
sched_switch->event->name);
h->switch_prev_comm = pevent_find_field(sched_switch->event,
"prev_comm");
if (!h->switch_prev_comm)
die("Event: %s does not have field prev_comm",
sched_switch->event->name);
h->switch_next_comm = pevent_find_field(sched_switch->event,
"next_comm");
if (!h->switch_next_comm)
die("Event: %s does not have field next_comm",
sched_switch->event->name);
sched_switch->print_func = sched_switch_print;
}
if (sched_switch && sched_wakeup) {
mate_events(h, sched_switch, "prev_pid", "next_pid",
sched_wakeup, "pid", 1, 0);
mate_events(h, sched_wakeup, "pid", "pid",
sched_switch, "prev_pid", 1, 0);
sched_wakeup->handle_event = handle_sched_wakeup_event;
/* The 'success' field may or may not be present */
sched_wakeup->data_field = pevent_find_field(sched_wakeup->event,
"success");
h->wakeup_comm = pevent_find_field(sched_wakeup->event, "comm");
if (!h->wakeup_comm)
die("Event: %s does not have field comm",
sched_wakeup->event->name);
}
if (irq_entry && irq_exit)
mate_events(h, irq_entry, NULL, "irq", irq_exit, "irq", 0, global);
if (softirq_entry)
softirq_entry->print_func = softirq_print;
if (softirq_exit)
softirq_exit->print_func = softirq_print;
if (softirq_raise)
softirq_raise->print_func = softirq_print;
if (softirq_entry && softirq_exit)
mate_events(h, softirq_entry, NULL, "vec", softirq_exit, "vec",
0, global);
if (softirq_entry && softirq_raise)
mate_events(h, softirq_raise, NULL, "vec", softirq_entry, "vec",
0, global);
if (fgraph_entry && fgraph_exit) {
mate_events(h, fgraph_entry, NULL, "func", fgraph_exit, "func", 1, 0);
fgraph_entry->handle_event = handle_fgraph_entry_event;
fgraph_exit->handle_event = handle_fgraph_exit_event;
fgraph_entry->print_func = func_print;
}
if (syscall_enter && syscall_exit) {
mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0);
syscall_enter->print_func = syscall_print;
syscall_exit->print_func = syscall_print;
}
events = pevent_list_events(pevent, EVENT_SORT_ID);
if (!events)
die("malloc");
/* Add some other events */
event_data = add_event(h, "ftrace", "function", EVENT_TYPE_FUNC);
if (event_data) {
event_data->data_field =
pevent_find_field(event_data->event, "ip");
}
/* Add any user defined hooks */
for (; hook; hook = hook->next) {
start_event = add_event(h, hook->start_system, hook->start_event,
EVENT_TYPE_USER_MATE);
end_event = add_event(h, hook->end_system, hook->end_event,
EVENT_TYPE_USER_MATE);
if (!start_event) {
warning("Event %s not found", hook->start_event);
continue;
}
if (!end_event) {
warning("Event %s not found", hook->end_event);
continue;
}
mate_events(h, start_event, hook->pid, hook->start_match,
end_event, hook->end_match, hook->migrate,
hook->global);
}
/* Now add any defined event that we haven't processed */
for (i = 0; events[i]; i++) {
event_data = find_event_data(h, events[i]->id);
if (event_data)
continue;
event_data = add_event(h, events[i]->system, events[i]->name,
EVENT_TYPE_UNDEFINED);
fields = pevent_event_fields(events[i]);
if (!fields)
die("malloc");
if (fields[0])
event_data->data_field = fields[0];
free(fields);
}
return;
free_data:
free(h->cpu_data);
free_starts:
free(h->cpu_starts);
free_handle:
handles = h->next;
free(h);
warning("Failed handle allocations");
}
static void output_event_stack(struct pevent *pevent, struct stack_data *stack)
{
int longsize = pevent_get_long_size(pevent);
unsigned long long val;
const char *func;
unsigned long long stop = -1ULL;
void *ptr;
int i;
if (longsize < 8)
stop &= (1ULL << (longsize * 8)) - 1;
if (stack->count)
stack->time_avg = stack->time / stack->count;
printf(" <stack> %lld total:%lld min:%lld(ts:%lld.%06lld) max:%lld(ts:%lld.%06lld) avg=%lld\n",
stack->count, stack->time, stack->time_min,
nsecs_per_sec(stack->ts_min), mod_to_usec(stack->ts_min),
stack->time_max,
nsecs_per_sec(stack->ts_max), mod_to_usec(stack->ts_max),
stack->time_avg);
for (i = 0; i < stack->size; i += longsize) {
ptr = stack->caller + i;
switch (longsize) {
case 4:
/* todo, read value from pevent */
val = *(unsigned int *)ptr;
break;
case 8:
val = *(unsigned long long *)ptr;
break;
default:
die("Strange long size %d", longsize);
}
if (val == stop)
break;
func = pevent_find_function(pevent, val);
if (func)
printf(" => %s (0x%llx)\n", func, val);
else
printf(" => 0x%llx\n", val);
}
}
struct stack_chain {
struct stack_chain *children;
unsigned long long val;
unsigned long long time;
unsigned long long time_min;
unsigned long long ts_min;
unsigned long long time_max;
unsigned long long ts_max;
unsigned long long time_avg;
unsigned long long count;
int percent;
int nr_children;
};
static int compare_chains(const void *a, const void *b)
{
const struct stack_chain * A = a;
const struct stack_chain * B = b;
if (A->time > B->time)
return -1;
if (A->time < B->time)
return 1;
/* If stacks don't use time, then use count */
if (A->count > B->count)
return -1;
if (A->count < B->count)
return 1;
return 0;
}
static int calc_percent(unsigned long long val, unsigned long long total)
{
return (val * 100 + total / 2) / total;
}
static int stack_overflows(struct stack_data *stack, int longsize, int level)
{
return longsize * level > stack->size - longsize;
}
static unsigned long long
stack_value(struct stack_data *stack, int longsize, int level)
{
void *ptr;
ptr = &stack->caller[longsize * level];
return longsize == 8 ? *(u64 *)ptr : *(unsigned *)ptr;
}
static struct stack_chain *
make_stack_chain(struct stack_data **stacks, int cnt, int longsize, int level,
int *nr_children)
{
struct stack_chain *chain;
unsigned long long total_time = 0;
unsigned long long total_count = 0;
unsigned long long time;
unsigned long long time_min;
unsigned long long ts_min;
unsigned long long time_max;
unsigned long long ts_max;
unsigned long long count;
unsigned long long stop = -1ULL;
int nr_chains = 0;
u64 last = 0;
u64 val;
int start;
int i;
int x;
if (longsize < 8)
stop &= (1ULL << (longsize * 8)) - 1;
/* First find out how many diffs there are */
for (i = 0; i < cnt; i++) {
if (stack_overflows(stacks[i], longsize, level))
continue;
val = stack_value(stacks[i], longsize, level);
if (val == stop)
continue;
if (!nr_chains || val != last)
nr_chains++;
last = val;
}
if (!nr_chains) {
*nr_children = 0;
return NULL;
}
chain = malloc(sizeof(*chain) * nr_chains);
if (!chain) {
warning("Could not allocate chain");
return NULL;
}
memset(chain, 0, sizeof(*chain) * nr_chains);
x = 0;
count = 0;
start = 0;
time = 0;
time_min = 0;
time_max = 0;
for (i = 0; i < cnt; i++) {
if (stack_overflows(stacks[i], longsize, level)) {
start = i+1;
continue;
}
val = stack_value(stacks[i], longsize, level);
if (val == stop) {
start = i+1;
continue;
}
count += stacks[i]->count;
time += stacks[i]->time;
if (stacks[i]->time_max > time_max) {
time_max = stacks[i]->time_max;
ts_max = stacks[i]->ts_max;
}
if (i == start || stacks[i]->time_min < time_min) {
time_min = stacks[i]->time_min;
ts_min = stacks[i]->ts_min;
}
if (i == cnt - 1 ||
stack_overflows(stacks[i+1], longsize, level) ||
val != stack_value(stacks[i+1], longsize, level)) {
total_time += time;
total_count += count;
chain[x].val = val;
chain[x].time_avg = time / count;
chain[x].count = count;
chain[x].time = time;
chain[x].time_min = time_min;
chain[x].ts_min = ts_min;
chain[x].time_max = time_max;
chain[x].ts_max = ts_max;
chain[x].children =
make_stack_chain(&stacks[start], (i - start) + 1,
longsize, level+1,
&chain[x].nr_children);
x++;
start = i + 1;
count = 0;
time = 0;
time_min = 0;
time_max = 0;
}
}
qsort(chain, nr_chains, sizeof(*chain), compare_chains);
*nr_children = nr_chains;
/* Should never happen */
if (!total_time && !total_count)
return chain;
/* Now calculate percentage */
time = 0;
for (i = 0; i < nr_chains; i++) {
if (total_time)
chain[i].percent = calc_percent(chain[i].time, total_time);
/* In case stacks don't have time */
else if (total_count)
chain[i].percent = calc_percent(chain[i].count, total_count);
}
return chain;
}
static void free_chain(struct stack_chain *chain, int nr_chains)
{
int i;
if (!chain)
return;
for (i = 0; i < nr_chains; i++)
free_chain(chain[i].children, chain[i].nr_children);
free(chain);
}
#define INDENT 5
static void print_indent(int level, unsigned long long mask)
{
char line;
int p;
for (p = 0; p < level + 1; p++) {
if (mask & (1ULL << p))
line = '|';
else
line = ' ';
printf("%*c ", INDENT, line);
}
}
static void print_chain_func(struct pevent *pevent, struct stack_chain *chain)
{
unsigned long long val = chain->val;
const char *func;
func = pevent_find_function(pevent, val);
if (func)
printf("%s (0x%llx)\n", func, val);
else
printf("0x%llx\n", val);
}
static void output_chain(struct pevent *pevent, struct stack_chain *chain, int level,
int nr_chains, unsigned long long *mask)
{
struct stack_chain *child;
int nr_children;
int i;
char line = '|';
if (!nr_chains)
return;
*mask |= (1ULL << (level + 1));
print_indent(level + 1, *mask);
printf("\n");
for (i = 0; i < nr_chains; i++) {
print_indent(level, *mask);
printf("%*c ", INDENT, '+');
if (i == nr_chains - 1) {
*mask &= ~(1ULL << (level + 1));
line = ' ';
}
print_chain_func(pevent, &chain[i]);
print_indent(level, *mask);
printf("%*c ", INDENT, line);
printf(" %d%% (%lld)", chain[i].percent, chain[i].count);
if (chain[i].time)
printf(" time:%lld max:%lld(ts:%lld.%06lld) min:%lld(ts:%lld.%06lld) avg:%lld",
chain[i].time, chain[i].time_max,
nsecs_per_sec(chain[i].ts_max),
mod_to_usec(chain[i].ts_max),
chain[i].time_min,
nsecs_per_sec(chain[i].ts_min),
mod_to_usec(chain[i].ts_min),
chain[i].time_avg);
printf("\n");
for (child = chain[i].children, nr_children = chain[i].nr_children;
child && nr_children == 1;
nr_children = child->nr_children, child = child->children) {
print_indent(level, *mask);
printf("%*c ", INDENT, line);
printf(" ");
print_chain_func(pevent, child);
}
if (child)
output_chain(pevent, child, level+1, nr_children, mask);
print_indent(level + 1, *mask);
printf("\n");
}
*mask &= ~(1ULL << (level + 1));
print_indent(level, *mask);
printf("\n");
}
static int compare_stacks(const void *a, const void *b)
{
struct stack_data * const *A = a;
struct stack_data * const *B = b;
unsigned int sa, sb;
int size;
int i;
/* only compare up to the smaller size of the two */
if ((*A)->size > (*B)->size)
size = (*B)->size;
else
size = (*A)->size;
for (i = 0; i < size; i += sizeof(sa)) {
sa = *(unsigned *)&(*A)->caller[i];
sb = *(unsigned *)&(*B)->caller[i];
if (sa > sb)
return 1;
if (sa < sb)
return -1;
}
/* They are the same up to size. Then bigger size wins */
if ((*A)->size > (*B)->size)
return 1;
if ((*A)->size < (*B)->size)
return -1;
return 0;
}
static void output_stacks(struct pevent *pevent, struct trace_hash *stack_hash)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct stack_data **stacks;
struct stack_chain *chain;
unsigned long long mask = 0;
int nr_chains;
int longsize = pevent_get_long_size(pevent);
int nr_stacks;
int i;
nr_stacks = 0;
trace_hash_for_each_bucket(bucket, stack_hash) {
trace_hash_for_each_item(item, bucket) {
nr_stacks++;
}
}
stacks = malloc(sizeof(*stacks) * nr_stacks);
if (!stacks) {
warning("Could not allocate stacks");
return;
}
nr_stacks = 0;
trace_hash_for_each_bucket(bucket, stack_hash) {
trace_hash_for_each_item(item, bucket) {
stacks[nr_stacks++] = stack_from_item(item);
}
}
qsort(stacks, nr_stacks, sizeof(*stacks), compare_stacks);
chain = make_stack_chain(stacks, nr_stacks, longsize, 0, &nr_chains);
output_chain(pevent, chain, 0, nr_chains, &mask);
if (0)
for (i = 0; i < nr_stacks; i++)
output_event_stack(pevent, stacks[i]);
free(stacks);
free_chain(chain, nr_chains);
}
static void output_event(struct event_hash *event_hash)
{
struct event_data *event_data = event_hash->event_data;
struct pevent *pevent = event_data->event->pevent;
struct trace_seq s;
trace_seq_init(&s);
if (event_data->print_func)
event_data->print_func(&s, event_hash);
else if (event_data->type == EVENT_TYPE_FUNC)
func_print(&s, event_hash);
else
trace_seq_printf(&s, "%s:0x%llx",
event_data->event->name,
event_hash->val);
trace_seq_terminate(&s);
printf(" Event: %s (%lld)",
s.buffer, event_hash->count);
trace_seq_destroy(&s);
if (event_hash->time_total) {
event_hash->time_avg = event_hash->time_total / event_hash->count;
printf(" Total: %lld Avg: %lld Max: %lld(ts:%lld.%06lld) Min:%lld(ts:%lld.%06lld)",
event_hash->time_total, event_hash->time_avg,
event_hash->time_max,
nsecs_per_sec(event_hash->ts_max),
mod_to_usec(event_hash->ts_max),
event_hash->time_min,
nsecs_per_sec(event_hash->ts_min),
mod_to_usec(event_hash->ts_min));
}
printf("\n");
output_stacks(pevent, &event_hash->stacks);
}
static int compare_events(const void *a, const void *b)
{
struct event_hash * const *A = a;
struct event_hash * const *B = b;
const struct event_data *event_data_a = (*A)->event_data;
const struct event_data *event_data_b = (*B)->event_data;
/* Schedule switch goes first */
if (event_data_a->type == EVENT_TYPE_SCHED_SWITCH) {
if (event_data_b->type != EVENT_TYPE_SCHED_SWITCH)
return -1;
/* lower the state the better */
if ((*A)->val > (*B)->val)
return 1;
if ((*A)->val < (*B)->val)
return -1;
return 0;
} else if (event_data_b->type == EVENT_TYPE_SCHED_SWITCH)
return 1;
/* Wakeups are next */
if (event_data_a->type == EVENT_TYPE_WAKEUP) {
if (event_data_b->type != EVENT_TYPE_WAKEUP)
return -1;
return 0;
} else if (event_data_b->type == EVENT_TYPE_WAKEUP)
return 1;
if (event_data_a->id > event_data_b->id)
return 1;
if (event_data_a->id < event_data_b->id)
return -1;
if ((*A)->time_total > (*B)->time_total)
return -1;
if ((*A)->time_total < (*B)->time_total)
return 1;
return 0;
}
static void output_task(struct handle_data *h, struct task_data *task)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct event_hash **events;
const char *comm;
int nr_events = 0;
int i;
if (task->group)
return;
if (task->comm)
comm = task->comm;
else
comm = pevent_data_comm_from_pid(h->pevent, task->pid);
if (task->pid < 0)
printf("%s\n", task->comm);
else
printf("\ntask: %s-%d\n", comm, task->pid);
trace_hash_for_each_bucket(bucket, &task->event_hash) {
trace_hash_for_each_item(item, bucket) {
nr_events++;
}
}
events = malloc(sizeof(*events) * nr_events);
if (!events) {
warning("Could not allocate events");
return;
}
i = 0;
trace_hash_for_each_bucket(bucket, &task->event_hash) {
trace_hash_for_each_item(item, bucket) {
events[i++] = event_from_item(item);
}
}
qsort(events, nr_events, sizeof(*events), compare_events);
for (i = 0; i < nr_events; i++)
output_event(events[i]);
free(events);
}
static void output_group(struct handle_data *h, struct group_data *group)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct event_hash **events;
int nr_events = 0;
int i;
printf("\ngroup: %s\n", group->comm);
trace_hash_for_each_bucket(bucket, &group->event_hash) {
trace_hash_for_each_item(item, bucket) {
nr_events++;
}
}
events = malloc(sizeof(*events) * nr_events);
if (!events) {
warning("Could not allocate events");
return;
}
i = 0;
trace_hash_for_each_bucket(bucket, &group->event_hash) {
trace_hash_for_each_item(item, bucket) {
events[i++] = event_from_item(item);
}
}
qsort(events, nr_events, sizeof(*events), compare_events);
for (i = 0; i < nr_events; i++)
output_event(events[i]);
free(events);
}
static int compare_tasks(const void *a, const void *b)
{
struct task_data * const *A = a;
struct task_data * const *B = b;
if ((*A)->pid > (*B)->pid)
return 1;
else if ((*A)->pid < (*B)->pid)
return -1;
return 0;
}
static int compare_groups(const void *a, const void *b)
{
const char *A = a;
const char *B = b;
return strcmp(A, B);
}
static void free_event_hash(struct event_hash *event_hash)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct stack_data *stack;
trace_hash_for_each_bucket(bucket, &event_hash->stacks) {
trace_hash_while_item(item, bucket) {
stack = stack_from_item(item);
trace_hash_del(&stack->hash);
free(stack);
}
}
trace_hash_free(&event_hash->stacks);
free(event_hash);
}
static void __free_task(struct task_data *task)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct start_data *start;
struct event_hash *event_hash;
free(task->comm);
trace_hash_for_each_bucket(bucket, &task->start_hash) {
trace_hash_while_item(item, bucket) {
start = start_from_item(item);
if (start->stack.record)
free_record(start->stack.record);
list_del(&start->list);
trace_hash_del(item);
free(start);
}
}
trace_hash_free(&task->start_hash);
trace_hash_for_each_bucket(bucket, &task->event_hash) {
trace_hash_while_item(item, bucket) {
event_hash = event_from_item(item);
trace_hash_del(item);
free_event_hash(event_hash);
}
}
trace_hash_free(&task->event_hash);
if (task->last_stack)
free_record(task->last_stack);
}
static void free_task(struct task_data *task)
{
__free_task(task);
free(task);
}
static void free_group(struct group_data *group)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct event_hash *event_hash;
free(group->comm);
trace_hash_for_each_bucket(bucket, &group->event_hash) {
trace_hash_while_item(item, bucket) {
event_hash = event_from_item(item);
trace_hash_del(item);
free_event_hash(event_hash);
}
}
trace_hash_free(&group->event_hash);
free(group);
}
static void show_global_task(struct handle_data *h,
struct task_data *task)
{
if (trace_hash_empty(&task->event_hash))
return;
output_task(h, task);
}
static void output_tasks(struct handle_data *h)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct task_data **tasks;
int nr_tasks = 0;
int i;
trace_hash_for_each_bucket(bucket, &h->task_hash) {
trace_hash_for_each_item(item, bucket) {
nr_tasks++;
}
}
tasks = malloc(sizeof(*tasks) * nr_tasks);
if (!tasks) {
warning("Could not allocate tasks");
return;
}
nr_tasks = 0;
trace_hash_for_each_bucket(bucket, &h->task_hash) {
trace_hash_while_item(item, bucket) {
tasks[nr_tasks++] = task_from_item(item);
trace_hash_del(item);
}
}
qsort(tasks, nr_tasks, sizeof(*tasks), compare_tasks);
for (i = 0; i < nr_tasks; i++) {
output_task(h, tasks[i]);
free_task(tasks[i]);
}
free(tasks);
}
static void output_groups(struct handle_data *h)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
struct group_data **groups;
int nr_groups = 0;
int i;
trace_hash_for_each_bucket(bucket, &h->group_hash) {
trace_hash_for_each_item(item, bucket) {
nr_groups++;
}
}
if (nr_groups == 0)
return;
groups = malloc(sizeof(*groups) * nr_groups);
if (!groups) {
warning("Could not allocate groups");
return;
}
nr_groups = 0;
trace_hash_for_each_bucket(bucket, &h->group_hash) {
trace_hash_while_item(item, bucket) {
groups[nr_groups++] = group_from_item(item);
trace_hash_del(item);
}
}
qsort(groups, nr_groups, sizeof(*groups), compare_groups);
for (i = 0; i < nr_groups; i++) {
output_group(h, groups[i]);
free_group(groups[i]);
}
free(groups);
}
static void output_handle(struct handle_data *h)
{
int i;
show_global_task(h, h->global_task);
for (i = 0; i < h->cpus; i++)
show_global_task(h, &h->global_percpu_tasks[i]);
output_groups(h);
output_tasks(h);
}
static void merge_event_stack(struct event_hash *event,
struct stack_data *stack)
{
struct stack_data *exist;
struct trace_hash_item *item;
struct stack_match match;
match.caller = stack->caller;
match.size = stack->size;
item = trace_hash_find(&event->stacks, stack->hash.key, match_stack,
&match);
if (!item) {
trace_hash_add(&event->stacks, &stack->hash);
return;
}
exist = stack_from_item(item);
exist->count += stack->count;
exist->time += stack->time;
if (exist->time_max < stack->time_max) {
exist->time_max = stack->time_max;
exist->ts_max = stack->ts_max;
}
if (exist->time_min > stack->time_min) {
exist->time_min = stack->time_min;
exist->ts_min = stack->ts_min;
}
free(stack);
}
static void merge_stacks(struct event_hash *exist, struct event_hash *event)
{
struct stack_data *stack;
struct trace_hash_item *item;
struct trace_hash_item **bucket;
trace_hash_for_each_bucket(bucket, &event->stacks) {
trace_hash_while_item(item, bucket) {
stack = stack_from_item(item);
trace_hash_del(&stack->hash);
merge_event_stack(exist, stack);
}
}
}
static void merge_event_into_group(struct group_data *group,
struct event_hash *event)
{
struct event_hash *exist;
struct trace_hash_item *item;
struct event_data_match edata;
unsigned long long key;
if (event->event_data->type == EVENT_TYPE_WAKEUP) {
edata.event_data = event->event_data;
event->search_val = 0;
event->val = 0;
key = trace_hash((unsigned long)event->event_data);
} else if (event->event_data->type == EVENT_TYPE_SCHED_SWITCH) {
edata.event_data = event->event_data;
event->search_val = event->val;
key = (unsigned long)event->event_data +
((unsigned long)event->val * 2);
key = trace_hash(key);
} else {
key = event->hash.key;
}
edata.event_data = event->event_data;
edata.search_val = event->search_val;
edata.val = event->val;
item = trace_hash_find(&group->event_hash, key, match_event, &edata);
if (!item) {
event->hash.key = key;
trace_hash_add(&group->event_hash, &event->hash);
return;
}
exist = event_from_item(item);
exist->count += event->count;
exist->time_total += event->time_total;
if (exist->time_max < event->time_max) {
exist->time_max = event->time_max;
exist->ts_max = event->ts_max;
}
if (exist->time_min > event->time_min) {
exist->time_min = event->time_min;
exist->ts_min = event->ts_min;
}
merge_stacks(exist, event);
free_event_hash(event);
}
static void add_group(struct handle_data *h, struct task_data *task)
{
unsigned long long key;
struct trace_hash_item *item;
struct group_data *grp;
struct trace_hash_item **bucket;
void *data = task->comm;
if (!task->comm)
return;
key = trace_hash_str(task->comm);
item = trace_hash_find(&h->group_hash, key, match_group, data);
if (item) {
grp = group_from_item(item);
} else {
grp = malloc(sizeof(*grp));
if (!grp) {
warning("Could not allocate group");
return;
}
memset(grp, 0, sizeof(*grp));
grp->comm = strdup(task->comm);
if (!grp->comm)
die("strdup");
grp->hash.key = key;
trace_hash_add(&h->group_hash, &grp->hash);
trace_hash_init(&grp->event_hash, 32);
}
task->group = grp;
trace_hash_for_each_bucket(bucket, &task->event_hash) {
trace_hash_while_item(item, bucket) {
struct event_hash *event_hash;
event_hash = event_from_item(item);
trace_hash_del(&event_hash->hash);
merge_event_into_group(grp, event_hash);
}
}
}
static void merge_tasks(struct handle_data *h)
{
struct trace_hash_item **bucket;
struct trace_hash_item *item;
if (!merge_like_comms)
return;
trace_hash_for_each_bucket(bucket, &h->task_hash) {
trace_hash_for_each_item(item, bucket)
add_group(h, task_from_item(item));
}
}
int trace_profile(void)
{
struct handle_data *h;
for (h = handles; h; h = h->next) {
if (merge_like_comms)
merge_tasks(h);
output_handle(h);
trace_hash_free(&h->task_hash);
}
return 0;
}