blob: 66338c4f7f4bdb6142c2b21a99f14292c708d050 [file] [log] [blame]
/*
* ring buffer based function tracer
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* Originally taken from the RT patch by:
* Arnaldo Carvalho de Melo <acme@redhat.com>
*
* Based on code from the latency_tracer, that is:
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 Nadia Yvette Chambers
*/
#include <linux/ring_buffer.h>
#include <generated/utsrelease.h>
#include <linux/stacktrace.h>
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
#include <linux/irq_work.h>
#include <linux/debugfs.h>
#include <linux/pagemap.h>
#include <linux/hardirq.h>
#include <linux/linkage.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/splice.h>
#include <linux/kdebug.h>
#include <linux/string.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/nmi.h>
#include <linux/fs.h>
#include <linux/sched/rt.h>
#include "trace.h"
#include "trace_output.h"
/*
* On boot up, the ring buffer is set to the minimum size, so that
* we do not waste memory on systems that are not using tracing.
*/
int ring_buffer_expanded;
/*
* We need to change this state when a selftest is running.
* A selftest will lurk into the ring-buffer to count the
* entries inserted during the selftest although some concurrent
* insertions into the ring-buffer such as trace_printk could occurred
* at the same time, giving false positive or negative results.
*/
static bool __read_mostly tracing_selftest_running;
/*
* If a tracer is running, we do not want to run SELFTEST.
*/
bool __read_mostly tracing_selftest_disabled;
/* For tracers that don't implement custom flags */
static struct tracer_opt dummy_tracer_opt[] = {
{ }
};
static struct tracer_flags dummy_tracer_flags = {
.val = 0,
.opts = dummy_tracer_opt
};
static int dummy_set_flag(u32 old_flags, u32 bit, int set)
{
return 0;
}
/*
* To prevent the comm cache from being overwritten when no
* tracing is active, only save the comm when a trace event
* occurred.
*/
static DEFINE_PER_CPU(bool, trace_cmdline_save);
/*
* When a reader is waiting for data, then this variable is
* set to true.
*/
static bool trace_wakeup_needed;
static struct irq_work trace_work_wakeup;
/*
* Kill all tracing for good (never come back).
* It is initialized to 1 but will turn to zero if the initialization
* of the tracer is successful. But that is the only place that sets
* this back to zero.
*/
static int tracing_disabled = 1;
DEFINE_PER_CPU(int, ftrace_cpu_disabled);
cpumask_var_t __read_mostly tracing_buffer_mask;
/*
* ftrace_dump_on_oops - variable to dump ftrace buffer on oops
*
* If there is an oops (or kernel panic) and the ftrace_dump_on_oops
* is set, then ftrace_dump is called. This will output the contents
* of the ftrace buffers to the console. This is very useful for
* capturing traces that lead to crashes and outputing it to a
* serial console.
*
* It is default off, but you can enable it with either specifying
* "ftrace_dump_on_oops" in the kernel command line, or setting
* /proc/sys/kernel/ftrace_dump_on_oops
* Set 1 if you want to dump buffers of all CPUs
* Set 2 if you want to dump the buffer of the CPU that triggered oops
*/
enum ftrace_dump_mode ftrace_dump_on_oops;
static int tracing_set_tracer(const char *buf);
#define MAX_TRACER_SIZE 100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
static char *default_bootup_tracer;
static int __init set_cmdline_ftrace(char *str)
{
strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
default_bootup_tracer = bootup_tracer_buf;
/* We are using ftrace early, expand it */
ring_buffer_expanded = 1;
return 1;
}
__setup("ftrace=", set_cmdline_ftrace);
static int __init set_ftrace_dump_on_oops(char *str)
{
if (*str++ != '=' || !*str) {
ftrace_dump_on_oops = DUMP_ALL;
return 1;
}
if (!strcmp("orig_cpu", str)) {
ftrace_dump_on_oops = DUMP_ORIG;
return 1;
}
return 0;
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
static char *trace_boot_options __initdata;
static int __init set_trace_boot_options(char *str)
{
strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
trace_boot_options = trace_boot_options_buf;
return 0;
}
__setup("trace_options=", set_trace_boot_options);
unsigned long long ns2usecs(cycle_t nsec)
{
nsec += 500;
do_div(nsec, 1000);
return nsec;
}
/*
* The global_trace is the descriptor that holds the tracing
* buffers for the live tracing. For each CPU, it contains
* a link list of pages that will store trace entries. The
* page descriptor of the pages in the memory is used to hold
* the link list by linking the lru item in the page descriptor
* to each of the pages in the buffer per CPU.
*
* For each active CPU there is a data field that holds the
* pages for the buffer for that CPU. Each CPU has the same number
* of pages allocated for its buffer.
*/
static struct trace_array global_trace;
static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
int filter_current_check_discard(struct ring_buffer *buffer,
struct ftrace_event_call *call, void *rec,
struct ring_buffer_event *event)
{
return filter_check_discard(call, rec, buffer, event);
}
EXPORT_SYMBOL_GPL(filter_current_check_discard);
cycle_t ftrace_now(int cpu)
{
u64 ts;
/* Early boot up does not have a buffer yet */
if (!global_trace.buffer)
return trace_clock_local();
ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
return ts;
}
/*
* The max_tr is used to snapshot the global_trace when a maximum
* latency is reached. Some tracers will use this to store a maximum
* trace while it continues examining live traces.
*
* The buffers for the max_tr are set up the same as the global_trace.
* When a snapshot is taken, the link list of the max_tr is swapped
* with the link list of the global_trace and the buffers are reset for
* the global_trace so the tracing can continue.
*/
static struct trace_array max_tr;
static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
int tracing_is_enabled(void)
{
return tracing_is_on();
}
/*
* trace_buf_size is the size in bytes that is allocated
* for a buffer. Note, the number of bytes is always rounded
* to page size.
*
* This number is purposely set to a low number of 16384.
* If the dump on oops happens, it will be much appreciated
* to not have to wait for all that output. Anyway this can be
* boot time and run time configurable.
*/
#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
/* trace_types holds a link list of available tracers. */
static struct tracer *trace_types __read_mostly;
/* current_trace points to the tracer that is currently active */
static struct tracer *current_trace __read_mostly = &nop_trace;
/*
* trace_types_lock is used to protect the trace_types list.
*/
static DEFINE_MUTEX(trace_types_lock);
/*
* serialize the access of the ring buffer
*
* ring buffer serializes readers, but it is low level protection.
* The validity of the events (which returns by ring_buffer_peek() ..etc)
* are not protected by ring buffer.
*
* The content of events may become garbage if we allow other process consumes
* these events concurrently:
* A) the page of the consumed events may become a normal page
* (not reader page) in ring buffer, and this page will be rewrited
* by events producer.
* B) The page of the consumed events may become a page for splice_read,
* and this page will be returned to system.
*
* These primitives allow multi process access to different cpu ring buffer
* concurrently.
*
* These primitives don't distinguish read-only and read-consume access.
* Multi read-only access are also serialized.
*/
#ifdef CONFIG_SMP
static DECLARE_RWSEM(all_cpu_access_lock);
static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
static inline void trace_access_lock(int cpu)
{
if (cpu == TRACE_PIPE_ALL_CPU) {
/* gain it for accessing the whole ring buffer. */
down_write(&all_cpu_access_lock);
} else {
/* gain it for accessing a cpu ring buffer. */
/* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
down_read(&all_cpu_access_lock);
/* Secondly block other access to this @cpu ring buffer. */
mutex_lock(&per_cpu(cpu_access_lock, cpu));
}
}
static inline void trace_access_unlock(int cpu)
{
if (cpu == TRACE_PIPE_ALL_CPU) {
up_write(&all_cpu_access_lock);
} else {
mutex_unlock(&per_cpu(cpu_access_lock, cpu));
up_read(&all_cpu_access_lock);
}
}
static inline void trace_access_lock_init(void)
{
int cpu;
for_each_possible_cpu(cpu)
mutex_init(&per_cpu(cpu_access_lock, cpu));
}
#else
static DEFINE_MUTEX(access_lock);
static inline void trace_access_lock(int cpu)
{
(void)cpu;
mutex_lock(&access_lock);
}
static inline void trace_access_unlock(int cpu)
{
(void)cpu;
mutex_unlock(&access_lock);
}
static inline void trace_access_lock_init(void)
{
}
#endif
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
static int trace_stop_count;
static DEFINE_RAW_SPINLOCK(tracing_start_lock);
/**
* trace_wake_up - wake up tasks waiting for trace input
*
* Schedules a delayed work to wake up any task that is blocked on the
* trace_wait queue. These is used with trace_poll for tasks polling the
* trace.
*/
static void trace_wake_up(struct irq_work *work)
{
wake_up_all(&trace_wait);
}
/**
* tracing_on - enable tracing buffers
*
* This function enables tracing buffers that may have been
* disabled with tracing_off.
*/
void tracing_on(void)
{
if (global_trace.buffer)
ring_buffer_record_on(global_trace.buffer);
/*
* This flag is only looked at when buffers haven't been
* allocated yet. We don't really care about the race
* between setting this flag and actually turning
* on the buffer.
*/
global_trace.buffer_disabled = 0;
}
EXPORT_SYMBOL_GPL(tracing_on);
/**
* tracing_off - turn off tracing buffers
*
* This function stops the tracing buffers from recording data.
* It does not disable any overhead the tracers themselves may
* be causing. This function simply causes all recording to
* the ring buffers to fail.
*/
void tracing_off(void)
{
if (global_trace.buffer)
ring_buffer_record_off(global_trace.buffer);
/*
* This flag is only looked at when buffers haven't been
* allocated yet. We don't really care about the race
* between setting this flag and actually turning
* on the buffer.
*/
global_trace.buffer_disabled = 1;
}
EXPORT_SYMBOL_GPL(tracing_off);
/**
* tracing_is_on - show state of ring buffers enabled
*/
int tracing_is_on(void)
{
if (global_trace.buffer)
return ring_buffer_record_is_on(global_trace.buffer);
return !global_trace.buffer_disabled;
}
EXPORT_SYMBOL_GPL(tracing_is_on);
static int __init set_buf_size(char *str)
{
unsigned long buf_size;
if (!str)
return 0;
buf_size = memparse(str, &str);
/* nr_entries can not be zero */
if (buf_size == 0)
return 0;
trace_buf_size = buf_size;
return 1;
}
__setup("trace_buf_size=", set_buf_size);
static int __init set_tracing_thresh(char *str)
{
unsigned long threshold;
int ret;
if (!str)
return 0;
ret = kstrtoul(str, 0, &threshold);
if (ret < 0)
return 0;
tracing_thresh = threshold * 1000;
return 1;
}
__setup("tracing_thresh=", set_tracing_thresh);
unsigned long nsecs_to_usecs(unsigned long nsecs)
{
return nsecs / 1000;
}
/* These must match the bit postions in trace_iterator_flags */
static const char *trace_options[] = {
"print-parent",
"sym-offset",
"sym-addr",
"verbose",
"raw",
"hex",
"bin",
"block",
"stacktrace",
"trace_printk",
"ftrace_preempt",
"branch",
"annotate",
"userstacktrace",
"sym-userobj",
"printk-msg-only",
"context-info",
"latency-format",
"sleep-time",
"graph-time",
"record-cmd",
"overwrite",
"disable_on_free",
"irq-info",
"markers",
NULL
};
static struct {
u64 (*func)(void);
const char *name;
int in_ns; /* is this clock in nanoseconds? */
} trace_clocks[] = {
{ trace_clock_local, "local", 1 },
{ trace_clock_global, "global", 1 },
{ trace_clock_counter, "counter", 0 },
ARCH_TRACE_CLOCKS
};
int trace_clock_id;
/*
* trace_parser_get_init - gets the buffer for trace parser
*/
int trace_parser_get_init(struct trace_parser *parser, int size)
{
memset(parser, 0, sizeof(*parser));
parser->buffer = kmalloc(size, GFP_KERNEL);
if (!parser->buffer)
return 1;
parser->size = size;
return 0;
}
/*
* trace_parser_put - frees the buffer for trace parser
*/
void trace_parser_put(struct trace_parser *parser)
{
kfree(parser->buffer);
}
/*
* trace_get_user - reads the user input string separated by space
* (matched by isspace(ch))
*
* For each string found the 'struct trace_parser' is updated,
* and the function returns.
*
* Returns number of bytes read.
*
* See kernel/trace/trace.h for 'struct trace_parser' details.
*/
int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char ch;
size_t read = 0;
ssize_t ret;
if (!*ppos)
trace_parser_clear(parser);
ret = get_user(ch, ubuf++);
if (ret)
goto out;
read++;
cnt--;
/*
* The parser is not finished with the last write,
* continue reading the user input without skipping spaces.
*/
if (!parser->cont) {
/* skip white space */
while (cnt && isspace(ch)) {
ret = get_user(ch, ubuf++);
if (ret)
goto out;
read++;
cnt--;
}
/* only spaces were written */
if (isspace(ch)) {
*ppos += read;
ret = read;
goto out;
}
parser->idx = 0;
}
/* read the non-space input */
while (cnt && !isspace(ch)) {
if (parser->idx < parser->size - 1)
parser->buffer[parser->idx++] = ch;
else {
ret = -EINVAL;
goto out;
}
ret = get_user(ch, ubuf++);
if (ret)
goto out;
read++;
cnt--;
}
/* We either got finished input or we have to wait for another call. */
if (isspace(ch)) {
parser->buffer[parser->idx] = 0;
parser->cont = false;
} else {
parser->cont = true;
parser->buffer[parser->idx++] = ch;
}
*ppos += read;
ret = read;
out:
return ret;
}
ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
{
int len;
int ret;
if (!cnt)
return 0;
if (s->len <= s->readpos)
return -EBUSY;
len = s->len - s->readpos;
if (cnt > len)
cnt = len;
ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
if (ret == cnt)
return -EFAULT;
cnt -= ret;
s->readpos += cnt;
return cnt;
}
static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
{
int len;
if (s->len <= s->readpos)
return -EBUSY;
len = s->len - s->readpos;
if (cnt > len)
cnt = len;
memcpy(buf, s->buffer + s->readpos, cnt);
s->readpos += cnt;
return cnt;
}
/*
* ftrace_max_lock is used to protect the swapping of buffers
* when taking a max snapshot. The buffers themselves are
* protected by per_cpu spinlocks. But the action of the swap
* needs its own lock.
*
* This is defined as a arch_spinlock_t in order to help
* with performance when lockdep debugging is enabled.
*
* It is also used in other places outside the update_max_tr
* so it needs to be defined outside of the
* CONFIG_TRACER_MAX_TRACE.
*/
static arch_spinlock_t ftrace_max_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
unsigned long __read_mostly tracing_thresh;
#ifdef CONFIG_TRACER_MAX_TRACE
unsigned long __read_mostly tracing_max_latency;
/*
* Copy the new maximum trace into the separate maximum-trace
* structure. (this way the maximum trace is permanently saved,
* for later retrieval via /sys/kernel/debug/tracing/latency_trace)
*/
static void
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
struct trace_array_cpu *data = tr->data[cpu];
struct trace_array_cpu *max_data;
max_tr.cpu = cpu;
max_tr.time_start = data->preempt_timestamp;
max_data = max_tr.data[cpu];
max_data->saved_latency = tracing_max_latency;
max_data->critical_start = data->critical_start;
max_data->critical_end = data->critical_end;
memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
max_data->pid = tsk->pid;
max_data->uid = task_uid(tsk);
max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
max_data->policy = tsk->policy;
max_data->rt_priority = tsk->rt_priority;
/* record this tasks comm */
tracing_record_cmdline(tsk);
}
/**
* update_max_tr - snapshot all trace buffers from global_trace to max_tr
* @tr: tracer
* @tsk: the task with the latency
* @cpu: The cpu that initiated the trace.
*
* Flip the buffers between the @tr and the max_tr and record information
* about which task was the cause of this latency.
*/
void
update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
struct ring_buffer *buf;
if (trace_stop_count)
return;
WARN_ON_ONCE(!irqs_disabled());
if (!current_trace->allocated_snapshot) {
/* Only the nop tracer should hit this when disabling */
WARN_ON_ONCE(current_trace != &nop_trace);
return;
}
arch_spin_lock(&ftrace_max_lock);
buf = tr->buffer;
tr->buffer = max_tr.buffer;
max_tr.buffer = buf;
__update_max_tr(tr, tsk, cpu);
arch_spin_unlock(&ftrace_max_lock);
}
/**
* update_max_tr_single - only copy one trace over, and reset the rest
* @tr - tracer
* @tsk - task with the latency
* @cpu - the cpu of the buffer to copy.
*
* Flip the trace of a single CPU buffer between the @tr and the max_tr.
*/
void
update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
int ret;
if (trace_stop_count)
return;
WARN_ON_ONCE(!irqs_disabled());
if (!current_trace->allocated_snapshot) {
/* Only the nop tracer should hit this when disabling */
WARN_ON_ONCE(current_trace != &nop_trace);
return;
}
arch_spin_lock(&ftrace_max_lock);
ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
if (ret == -EBUSY) {
/*
* We failed to swap the buffer due to a commit taking
* place on this CPU. We fail to record, but we reset
* the max trace buffer (no one writes directly to it)
* and flag that it failed.
*/
trace_array_printk(&max_tr, _THIS_IP_,
"Failed to swap buffers due to commit in progress\n");
}
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
__update_max_tr(tr, tsk, cpu);
arch_spin_unlock(&ftrace_max_lock);
}
#endif /* CONFIG_TRACER_MAX_TRACE */
static void default_wait_pipe(struct trace_iterator *iter)
{
DEFINE_WAIT(wait);
prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
/*
* The events can happen in critical sections where
* checking a work queue can cause deadlocks.
* After adding a task to the queue, this flag is set
* only to notify events to try to wake up the queue
* using irq_work.
*
* We don't clear it even if the buffer is no longer
* empty. The flag only causes the next event to run
* irq_work to do the work queue wake up. The worse
* that can happen if we race with !trace_empty() is that
* an event will cause an irq_work to try to wake up
* an empty queue.
*
* There's no reason to protect this flag either, as
* the work queue and irq_work logic will do the necessary
* synchronization for the wake ups. The only thing
* that is necessary is that the wake up happens after
* a task has been queued. It's OK for spurious wake ups.
*/
trace_wakeup_needed = true;
if (trace_empty(iter))
schedule();
finish_wait(&trace_wait, &wait);
}
/**
* register_tracer - register a tracer with the ftrace system.
* @type - the plugin for the tracer
*
* Register a new plugin tracer.
*/
int register_tracer(struct tracer *type)
{
struct tracer *t;
int ret = 0;
if (!type->name) {
pr_info("Tracer must have a name\n");
return -1;
}
if (strlen(type->name) >= MAX_TRACER_SIZE) {
pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
return -1;
}
mutex_lock(&trace_types_lock);
tracing_selftest_running = true;
for (t = trace_types; t; t = t->next) {
if (strcmp(type->name, t->name) == 0) {
/* already found */
pr_info("Tracer %s already registered\n",
type->name);
ret = -1;
goto out;
}
}
if (!type->set_flag)
type->set_flag = &dummy_set_flag;
if (!type->flags)
type->flags = &dummy_tracer_flags;
else
if (!type->flags->opts)
type->flags->opts = dummy_tracer_opt;
if (!type->wait_pipe)
type->wait_pipe = default_wait_pipe;
#ifdef CONFIG_FTRACE_STARTUP_TEST
if (type->selftest && !tracing_selftest_disabled) {
struct tracer *saved_tracer = current_trace;
struct trace_array *tr = &global_trace;
/*
* Run a selftest on this tracer.
* Here we reset the trace buffer, and set the current
* tracer to be this tracer. The tracer can then run some
* internal tracing to verify that everything is in order.
* If we fail, we do not register this tracer.
*/
tracing_reset_online_cpus(tr);
current_trace = type;
if (type->use_max_tr) {
/* If we expanded the buffers, make sure the max is expanded too */
if (ring_buffer_expanded)
ring_buffer_resize(max_tr.buffer, trace_buf_size,
RING_BUFFER_ALL_CPUS);
type->allocated_snapshot = true;
}
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
/* the test is responsible for resetting too */
current_trace = saved_tracer;
if (ret) {
printk(KERN_CONT "FAILED!\n");
/* Add the warning after printing 'FAILED' */
WARN_ON(1);
goto out;
}
/* Only reset on passing, to avoid touching corrupted buffers */
tracing_reset_online_cpus(tr);
if (type->use_max_tr) {
type->allocated_snapshot = false;
/* Shrink the max buffer again */
if (ring_buffer_expanded)
ring_buffer_resize(max_tr.buffer, 1,
RING_BUFFER_ALL_CPUS);
}
printk(KERN_CONT "PASSED\n");
}
#endif
type->next = trace_types;
trace_types = type;
out:
tracing_selftest_running = false;
mutex_unlock(&trace_types_lock);
if (ret || !default_bootup_tracer)
goto out_unlock;
if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
goto out_unlock;
printk(KERN_INFO "Starting tracer '%s'\n", type->name);
/* Do we want this tracer to start on bootup? */
tracing_set_tracer(type->name);
default_bootup_tracer = NULL;
/* disable other selftests, since this will break it. */
tracing_selftest_disabled = 1;
#ifdef CONFIG_FTRACE_STARTUP_TEST
printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
type->name);
#endif
out_unlock:
return ret;
}
void tracing_reset(struct trace_array *tr, int cpu)
{
struct ring_buffer *buffer = tr->buffer;
if (!buffer)
return;
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
synchronize_sched();
ring_buffer_reset_cpu(buffer, cpu);
ring_buffer_record_enable(buffer);
}
void tracing_reset_online_cpus(struct trace_array *tr)
{
struct ring_buffer *buffer = tr->buffer;
int cpu;
if (!buffer)
return;
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
synchronize_sched();
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
ring_buffer_reset_cpu(buffer, cpu);
ring_buffer_record_enable(buffer);
}
void tracing_reset_current(int cpu)
{
tracing_reset(&global_trace, cpu);
}
void tracing_reset_current_online_cpus(void)
{
tracing_reset_online_cpus(&global_trace);
}
#define SAVED_CMDLINES 128
#define NO_CMDLINE_MAP UINT_MAX
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
static int cmdline_idx;
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;
static void trace_init_cmdlines(void)
{
memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
cmdline_idx = 0;
}
int is_tracing_stopped(void)
{
return trace_stop_count;
}
/**
* ftrace_off_permanent - disable all ftrace code permanently
*
* This should only be called when a serious anomally has
* been detected. This will turn off the function tracing,
* ring buffers, and other tracing utilites. It takes no
* locks and can be called from any context.
*/
void ftrace_off_permanent(void)
{
tracing_disabled = 1;
ftrace_stop();
tracing_off_permanent();
}
/**
* tracing_start - quick start of the tracer
*
* If tracing is enabled but was stopped by tracing_stop,
* this will start the tracer back up.
*/
void tracing_start(void)
{
struct ring_buffer *buffer;
unsigned long flags;
if (tracing_disabled)
return;
raw_spin_lock_irqsave(&tracing_start_lock, flags);
if (--trace_stop_count) {
if (trace_stop_count < 0) {
/* Someone screwed up their debugging */
WARN_ON_ONCE(1);
trace_stop_count = 0;
}
goto out;
}
/* Prevent the buffers from switching */
arch_spin_lock(&ftrace_max_lock);
buffer = global_trace.buffer;
if (buffer)
ring_buffer_record_enable(buffer);
buffer = max_tr.buffer;
if (buffer)
ring_buffer_record_enable(buffer);
arch_spin_unlock(&ftrace_max_lock);
ftrace_start();
out:
raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
}
/**
* tracing_stop - quick stop of the tracer
*
* Light weight way to stop tracing. Use in conjunction with
* tracing_start.
*/
void tracing_stop(void)
{
struct ring_buffer *buffer;
unsigned long flags;
ftrace_stop();
raw_spin_lock_irqsave(&tracing_start_lock, flags);
if (trace_stop_count++)
goto out;
/* Prevent the buffers from switching */
arch_spin_lock(&ftrace_max_lock);
buffer = global_trace.buffer;
if (buffer)
ring_buffer_record_disable(buffer);
buffer = max_tr.buffer;
if (buffer)
ring_buffer_record_disable(buffer);
arch_spin_unlock(&ftrace_max_lock);
out:
raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
}
void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
{
unsigned pid, idx;
if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
return;
/*
* It's not the end of the world if we don't get
* the lock, but we also don't want to spin
* nor do we want to disable interrupts,
* so if we miss here, then better luck next time.
*/
if (!arch_spin_trylock(&trace_cmdline_lock))
return;
idx = map_pid_to_cmdline[tsk->pid];
if (idx == NO_CMDLINE_MAP) {
idx = (cmdline_idx + 1) % SAVED_CMDLINES;
/*
* Check whether the cmdline buffer at idx has a pid
* mapped. We are going to overwrite that entry so we
* need to clear the map_pid_to_cmdline. Otherwise we
* would read the new comm for the old pid.
*/
pid = map_cmdline_to_pid[idx];
if (pid != NO_CMDLINE_MAP)
map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
map_cmdline_to_pid[idx] = tsk->pid;
map_pid_to_cmdline[tsk->pid] = idx;
cmdline_idx = idx;
}
memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
arch_spin_unlock(&trace_cmdline_lock);
}
void trace_find_cmdline(int pid, char comm[])
{
unsigned map;
if (!pid) {
strcpy(comm, "<idle>");
return;
}
if (WARN_ON_ONCE(pid < 0)) {
strcpy(comm, "<XXX>");
return;
}
if (pid > PID_MAX_DEFAULT) {
strcpy(comm, "<...>");
return;
}
preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
map = map_pid_to_cmdline[pid];
if (map != NO_CMDLINE_MAP)
strcpy(comm, saved_cmdlines[map]);
else
strcpy(comm, "<...>");
arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
}
void tracing_record_cmdline(struct task_struct *tsk)
{
if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
return;
if (!__this_cpu_read(trace_cmdline_save))
return;
__this_cpu_write(trace_cmdline_save, false);
trace_save_cmdline(tsk);
}
void
tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
int pc)
{
struct task_struct *tsk = current;
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
#else
TRACE_FLAG_IRQS_NOSUPPORT |
#endif
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer *buffer,
int type,
unsigned long len,
unsigned long flags, int pc)
{
struct ring_buffer_event *event;
event = ring_buffer_lock_reserve(buffer, len);
if (event != NULL) {
struct trace_entry *ent = ring_buffer_event_data(event);
tracing_generic_entry_update(ent, flags, pc);
ent->type = type;
}
return event;
}
void
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
{
__this_cpu_write(trace_cmdline_save, true);
if (trace_wakeup_needed) {
trace_wakeup_needed = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&trace_work_wakeup);
}
ring_buffer_unlock_commit(buffer, event);
}
static inline void
__trace_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event,
unsigned long flags, int pc)
{
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
ftrace_trace_userstack(buffer, flags, pc);
}
void trace_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event,
unsigned long flags, int pc)
{
__trace_buffer_unlock_commit(buffer, event, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
struct ring_buffer_event *
trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
int type, unsigned long len,
unsigned long flags, int pc)
{
*current_rb = global_trace.buffer;
return trace_buffer_lock_reserve(*current_rb,
type, len, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event,
unsigned long flags, int pc)
{
__trace_buffer_unlock_commit(buffer, event, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
struct ring_buffer_event *event,
unsigned long flags, int pc,
struct pt_regs *regs)
{
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
ring_buffer_discard_commit(buffer, event);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
void
trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
int pc)
{
struct ftrace_event_call *call = &event_function;
struct ring_buffer *buffer = tr->buffer;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
/* If we are reading the ring buffer, don't trace */
if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
return;
event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->ip = ip;
entry->parent_ip = parent_ip;
if (!filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
}
void
ftrace(struct trace_array *tr, struct trace_array_cpu *data,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
int pc)
{
if (likely(!atomic_read(&data->disabled)))
trace_function(tr, ip, parent_ip, flags, pc);
}
#ifdef CONFIG_STACKTRACE
#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
struct ftrace_stack {
unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
};
static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
static DEFINE_PER_CPU(int, ftrace_stack_reserve);
static void __ftrace_trace_stack(struct ring_buffer *buffer,
unsigned long flags,
int skip, int pc, struct pt_regs *regs)
{
struct ftrace_event_call *call = &event_kernel_stack;
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
int use_stack;
int size = FTRACE_STACK_ENTRIES;
trace.nr_entries = 0;
trace.skip = skip;
/*
* Since events can happen in NMIs there's no safe way to
* use the per cpu ftrace_stacks. We reserve it and if an interrupt
* or NMI comes in, it will just have to use the default
* FTRACE_STACK_SIZE.
*/
preempt_disable_notrace();
use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
/*
* We don't need any atomic variables, just a barrier.
* If an interrupt comes in, we don't care, because it would
* have exited and put the counter back to what we want.
* We just need a barrier to keep gcc from moving things
* around.
*/
barrier();
if (use_stack == 1) {
trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
if (regs)
save_stack_trace_regs(regs, &trace);
else
save_stack_trace(&trace);
if (trace.nr_entries > size)
size = trace.nr_entries;
} else
/* From now on, use_stack is a boolean */
use_stack = 0;
size *= sizeof(unsigned long);
event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
sizeof(*entry) + size, flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
memset(&entry->caller, 0, size);
if (use_stack)
memcpy(&entry->caller, trace.entries,
trace.nr_entries * sizeof(unsigned long));
else {
trace.max_entries = FTRACE_STACK_ENTRIES;
trace.entries = entry->caller;
if (regs)
save_stack_trace_regs(regs, &trace);
else
save_stack_trace(&trace);
}
entry->size = trace.nr_entries;
if (!filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out:
/* Again, don't let gcc optimize things here */
barrier();
__this_cpu_dec(ftrace_stack_reserve);
preempt_enable_notrace();
}
void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
int skip, int pc, struct pt_regs *regs)
{
if (!(trace_flags & TRACE_ITER_STACKTRACE))
return;
__ftrace_trace_stack(buffer, flags, skip, pc, regs);
}
void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
int skip, int pc)
{
if (!(trace_flags & TRACE_ITER_STACKTRACE))
return;
__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
}
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc)
{
__ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
}
/**
* trace_dump_stack - record a stack back trace in the trace buffer
*/
void trace_dump_stack(void)
{
unsigned long flags;
if (tracing_disabled || tracing_selftest_running)
return;
local_save_flags(flags);
/* skipping 3 traces, seems to get us at the caller of this function */
__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
}
static DEFINE_PER_CPU(int, user_stack_count);
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
struct stack_trace trace;
if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
/*
* NMIs can not handle page faults, even with fix ups.
* The save user stack can (and often does) fault.
*/
if (unlikely(in_nmi()))
return;
/*
* prevent recursion, since the user stack tracing may
* trigger other kernel events.
*/
preempt_disable();
if (__this_cpu_read(user_stack_count))
goto out;
__this_cpu_inc(user_stack_count);
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
goto out_drop_count;
entry = ring_buffer_event_data(event);
entry->tgid = current->tgid;
memset(&entry->caller, 0, sizeof(entry->caller));
trace.nr_entries = 0;
trace.max_entries = FTRACE_STACK_ENTRIES;
trace.skip = 0;
trace.entries = entry->caller;
save_stack_trace_user(&trace);
if (!filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out_drop_count:
__this_cpu_dec(user_stack_count);
out:
preempt_enable();
}
#ifdef UNUSED
static void __trace_userstack(struct trace_array *tr, unsigned long flags)
{
ftrace_trace_userstack(tr, flags, preempt_count());
}
#endif /* UNUSED */
#endif /* CONFIG_STACKTRACE */
/* created for use with alloc_percpu */
struct trace_buffer_struct {
char buffer[TRACE_BUF_SIZE];
};
static struct trace_buffer_struct *trace_percpu_buffer;
static struct trace_buffer_struct *trace_percpu_sirq_buffer;
static struct trace_buffer_struct *trace_percpu_irq_buffer;
static struct trace_buffer_struct *trace_percpu_nmi_buffer;
/*
* The buffer used is dependent on the context. There is a per cpu
* buffer for normal context, softirq contex, hard irq context and
* for NMI context. Thise allows for lockless recording.
*
* Note, if the buffers failed to be allocated, then this returns NULL
*/
static char *get_trace_buf(void)
{
struct trace_buffer_struct *percpu_buffer;
/*
* If we have allocated per cpu buffers, then we do not
* need to do any locking.
*/
if (in_nmi())
percpu_buffer = trace_percpu_nmi_buffer;
else if (in_irq())
percpu_buffer = trace_percpu_irq_buffer;
else if (in_softirq())
percpu_buffer = trace_percpu_sirq_buffer;
else
percpu_buffer = trace_percpu_buffer;
if (!percpu_buffer)
return NULL;
return this_cpu_ptr(&percpu_buffer->buffer[0]);
}
static int alloc_percpu_trace_buffer(void)
{
struct trace_buffer_struct *buffers;
struct trace_buffer_struct *sirq_buffers;
struct trace_buffer_struct *irq_buffers;
struct trace_buffer_struct *nmi_buffers;
buffers = alloc_percpu(struct trace_buffer_struct);
if (!buffers)
goto err_warn;
sirq_buffers = alloc_percpu(struct trace_buffer_struct);
if (!sirq_buffers)
goto err_sirq;
irq_buffers = alloc_percpu(struct trace_buffer_struct);
if (!irq_buffers)
goto err_irq;
nmi_buffers = alloc_percpu(struct trace_buffer_struct);
if (!nmi_buffers)
goto err_nmi;
trace_percpu_buffer = buffers;
trace_percpu_sirq_buffer = sirq_buffers;
trace_percpu_irq_buffer = irq_buffers;
trace_percpu_nmi_buffer = nmi_buffers;
return 0;
err_nmi:
free_percpu(irq_buffers);
err_irq:
free_percpu(sirq_buffers);
err_sirq:
free_percpu(buffers);
err_warn:
WARN(1, "Could not allocate percpu trace_printk buffer");
return -ENOMEM;
}
static int buffers_allocated;
void trace_printk_init_buffers(void)
{
if (buffers_allocated)
return;
if (alloc_percpu_trace_buffer())
return;
pr_info("ftrace: Allocated trace_printk buffers\n");
/* Expand the buffers to set size */
tracing_update_buffers();
buffers_allocated = 1;
/*
* trace_printk_init_buffers() can be called by modules.
* If that happens, then we need to start cmdline recording
* directly here. If the global_trace.buffer is already
* allocated here, then this was called by module code.
*/
if (global_trace.buffer)
tracing_start_cmdline_record();
}
void trace_printk_start_comm(void)
{
/* Start tracing comms if trace printk is set */
if (!buffers_allocated)
return;
tracing_start_cmdline_record();
}
static void trace_printk_start_stop_comm(int enabled)
{
if (!buffers_allocated)
return;
if (enabled)
tracing_start_cmdline_record();
else
tracing_stop_cmdline_record();
}
/**
* trace_vbprintk - write binary msg to tracing buffer
*
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
struct ftrace_event_call *call = &event_bprint;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct trace_array *tr = &global_trace;
struct bprint_entry *entry;
unsigned long flags;
char *tbuffer;
int len = 0, size, pc;
if (unlikely(tracing_selftest_running || tracing_disabled))
return 0;
/* Don't pollute graph traces with trace_vprintk internals */
pause_graph_tracing();
pc = preempt_count();
preempt_disable_notrace();
tbuffer = get_trace_buf();
if (!tbuffer) {
len = 0;
goto out;
}
len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
goto out;
local_save_flags(flags);
size = sizeof(*entry) + sizeof(u32) * len;
buffer = tr->buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->ip = ip;
entry->fmt = fmt;
memcpy(entry->buf, tbuffer, sizeof(u32) * len);
if (!filter_check_discard(call, entry, buffer, event)) {
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
out:
preempt_enable_notrace();
unpause_graph_tracing();
return len;
}
EXPORT_SYMBOL_GPL(trace_vbprintk);
int trace_array_printk(struct trace_array *tr,
unsigned long ip, const char *fmt, ...)
{
int ret;
va_list ap;
if (!(trace_flags & TRACE_ITER_PRINTK))
return 0;
va_start(ap, fmt);
ret = trace_array_vprintk(tr, ip, fmt, ap);
va_end(ap);
return ret;
}
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
struct ftrace_event_call *call = &event_print;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
int len = 0, size, pc;
struct print_entry *entry;
unsigned long flags;
char *tbuffer;
if (tracing_disabled || tracing_selftest_running)
return 0;
/* Don't pollute graph traces with trace_vprintk internals */
pause_graph_tracing();
pc = preempt_count();
preempt_disable_notrace();
tbuffer = get_trace_buf();
if (!tbuffer) {
len = 0;
goto out;
}
len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
if (len > TRACE_BUF_SIZE)
goto out;
local_save_flags(flags);
size = sizeof(*entry) + len + 1;
buffer = tr->buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->ip = ip;
memcpy(&entry->buf, tbuffer, len);
entry->buf[len] = '\0';
if (!filter_check_discard(call, entry, buffer, event)) {
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
out:
preempt_enable_notrace();
unpause_graph_tracing();
return len;
}
int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
return trace_array_vprintk(&global_trace, ip, fmt, args);
}
EXPORT_SYMBOL_GPL(trace_vprintk);
static void trace_iterator_increment(struct trace_iterator *iter)
{
struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
iter->idx++;
if (buf_iter)
ring_buffer_read(buf_iter, NULL);
}
static struct trace_entry *
peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
unsigned long *lost_events)
{
struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
if (buf_iter)
event = ring_buffer_iter_peek(buf_iter, ts);
else
event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
lost_events);
if (event) {
iter->ent_size = ring_buffer_event_length(event);
return ring_buffer_event_data(event);
}
iter->ent_size = 0;
return NULL;
}
static struct trace_entry *
__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
unsigned long *missing_events, u64 *ent_ts)
{
struct ring_buffer *buffer = iter->tr->buffer;
struct trace_entry *ent, *next = NULL;
unsigned long lost_events = 0, next_lost = 0;
int cpu_file = iter->cpu_file;
u64 next_ts = 0, ts;
int next_cpu = -1;
int next_size = 0;
int cpu;
/*
* If we are in a per_cpu trace file, don't bother by iterating over
* all cpu and peek directly.
*/
if (cpu_file > TRACE_PIPE_ALL_CPU) {
if (ring_buffer_empty_cpu(buffer, cpu_file))
return NULL;
ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
if (ent_cpu)
*ent_cpu = cpu_file;
return ent;
}
for_each_tracing_cpu(cpu) {
if (ring_buffer_empty_cpu(buffer, cpu))
continue;
ent = peek_next_entry(iter, cpu, &ts, &lost_events);
/*
* Pick the entry with the smallest timestamp:
*/
if (ent && (!next || ts < next_ts)) {
next = ent;
next_cpu = cpu;
next_ts = ts;
next_lost = lost_events;
next_size = iter->ent_size;
}
}
iter->ent_size = next_size;
if (ent_cpu)
*ent_cpu = next_cpu;
if (ent_ts)
*ent_ts = next_ts;
if (missing_events)
*missing_events = next_lost;
return next;
}
/* Find the next real entry, without updating the iterator itself */
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts)
{
return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
}
/* Find the next real entry, and increment the iterator to the next entry */
void *trace_find_next_entry_inc(struct trace_iterator *iter)
{
iter->ent = __find_next_entry(iter, &iter->cpu,
&iter->lost_events, &iter->ts);
if (iter->ent)
trace_iterator_increment(iter);
return iter->ent ? iter : NULL;
}
static void trace_consume(struct trace_iterator *iter)
{
ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
&iter->lost_events);
}
static void *s_next(struct seq_file *m, void *v, loff_t *pos)
{
struct trace_iterator *iter = m->private;
int i = (int)*pos;
void *ent;
WARN_ON_ONCE(iter->leftover);
(*pos)++;
/* can't go backwards */
if (iter->idx > i)
return NULL;
if (iter->idx < 0)
ent = trace_find_next_entry_inc(iter);
else
ent = iter;
while (ent && iter->idx < i)
ent = trace_find_next_entry_inc(iter);
iter->pos = *pos;
return ent;
}
void tracing_iter_reset(struct trace_iterator *iter, int cpu)
{
struct trace_array *tr = iter->tr;
struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter;
unsigned long entries = 0;
u64 ts;
tr->data[cpu]->skipped_entries = 0;
buf_iter = trace_buffer_iter(iter, cpu);
if (!buf_iter)
return;
ring_buffer_iter_reset(buf_iter);
/*
* We could have the case with the max latency tracers
* that a reset never took place on a cpu. This is evident
* by the timestamp being before the start of the buffer.
*/
while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
if (ts >= iter->tr->time_start)
break;
entries++;
ring_buffer_read(buf_iter, NULL);
}
tr->data[cpu]->skipped_entries = entries;
}
/*
* The current tracer is copied to avoid a global locking
* all around.
*/
static void *s_start(struct seq_file *m, loff_t *pos)
{
struct trace_iterator *iter = m->private;
int cpu_file = iter->cpu_file;
void *p = NULL;
loff_t l = 0;
int cpu;
/*
* copy the tracer to avoid using a global lock all around.
* iter->trace is a copy of current_trace, the pointer to the
* name may be used instead of a strcmp(), as iter->trace->name
* will point to the same string as current_trace->name.
*/
mutex_lock(&trace_types_lock);
if (unlikely(current_trace && iter->trace->name != current_trace->name))
*iter->trace = *current_trace;
mutex_unlock(&trace_types_lock);
if (iter->snapshot && iter->trace->use_max_tr)
return ERR_PTR(-EBUSY);
if (!iter->snapshot)
atomic_inc(&trace_record_cmdline_disabled);
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
iter->idx = -1;
if (cpu_file == TRACE_PIPE_ALL_CPU) {
for_each_tracing_cpu(cpu)
tracing_iter_reset(iter, cpu);
} else
tracing_iter_reset(iter, cpu_file);
iter->leftover = 0;
for (p = iter; p && l < *pos; p = s_next(m, p, &l))
;
} else {
/*
* If we overflowed the seq_file before, then we want
* to just reuse the trace_seq buffer again.
*/
if (iter->leftover)
p = iter;
else {
l = *pos - 1;
p = s_next(m, p, &l);
}
}
trace_event_read_lock();
trace_access_lock(cpu_file);
return p;
}
static void s_stop(struct seq_file *m, void *p)
{
struct trace_iterator *iter = m->private;
if (iter->snapshot && iter->trace->use_max_tr)
return;
if (!iter->snapshot)
atomic_dec(&trace_record_cmdline_disabled);
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
static void
get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
{
unsigned long count;
int cpu;
*total = 0;
*entries = 0;
for_each_tracing_cpu(cpu) {
count = ring_buffer_entries_cpu(tr->buffer, cpu);
/*
* If this buffer has skipped entries, then we hold all
* entries for the trace and we need to ignore the
* ones before the time stamp.
*/
if (tr->data[cpu]->skipped_entries) {
count -= tr->data[cpu]->skipped_entries;
/* total is the same as the entries */
*total += count;
} else
*total += count +
ring_buffer_overrun_cpu(tr->buffer, cpu);
*entries += count;
}
}
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n");
seq_puts(m, "# / _-----=> irqs-off \n");
seq_puts(m, "# | / _----=> need-resched \n");
seq_puts(m, "# || / _---=> hardirq/softirq \n");
seq_puts(m, "# ||| / _--=> preempt-depth \n");
seq_puts(m, "# |||| / delay \n");
seq_puts(m, "# cmd pid ||||| time | caller \n");
seq_puts(m, "# \\ / ||||| \\ | / \n");
}
static void print_event_info(struct trace_array *tr, struct seq_file *m)
{
unsigned long total;
unsigned long entries;
get_total_entries(tr, &total, &entries);
seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
entries, total, num_online_cpus());
seq_puts(m, "#\n");
}
static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
{
print_event_info(tr, m);
seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
seq_puts(m, "# | | | | |\n");
}
static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
{
print_event_info(tr, m);
seq_puts(m, "# _-----=> irqs-off\n");
seq_puts(m, "# / _----=> need-resched\n");
seq_puts(m, "# | / _---=> hardirq/softirq\n");
seq_puts(m, "# || / _--=> preempt-depth\n");
seq_puts(m, "# ||| / delay\n");
seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
seq_puts(m, "# | | | |||| | |\n");
}
void
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
{
unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
struct trace_array *tr = iter->tr;
struct trace_array_cpu *data = tr->data[tr->cpu];
struct tracer *type = current_trace;
unsigned long entries;
unsigned long total;
const char *name = "preemption";
name = type->name;
get_total_entries(tr, &total, &entries);
seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
name, UTS_RELEASE);
seq_puts(m, "# -----------------------------------"
"---------------------------------\n");
seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
" (M:%s VP:%d, KP:%d, SP:%d HP:%d",
nsecs_to_usecs(data->saved_latency),
entries,
total,
tr->cpu,
#if defined(CONFIG_PREEMPT_NONE)
"server",
#elif defined(CONFIG_PREEMPT_VOLUNTARY)
"desktop",
#elif defined(CONFIG_PREEMPT)
"preempt",
#else
"unknown",
#endif
/* These are reserved for later use */
0, 0, 0, 0);
#ifdef CONFIG_SMP
seq_printf(m, " #P:%d)\n", num_online_cpus());
#else
seq_puts(m, ")\n");
#endif
seq_puts(m, "# -----------------\n");
seq_printf(m, "# | task: %.16s-%d "
"(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
data->comm, data->pid,
from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
data->policy, data->rt_priority);
seq_puts(m, "# -----------------\n");
if (data->critical_start) {
seq_puts(m, "# => started at: ");
seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
trace_print_seq(m, &iter->seq);
seq_puts(m, "\n# => ended at: ");
seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
trace_print_seq(m, &iter->seq);
seq_puts(m, "\n#\n");
}
seq_puts(m, "#\n");
}
static void test_cpu_buff_start(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
if (!(trace_flags & TRACE_ITER_ANNOTATE))
return;
if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
return;
if (cpumask_test_cpu(iter->cpu, iter->started))
return;
if (iter->tr->data[iter->cpu]->skipped_entries)
return;
cpumask_set_cpu(iter->cpu, iter->started);
/* Don't print started cpu buffer for the first entry of the trace */
if (iter->idx > 1)
trace_seq_printf(s, "##### CPU %u buffer started ####\n",
iter->cpu);
}
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
struct trace_entry *entry;
struct trace_event *event;
entry = iter->ent;
test_cpu_buff_start(iter);
event = ftrace_find_event(entry->type);
if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
if (!trace_print_lat_context(iter))
goto partial;
} else {
if (!trace_print_context(iter))
goto partial;
}
}
if (event)
return event->funcs->trace(iter, sym_flags, event);
if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
goto partial;
return TRACE_TYPE_HANDLED;
partial:
return TRACE_TYPE_PARTIAL_LINE;
}
static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry;
struct trace_event *event;
entry = iter->ent;
if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
if (!trace_seq_printf(s, "%d %d %llu ",
entry->pid, iter->cpu, iter->ts))
goto partial;
}
event = ftrace_find_event(entry->type);
if (event)
return event->funcs->raw(iter, 0, event);
if (!trace_seq_printf(s, "%d ?\n", entry->type))
goto partial;
return TRACE_TYPE_HANDLED;
partial:
return TRACE_TYPE_PARTIAL_LINE;
}
static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
unsigned char newline = '\n';
struct trace_entry *entry;
struct trace_event *event;
entry = iter->ent;
if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
}
event = ftrace_find_event(entry->type);
if (event) {
enum print_line_t ret = event->funcs->hex(iter, 0, event);
if (ret != TRACE_TYPE_HANDLED)
return ret;
}
SEQ_PUT_FIELD_RET(s, newline);
return TRACE_TYPE_HANDLED;
}
static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry;
struct trace_event *event;
entry = iter->ent;
if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
SEQ_PUT_FIELD_RET(s, entry->pid);
SEQ_PUT_FIELD_RET(s, iter->cpu);
SEQ_PUT_FIELD_RET(s, iter->ts);
}
event = ftrace_find_event(entry->type);
return event ? event->funcs->binary(iter, 0, event) :
TRACE_TYPE_HANDLED;
}
int trace_empty(struct trace_iterator *iter)
{
struct ring_buffer_iter *buf_iter;
int cpu;
/* If we are looking at one CPU buffer, only check that one */
if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
cpu = iter->cpu_file;
buf_iter = trace_buffer_iter(iter, cpu);
if (buf_iter) {
if (!ring_buffer_iter_empty(buf_iter))
return 0;
} else {
if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
return 0;
}
return 1;
}
for_each_tracing_cpu(cpu) {
buf_iter = trace_buffer_iter(iter, cpu);
if (buf_iter) {
if (!ring_buffer_iter_empty(buf_iter))
return 0;
} else {
if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
return 0;
}
}
return 1;
}
/* Called with trace_event_read_lock() held. */
enum print_line_t print_trace_line(struct trace_iterator *iter)
{
enum print_line_t ret;
if (iter->lost_events &&
!trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
iter->cpu, iter->lost_events))
return TRACE_TYPE_PARTIAL_LINE;
if (iter->trace && iter->trace->print_line) {
ret = iter->trace->print_line(iter);
if (ret != TRACE_TYPE_UNHANDLED)
return ret;
}
if (iter->ent->type == TRACE_BPRINT &&
trace_flags & TRACE_ITER_PRINTK &&
trace_flags & TRACE_ITER_PRINTK_MSGONLY)
return trace_print_bprintk_msg_only(iter);
if (iter->ent->type == TRACE_PRINT &&
trace_flags & TRACE_ITER_PRINTK &&
trace_flags & TRACE_ITER_PRINTK_MSGONLY)
return trace_print_printk_msg_only(iter);
if (trace_flags & TRACE_ITER_BIN)
return print_bin_fmt(iter);
if (trace_flags & TRACE_ITER_HEX)
return print_hex_fmt(iter);
if (trace_flags & TRACE_ITER_RAW)
return print_raw_fmt(iter);
return print_trace_fmt(iter);
}
void trace_latency_header(struct seq_file *m)
{
struct trace_iterator *iter = m->private;
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return;
if (iter->iter_flags & TRACE_FILE_LAT_FMT)
print_trace_header(m, iter);
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_lat_help_header(m);
}
void trace_default_header(struct seq_file *m)
{
struct trace_iterator *iter = m->private;
if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
return;
if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return;
print_trace_header(m, iter);
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_lat_help_header(m);
} else {
if (!(trace_flags & TRACE_ITER_VERBOSE)) {
if (trace_flags & TRACE_ITER_IRQ_INFO)
print_func_help_header_irq(iter->tr, m);
else
print_func_help_header(iter->tr, m);
}
}
}
static void test_ftrace_alive(struct seq_file *m)
{
if (!ftrace_is_dead())
return;
seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
}
#ifdef CONFIG_TRACER_MAX_TRACE
static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
{
if (iter->trace->allocated_snapshot)
seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
else
seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
seq_printf(m, "# Snapshot commands:\n");
seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
seq_printf(m, "# Takes a snapshot of the main buffer.\n");
seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
seq_printf(m, "# (Doesn't have to be '2' works with any number that\n");
seq_printf(m, "# is not a '0' or '1')\n");
}
#else
/* Should never be called */
static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
#endif
static int s_show(struct seq_file *m, void *v)
{
struct trace_iterator *iter = v;
int ret;
if (iter->ent == NULL) {
if (iter->tr) {
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
test_ftrace_alive(m);
}
if (iter->snapshot && trace_empty(iter))
print_snapshot_help(m, iter);
else if (iter->trace && iter->trace->print_header)
iter->trace->print_header(m);
else
trace_default_header(m);
} else if (iter->leftover) {
/*
* If we filled the seq_file buffer earlier, we
* want to just show it now.
*/
ret = trace_print_seq(m, &iter->seq);
/* ret should this time be zero, but you never know */
iter->leftover = ret;
} else {
print_trace_line(iter);
ret = trace_print_seq(m, &iter->seq);
/*
* If we overflow the seq_file buffer, then it will
* ask us for this data again at start up.
* Use that instead.
* ret is 0 if seq_file write succeeded.
* -1 otherwise.
*/
iter->leftover = ret;
}
return 0;
}
static const struct seq_operations tracer_seq_ops = {
.start = s_start,
.next = s_next,
.stop = s_stop,
.show = s_show,
};
static struct trace_iterator *
__tracing_open(struct inode *inode, struct file *file, bool snapshot)
{
long cpu_file = (long) inode->i_private;
struct trace_iterator *iter;
int cpu;
if (tracing_disabled)
return ERR_PTR(-ENODEV);
iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
if (!iter)
return ERR_PTR(-ENOMEM);
iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
GFP_KERNEL);
if (!iter->buffer_iter)
goto release;
/*
* We make a copy of the current tracer to avoid concurrent
* changes on it while we are reading.
*/
mutex_lock(&trace_types_lock);
iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
if (!iter->trace)
goto fail;
*iter->trace = *current_trace;
if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
goto fail;
if (current_trace->print_max || snapshot)
iter->tr = &max_tr;
else
iter->tr = &global_trace;
iter->snapshot = snapshot;
iter->pos = -1;
mutex_init(&iter->mutex);
iter->cpu_file = cpu_file;
/* Notify the tracer early; before we stop tracing. */
if (iter->trace && iter->trace->open)
iter->trace->open(iter);
/* Annotate start of buffers if we had overruns */
if (ring_buffer_overruns(iter->tr->buffer))
iter->iter_flags |= TRACE_FILE_ANNOTATE;
/* Output in nanoseconds only if we are using a clock in nanoseconds. */
if (trace_clocks[trace_clock_id].in_ns)
iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
/* stop the trace while dumping if we are not opening "snapshot" */
if (!iter->snapshot)
tracing_stop();
if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
for_each_tracing_cpu(cpu) {
iter->buffer_iter[cpu] =
ring_buffer_read_prepare(iter->tr->buffer, cpu);
}
ring_buffer_read_prepare_sync();
for_each_tracing_cpu(cpu) {
ring_buffer_read_start(iter->buffer_iter[cpu]);
tracing_iter_reset(iter, cpu);
}
} else {
cpu = iter->cpu_file;
iter->buffer_iter[cpu] =
ring_buffer_read_prepare(iter->tr->buffer, cpu);
ring_buffer_read_prepare_sync();
ring_buffer_read_start(iter->buffer_iter[cpu]);
tracing_iter_reset(iter, cpu);
}
mutex_unlock(&trace_types_lock);
return iter;
fail:
mutex_unlock(&trace_types_lock);
kfree(iter->trace);
kfree(iter->buffer_iter);
release:
seq_release_private(inode, file);
return ERR_PTR(-ENOMEM);
}
int tracing_open_generic(struct inode *inode, struct file *filp)
{
if (tracing_disabled)
return -ENODEV;
filp->private_data = inode->i_private;
return 0;
}
static int tracing_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
struct trace_iterator *iter;
int cpu;
if (!(file->f_mode & FMODE_READ))
return 0;
iter = m->private;
mutex_lock(&trace_types_lock);
for_each_tracing_cpu(cpu) {
if (iter->buffer_iter[cpu])
ring_buffer_read_finish(iter->buffer_iter[cpu]);
}
if (iter->trace && iter->trace->close)
iter->trace->close(iter);
if (!iter->snapshot)
/* reenable tracing if it was previously enabled */
tracing_start();
mutex_unlock(&trace_types_lock);
mutex_destroy(&iter->mutex);
free_cpumask_var(iter->started);
kfree(iter->trace);
kfree(iter->buffer_iter);
seq_release_private(inode, file);
return 0;
}
static int tracing_open(struct inode *inode, struct file *file)
{
struct trace_iterator *iter;
int ret = 0;
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC)) {
long cpu = (long) inode->i_private;
if (cpu == TRACE_PIPE_ALL_CPU)
tracing_reset_online_cpus(&global_trace);
else
tracing_reset(&global_trace, cpu);
}
if (file->f_mode & FMODE_READ) {
iter = __tracing_open(inode, file, false);
if (IS_ERR(iter))
ret = PTR_ERR(iter);
else if (trace_flags & TRACE_ITER_LATENCY_FMT)
iter->iter_flags |= TRACE_FILE_LAT_FMT;
}
return ret;
}
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
struct tracer *t = v;
(*pos)++;
if (t)
t = t->next;
return t;
}
static void *t_start(struct seq_file *m, loff_t *pos)
{
struct tracer *t;
loff_t l = 0;
mutex_lock(&trace_types_lock);
for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
;
return t;
}
static void t_stop(struct seq_file *m, void *p)
{
mutex_unlock(&trace_types_lock);
}
static int t_show(struct seq_file *m, void *v)
{
struct tracer *t = v;
if (!t)
return 0;
seq_printf(m, "%s", t->name);
if (t->next)
seq_putc(m, ' ');
else
seq_putc(m, '\n');
return 0;
}
static const struct seq_operations show_traces_seq_ops = {
.start = t_start,
.next = t_next,
.stop = t_stop,
.show = t_show,
};
static int show_traces_open(struct inode *inode, struct file *file)
{
if (tracing_disabled)
return -ENODEV;
return seq_open(file, &show_traces_seq_ops);
}
static ssize_t
tracing_write_stub(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
{
return count;
}
static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
{
if (file->f_mode & FMODE_READ)
return seq_lseek(file, offset, origin);
else
return 0;
}
static const struct file_operations tracing_fops = {
.open = tracing_open,
.read = seq_read,
.write = tracing_write_stub,
.llseek = tracing_seek,
.release = tracing_release,
};
static const struct file_operations show_traces_fops = {
.open = show_traces_open,
.read = seq_read,
.release = seq_release,
.llseek = seq_lseek,
};
/*
* Only trace on a CPU if the bitmask is set:
*/
static cpumask_var_t tracing_cpumask;
/*
* The tracer itself will not take this lock, but still we want
* to provide a consistent cpumask to user-space:
*/
static DEFINE_MUTEX(tracing_cpumask_update_lock);
/*
* Temporary storage for the character representation of the
* CPU bitmask (and one more byte for the newline):
*/
static char mask_str[NR_CPUS + 1];
static ssize_t
tracing_cpumask_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
int len;
mutex_lock(&tracing_cpumask_update_lock);
len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
if (count - len < 2) {
count = -EINVAL;
goto out_err;
}
len += sprintf(mask_str + len, "\n");
count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
out_err:
mutex_unlock(&tracing_cpumask_update_lock);
return count;
}
static ssize_t
tracing_cpumask_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
{
int err, cpu;
cpumask_var_t tracing_cpumask_new;
if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
return -ENOMEM;
err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
if (err)
goto err_unlock;
mutex_lock(&tracing_cpumask_update_lock);
local_irq_disable();
arch_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
* Increase/decrease the disabled counter if we are
* about to flip a bit in the cpumask:
*/
if (cpumask_test_cpu(cpu, tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(&global_trace.data[cpu]->disabled);
ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
}
if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(&global_trace.data[cpu]->disabled);
ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
}
}
arch_spin_unlock(&ftrace_max_lock);
local_irq_enable();
cpumask_copy(tracing_cpumask, tracing_cpumask_new);
mutex_unlock(&tracing_cpumask_update_lock);
free_cpumask_var(tracing_cpumask_new);
return count;
err_unlock:
free_cpumask_var(tracing_cpumask_new);
return err;
}
static const struct file_operations tracing_cpumask_fops = {
.open = tracing_open_generic,
.read = tracing_cpumask_read,
.write = tracing_cpumask_write,
.llseek = generic_file_llseek,
};
static int tracing_trace_options_show(struct seq_file *m, void *v)
{
struct tracer_opt *trace_opts;
u32 tracer_flags;
int i;
mutex_lock(&trace_types_lock);
tracer_flags = current_trace->flags->val;
trace_opts = current_trace->flags->opts;
for (i = 0; trace_options[i]; i++) {
if (trace_flags & (1 << i))
seq_printf(m, "%s\n", trace_options[i]);
else
seq_printf(m, "no%s\n", trace_options[i]);
}
for (i = 0; trace_opts[i].name; i++) {
if (tracer_flags & trace_opts[i].bit)
seq_printf(m, "%s\n", trace_opts[i].name);
else
seq_printf(m, "no%s\n", trace_opts[i].name);
}
mutex_unlock(&trace_types_lock);
return 0;
}
static int __set_tracer_option(struct tracer *trace,
struct tracer_flags *tracer_flags,
struct tracer_opt *opts, int neg)
{
int ret;
ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
if (ret)
return ret;
if (neg)
tracer_flags->val &= ~opts->bit;
else
tracer_flags->val |= opts->bit;
return 0;
}
/* Try to assign a tracer specific option */
static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
{
struct tracer_flags *tracer_flags = trace->flags;
struct tracer_opt *opts = NULL;
int i;
for (i = 0; tracer_flags->opts[i].name; i++) {
opts = &tracer_flags->opts[i];
if (strcmp(cmp, opts->name) == 0)
return __set_tracer_option(trace, trace->flags,
opts, neg);
}
return -EINVAL;
}
/* Some tracers require overwrite to stay enabled */
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
{
if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
return -1;
return 0;
}
int set_tracer_flag(unsigned int mask, int enabled)
{
/* do nothing if flag is already set */
if (!!(trace_flags & mask) == !!enabled)
return 0;
/* Give the tracer a chance to approve the change */
if (current_trace->flag_changed)
if (current_trace->flag_changed(current_trace, mask, !!enabled))
return -EINVAL;
if (enabled)
trace_flags |= mask;
else
trace_flags &= ~mask;
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);
if (mask == TRACE_ITER_OVERWRITE) {
ring_buffer_change_overwrite(global_trace.buffer, enabled);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_change_overwrite(max_tr.buffer, enabled);
#endif
}
if (mask == TRACE_ITER_PRINTK)
trace_printk_start_stop_comm(enabled);
return 0;
}
static int trace_set_options(char *option)
{
char *cmp