blob: b79a58a4d26cb8f6028b58fc8e622a1c8263bdbf [file] [log] [blame]
From 0f4a6b445721feb10ca8e12c164c4889ccbf74c6 Mon Sep 17 00:00:00 2001
From: Carsten Emde <C.Emde@osadl.org>
Date: Mon, 18 Oct 2010 22:53:08 +0100
Subject: [PATCH] tracing: Add combined timer and wakeup latency histogram
commit cfaca74d233d07b0e29c134062a980d1e89ddd20 in tip.
The real-time wakeup latency tracer (wakeup_rt) and the wakeup latency
histogram (CONFIG_WAKEUP_LATENCY_HIST) monitor processes that have the
highest priority of the system throughout their entire wakeup processing
and do not share this priority with any other concurrently running or
scheduled process. Thus, this tracer should be able to determine a
system's worst-case latency. However, in a situation where interrupts
are temporarily disabled, the timer interrupt to enqueue a process is
already late, but the latency between enqueuing and wakeup still may be
normal. In this case, an erroneously low latency is recorded.
To overcome this limitation and to create a true recording of the
apparent latency of every single wakeup procedure, an additional tracer
was implemented. This tracer determines the offset between the intended
timeout and the real timeout of the timer interrupts; the
CONFIG_MISSED_TIMER_OFFSETS_HIST configuration item is used to enable
this tracer.
For the time being, however, these two tracers work independently; in
consequence, the worst-case timer offset and the worst-case wakeup latency
cannot simply be added to determine the overall scheduling latency, since the
two worst-case values normally are not derived at the same time from the
same process.
This patch adds the additional latency histogram "timerandwakeup"; it is
based on the existing wakeup latency histogram, except that the timer
latency is added to the recorded latency value, if the process was
enqueued and woken up as a result of an expired wakeup timer. The
"timerandwakeup" latency histogram is configured implicitly, if both
CONFIG_WAKEUP_LATENCY_HIST and CONFIG_MISSED_TIMER_OFFSETS_HIST are
configured, but must be enabled separately at run-time.
Signed-off-by: Carsten Emde <C.Emde@osadl.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt
index 6645057..0528201 100644
--- a/Documentation/trace/histograms.txt
+++ b/Documentation/trace/histograms.txt
@@ -36,14 +36,20 @@ Effective latencies are actually occuring during wakeup of a process. To
determine effective latencies, the kernel stores the time stamp when a
process is scheduled to be woken up, and determines the duration of the
wakeup time shortly before control is passed over to this process. Note
-that the apparent latency in user space may be considerably longer,
-since
-i) interrupts may be disabled preventing the scheduler from initiating
-the wakeup mechanism, and
-ii) the process may be interrupted after control is passed over to it
-but before user space execution takes place.
-- Configuration item (in the Kernel hacking/Tracers submenu)
+that the apparent latency in user space may be somewhat longer, since the
+process may be interrupted after control is passed over to it but before
+the execution in user space takes place. Simply measuring the interval
+between enqueuing and wakeup may also not appropriate in cases when a
+process is scheduled as a result of a timer expiration. The timer may have
+missed its deadline, e.g. due to disabled interrupts, but this latency
+would not be registered. Therefore, the offsets of missed timers are
+recorded in a separate histogram. If both wakeup latency and missed timer
+offsets are configured and enabled, a third histogram may be enabled that
+records the overall latency as a sum of the timer latency, if any, and the
+wakeup latency. This histogram is called "timerandwakeup".
+- Configuration items (in the Kernel hacking/Tracers submenu)
CONFIG_WAKEUP_LATENCY
+ CONFIG_MISSED_TIMER_OFSETS
* Usage
@@ -59,30 +65,34 @@ from shell command line level, or add
nodev /sys sysfs defaults 0 0
nodev /sys/kernel/debug debugfs defaults 0 0
-to the file /etc/fstab. All latency histogram related files are
+to the file /etc/fstab. All latency histogram related files are then
available in the directory /sys/kernel/debug/tracing/latency_hist. A
particular histogram type is enabled by writing non-zero to the related
variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
Select "preemptirqsoff" for the histograms of potential sources of
-latencies and "wakeup" for histograms of effective latencies. The
+latencies and "wakeup" for histograms of effective latencies etc. The
histogram data - one per CPU - are available in the files
/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
-/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx.
+/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
+/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
+/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
+/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
The histograms are reset by writing non-zero to the file "reset" in a
particular latency directory. To reset all latency data, use
#!/bin/sh
-HISTDIR=/sys/kernel/debug/tracing/latency_hist
+TRACINGDIR=/sys/kernel/debug/tracing
+HISTDIR=$TRACINGDIR/latency_hist
if test -d $HISTDIR
then
cd $HISTDIR
- for i in */reset
+ for i in `find . | grep /reset$`
do
echo 1 >$i
done
@@ -138,7 +148,7 @@ grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
To only collect wakeup latency data of a particular process, write the
PID of the requested process to
-/sys/kernel/debug/tracing/latency_hist/wakeup/pid.
+/sys/kernel/debug/tracing/latency_hist/wakeup/pid
PIDs are not considered, if this variable is set to 0.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7041cc9..595a960 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1592,6 +1592,9 @@ struct task_struct {
unsigned long trace_recursion;
#ifdef CONFIG_WAKEUP_LATENCY_HIST
u64 preempt_timestamp_hist;
+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+ unsigned long timer_offset;
+#endif
#endif
#endif /* CONFIG_TRACING */
#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e966f40..62c8df8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -266,10 +266,24 @@ config WAKEUP_LATENCY_HIST
/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
+ If both Scheduling Latency Histogram and Missed Timer Offsets
+ Histogram are selected, additional histogram data will be collected
+ that contain, in addition to the wakeup latency, the timer latency, in
+ case the wakeup was triggered by an expired timer. These histograms
+ are available in the
+
+ /sys/kernel/debug/tracing/latency_hist/timerandwakeup
+
+ directory. They reflect the apparent interrupt and scheduling latency
+ and are best suitable to determine the worst-case latency of a given
+ system. To enable these histograms, write a non-zero number to
+
+ /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
+
config MISSED_TIMER_OFFSETS_HIST
depends on GENERIC_TIME
select GENERIC_TRACER
- bool "Missed timer offsets histogram"
+ bool "Missed Timer Offsets Histogram"
help
Generate a histogram of missed timer offsets in microseconds. The
histograms are disabled by default. To enable them, write a non-zero
@@ -281,6 +295,20 @@ config MISSED_TIMER_OFFSETS_HIST
/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
+ If both Scheduling Latency Histogram and Missed Timer Offsets
+ Histogram are selected, additional histogram data will be collected
+ that contain, in addition to the wakeup latency, the timer latency, in
+ case the wakeup was triggered by an expired timer. These histograms
+ are available in the
+
+ /sys/kernel/debug/tracing/latency_hist/timerandwakeup
+
+ directory. They reflect the apparent interrupt and scheduling latency
+ and are best suitable to determine the worst-case latency of a given
+ system. To enable these histograms, write a non-zero number to
+
+ /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
+
config SYSPROF_TRACER
bool "Sysprof Tracer"
depends on X86
diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c
index 7c26615..43dba1e 100644
--- a/kernel/trace/latency_hist.c
+++ b/kernel/trace/latency_hist.c
@@ -36,6 +36,7 @@ enum {
WAKEUP_LATENCY,
WAKEUP_LATENCY_SHAREDPRIO,
MISSED_TIMER_OFFSETS,
+ TIMERANDWAKEUP_LATENCY,
MAX_LATENCY_TYPE,
};
@@ -134,6 +135,17 @@ static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
static unsigned long missed_timer_offsets_pid;
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
+static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
+static struct enable_data timerandwakeup_enabled_data = {
+ .latency_type = TIMERANDWAKEUP_LATENCY,
+ .enabled = 0,
+};
+static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
+#endif
+
void notrace latency_hist(int latency_type, int cpu, unsigned long latency,
struct task_struct *p)
{
@@ -179,15 +191,23 @@ void notrace latency_hist(int latency_type, int cpu, unsigned long latency,
mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
break;
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ case TIMERANDWAKEUP_LATENCY:
+ my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
+ mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
+ break;
+#endif
+
default:
return;
}
+ latency += my_hist->offset;
+
if (atomic_read(&my_hist->hist_mode) == 0)
return;
- latency += my_hist->offset;
-
if (latency < 0 || latency >= MAX_ENTRY_NUM) {
if (latency < 0)
my_hist->below_hist_bound_samples++;
@@ -203,7 +223,8 @@ void notrace latency_hist(int latency_type, int cpu, unsigned long latency,
defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
if (latency_type == WAKEUP_LATENCY ||
latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
- latency_type == MISSED_TIMER_OFFSETS) {
+ latency_type == MISSED_TIMER_OFFSETS ||
+ latency_type == TIMERANDWAKEUP_LATENCY) {
strncpy(mp->comm, p->comm, sizeof(mp->comm));
mp->pid = task_pid_nr(p);
mp->prio = p->prio;
@@ -384,6 +405,13 @@ latency_hist_reset(struct file *file, const char __user *a,
mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
break;
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ case TIMERANDWAKEUP_LATENCY:
+ hist = &per_cpu(timerandwakeup_latency_hist, cpu);
+ mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
+ break;
+#endif
}
hist_reset(hist);
@@ -391,7 +419,8 @@ latency_hist_reset(struct file *file, const char __user *a,
defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
if (latency_type == WAKEUP_LATENCY ||
latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
- latency_type == MISSED_TIMER_OFFSETS) {
+ latency_type == MISSED_TIMER_OFFSETS ||
+ latency_type == TIMERANDWAKEUP_LATENCY) {
mp->comm[0] = '\0';
mp->prio = mp->pid = mp->latency = -1;
}
@@ -567,6 +596,14 @@ do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
}
break;
#endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ case TIMERANDWAKEUP_LATENCY:
+ if (!wakeup_latency_enabled_data.enabled ||
+ !missed_timer_offsets_enabled_data.enabled)
+ return -EINVAL;
+ break;
+#endif
default:
break;
}
@@ -615,12 +652,18 @@ do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
per_cpu(wakeup_sharedprio, cpu) = 0;
}
}
+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+ timerandwakeup_enabled_data.enabled = 0;
+#endif
break;
#endif
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
case MISSED_TIMER_OFFSETS:
unregister_trace_hrtimer_interrupt(
probe_hrtimer_interrupt);
+#ifdef CONFIG_WAKEUP_LATENCY_HIST
+ timerandwakeup_enabled_data.enabled = 0;
+#endif
break;
#endif
default:
@@ -861,10 +904,20 @@ static notrace void probe_wakeup_latency_hist_stop(struct rq *rq,
if (per_cpu(wakeup_sharedprio, cpu)) {
latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, next);
per_cpu(wakeup_sharedprio, cpu) = 0;
- } else
+ } else {
latency_hist(WAKEUP_LATENCY, cpu, latency, next);
+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+ if (timerandwakeup_enabled_data.enabled) {
+ latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
+ next->timer_offset + latency, next);
+ }
+#endif
+ }
out_reset:
+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+ next->timer_offset = 0;
+#endif
put_task_struct(cpu_wakeup_task);
per_cpu(wakeup_task, cpu) = NULL;
out:
@@ -888,6 +941,9 @@ static notrace void probe_hrtimer_interrupt(int cpu, long long latency_ns,
latency = (unsigned long) div_s64(-latency_ns, 1000);
latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, task);
+#ifdef CONFIG_WAKEUP_LATENCY_HIST
+ task->timer_offset = latency;
+#endif
}
}
#endif
@@ -1035,6 +1091,32 @@ static __init int latency_hist_init(void)
enable_root, (void *)&missed_timer_offsets_enabled_data,
&enable_fops);
#endif
+
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+ dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
+ latency_hist_root);
+ for_each_possible_cpu(i) {
+ sprintf(name, cpufmt, i);
+ entry = debugfs_create_file(name, 0444, dentry,
+ &per_cpu(timerandwakeup_latency_hist, i),
+ &latency_hist_fops);
+ my_hist = &per_cpu(timerandwakeup_latency_hist, i);
+ atomic_set(&my_hist->hist_mode, 1);
+ my_hist->min_lat = 0xFFFFFFFFUL;
+
+ sprintf(name, cpufmt_maxlatproc, i);
+ mp = &per_cpu(timerandwakeup_maxlatproc, i);
+ entry = debugfs_create_file(name, 0444, dentry, mp,
+ &maxlatproc_fops);
+ mp->prio = mp->pid = mp->latency = -1;
+ }
+ entry = debugfs_create_file("reset", 0644, dentry,
+ (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
+ entry = debugfs_create_file("timerandwakeup", 0644,
+ enable_root, (void *)&timerandwakeup_enabled_data,
+ &enable_fops);
+#endif
return 0;
}
--
1.7.1.1