| From 0f4a6b445721feb10ca8e12c164c4889ccbf74c6 Mon Sep 17 00:00:00 2001 |
| From: Carsten Emde <C.Emde@osadl.org> |
| Date: Mon, 18 Oct 2010 22:53:08 +0100 |
| Subject: [PATCH] tracing: Add combined timer and wakeup latency histogram |
| |
| commit cfaca74d233d07b0e29c134062a980d1e89ddd20 in tip. |
| |
| The real-time wakeup latency tracer (wakeup_rt) and the wakeup latency |
| histogram (CONFIG_WAKEUP_LATENCY_HIST) monitor processes that have the |
| highest priority of the system throughout their entire wakeup processing |
| and do not share this priority with any other concurrently running or |
| scheduled process. Thus, this tracer should be able to determine a |
| system's worst-case latency. However, in a situation where interrupts |
| are temporarily disabled, the timer interrupt to enqueue a process is |
| already late, but the latency between enqueuing and wakeup still may be |
| normal. In this case, an erroneously low latency is recorded. |
| |
| To overcome this limitation and to create a true recording of the |
| apparent latency of every single wakeup procedure, an additional tracer |
| was implemented. This tracer determines the offset between the intended |
| timeout and the real timeout of the timer interrupts; the |
| CONFIG_MISSED_TIMER_OFFSETS_HIST configuration item is used to enable |
| this tracer. |
| |
| For the time being, however, these two tracers work independently; in |
| consequence, the worst-case timer offset and the worst-case wakeup latency |
| cannot simply be added to determine the overall scheduling latency, since the |
| two worst-case values normally are not derived at the same time from the |
| same process. |
| |
| This patch adds the additional latency histogram "timerandwakeup"; it is |
| based on the existing wakeup latency histogram, except that the timer |
| latency is added to the recorded latency value, if the process was |
| enqueued and woken up as a result of an expired wakeup timer. The |
| "timerandwakeup" latency histogram is configured implicitly, if both |
| CONFIG_WAKEUP_LATENCY_HIST and CONFIG_MISSED_TIMER_OFFSETS_HIST are |
| configured, but must be enabled separately at run-time. |
| |
| Signed-off-by: Carsten Emde <C.Emde@osadl.org> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt |
| index 6645057..0528201 100644 |
| --- a/Documentation/trace/histograms.txt |
| +++ b/Documentation/trace/histograms.txt |
| @@ -36,14 +36,20 @@ Effective latencies are actually occuring during wakeup of a process. To |
| determine effective latencies, the kernel stores the time stamp when a |
| process is scheduled to be woken up, and determines the duration of the |
| wakeup time shortly before control is passed over to this process. Note |
| -that the apparent latency in user space may be considerably longer, |
| -since |
| -i) interrupts may be disabled preventing the scheduler from initiating |
| -the wakeup mechanism, and |
| -ii) the process may be interrupted after control is passed over to it |
| -but before user space execution takes place. |
| -- Configuration item (in the Kernel hacking/Tracers submenu) |
| +that the apparent latency in user space may be somewhat longer, since the |
| +process may be interrupted after control is passed over to it but before |
| +the execution in user space takes place. Simply measuring the interval |
| +between enqueuing and wakeup may also not appropriate in cases when a |
| +process is scheduled as a result of a timer expiration. The timer may have |
| +missed its deadline, e.g. due to disabled interrupts, but this latency |
| +would not be registered. Therefore, the offsets of missed timers are |
| +recorded in a separate histogram. If both wakeup latency and missed timer |
| +offsets are configured and enabled, a third histogram may be enabled that |
| +records the overall latency as a sum of the timer latency, if any, and the |
| +wakeup latency. This histogram is called "timerandwakeup". |
| +- Configuration items (in the Kernel hacking/Tracers submenu) |
| CONFIG_WAKEUP_LATENCY |
| + CONFIG_MISSED_TIMER_OFSETS |
| |
| |
| * Usage |
| @@ -59,30 +65,34 @@ from shell command line level, or add |
| nodev /sys sysfs defaults 0 0 |
| nodev /sys/kernel/debug debugfs defaults 0 0 |
| |
| -to the file /etc/fstab. All latency histogram related files are |
| +to the file /etc/fstab. All latency histogram related files are then |
| available in the directory /sys/kernel/debug/tracing/latency_hist. A |
| particular histogram type is enabled by writing non-zero to the related |
| variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. |
| Select "preemptirqsoff" for the histograms of potential sources of |
| -latencies and "wakeup" for histograms of effective latencies. The |
| +latencies and "wakeup" for histograms of effective latencies etc. The |
| histogram data - one per CPU - are available in the files |
| |
| /sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx |
| /sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx |
| /sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx |
| -/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx. |
| +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx |
| +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx |
| +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx |
| +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx |
| |
| The histograms are reset by writing non-zero to the file "reset" in a |
| particular latency directory. To reset all latency data, use |
| |
| #!/bin/sh |
| |
| -HISTDIR=/sys/kernel/debug/tracing/latency_hist |
| +TRACINGDIR=/sys/kernel/debug/tracing |
| +HISTDIR=$TRACINGDIR/latency_hist |
| |
| if test -d $HISTDIR |
| then |
| cd $HISTDIR |
| - for i in */reset |
| + for i in `find . | grep /reset$` |
| do |
| echo 1 >$i |
| done |
| @@ -138,7 +148,7 @@ grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 |
| To only collect wakeup latency data of a particular process, write the |
| PID of the requested process to |
| |
| -/sys/kernel/debug/tracing/latency_hist/wakeup/pid. |
| +/sys/kernel/debug/tracing/latency_hist/wakeup/pid |
| |
| PIDs are not considered, if this variable is set to 0. |
| |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| index 7041cc9..595a960 100644 |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -1592,6 +1592,9 @@ struct task_struct { |
| unsigned long trace_recursion; |
| #ifdef CONFIG_WAKEUP_LATENCY_HIST |
| u64 preempt_timestamp_hist; |
| +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST |
| + unsigned long timer_offset; |
| +#endif |
| #endif |
| #endif /* CONFIG_TRACING */ |
| #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ |
| diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig |
| index e966f40..62c8df8 100644 |
| --- a/kernel/trace/Kconfig |
| +++ b/kernel/trace/Kconfig |
| @@ -266,10 +266,24 @@ config WAKEUP_LATENCY_HIST |
| |
| /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio |
| |
| + If both Scheduling Latency Histogram and Missed Timer Offsets |
| + Histogram are selected, additional histogram data will be collected |
| + that contain, in addition to the wakeup latency, the timer latency, in |
| + case the wakeup was triggered by an expired timer. These histograms |
| + are available in the |
| + |
| + /sys/kernel/debug/tracing/latency_hist/timerandwakeup |
| + |
| + directory. They reflect the apparent interrupt and scheduling latency |
| + and are best suitable to determine the worst-case latency of a given |
| + system. To enable these histograms, write a non-zero number to |
| + |
| + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup |
| + |
| config MISSED_TIMER_OFFSETS_HIST |
| depends on GENERIC_TIME |
| select GENERIC_TRACER |
| - bool "Missed timer offsets histogram" |
| + bool "Missed Timer Offsets Histogram" |
| help |
| Generate a histogram of missed timer offsets in microseconds. The |
| histograms are disabled by default. To enable them, write a non-zero |
| @@ -281,6 +295,20 @@ config MISSED_TIMER_OFFSETS_HIST |
| |
| /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets |
| |
| + If both Scheduling Latency Histogram and Missed Timer Offsets |
| + Histogram are selected, additional histogram data will be collected |
| + that contain, in addition to the wakeup latency, the timer latency, in |
| + case the wakeup was triggered by an expired timer. These histograms |
| + are available in the |
| + |
| + /sys/kernel/debug/tracing/latency_hist/timerandwakeup |
| + |
| + directory. They reflect the apparent interrupt and scheduling latency |
| + and are best suitable to determine the worst-case latency of a given |
| + system. To enable these histograms, write a non-zero number to |
| + |
| + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup |
| + |
| config SYSPROF_TRACER |
| bool "Sysprof Tracer" |
| depends on X86 |
| diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c |
| index 7c26615..43dba1e 100644 |
| --- a/kernel/trace/latency_hist.c |
| +++ b/kernel/trace/latency_hist.c |
| @@ -36,6 +36,7 @@ enum { |
| WAKEUP_LATENCY, |
| WAKEUP_LATENCY_SHAREDPRIO, |
| MISSED_TIMER_OFFSETS, |
| + TIMERANDWAKEUP_LATENCY, |
| MAX_LATENCY_TYPE, |
| }; |
| |
| @@ -134,6 +135,17 @@ static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc); |
| static unsigned long missed_timer_offsets_pid; |
| #endif |
| |
| +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ |
| + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) |
| +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist); |
| +static char *timerandwakeup_latency_hist_dir = "timerandwakeup"; |
| +static struct enable_data timerandwakeup_enabled_data = { |
| + .latency_type = TIMERANDWAKEUP_LATENCY, |
| + .enabled = 0, |
| +}; |
| +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc); |
| +#endif |
| + |
| void notrace latency_hist(int latency_type, int cpu, unsigned long latency, |
| struct task_struct *p) |
| { |
| @@ -179,15 +191,23 @@ void notrace latency_hist(int latency_type, int cpu, unsigned long latency, |
| mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); |
| break; |
| #endif |
| +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ |
| + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) |
| + case TIMERANDWAKEUP_LATENCY: |
| + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu); |
| + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); |
| + break; |
| +#endif |
| + |
| default: |
| return; |
| } |
| |
| + latency += my_hist->offset; |
| + |
| if (atomic_read(&my_hist->hist_mode) == 0) |
| return; |
| |
| - latency += my_hist->offset; |
| - |
| if (latency < 0 || latency >= MAX_ENTRY_NUM) { |
| if (latency < 0) |
| my_hist->below_hist_bound_samples++; |
| @@ -203,7 +223,8 @@ void notrace latency_hist(int latency_type, int cpu, unsigned long latency, |
| defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) |
| if (latency_type == WAKEUP_LATENCY || |
| latency_type == WAKEUP_LATENCY_SHAREDPRIO || |
| - latency_type == MISSED_TIMER_OFFSETS) { |
| + latency_type == MISSED_TIMER_OFFSETS || |
| + latency_type == TIMERANDWAKEUP_LATENCY) { |
| strncpy(mp->comm, p->comm, sizeof(mp->comm)); |
| mp->pid = task_pid_nr(p); |
| mp->prio = p->prio; |
| @@ -384,6 +405,13 @@ latency_hist_reset(struct file *file, const char __user *a, |
| mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); |
| break; |
| #endif |
| +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ |
| + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) |
| + case TIMERANDWAKEUP_LATENCY: |
| + hist = &per_cpu(timerandwakeup_latency_hist, cpu); |
| + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); |
| + break; |
| +#endif |
| } |
| |
| hist_reset(hist); |
| @@ -391,7 +419,8 @@ latency_hist_reset(struct file *file, const char __user *a, |
| defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) |
| if (latency_type == WAKEUP_LATENCY || |
| latency_type == WAKEUP_LATENCY_SHAREDPRIO || |
| - latency_type == MISSED_TIMER_OFFSETS) { |
| + latency_type == MISSED_TIMER_OFFSETS || |
| + latency_type == TIMERANDWAKEUP_LATENCY) { |
| mp->comm[0] = '\0'; |
| mp->prio = mp->pid = mp->latency = -1; |
| } |
| @@ -567,6 +596,14 @@ do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) |
| } |
| break; |
| #endif |
| +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ |
| + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) |
| + case TIMERANDWAKEUP_LATENCY: |
| + if (!wakeup_latency_enabled_data.enabled || |
| + !missed_timer_offsets_enabled_data.enabled) |
| + return -EINVAL; |
| + break; |
| +#endif |
| default: |
| break; |
| } |
| @@ -615,12 +652,18 @@ do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) |
| per_cpu(wakeup_sharedprio, cpu) = 0; |
| } |
| } |
| +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST |
| + timerandwakeup_enabled_data.enabled = 0; |
| +#endif |
| break; |
| #endif |
| #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST |
| case MISSED_TIMER_OFFSETS: |
| unregister_trace_hrtimer_interrupt( |
| probe_hrtimer_interrupt); |
| +#ifdef CONFIG_WAKEUP_LATENCY_HIST |
| + timerandwakeup_enabled_data.enabled = 0; |
| +#endif |
| break; |
| #endif |
| default: |
| @@ -861,10 +904,20 @@ static notrace void probe_wakeup_latency_hist_stop(struct rq *rq, |
| if (per_cpu(wakeup_sharedprio, cpu)) { |
| latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, next); |
| per_cpu(wakeup_sharedprio, cpu) = 0; |
| - } else |
| + } else { |
| latency_hist(WAKEUP_LATENCY, cpu, latency, next); |
| +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST |
| + if (timerandwakeup_enabled_data.enabled) { |
| + latency_hist(TIMERANDWAKEUP_LATENCY, cpu, |
| + next->timer_offset + latency, next); |
| + } |
| +#endif |
| + } |
| |
| out_reset: |
| +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST |
| + next->timer_offset = 0; |
| +#endif |
| put_task_struct(cpu_wakeup_task); |
| per_cpu(wakeup_task, cpu) = NULL; |
| out: |
| @@ -888,6 +941,9 @@ static notrace void probe_hrtimer_interrupt(int cpu, long long latency_ns, |
| |
| latency = (unsigned long) div_s64(-latency_ns, 1000); |
| latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, task); |
| +#ifdef CONFIG_WAKEUP_LATENCY_HIST |
| + task->timer_offset = latency; |
| +#endif |
| } |
| } |
| #endif |
| @@ -1035,6 +1091,32 @@ static __init int latency_hist_init(void) |
| enable_root, (void *)&missed_timer_offsets_enabled_data, |
| &enable_fops); |
| #endif |
| + |
| +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ |
| + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) |
| + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir, |
| + latency_hist_root); |
| + for_each_possible_cpu(i) { |
| + sprintf(name, cpufmt, i); |
| + entry = debugfs_create_file(name, 0444, dentry, |
| + &per_cpu(timerandwakeup_latency_hist, i), |
| + &latency_hist_fops); |
| + my_hist = &per_cpu(timerandwakeup_latency_hist, i); |
| + atomic_set(&my_hist->hist_mode, 1); |
| + my_hist->min_lat = 0xFFFFFFFFUL; |
| + |
| + sprintf(name, cpufmt_maxlatproc, i); |
| + mp = &per_cpu(timerandwakeup_maxlatproc, i); |
| + entry = debugfs_create_file(name, 0444, dentry, mp, |
| + &maxlatproc_fops); |
| + mp->prio = mp->pid = mp->latency = -1; |
| + } |
| + entry = debugfs_create_file("reset", 0644, dentry, |
| + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops); |
| + entry = debugfs_create_file("timerandwakeup", 0644, |
| + enable_root, (void *)&timerandwakeup_enabled_data, |
| + &enable_fops); |
| +#endif |
| return 0; |
| } |
| |
| -- |
| 1.7.1.1 |
| |