| From 868baf07b1a259f5f3803c1dc2777b6c358f83cf Mon Sep 17 00:00:00 2001 |
| From: Steven Rostedt <srostedt@redhat.com> |
| Date: Thu, 10 Feb 2011 21:26:13 -0500 |
| Subject: ftrace: Fix memory leak with function graph and cpu hotplug |
| |
| From: Steven Rostedt <srostedt@redhat.com> |
| |
| commit 868baf07b1a259f5f3803c1dc2777b6c358f83cf upstream. |
| |
| When the fuction graph tracer starts, it needs to make a special |
| stack for each task to save the real return values of the tasks. |
| All running tasks have this stack created, as well as any new |
| tasks. |
| |
| On CPU hot plug, the new idle task will allocate a stack as well |
| when init_idle() is called. The problem is that cpu hotplug does |
| not create a new idle_task. Instead it uses the idle task that |
| existed when the cpu went down. |
| |
| ftrace_graph_init_task() will add a new ret_stack to the task |
| that is given to it. Because a clone will make the task |
| have a stack of its parent it does not check if the task's |
| ret_stack is already NULL or not. When the CPU hotplug code |
| starts a CPU up again, it will allocate a new stack even |
| though one already existed for it. |
| |
| The solution is to treat the idle_task specially. In fact, the |
| function_graph code already does, just not at init_idle(). |
| Instead of using the ftrace_graph_init_task() for the idle task, |
| which that function expects the task to be a clone, have a |
| separate ftrace_graph_init_idle_task(). Also, we will create a |
| per_cpu ret_stack that is used by the idle task. When we call |
| ftrace_graph_init_idle_task() it will check if the idle task's |
| ret_stack is NULL, if it is, then it will assign it the per_cpu |
| ret_stack. |
| |
| Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> |
| Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| include/linux/ftrace.h | 2 + |
| kernel/sched.c | 2 - |
| kernel/trace/ftrace.c | 52 ++++++++++++++++++++++++++++++++++++++++++------- |
| 3 files changed, 48 insertions(+), 8 deletions(-) |
| |
| --- a/include/linux/ftrace.h |
| +++ b/include/linux/ftrace.h |
| @@ -412,6 +412,7 @@ extern void unregister_ftrace_graph(void |
| |
| extern void ftrace_graph_init_task(struct task_struct *t); |
| extern void ftrace_graph_exit_task(struct task_struct *t); |
| +extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu); |
| |
| static inline int task_curr_ret_stack(struct task_struct *t) |
| { |
| @@ -435,6 +436,7 @@ static inline void unpause_graph_tracing |
| |
| static inline void ftrace_graph_init_task(struct task_struct *t) { } |
| static inline void ftrace_graph_exit_task(struct task_struct *t) { } |
| +static inline void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { } |
| |
| static inline int task_curr_ret_stack(struct task_struct *tsk) |
| { |
| --- a/kernel/sched.c |
| +++ b/kernel/sched.c |
| @@ -7290,7 +7290,7 @@ void __cpuinit init_idle(struct task_str |
| * The idle tasks have their own, simple scheduling class: |
| */ |
| idle->sched_class = &idle_sched_class; |
| - ftrace_graph_init_task(idle); |
| + ftrace_graph_init_idle_task(idle, cpu); |
| } |
| |
| /* |
| --- a/kernel/trace/ftrace.c |
| +++ b/kernel/trace/ftrace.c |
| @@ -3172,7 +3172,7 @@ static int start_graph_tracing(void) |
| /* The cpu_boot init_task->ret_stack will never be freed */ |
| for_each_online_cpu(cpu) { |
| if (!idle_task(cpu)->ret_stack) |
| - ftrace_graph_init_task(idle_task(cpu)); |
| + ftrace_graph_init_idle_task(idle_task(cpu), cpu); |
| } |
| |
| do { |
| @@ -3262,6 +3262,49 @@ void unregister_ftrace_graph(void) |
| mutex_unlock(&ftrace_lock); |
| } |
| |
| +static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); |
| + |
| +static void |
| +graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) |
| +{ |
| + atomic_set(&t->tracing_graph_pause, 0); |
| + atomic_set(&t->trace_overrun, 0); |
| + t->ftrace_timestamp = 0; |
| + /* make curr_ret_stack visable before we add the ret_stack */ |
| + smp_wmb(); |
| + t->ret_stack = ret_stack; |
| +} |
| + |
| +/* |
| + * Allocate a return stack for the idle task. May be the first |
| + * time through, or it may be done by CPU hotplug online. |
| + */ |
| +void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) |
| +{ |
| + t->curr_ret_stack = -1; |
| + /* |
| + * The idle task has no parent, it either has its own |
| + * stack or no stack at all. |
| + */ |
| + if (t->ret_stack) |
| + WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); |
| + |
| + if (ftrace_graph_active) { |
| + struct ftrace_ret_stack *ret_stack; |
| + |
| + ret_stack = per_cpu(idle_ret_stack, cpu); |
| + if (!ret_stack) { |
| + ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH |
| + * sizeof(struct ftrace_ret_stack), |
| + GFP_KERNEL); |
| + if (!ret_stack) |
| + return; |
| + per_cpu(idle_ret_stack, cpu) = ret_stack; |
| + } |
| + graph_init_task(t, ret_stack); |
| + } |
| +} |
| + |
| /* Allocate a return stack for newly created task */ |
| void ftrace_graph_init_task(struct task_struct *t) |
| { |
| @@ -3277,12 +3320,7 @@ void ftrace_graph_init_task(struct task_ |
| GFP_KERNEL); |
| if (!ret_stack) |
| return; |
| - atomic_set(&t->tracing_graph_pause, 0); |
| - atomic_set(&t->trace_overrun, 0); |
| - t->ftrace_timestamp = 0; |
| - /* make curr_ret_stack visable before we add the ret_stack */ |
| - smp_wmb(); |
| - t->ret_stack = ret_stack; |
| + graph_init_task(t, ret_stack); |
| } |
| } |
| |