| From bf959931ddb88c4e4366e96dd22e68fa0db9527c Mon Sep 17 00:00:00 2001 |
| From: Oleg Nesterov <oleg@redhat.com> |
| Date: Mon, 23 May 2016 16:23:50 -0700 |
| Subject: wait/ptrace: assume __WALL if the child is traced |
| |
| From: Oleg Nesterov <oleg@redhat.com> |
| |
| commit bf959931ddb88c4e4366e96dd22e68fa0db9527c upstream. |
| |
| The following program (simplified version of generated by syzkaller) |
| |
| #include <pthread.h> |
| #include <unistd.h> |
| #include <sys/ptrace.h> |
| #include <stdio.h> |
| #include <signal.h> |
| |
| void *thread_func(void *arg) |
| { |
| ptrace(PTRACE_TRACEME, 0,0,0); |
| return 0; |
| } |
| |
| int main(void) |
| { |
| pthread_t thread; |
| |
| if (fork()) |
| return 0; |
| |
| while (getppid() != 1) |
| ; |
| |
| pthread_create(&thread, NULL, thread_func, NULL); |
| pthread_join(thread, NULL); |
| return 0; |
| } |
| |
| creates an unreapable zombie if /sbin/init doesn't use __WALL. |
| |
| This is not a kernel bug, at least in a sense that everything works as |
| expected: debugger should reap a traced sub-thread before it can reap the |
| leader, but without __WALL/__WCLONE do_wait() ignores sub-threads. |
| |
| Unfortunately, it seems that /sbin/init in most (all?) distributions |
| doesn't use it and we have to change the kernel to avoid the problem. |
| Note also that most init's use sys_waitid() which doesn't allow __WALL, so |
| the necessary user-space fix is not that trivial. |
| |
| This patch just adds the "ptrace" check into eligible_child(). To some |
| degree this matches the "tsk->ptrace" in exit_notify(), ->exit_signal is |
| mostly ignored when the tracee reports to debugger. Or WSTOPPED, the |
| tracer doesn't need to set this flag to wait for the stopped tracee. |
| |
| This obviously means the user-visible change: __WCLONE and __WALL no |
| longer have any meaning for debugger. And I can only hope that this won't |
| break something, but at least strace/gdb won't suffer. |
| |
| We could make a more conservative change. Say, we can take __WCLONE into |
| account, or !thread_group_leader(). But it would be nice to not |
| complicate these historical/confusing checks. |
| |
| Signed-off-by: Oleg Nesterov <oleg@redhat.com> |
| Reported-by: Dmitry Vyukov <dvyukov@google.com> |
| Cc: Denys Vlasenko <dvlasenk@redhat.com> |
| Cc: Jan Kratochvil <jan.kratochvil@redhat.com> |
| Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> |
| Cc: Pedro Alves <palves@redhat.com> |
| Cc: Roland McGrath <roland@hack.frob.com> |
| Cc: <syzkaller@googlegroups.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| kernel/exit.c | 29 ++++++++++++++++++++--------- |
| 1 file changed, 20 insertions(+), 9 deletions(-) |
| |
| --- a/kernel/exit.c |
| +++ b/kernel/exit.c |
| @@ -918,17 +918,28 @@ static int eligible_pid(struct wait_opts |
| task_pid_type(p, wo->wo_type) == wo->wo_pid; |
| } |
| |
| -static int eligible_child(struct wait_opts *wo, struct task_struct *p) |
| +static int |
| +eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p) |
| { |
| if (!eligible_pid(wo, p)) |
| return 0; |
| - /* Wait for all children (clone and not) if __WALL is set; |
| - * otherwise, wait for clone children *only* if __WCLONE is |
| - * set; otherwise, wait for non-clone children *only*. (Note: |
| - * A "clone" child here is one that reports to its parent |
| - * using a signal other than SIGCHLD.) */ |
| - if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) |
| - && !(wo->wo_flags & __WALL)) |
| + |
| + /* |
| + * Wait for all children (clone and not) if __WALL is set or |
| + * if it is traced by us. |
| + */ |
| + if (ptrace || (wo->wo_flags & __WALL)) |
| + return 1; |
| + |
| + /* |
| + * Otherwise, wait for clone children *only* if __WCLONE is set; |
| + * otherwise, wait for non-clone children *only*. |
| + * |
| + * Note: a "clone" child here is one that reports to its parent |
| + * using a signal other than SIGCHLD, or a non-leader thread which |
| + * we can only see if it is traced by us. |
| + */ |
| + if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) |
| return 0; |
| |
| return 1; |
| @@ -1300,7 +1311,7 @@ static int wait_consider_task(struct wai |
| if (unlikely(exit_state == EXIT_DEAD)) |
| return 0; |
| |
| - ret = eligible_child(wo, p); |
| + ret = eligible_child(wo, ptrace, p); |
| if (!ret) |
| return ret; |
| |