| From 0644079410065567e3bb31fcb8e6441f2b7685a9 Mon Sep 17 00:00:00 2001 |
| From: Anton Blanchard <anton@samba.org> |
| Date: Mon, 10 May 2010 16:25:51 +0000 |
| Subject: powerpc/kdump: CPUs assume the context of the oopsing CPU |
| |
| From: Anton Blanchard <anton@samba.org> |
| |
| commit 0644079410065567e3bb31fcb8e6441f2b7685a9 upstream. |
| |
| We wrap the crash_shutdown_handles[] calls with longjmp/setjmp, so if any |
| of them fault we can recover. The problem is we add a hook to the debugger |
| fault handler hook which calls longjmp unconditionally. |
| |
| This first part of kdump is run before we marshall the other CPUs, so there |
| is a very good chance some CPU on the box is going to page fault. And when |
| it does it hits the longjmp code and assumes the context of the oopsing CPU. |
| The machine gets very confused when it has 10 CPUs all with the same stack, |
| all thinking they have the same CPU id. I get even more confused trying |
| to debug it. |
| |
| The patch below adds crash_shutdown_cpu and uses it to specify which cpu is |
| in the protected region. Since it can only be -1 or the oopsing CPU, we don't |
| need to use memory barriers since it is only valid on the local CPU - no other |
| CPU will ever see a value that matches it's local CPU id. |
| |
| Eventually we should switch the order and marshall all CPUs before doing the |
| crash_shutdown_handles[] calls, but that is a bigger fix. |
| |
| Signed-off-by: Anton Blanchard <anton@samba.org> |
| Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> |
| Cc: Kamalesh babulal <kamalesh@linux.vnet.ibm.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| arch/powerpc/kernel/crash.c | 6 +++++- |
| 1 file changed, 5 insertions(+), 1 deletion(-) |
| |
| --- a/arch/powerpc/kernel/crash.c |
| +++ b/arch/powerpc/kernel/crash.c |
| @@ -347,10 +347,12 @@ int crash_shutdown_unregister(crash_shut |
| EXPORT_SYMBOL(crash_shutdown_unregister); |
| |
| static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; |
| +static int crash_shutdown_cpu = -1; |
| |
| static int handle_fault(struct pt_regs *regs) |
| { |
| - longjmp(crash_shutdown_buf, 1); |
| + if (crash_shutdown_cpu == smp_processor_id()) |
| + longjmp(crash_shutdown_buf, 1); |
| return 0; |
| } |
| |
| @@ -388,6 +390,7 @@ void default_machine_crash_shutdown(stru |
| */ |
| old_handler = __debugger_fault_handler; |
| __debugger_fault_handler = handle_fault; |
| + crash_shutdown_cpu = smp_processor_id(); |
| for (i = 0; crash_shutdown_handles[i]; i++) { |
| if (setjmp(crash_shutdown_buf) == 0) { |
| /* |
| @@ -401,6 +404,7 @@ void default_machine_crash_shutdown(stru |
| asm volatile("sync; isync"); |
| } |
| } |
| + crash_shutdown_cpu = -1; |
| __debugger_fault_handler = old_handler; |
| |
| /* |