| Subject: softirq: Init softirq local lock after per cpu section is set up |
| From: Steven Rostedt <rostedt@goodmis.org> |
| Date: Thu, 04 Oct 2012 11:02:04 -0400 |
| |
| I discovered this bug when booting 3.4-rt on my powerpc box. It crashed |
| with the following report: |
| |
| ------------[ cut here ]------------ |
| kernel BUG at /work/rt/stable-rt.git/kernel/rtmutex_common.h:75! |
| Oops: Exception in kernel mode, sig: 5 [#1] |
| PREEMPT SMP NR_CPUS=64 NUMA PA Semi PWRficient |
| Modules linked in: |
| NIP: c0000000004aa03c LR: c0000000004aa01c CTR: c00000000009b2ac |
| REGS: c00000003e8d7950 TRAP: 0700 Not tainted (3.4.11-test-rt19) |
| MSR: 9000000000029032 <SF,HV,EE,ME,IR,DR,RI> CR: 24000082 XER: 20000000 |
| SOFTE: 0 |
| TASK = c00000003e8fdcd0[11] 'ksoftirqd/1' THREAD: c00000003e8d4000 CPU: 1 |
| GPR00: 0000000000000001 c00000003e8d7bd0 c000000000d6cbb0 0000000000000000 |
| GPR04: c00000003e8fdcd0 0000000000000000 0000000024004082 c000000000011454 |
| GPR08: 0000000000000000 0000000080000001 c00000003e8fdcd1 0000000000000000 |
| GPR12: 0000000024000084 c00000000fff0280 ffffffffffffffff 000000003ffffad8 |
| GPR16: ffffffffffffffff 000000000072c798 0000000000000060 0000000000000000 |
| GPR20: 0000000000642741 000000000072c858 000000003ffffaf0 0000000000000417 |
| GPR24: 000000000072dcd0 c00000003e7ff990 0000000000000000 0000000000000001 |
| GPR28: 0000000000000000 c000000000792340 c000000000ccec78 c000000001182338 |
| NIP [c0000000004aa03c] .wakeup_next_waiter+0x44/0xb8 |
| LR [c0000000004aa01c] .wakeup_next_waiter+0x24/0xb8 |
| Call Trace: |
| [c00000003e8d7bd0] [c0000000004aa01c] .wakeup_next_waiter+0x24/0xb8 (unreliable) |
| [c00000003e8d7c60] [c0000000004a0320] .rt_spin_lock_slowunlock+0x8c/0xe4 |
| [c00000003e8d7ce0] [c0000000004a07cc] .rt_spin_unlock+0x54/0x64 |
| [c00000003e8d7d60] [c0000000000636bc] .__thread_do_softirq+0x130/0x174 |
| [c00000003e8d7df0] [c00000000006379c] .run_ksoftirqd+0x9c/0x1a4 |
| [c00000003e8d7ea0] [c000000000080b68] .kthread+0xa8/0xb4 |
| [c00000003e8d7f90] [c00000000001c2f8] .kernel_thread+0x54/0x70 |
| Instruction dump: |
| 60000000 e86d01c8 38630730 4bff7061 60000000 ebbf0008 7c7c1b78 e81d0040 |
| 7fe00278 7c000074 7800d182 68000001 <0b000000> e88d01c8 387d0010 38840738 |
| |
| The rtmutex_common.h:75 is: |
| |
| rt_mutex_top_waiter(struct rt_mutex *lock) |
| { |
| struct rt_mutex_waiter *w; |
| |
| w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter, |
| list_entry); |
| BUG_ON(w->lock != lock); |
| |
| return w; |
| } |
| |
| Where the waiter->lock is corrupted. I saw various other random bugs |
| that all had to with the softirq lock and plist. As plist needs to be |
| initialized before it is used I investigated how this lock is |
| initialized. It's initialized with: |
| |
| void __init softirq_early_init(void) |
| { |
| local_irq_lock_init(local_softirq_lock); |
| } |
| |
| Where: |
| |
| #define local_irq_lock_init(lvar) \ |
| do { \ |
| int __cpu; \ |
| for_each_possible_cpu(__cpu) \ |
| spin_lock_init(&per_cpu(lvar, __cpu).lock); \ |
| } while (0) |
| |
| As the softirq lock is a local_irq_lock, which is a per_cpu lock, the |
| initialization is done to all per_cpu versions of the lock. But lets |
| look at where the softirq_early_init() is called from. |
| |
| In init/main.c: start_kernel() |
| |
| /* |
| * Interrupts are still disabled. Do necessary setups, then |
| * enable them |
| */ |
| softirq_early_init(); |
| tick_init(); |
| boot_cpu_init(); |
| page_address_init(); |
| printk(KERN_NOTICE "%s", linux_banner); |
| setup_arch(&command_line); |
| mm_init_owner(&init_mm, &init_task); |
| mm_init_cpumask(&init_mm); |
| setup_command_line(command_line); |
| setup_nr_cpu_ids(); |
| setup_per_cpu_areas(); |
| smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ |
| |
| One of the first things that is called is the initialization of the |
| softirq lock. But if you look further down, we see the per_cpu areas |
| have not been set up yet. Thus initializing a local_irq_lock() before |
| the per_cpu section is set up, may not work as it is initializing the |
| per cpu locks before the per cpu exists. |
| |
| By moving the softirq_early_init() right after setup_per_cpu_areas(), |
| the kernel boots fine. |
| |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Cc: Clark Williams <clark@redhat.com> |
| Cc: John Kacur <jkacur@redhat.com> |
| Cc: Carsten Emde <cbe@osadl.org> |
| Cc: vomlehn@texas.net |
| Link: http://lkml.kernel.org/r/1349362924.6755.18.camel@gandalf.local.home |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| --- |
| init/main.c | 2 +- |
| 1 file changed, 1 insertion(+), 1 deletion(-) |
| |
| --- a/init/main.c |
| +++ b/init/main.c |
| @@ -493,7 +493,6 @@ asmlinkage void __init start_kernel(void |
| * Interrupts are still disabled. Do necessary setups, then |
| * enable them |
| */ |
| - softirq_early_init(); |
| tick_init(); |
| boot_cpu_init(); |
| page_address_init(); |
| @@ -504,6 +503,7 @@ asmlinkage void __init start_kernel(void |
| setup_command_line(command_line); |
| setup_nr_cpu_ids(); |
| setup_per_cpu_areas(); |
| + softirq_early_init(); |
| smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ |
| |
| build_all_zonelists(NULL, NULL); |