| Subject: rcu: Frob softirq test | 
 | From: Peter Zijlstra <a.p.zijlstra@chello.nl> | 
 | Date: Sat Aug 13 00:23:17 CEST 2011 | 
 |  | 
 | With RT_FULL we get the below wreckage: | 
 |  | 
 | [  126.060484] ======================================================= | 
 | [  126.060486] [ INFO: possible circular locking dependency detected ] | 
 | [  126.060489] 3.0.1-rt10+ #30 | 
 | [  126.060490] ------------------------------------------------------- | 
 | [  126.060492] irq/24-eth0/1235 is trying to acquire lock: | 
 | [  126.060495]  (&(lock)->wait_lock#2){+.+...}, at: [<ffffffff81501c81>] rt_mutex_slowunlock+0x16/0x55 | 
 | [  126.060503]  | 
 | [  126.060504] but task is already holding lock: | 
 | [  126.060506]  (&p->pi_lock){-...-.}, at: [<ffffffff81074fdc>] try_to_wake_up+0x35/0x429 | 
 | [  126.060511]  | 
 | [  126.060511] which lock already depends on the new lock. | 
 | [  126.060513]  | 
 | [  126.060514]  | 
 | [  126.060514] the existing dependency chain (in reverse order) is: | 
 | [  126.060516]  | 
 | [  126.060516] -> #1 (&p->pi_lock){-...-.}: | 
 | [  126.060519]        [<ffffffff810afe9e>] lock_acquire+0x145/0x18a | 
 | [  126.060524]        [<ffffffff8150291e>] _raw_spin_lock_irqsave+0x4b/0x85 | 
 | [  126.060527]        [<ffffffff810b5aa4>] task_blocks_on_rt_mutex+0x36/0x20f | 
 | [  126.060531]        [<ffffffff815019bb>] rt_mutex_slowlock+0xd1/0x15a | 
 | [  126.060534]        [<ffffffff81501ae3>] rt_mutex_lock+0x2d/0x2f | 
 | [  126.060537]        [<ffffffff810d9020>] rcu_boost+0xad/0xde | 
 | [  126.060541]        [<ffffffff810d90ce>] rcu_boost_kthread+0x7d/0x9b | 
 | [  126.060544]        [<ffffffff8109a760>] kthread+0x99/0xa1 | 
 | [  126.060547]        [<ffffffff81509b14>] kernel_thread_helper+0x4/0x10 | 
 | [  126.060551]  | 
 | [  126.060552] -> #0 (&(lock)->wait_lock#2){+.+...}: | 
 | [  126.060555]        [<ffffffff810af1b8>] __lock_acquire+0x1157/0x1816 | 
 | [  126.060558]        [<ffffffff810afe9e>] lock_acquire+0x145/0x18a | 
 | [  126.060561]        [<ffffffff8150279e>] _raw_spin_lock+0x40/0x73 | 
 | [  126.060564]        [<ffffffff81501c81>] rt_mutex_slowunlock+0x16/0x55 | 
 | [  126.060566]        [<ffffffff81501ce7>] rt_mutex_unlock+0x27/0x29 | 
 | [  126.060569]        [<ffffffff810d9f86>] rcu_read_unlock_special+0x17e/0x1c4 | 
 | [  126.060573]        [<ffffffff810da014>] __rcu_read_unlock+0x48/0x89 | 
 | [  126.060576]        [<ffffffff8106847a>] select_task_rq_rt+0xc7/0xd5 | 
 | [  126.060580]        [<ffffffff8107511c>] try_to_wake_up+0x175/0x429 | 
 | [  126.060583]        [<ffffffff81075425>] wake_up_process+0x15/0x17 | 
 | [  126.060585]        [<ffffffff81080a51>] wakeup_softirqd+0x24/0x26 | 
 | [  126.060590]        [<ffffffff81081df9>] irq_exit+0x49/0x55 | 
 | [  126.060593]        [<ffffffff8150a3bd>] smp_apic_timer_interrupt+0x8a/0x98 | 
 | [  126.060597]        [<ffffffff81509793>] apic_timer_interrupt+0x13/0x20 | 
 | [  126.060600]        [<ffffffff810d5952>] irq_forced_thread_fn+0x1b/0x44 | 
 | [  126.060603]        [<ffffffff810d582c>] irq_thread+0xde/0x1af | 
 | [  126.060606]        [<ffffffff8109a760>] kthread+0x99/0xa1 | 
 | [  126.060608]        [<ffffffff81509b14>] kernel_thread_helper+0x4/0x10 | 
 | [  126.060611]  | 
 | [  126.060612] other info that might help us debug this: | 
 | [  126.060614]  | 
 | [  126.060615]  Possible unsafe locking scenario: | 
 | [  126.060616]  | 
 | [  126.060617]        CPU0                    CPU1 | 
 | [  126.060619]        ----                    ---- | 
 | [  126.060620]   lock(&p->pi_lock); | 
 | [  126.060623]                                lock(&(lock)->wait_lock); | 
 | [  126.060625]                                lock(&p->pi_lock); | 
 | [  126.060627]   lock(&(lock)->wait_lock); | 
 | [  126.060629]  | 
 | [  126.060629]  *** DEADLOCK *** | 
 | [  126.060630]  | 
 | [  126.060632] 1 lock held by irq/24-eth0/1235: | 
 | [  126.060633]  #0:  (&p->pi_lock){-...-.}, at: [<ffffffff81074fdc>] try_to_wake_up+0x35/0x429 | 
 | [  126.060638]  | 
 | [  126.060638] stack backtrace: | 
 | [  126.060641] Pid: 1235, comm: irq/24-eth0 Not tainted 3.0.1-rt10+ #30 | 
 | [  126.060643] Call Trace: | 
 | [  126.060644]  <IRQ>  [<ffffffff810acbde>] print_circular_bug+0x289/0x29a | 
 | [  126.060651]  [<ffffffff810af1b8>] __lock_acquire+0x1157/0x1816 | 
 | [  126.060655]  [<ffffffff810ab3aa>] ? trace_hardirqs_off_caller+0x1f/0x99 | 
 | [  126.060658]  [<ffffffff81501c81>] ? rt_mutex_slowunlock+0x16/0x55 | 
 | [  126.060661]  [<ffffffff810afe9e>] lock_acquire+0x145/0x18a | 
 | [  126.060664]  [<ffffffff81501c81>] ? rt_mutex_slowunlock+0x16/0x55 | 
 | [  126.060668]  [<ffffffff8150279e>] _raw_spin_lock+0x40/0x73 | 
 | [  126.060671]  [<ffffffff81501c81>] ? rt_mutex_slowunlock+0x16/0x55 | 
 | [  126.060674]  [<ffffffff810d9655>] ? rcu_report_qs_rsp+0x87/0x8c | 
 | [  126.060677]  [<ffffffff81501c81>] rt_mutex_slowunlock+0x16/0x55 | 
 | [  126.060680]  [<ffffffff810d9ea3>] ? rcu_read_unlock_special+0x9b/0x1c4 | 
 | [  126.060683]  [<ffffffff81501ce7>] rt_mutex_unlock+0x27/0x29 | 
 | [  126.060687]  [<ffffffff810d9f86>] rcu_read_unlock_special+0x17e/0x1c4 | 
 | [  126.060690]  [<ffffffff810da014>] __rcu_read_unlock+0x48/0x89 | 
 | [  126.060693]  [<ffffffff8106847a>] select_task_rq_rt+0xc7/0xd5 | 
 | [  126.060696]  [<ffffffff810683da>] ? select_task_rq_rt+0x27/0xd5 | 
 | [  126.060701]  [<ffffffff810a852a>] ? clockevents_program_event+0x8e/0x90 | 
 | [  126.060704]  [<ffffffff8107511c>] try_to_wake_up+0x175/0x429 | 
 | [  126.060708]  [<ffffffff810a95dc>] ? tick_program_event+0x1f/0x21 | 
 | [  126.060711]  [<ffffffff81075425>] wake_up_process+0x15/0x17 | 
 | [  126.060715]  [<ffffffff81080a51>] wakeup_softirqd+0x24/0x26 | 
 | [  126.060718]  [<ffffffff81081df9>] irq_exit+0x49/0x55 | 
 | [  126.060721]  [<ffffffff8150a3bd>] smp_apic_timer_interrupt+0x8a/0x98 | 
 | [  126.060724]  [<ffffffff81509793>] apic_timer_interrupt+0x13/0x20 | 
 | [  126.060726]  <EOI>  [<ffffffff81072855>] ? migrate_disable+0x75/0x12d | 
 | [  126.060733]  [<ffffffff81080a61>] ? local_bh_disable+0xe/0x1f | 
 | [  126.060736]  [<ffffffff81080a70>] ? local_bh_disable+0x1d/0x1f | 
 | [  126.060739]  [<ffffffff810d5952>] irq_forced_thread_fn+0x1b/0x44 | 
 | [  126.060742]  [<ffffffff81502ac0>] ? _raw_spin_unlock_irq+0x3b/0x59 | 
 | [  126.060745]  [<ffffffff810d582c>] irq_thread+0xde/0x1af | 
 | [  126.060748]  [<ffffffff810d5937>] ? irq_thread_fn+0x3a/0x3a | 
 | [  126.060751]  [<ffffffff810d574e>] ? irq_finalize_oneshot+0xd1/0xd1 | 
 | [  126.060754]  [<ffffffff810d574e>] ? irq_finalize_oneshot+0xd1/0xd1 | 
 | [  126.060757]  [<ffffffff8109a760>] kthread+0x99/0xa1 | 
 | [  126.060761]  [<ffffffff81509b14>] kernel_thread_helper+0x4/0x10 | 
 | [  126.060764]  [<ffffffff81069ed7>] ? finish_task_switch+0x87/0x10a | 
 | [  126.060768]  [<ffffffff81502ec4>] ? retint_restore_args+0xe/0xe | 
 | [  126.060771]  [<ffffffff8109a6c7>] ? __init_kthread_worker+0x8c/0x8c | 
 | [  126.060774]  [<ffffffff81509b10>] ? gs_change+0xb/0xb | 
 |  | 
 | Because irq_exit() does: | 
 |  | 
 | void irq_exit(void) | 
 | { | 
 | 	account_system_vtime(current); | 
 | 	trace_hardirq_exit(); | 
 | 	sub_preempt_count(IRQ_EXIT_OFFSET); | 
 | 	if (!in_interrupt() && local_softirq_pending()) | 
 | 		invoke_softirq(); | 
 |  | 
 | 	... | 
 | } | 
 |  | 
 | Which triggers a wakeup, which uses RCU, now if the interrupted task has | 
 | t->rcu_read_unlock_special set, the rcu usage from the wakeup will end | 
 | up in rcu_read_unlock_special(). rcu_read_unlock_special() will test | 
 | for in_irq(), which will fail as we just decremented preempt_count | 
 | with IRQ_EXIT_OFFSET, and in_sering_softirq(), which for | 
 | PREEMPT_RT_FULL reads: | 
 |  | 
 | int in_serving_softirq(void) | 
 | { | 
 | 	int res; | 
 |  | 
 | 	preempt_disable(); | 
 | 	res = __get_cpu_var(local_softirq_runner) == current; | 
 | 	preempt_enable(); | 
 | 	return res; | 
 | } | 
 |  | 
 | Which will thus also fail, resulting in the above wreckage. | 
 |  | 
 | The 'somewhat' ugly solution is to open-code the preempt_count() test | 
 | in rcu_read_unlock_special(). | 
 |  | 
 | Also, we're not at all sure how ->rcu_read_unlock_special gets set | 
 | here... so this is very likely a bandaid and more thought is required. | 
 |  | 
 | Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 
 | Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> | 
 | --- | 
 |  kernel/rcutree_plugin.h |    2 +- | 
 |  1 file changed, 1 insertion(+), 1 deletion(-) | 
 |  | 
 | Index: linux-stable/kernel/rcutree_plugin.h | 
 | =================================================================== | 
 | --- linux-stable.orig/kernel/rcutree_plugin.h | 
 | +++ linux-stable/kernel/rcutree_plugin.h | 
 | @@ -331,7 +331,7 @@ void rcu_read_unlock_special(struct task | 
 |  	} | 
 |   | 
 |  	/* Hardware IRQ handlers cannot block. */ | 
 | -	if (in_irq() || in_serving_softirq()) { | 
 | +	if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { | 
 |  		local_irq_restore(flags); | 
 |  		return; | 
 |  	} |