| From 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec Mon Sep 17 00:00:00 2001 |
| From: Eric Dumazet <edumazet@google.com> |
| Date: Thu, 2 Feb 2017 10:31:35 -0800 |
| Subject: [PATCH] net: use a work queue to defer net_disable_timestamp() work |
| |
| commit 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec upstream. |
| |
| Dmitry reported a warning [1] showing that we were calling |
| net_disable_timestamp() -> static_key_slow_dec() from a non |
| process context. |
| |
| Grabbing a mutex while holding a spinlock or rcu_read_lock() |
| is not allowed. |
| |
| As Cong suggested, we now use a work queue. |
| |
| It is possible netstamp_clear() exits while netstamp_needed_deferred |
| is not zero, but it is probably not worth trying to do better than that. |
| |
| netstamp_needed_deferred atomic tracks the exact number of deferred |
| decrements. |
| |
| [1] |
| [ INFO: suspicious RCU usage. ] |
| 4.10.0-rc5+ #192 Not tainted |
| ------------------------------- |
| ./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side |
| critical section! |
| |
| other info that might help us debug this: |
| |
| rcu_scheduler_active = 2, debug_locks = 0 |
| 2 locks held by syz-executor14/23111: |
| #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] lock_sock |
| include/net/sock.h:1454 [inline] |
| #0: (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] |
| rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919 |
| #1: (rcu_read_lock){......}, at: [<ffffffff83ae2678>] nf_hook |
| include/linux/netfilter.h:201 [inline] |
| #1: (rcu_read_lock){......}, at: [<ffffffff83ae2678>] |
| __ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160 |
| |
| stack backtrace: |
| CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 |
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs |
| 01/01/2011 |
| Call Trace: |
| __dump_stack lib/dump_stack.c:15 [inline] |
| dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 |
| lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452 |
| rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline] |
| ___might_sleep+0x560/0x650 kernel/sched/core.c:7748 |
| __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 |
| mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 |
| atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 |
| __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 |
| static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 |
| net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 |
| sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 |
| __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 |
| sk_destruct+0x47/0x80 net/core/sock.c:1460 |
| __sk_free+0x57/0x230 net/core/sock.c:1468 |
| sock_wfree+0xae/0x120 net/core/sock.c:1645 |
| skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 |
| skb_release_all+0x15/0x60 net/core/skbuff.c:668 |
| __kfree_skb+0x15/0x20 net/core/skbuff.c:684 |
| kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 |
| inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 |
| inet_frag_put include/net/inet_frag.h:133 [inline] |
| nf_ct_frag6_gather+0x1106/0x3840 |
| net/ipv6/netfilter/nf_conntrack_reasm.c:617 |
| ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 |
| nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] |
| nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 |
| nf_hook include/linux/netfilter.h:212 [inline] |
| __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 |
| ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 |
| ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 |
| ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 |
| rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] |
| rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 |
| inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 |
| sock_sendmsg_nosec net/socket.c:635 [inline] |
| sock_sendmsg+0xca/0x110 net/socket.c:645 |
| sock_write_iter+0x326/0x600 net/socket.c:848 |
| do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 |
| do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 |
| vfs_writev+0x87/0xc0 fs/read_write.c:911 |
| do_writev+0x110/0x2c0 fs/read_write.c:944 |
| SYSC_writev fs/read_write.c:1017 [inline] |
| SyS_writev+0x27/0x30 fs/read_write.c:1014 |
| entry_SYSCALL_64_fastpath+0x1f/0xc2 |
| RIP: 0033:0x445559 |
| RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 |
| RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559 |
| RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005 |
| RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000 |
| R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000 |
| R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400 |
| BUG: sleeping function called from invalid context at |
| kernel/locking/mutex.c:752 |
| in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14 |
| INFO: lockdep is turned off. |
| CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 |
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs |
| 01/01/2011 |
| Call Trace: |
| __dump_stack lib/dump_stack.c:15 [inline] |
| dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 |
| ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780 |
| __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 |
| mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 |
| atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 |
| __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 |
| static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 |
| net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 |
| sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 |
| __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 |
| sk_destruct+0x47/0x80 net/core/sock.c:1460 |
| __sk_free+0x57/0x230 net/core/sock.c:1468 |
| sock_wfree+0xae/0x120 net/core/sock.c:1645 |
| skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 |
| skb_release_all+0x15/0x60 net/core/skbuff.c:668 |
| __kfree_skb+0x15/0x20 net/core/skbuff.c:684 |
| kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 |
| inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 |
| inet_frag_put include/net/inet_frag.h:133 [inline] |
| nf_ct_frag6_gather+0x1106/0x3840 |
| net/ipv6/netfilter/nf_conntrack_reasm.c:617 |
| ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 |
| nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] |
| nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 |
| nf_hook include/linux/netfilter.h:212 [inline] |
| __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 |
| ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 |
| ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 |
| ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 |
| rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] |
| rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 |
| inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 |
| sock_sendmsg_nosec net/socket.c:635 [inline] |
| sock_sendmsg+0xca/0x110 net/socket.c:645 |
| sock_write_iter+0x326/0x600 net/socket.c:848 |
| do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 |
| do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 |
| vfs_writev+0x87/0xc0 fs/read_write.c:911 |
| do_writev+0x110/0x2c0 fs/read_write.c:944 |
| SYSC_writev fs/read_write.c:1017 [inline] |
| SyS_writev+0x27/0x30 fs/read_write.c:1014 |
| entry_SYSCALL_64_fastpath+0x1f/0xc2 |
| RIP: 0033:0x445559 |
| |
| Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") |
| Suggested-by: Cong Wang <xiyou.wangcong@gmail.com> |
| Reported-by: Dmitry Vyukov <dvyukov@google.com> |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| |
| diff --git a/net/core/dev.c b/net/core/dev.c |
| index 7f218e095361..29101c98399f 100644 |
| --- a/net/core/dev.c |
| +++ b/net/core/dev.c |
| @@ -1695,24 +1695,19 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); |
| |
| static struct static_key netstamp_needed __read_mostly; |
| #ifdef HAVE_JUMP_LABEL |
| -/* We are not allowed to call static_key_slow_dec() from irq context |
| - * If net_disable_timestamp() is called from irq context, defer the |
| - * static_key_slow_dec() calls. |
| - */ |
| static atomic_t netstamp_needed_deferred; |
| -#endif |
| - |
| -void net_enable_timestamp(void) |
| +static void netstamp_clear(struct work_struct *work) |
| { |
| -#ifdef HAVE_JUMP_LABEL |
| int deferred = atomic_xchg(&netstamp_needed_deferred, 0); |
| |
| - if (deferred) { |
| - while (--deferred) |
| - static_key_slow_dec(&netstamp_needed); |
| - return; |
| - } |
| + while (deferred--) |
| + static_key_slow_dec(&netstamp_needed); |
| +} |
| +static DECLARE_WORK(netstamp_work, netstamp_clear); |
| #endif |
| + |
| +void net_enable_timestamp(void) |
| +{ |
| static_key_slow_inc(&netstamp_needed); |
| } |
| EXPORT_SYMBOL(net_enable_timestamp); |
| @@ -1720,12 +1715,12 @@ EXPORT_SYMBOL(net_enable_timestamp); |
| void net_disable_timestamp(void) |
| { |
| #ifdef HAVE_JUMP_LABEL |
| - if (in_interrupt()) { |
| - atomic_inc(&netstamp_needed_deferred); |
| - return; |
| - } |
| -#endif |
| + /* net_disable_timestamp() can be called from non process context */ |
| + atomic_inc(&netstamp_needed_deferred); |
| + schedule_work(&netstamp_work); |
| +#else |
| static_key_slow_dec(&netstamp_needed); |
| +#endif |
| } |
| EXPORT_SYMBOL(net_disable_timestamp); |
| |
| -- |
| 2.12.0 |
| |