| From e2c580b27b7fdce751116f5c9e8585207f8cc5fe Mon Sep 17 00:00:00 2001 |
| From: Konstantin Khlebnikov <khlebnikov@openvz.org> |
| Date: Sun, 5 May 2013 04:56:22 +0000 |
| Subject: net: frag, fix race conditions in LRU list maintenance |
| |
| |
| From: Konstantin Khlebnikov <khlebnikov@openvz.org> |
| |
| [ Upstream commit b56141ab34e2c3e2d7960cea12c20c99530c0c76 ] |
| |
| This patch fixes race between inet_frag_lru_move() and inet_frag_lru_add() |
| which was introduced in commit 3ef0eb0db4bf92c6d2510fe5c4dc51852746f206 |
| ("net: frag, move LRU list maintenance outside of rwlock") |
| |
| One cpu already added new fragment queue into hash but not into LRU. |
| Other cpu found it in hash and tries to move it to the end of LRU. |
| This leads to NULL pointer dereference inside of list_move_tail(). |
| |
| Another possible race condition is between inet_frag_lru_move() and |
| inet_frag_lru_del(): move can happens after deletion. |
| |
| This patch initializes LRU list head before adding fragment into hash and |
| inet_frag_lru_move() doesn't touches it if it's empty. |
| |
| I saw this kernel oops two times in a couple of days. |
| |
| [119482.128853] BUG: unable to handle kernel NULL pointer dereference at (null) |
| [119482.132693] IP: [<ffffffff812ede89>] __list_del_entry+0x29/0xd0 |
| [119482.136456] PGD 2148f6067 PUD 215ab9067 PMD 0 |
| [119482.140221] Oops: 0000 [#1] SMP |
| [119482.144008] Modules linked in: vfat msdos fat 8021q fuse nfsd auth_rpcgss nfs_acl nfs lockd sunrpc ppp_async ppp_generic bridge slhc stp llc w83627ehf hwmon_vid snd_hda_codec_hdmi snd_hda_codec_realtek kvm_amd k10temp kvm snd_hda_intel snd_hda_codec edac_core radeon snd_hwdep ath9k snd_pcm ath9k_common snd_page_alloc ath9k_hw snd_timer snd soundcore drm_kms_helper ath ttm r8169 mii |
| [119482.152692] CPU 3 |
| [119482.152721] Pid: 20, comm: ksoftirqd/3 Not tainted 3.9.0-zurg-00001-g9f95269 #132 To Be Filled By O.E.M. To Be Filled By O.E.M./RS880D |
| [119482.161478] RIP: 0010:[<ffffffff812ede89>] [<ffffffff812ede89>] __list_del_entry+0x29/0xd0 |
| [119482.166004] RSP: 0018:ffff880216d5db58 EFLAGS: 00010207 |
| [119482.170568] RAX: 0000000000000000 RBX: ffff88020882b9c0 RCX: dead000000200200 |
| [119482.175189] RDX: 0000000000000000 RSI: 0000000000000880 RDI: ffff88020882ba00 |
| [119482.179860] RBP: ffff880216d5db58 R08: ffffffff8155c7f0 R09: 0000000000000014 |
| [119482.184570] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88020882ba00 |
| [119482.189337] R13: ffffffff81c8d780 R14: ffff880204357f00 R15: 00000000000005a0 |
| [119482.194140] FS: 00007f58124dc700(0000) GS:ffff88021fcc0000(0000) knlGS:0000000000000000 |
| [119482.198928] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b |
| [119482.203711] CR2: 0000000000000000 CR3: 00000002155f0000 CR4: 00000000000007e0 |
| [119482.208533] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 |
| [119482.213371] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 |
| [119482.218221] Process ksoftirqd/3 (pid: 20, threadinfo ffff880216d5c000, task ffff880216d3a9a0) |
| [119482.223113] Stack: |
| [119482.228004] ffff880216d5dbd8 ffffffff8155dcda 0000000000000000 ffff000200000001 |
| [119482.233038] ffff8802153c1f00 ffff880000289440 ffff880200000014 ffff88007bc72000 |
| [119482.238083] 00000000000079d5 ffff88007bc72f44 ffffffff00000002 ffff880204357f00 |
| [119482.243090] Call Trace: |
| [119482.248009] [<ffffffff8155dcda>] ip_defrag+0x8fa/0xd10 |
| [119482.252921] [<ffffffff815a8013>] ipv4_conntrack_defrag+0x83/0xe0 |
| [119482.257803] [<ffffffff8154485b>] nf_iterate+0x8b/0xa0 |
| [119482.262658] [<ffffffff8155c7f0>] ? inet_del_offload+0x40/0x40 |
| [119482.267527] [<ffffffff815448e4>] nf_hook_slow+0x74/0x130 |
| [119482.272412] [<ffffffff8155c7f0>] ? inet_del_offload+0x40/0x40 |
| [119482.277302] [<ffffffff8155d068>] ip_rcv+0x268/0x320 |
| [119482.282147] [<ffffffff81519992>] __netif_receive_skb_core+0x612/0x7e0 |
| [119482.286998] [<ffffffff81519b78>] __netif_receive_skb+0x18/0x60 |
| [119482.291826] [<ffffffff8151a650>] process_backlog+0xa0/0x160 |
| [119482.296648] [<ffffffff81519f29>] net_rx_action+0x139/0x220 |
| [119482.301403] [<ffffffff81053707>] __do_softirq+0xe7/0x220 |
| [119482.306103] [<ffffffff81053868>] run_ksoftirqd+0x28/0x40 |
| [119482.310809] [<ffffffff81074f5f>] smpboot_thread_fn+0xff/0x1a0 |
| [119482.315515] [<ffffffff81074e60>] ? lg_local_lock_cpu+0x40/0x40 |
| [119482.320219] [<ffffffff8106d870>] kthread+0xc0/0xd0 |
| [119482.324858] [<ffffffff8106d7b0>] ? insert_kthread_work+0x40/0x40 |
| [119482.329460] [<ffffffff816c32dc>] ret_from_fork+0x7c/0xb0 |
| [119482.334057] [<ffffffff8106d7b0>] ? insert_kthread_work+0x40/0x40 |
| [119482.338661] Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 ad de 48 39 c8 74 7a <4c> 8b 00 4c 39 c7 75 53 4c 8b 42 08 4c 39 c7 75 2b 48 89 42 08 |
| [119482.343787] RIP [<ffffffff812ede89>] __list_del_entry+0x29/0xd0 |
| [119482.348675] RSP <ffff880216d5db58> |
| [119482.353493] CR2: 0000000000000000 |
| |
| Oops happened on this path: |
| ip_defrag() -> ip_frag_queue() -> inet_frag_lru_move() -> list_move_tail() -> __list_del_entry() |
| |
| Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> |
| Cc: Jesper Dangaard Brouer <brouer@redhat.com> |
| Cc: Florian Westphal <fw@strlen.de> |
| Cc: Eric Dumazet <edumazet@google.com> |
| Cc: David S. Miller <davem@davemloft.net> |
| Acked-by: Florian Westphal <fw@strlen.de> |
| Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| include/net/inet_frag.h | 5 +++-- |
| net/ipv4/inet_fragment.c | 1 + |
| 2 files changed, 4 insertions(+), 2 deletions(-) |
| |
| --- a/include/net/inet_frag.h |
| +++ b/include/net/inet_frag.h |
| @@ -135,14 +135,15 @@ static inline int sum_frag_mem_limit(str |
| static inline void inet_frag_lru_move(struct inet_frag_queue *q) |
| { |
| spin_lock(&q->net->lru_lock); |
| - list_move_tail(&q->lru_list, &q->net->lru_list); |
| + if (!list_empty(&q->lru_list)) |
| + list_move_tail(&q->lru_list, &q->net->lru_list); |
| spin_unlock(&q->net->lru_lock); |
| } |
| |
| static inline void inet_frag_lru_del(struct inet_frag_queue *q) |
| { |
| spin_lock(&q->net->lru_lock); |
| - list_del(&q->lru_list); |
| + list_del_init(&q->lru_list); |
| spin_unlock(&q->net->lru_lock); |
| } |
| |
| --- a/net/ipv4/inet_fragment.c |
| +++ b/net/ipv4/inet_fragment.c |
| @@ -257,6 +257,7 @@ static struct inet_frag_queue *inet_frag |
| setup_timer(&q->timer, f->frag_expire, (unsigned long)q); |
| spin_lock_init(&q->lock); |
| atomic_set(&q->refcnt, 1); |
| + INIT_LIST_HEAD(&q->lru_list); |
| |
| return q; |
| } |