| From foo@baz Sat Jan 26 10:22:50 CET 2019 |
| From: Ido Schimmel <idosch@mellanox.com> |
| Date: Wed, 9 Jan 2019 09:57:39 +0000 |
| Subject: net: ipv4: Fix memory leak in network namespace dismantle |
| |
| From: Ido Schimmel <idosch@mellanox.com> |
| |
| [ Upstream commit f97f4dd8b3bb9d0993d2491e0f22024c68109184 ] |
| |
| IPv4 routing tables are flushed in two cases: |
| |
| 1. In response to events in the netdev and inetaddr notification chains |
| 2. When a network namespace is being dismantled |
| |
| In both cases only routes associated with a dead nexthop group are |
| flushed. However, a nexthop group will only be marked as dead in case it |
| is populated with actual nexthops using a nexthop device. This is not |
| the case when the route in question is an error route (e.g., |
| 'blackhole', 'unreachable'). |
| |
| Therefore, when a network namespace is being dismantled such routes are |
| not flushed and leaked [1]. |
| |
| To reproduce: |
| # ip netns add blue |
| # ip -n blue route add unreachable 192.0.2.0/24 |
| # ip netns del blue |
| |
| Fix this by not skipping error routes that are not marked with |
| RTNH_F_DEAD when flushing the routing tables. |
| |
| To prevent the flushing of such routes in case #1, add a parameter to |
| fib_table_flush() that indicates if the table is flushed as part of |
| namespace dismantle or not. |
| |
| Note that this problem does not exist in IPv6 since error routes are |
| associated with the loopback device. |
| |
| [1] |
| unreferenced object 0xffff888066650338 (size 56): |
| comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) |
| hex dump (first 32 bytes): |
| 00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff ..........ba.... |
| e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00 ...d............ |
| backtrace: |
| [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 |
| [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 |
| [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 |
| [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 |
| [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 |
| [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 |
| [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 |
| [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 |
| [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 |
| [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| [<000000003a8b605b>] 0xffffffffffffffff |
| unreferenced object 0xffff888061621c88 (size 48): |
| comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) |
| hex dump (first 32 bytes): |
| 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk |
| 6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff kkkkkkkk..&_.... |
| backtrace: |
| [<00000000733609e3>] fib_table_insert+0x978/0x1500 |
| [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 |
| [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 |
| [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 |
| [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 |
| [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 |
| [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 |
| [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 |
| [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 |
| [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 |
| [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| [<000000003a8b605b>] 0xffffffffffffffff |
| |
| Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.") |
| Signed-off-by: Ido Schimmel <idosch@mellanox.com> |
| Reviewed-by: David Ahern <dsahern@gmail.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| include/net/ip_fib.h | 2 +- |
| net/ipv4/fib_frontend.c | 4 ++-- |
| net/ipv4/fib_trie.c | 15 ++++++++++++--- |
| 3 files changed, 15 insertions(+), 6 deletions(-) |
| |
| --- a/include/net/ip_fib.h |
| +++ b/include/net/ip_fib.h |
| @@ -241,7 +241,7 @@ int fib_table_delete(struct net *, struc |
| struct netlink_ext_ack *extack); |
| int fib_table_dump(struct fib_table *table, struct sk_buff *skb, |
| struct netlink_callback *cb, struct fib_dump_filter *filter); |
| -int fib_table_flush(struct net *net, struct fib_table *table); |
| +int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); |
| struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); |
| void fib_table_flush_external(struct fib_table *table); |
| void fib_free_table(struct fib_table *tb); |
| --- a/net/ipv4/fib_frontend.c |
| +++ b/net/ipv4/fib_frontend.c |
| @@ -203,7 +203,7 @@ static void fib_flush(struct net *net) |
| struct fib_table *tb; |
| |
| hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) |
| - flushed += fib_table_flush(net, tb); |
| + flushed += fib_table_flush(net, tb, false); |
| } |
| |
| if (flushed) |
| @@ -1463,7 +1463,7 @@ static void ip_fib_net_exit(struct net * |
| |
| hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { |
| hlist_del(&tb->tb_hlist); |
| - fib_table_flush(net, tb); |
| + fib_table_flush(net, tb, true); |
| fib_free_table(tb); |
| } |
| } |
| --- a/net/ipv4/fib_trie.c |
| +++ b/net/ipv4/fib_trie.c |
| @@ -1856,7 +1856,7 @@ void fib_table_flush_external(struct fib |
| } |
| |
| /* Caller must hold RTNL. */ |
| -int fib_table_flush(struct net *net, struct fib_table *tb) |
| +int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) |
| { |
| struct trie *t = (struct trie *)tb->tb_data; |
| struct key_vector *pn = t->kv; |
| @@ -1904,8 +1904,17 @@ int fib_table_flush(struct net *net, str |
| hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { |
| struct fib_info *fi = fa->fa_info; |
| |
| - if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || |
| - tb->tb_id != fa->tb_id) { |
| + if (!fi || tb->tb_id != fa->tb_id || |
| + (!(fi->fib_flags & RTNH_F_DEAD) && |
| + !fib_props[fa->fa_type].error)) { |
| + slen = fa->fa_slen; |
| + continue; |
| + } |
| + |
| + /* Do not flush error routes if network namespace is |
| + * not being dismantled |
| + */ |
| + if (!flush_all && fib_props[fa->fa_type].error) { |
| slen = fa->fa_slen; |
| continue; |
| } |