| From foo@baz Wed Sep 21 12:45:10 CEST 2016 |
| From: Lance Richardson <lrichard@redhat.com> |
| Date: Tue, 9 Aug 2016 15:29:42 -0400 |
| Subject: vti: flush x-netns xfrm cache when vti interface is removed |
| |
| From: Lance Richardson <lrichard@redhat.com> |
| |
| |
| [ Upstream commit a5d0dc810abf3d6b241777467ee1d6efb02575fc ] |
| |
| When executing the script included below, the netns delete operation |
| hangs with the following message (repeated at 10 second intervals): |
| |
| kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1 |
| |
| This occurs because a reference to the lo interface in the "secure" netns |
| is still held by a dst entry in the xfrm bundle cache in the init netns. |
| |
| Address this problem by garbage collecting the tunnel netns flow cache |
| when a cross-namespace vti interface receives a NETDEV_DOWN notification. |
| |
| A more detailed description of the problem scenario (referencing commands |
| in the script below): |
| |
| (1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 |
| |
| The vti_test interface is created in the init namespace. vti_tunnel_init() |
| attaches a struct ip_tunnel to the vti interface's netdev_priv(dev), |
| setting the tunnel net to &init_net. |
| |
| (2) ip link set vti_test netns secure |
| |
| The vti_test interface is moved to the "secure" netns. Note that |
| the associated struct ip_tunnel still has tunnel->net set to &init_net. |
| |
| (3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 |
| |
| The first packet sent using the vti device causes xfrm_lookup() to be |
| called as follows: |
| |
| dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0); |
| |
| Note that tunnel->net is the init namespace, while skb_dst(skb) references |
| the vti_test interface in the "secure" namespace. The returned dst |
| references an interface in the init namespace. |
| |
| Also note that the first parameter to xfrm_lookup() determines which flow |
| cache is used to store the computed xfrm bundle, so after xfrm_lookup() |
| returns there will be a cached bundle in the init namespace flow cache |
| with a dst referencing a device in the "secure" namespace. |
| |
| (4) ip netns del secure |
| |
| Kernel begins to delete the "secure" namespace. At some point the |
| vti_test interface is deleted, at which point dst_ifdown() changes |
| the dst->dev in the cached xfrm bundle flow from vti_test to lo (still |
| in the "secure" namespace however). |
| Since nothing has happened to cause the init namespace's flow cache |
| to be garbage collected, this dst remains attached to the flow cache, |
| so the kernel loops waiting for the last reference to lo to go away. |
| |
| <Begin script> |
| ip link add br1 type bridge |
| ip link set dev br1 up |
| ip addr add dev br1 1.1.1.1/8 |
| |
| ip netns add secure |
| ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 |
| ip link set vti_test netns secure |
| ip netns exec secure ip link set vti_test up |
| ip netns exec secure ip link s lo up |
| ip netns exec secure ip addr add dev lo 192.168.100.1/24 |
| ip netns exec secure ip route add 192.168.200.0/24 dev vti_test |
| ip xfrm policy flush |
| ip xfrm state flush |
| ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \ |
| proto esp mode tunnel mark 1 |
| ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \ |
| proto esp mode tunnel mark 1 |
| ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \ |
| mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 |
| ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \ |
| mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 |
| |
| ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 |
| |
| ip netns del secure |
| <End script> |
| |
| Reported-by: Hangbin Liu <haliu@redhat.com> |
| Reported-by: Jan Tluka <jtluka@redhat.com> |
| Signed-off-by: Lance Richardson <lrichard@redhat.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| net/ipv4/ip_vti.c | 31 +++++++++++++++++++++++++++++++ |
| 1 file changed, 31 insertions(+) |
| |
| --- a/net/ipv4/ip_vti.c |
| +++ b/net/ipv4/ip_vti.c |
| @@ -540,6 +540,33 @@ static struct rtnl_link_ops vti_link_ops |
| .get_link_net = ip_tunnel_get_link_net, |
| }; |
| |
| +static bool is_vti_tunnel(const struct net_device *dev) |
| +{ |
| + return dev->netdev_ops == &vti_netdev_ops; |
| +} |
| + |
| +static int vti_device_event(struct notifier_block *unused, |
| + unsigned long event, void *ptr) |
| +{ |
| + struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
| + struct ip_tunnel *tunnel = netdev_priv(dev); |
| + |
| + if (!is_vti_tunnel(dev)) |
| + return NOTIFY_DONE; |
| + |
| + switch (event) { |
| + case NETDEV_DOWN: |
| + if (!net_eq(tunnel->net, dev_net(dev))) |
| + xfrm_garbage_collect(tunnel->net); |
| + break; |
| + } |
| + return NOTIFY_DONE; |
| +} |
| + |
| +static struct notifier_block vti_notifier_block __read_mostly = { |
| + .notifier_call = vti_device_event, |
| +}; |
| + |
| static int __init vti_init(void) |
| { |
| const char *msg; |
| @@ -547,6 +574,8 @@ static int __init vti_init(void) |
| |
| pr_info("IPv4 over IPsec tunneling driver\n"); |
| |
| + register_netdevice_notifier(&vti_notifier_block); |
| + |
| msg = "tunnel device"; |
| err = register_pernet_device(&vti_net_ops); |
| if (err < 0) |
| @@ -579,6 +608,7 @@ xfrm_proto_ah_failed: |
| xfrm_proto_esp_failed: |
| unregister_pernet_device(&vti_net_ops); |
| pernet_dev_failed: |
| + unregister_netdevice_notifier(&vti_notifier_block); |
| pr_err("vti init: failed to register %s\n", msg); |
| return err; |
| } |
| @@ -590,6 +620,7 @@ static void __exit vti_fini(void) |
| xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); |
| xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); |
| unregister_pernet_device(&vti_net_ops); |
| + unregister_netdevice_notifier(&vti_notifier_block); |
| } |
| |
| module_init(vti_init); |