| From foo@baz Wed Sep 30 05:25:07 CEST 2015 |
| From: Roopa Prabhu <roopa@cumulusnetworks.com> |
| Date: Tue, 8 Sep 2015 10:53:04 -0700 |
| Subject: ipv6: fix multipath route replace error recovery |
| |
| From: Roopa Prabhu <roopa@cumulusnetworks.com> |
| |
| [ Upstream commit 6b9ea5a64ed5eeb3f68f2e6fcce0ed1179801d1e ] |
| |
| Problem: |
| The ecmp route replace support for ipv6 in the kernel, deletes the |
| existing ecmp route too early, ie when it installs the first nexthop. |
| If there is an error in installing the subsequent nexthops, its too late |
| to recover the already deleted existing route leaving the fib |
| in an inconsistent state. |
| |
| This patch reduces the possibility of this by doing the following: |
| a) Changes the existing multipath route add code to a two stage process: |
| build rt6_infos + insert them |
| ip6_route_add rt6_info creation code is moved into |
| ip6_route_info_create. |
| b) This ensures that most errors are caught during building rt6_infos |
| and we fail early |
| c) Separates multipath add and del code. Because add needs the special |
| two stage mode in a) and delete essentially does not care. |
| d) In any event if the code fails during inserting a route again, a |
| warning is printed (This should be unlikely) |
| |
| Before the patch: |
| $ip -6 route show |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 |
| |
| /* Try replacing the route with a duplicate nexthop */ |
| $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via |
| fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev |
| swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 |
| RTNETLINK answers: File exists |
| |
| $ip -6 route show |
| /* previously added ecmp route 3000:1000:1000:1000::2 dissappears from |
| * kernel */ |
| |
| After the patch: |
| $ip -6 route show |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 |
| |
| /* Try replacing the route with a duplicate nexthop */ |
| $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via |
| fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev |
| swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 |
| RTNETLINK answers: File exists |
| |
| $ip -6 route show |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 |
| 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 |
| |
| Fixes: 27596472473a ("ipv6: fix ECMP route replacement") |
| Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> |
| Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> |
| Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| net/ipv6/route.c | 201 +++++++++++++++++++++++++++++++++++++++++++++++-------- |
| 1 file changed, 175 insertions(+), 26 deletions(-) |
| |
| --- a/net/ipv6/route.c |
| +++ b/net/ipv6/route.c |
| @@ -1727,7 +1727,7 @@ static int ip6_convert_metrics(struct mx |
| return -EINVAL; |
| } |
| |
| -int ip6_route_add(struct fib6_config *cfg) |
| +int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) |
| { |
| int err; |
| struct net *net = cfg->fc_nlinfo.nl_net; |
| @@ -1735,7 +1735,6 @@ int ip6_route_add(struct fib6_config *cf |
| struct net_device *dev = NULL; |
| struct inet6_dev *idev = NULL; |
| struct fib6_table *table; |
| - struct mx6_config mxc = { .mx = NULL, }; |
| int addr_type; |
| |
| if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) |
| @@ -1941,6 +1940,32 @@ install_route: |
| |
| cfg->fc_nlinfo.nl_net = dev_net(dev); |
| |
| + *rt_ret = rt; |
| + |
| + return 0; |
| +out: |
| + if (dev) |
| + dev_put(dev); |
| + if (idev) |
| + in6_dev_put(idev); |
| + if (rt) |
| + dst_free(&rt->dst); |
| + |
| + *rt_ret = NULL; |
| + |
| + return err; |
| +} |
| + |
| +int ip6_route_add(struct fib6_config *cfg) |
| +{ |
| + struct mx6_config mxc = { .mx = NULL, }; |
| + struct rt6_info *rt = NULL; |
| + int err; |
| + |
| + err = ip6_route_info_create(cfg, &rt); |
| + if (err) |
| + goto out; |
| + |
| err = ip6_convert_metrics(&mxc, cfg); |
| if (err) |
| goto out; |
| @@ -1948,14 +1973,12 @@ install_route: |
| err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); |
| |
| kfree(mxc.mx); |
| + |
| return err; |
| out: |
| - if (dev) |
| - dev_put(dev); |
| - if (idev) |
| - in6_dev_put(idev); |
| if (rt) |
| dst_free(&rt->dst); |
| + |
| return err; |
| } |
| |
| @@ -2727,19 +2750,78 @@ errout: |
| return err; |
| } |
| |
| -static int ip6_route_multipath(struct fib6_config *cfg, int add) |
| +struct rt6_nh { |
| + struct rt6_info *rt6_info; |
| + struct fib6_config r_cfg; |
| + struct mx6_config mxc; |
| + struct list_head next; |
| +}; |
| + |
| +static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) |
| +{ |
| + struct rt6_nh *nh; |
| + |
| + list_for_each_entry(nh, rt6_nh_list, next) { |
| + pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", |
| + &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, |
| + nh->r_cfg.fc_ifindex); |
| + } |
| +} |
| + |
| +static int ip6_route_info_append(struct list_head *rt6_nh_list, |
| + struct rt6_info *rt, struct fib6_config *r_cfg) |
| +{ |
| + struct rt6_nh *nh; |
| + struct rt6_info *rtnh; |
| + int err = -EEXIST; |
| + |
| + list_for_each_entry(nh, rt6_nh_list, next) { |
| + /* check if rt6_info already exists */ |
| + rtnh = nh->rt6_info; |
| + |
| + if (rtnh->dst.dev == rt->dst.dev && |
| + rtnh->rt6i_idev == rt->rt6i_idev && |
| + ipv6_addr_equal(&rtnh->rt6i_gateway, |
| + &rt->rt6i_gateway)) |
| + return err; |
| + } |
| + |
| + nh = kzalloc(sizeof(*nh), GFP_KERNEL); |
| + if (!nh) |
| + return -ENOMEM; |
| + nh->rt6_info = rt; |
| + err = ip6_convert_metrics(&nh->mxc, r_cfg); |
| + if (err) { |
| + kfree(nh); |
| + return err; |
| + } |
| + memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); |
| + list_add_tail(&nh->next, rt6_nh_list); |
| + |
| + return 0; |
| +} |
| + |
| +static int ip6_route_multipath_add(struct fib6_config *cfg) |
| { |
| struct fib6_config r_cfg; |
| struct rtnexthop *rtnh; |
| + struct rt6_info *rt; |
| + struct rt6_nh *err_nh; |
| + struct rt6_nh *nh, *nh_safe; |
| int remaining; |
| int attrlen; |
| - int err = 0, last_err = 0; |
| + int err = 1; |
| + int nhn = 0; |
| + int replace = (cfg->fc_nlinfo.nlh && |
| + (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); |
| + LIST_HEAD(rt6_nh_list); |
| |
| remaining = cfg->fc_mp_len; |
| -beginning: |
| rtnh = (struct rtnexthop *)cfg->fc_mp; |
| |
| - /* Parse a Multipath Entry */ |
| + /* Parse a Multipath Entry and build a list (rt6_nh_list) of |
| + * rt6_info structs per nexthop |
| + */ |
| while (rtnh_ok(rtnh, remaining)) { |
| memcpy(&r_cfg, cfg, sizeof(*cfg)); |
| if (rtnh->rtnh_ifindex) |
| @@ -2755,22 +2837,32 @@ beginning: |
| r_cfg.fc_flags |= RTF_GATEWAY; |
| } |
| } |
| - err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); |
| + |
| + err = ip6_route_info_create(&r_cfg, &rt); |
| + if (err) |
| + goto cleanup; |
| + |
| + err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); |
| if (err) { |
| - last_err = err; |
| - /* If we are trying to remove a route, do not stop the |
| - * loop when ip6_route_del() fails (because next hop is |
| - * already gone), we should try to remove all next hops. |
| - */ |
| - if (add) { |
| - /* If add fails, we should try to delete all |
| - * next hops that have been already added. |
| - */ |
| - add = 0; |
| - remaining = cfg->fc_mp_len - remaining; |
| - goto beginning; |
| - } |
| + dst_free(&rt->dst); |
| + goto cleanup; |
| + } |
| + |
| + rtnh = rtnh_next(rtnh, &remaining); |
| + } |
| + |
| + err_nh = NULL; |
| + list_for_each_entry(nh, &rt6_nh_list, next) { |
| + err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); |
| + /* nh->rt6_info is used or freed at this point, reset to NULL*/ |
| + nh->rt6_info = NULL; |
| + if (err) { |
| + if (replace && nhn) |
| + ip6_print_replace_route_err(&rt6_nh_list); |
| + err_nh = nh; |
| + goto add_errout; |
| } |
| + |
| /* Because each route is added like a single route we remove |
| * these flags after the first nexthop: if there is a collision, |
| * we have already failed to add the first nexthop: |
| @@ -2780,6 +2872,63 @@ beginning: |
| */ |
| cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | |
| NLM_F_REPLACE); |
| + nhn++; |
| + } |
| + |
| + goto cleanup; |
| + |
| +add_errout: |
| + /* Delete routes that were already added */ |
| + list_for_each_entry(nh, &rt6_nh_list, next) { |
| + if (err_nh == nh) |
| + break; |
| + ip6_route_del(&nh->r_cfg); |
| + } |
| + |
| +cleanup: |
| + list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { |
| + if (nh->rt6_info) |
| + dst_free(&nh->rt6_info->dst); |
| + if (nh->mxc.mx) |
| + kfree(nh->mxc.mx); |
| + list_del(&nh->next); |
| + kfree(nh); |
| + } |
| + |
| + return err; |
| +} |
| + |
| +static int ip6_route_multipath_del(struct fib6_config *cfg) |
| +{ |
| + struct fib6_config r_cfg; |
| + struct rtnexthop *rtnh; |
| + int remaining; |
| + int attrlen; |
| + int err = 1, last_err = 0; |
| + |
| + remaining = cfg->fc_mp_len; |
| + rtnh = (struct rtnexthop *)cfg->fc_mp; |
| + |
| + /* Parse a Multipath Entry */ |
| + while (rtnh_ok(rtnh, remaining)) { |
| + memcpy(&r_cfg, cfg, sizeof(*cfg)); |
| + if (rtnh->rtnh_ifindex) |
| + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; |
| + |
| + attrlen = rtnh_attrlen(rtnh); |
| + if (attrlen > 0) { |
| + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); |
| + |
| + nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
| + if (nla) { |
| + nla_memcpy(&r_cfg.fc_gateway, nla, 16); |
| + r_cfg.fc_flags |= RTF_GATEWAY; |
| + } |
| + } |
| + err = ip6_route_del(&r_cfg); |
| + if (err) |
| + last_err = err; |
| + |
| rtnh = rtnh_next(rtnh, &remaining); |
| } |
| |
| @@ -2796,7 +2945,7 @@ static int inet6_rtm_delroute(struct sk_ |
| return err; |
| |
| if (cfg.fc_mp) |
| - return ip6_route_multipath(&cfg, 0); |
| + return ip6_route_multipath_del(&cfg); |
| else |
| return ip6_route_del(&cfg); |
| } |
| @@ -2811,7 +2960,7 @@ static int inet6_rtm_newroute(struct sk_ |
| return err; |
| |
| if (cfg.fc_mp) |
| - return ip6_route_multipath(&cfg, 1); |
| + return ip6_route_multipath_add(&cfg); |
| else |
| return ip6_route_add(&cfg); |
| } |