| From ebf5c89b567f46df87dd665aa829e49345702e9b Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Fri, 25 Jun 2021 19:21:39 +0300 |
| Subject: net: lwtunnel: handle MTU calculation in forwading |
| |
| From: Vadim Fedorenko <vfedorenko@novek.ru> |
| |
| [ Upstream commit fade56410c22cacafb1be9f911a0afd3701d8366 ] |
| |
| Commit 14972cbd34ff ("net: lwtunnel: Handle fragmentation") moved |
| fragmentation logic away from lwtunnel by carry encap headroom and |
| use it in output MTU calculation. But the forwarding part was not |
| covered and created difference in MTU for output and forwarding and |
| further to silent drops on ipv4 forwarding path. Fix it by taking |
| into account lwtunnel encap headroom. |
| |
| The same commit also introduced difference in how to treat RTAX_MTU |
| in IPv4 and IPv6 where latter explicitly removes lwtunnel encap |
| headroom from route MTU. Make IPv4 version do the same. |
| |
| Fixes: 14972cbd34ff ("net: lwtunnel: Handle fragmentation") |
| Suggested-by: David Ahern <dsahern@gmail.com> |
| Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru> |
| Reviewed-by: David Ahern <dsahern@kernel.org> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| include/net/ip.h | 12 ++++++++---- |
| include/net/ip6_route.h | 16 ++++++++++++---- |
| net/ipv4/route.c | 3 ++- |
| 3 files changed, 22 insertions(+), 9 deletions(-) |
| |
| diff --git a/include/net/ip.h b/include/net/ip.h |
| index 2d6b985d11cc..5538e54d4620 100644 |
| --- a/include/net/ip.h |
| +++ b/include/net/ip.h |
| @@ -31,6 +31,7 @@ |
| #include <net/flow.h> |
| #include <net/flow_dissector.h> |
| #include <net/netns/hash.h> |
| +#include <net/lwtunnel.h> |
| |
| #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ |
| #define IPV4_MIN_MTU 68 /* RFC 791 */ |
| @@ -445,22 +446,25 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, |
| |
| /* 'forwarding = true' case should always honour route mtu */ |
| mtu = dst_metric_raw(dst, RTAX_MTU); |
| - if (mtu) |
| - return mtu; |
| + if (!mtu) |
| + mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); |
| |
| - return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); |
| + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); |
| } |
| |
| static inline unsigned int ip_skb_dst_mtu(struct sock *sk, |
| const struct sk_buff *skb) |
| { |
| + unsigned int mtu; |
| + |
| if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { |
| bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; |
| |
| return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); |
| } |
| |
| - return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); |
| + mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); |
| + return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); |
| } |
| |
| struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, |
| diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h |
| index 2a5277758379..37a7fb1969d6 100644 |
| --- a/include/net/ip6_route.h |
| +++ b/include/net/ip6_route.h |
| @@ -264,11 +264,18 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, |
| |
| static inline int ip6_skb_dst_mtu(struct sk_buff *skb) |
| { |
| + int mtu; |
| + |
| struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? |
| inet6_sk(skb->sk) : NULL; |
| |
| - return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? |
| - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); |
| + if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { |
| + mtu = READ_ONCE(skb_dst(skb)->dev->mtu); |
| + mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); |
| + } else |
| + mtu = dst_mtu(skb_dst(skb)); |
| + |
| + return mtu; |
| } |
| |
| static inline bool ip6_sk_accept_pmtu(const struct sock *sk) |
| @@ -316,7 +323,7 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) |
| if (dst_metric_locked(dst, RTAX_MTU)) { |
| mtu = dst_metric_raw(dst, RTAX_MTU); |
| if (mtu) |
| - return mtu; |
| + goto out; |
| } |
| |
| mtu = IPV6_MIN_MTU; |
| @@ -326,7 +333,8 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) |
| mtu = idev->cnf.mtu6; |
| rcu_read_unlock(); |
| |
| - return mtu; |
| +out: |
| + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); |
| } |
| |
| u32 ip6_mtu_from_fib6(const struct fib6_result *res, |
| diff --git a/net/ipv4/route.c b/net/ipv4/route.c |
| index e968bb47d5bd..e15c1d8b7c8d 100644 |
| --- a/net/ipv4/route.c |
| +++ b/net/ipv4/route.c |
| @@ -1327,7 +1327,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) |
| mtu = dst_metric_raw(dst, RTAX_MTU); |
| |
| if (mtu) |
| - return mtu; |
| + goto out; |
| |
| mtu = READ_ONCE(dst->dev->mtu); |
| |
| @@ -1336,6 +1336,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) |
| mtu = 576; |
| } |
| |
| +out: |
| mtu = min_t(unsigned int, mtu, IP_MAX_MTU); |
| |
| return mtu - lwtunnel_headroom(dst->lwtstate, mtu); |
| -- |
| 2.30.2 |
| |