| From foo@baz Fri Jan 22 01:21:57 PM CET 2021 |
| From: Aya Levin <ayal@nvidia.com> |
| Date: Thu, 7 Jan 2021 15:50:18 +0200 |
| Subject: net: ipv6: Validate GSO SKB before finish IPv6 processing |
| |
| From: Aya Levin <ayal@nvidia.com> |
| |
| [ Upstream commit b210de4f8c97d57de051e805686248ec4c6cfc52 ] |
| |
| There are cases where GSO segment's length exceeds the egress MTU: |
| - Forwarding of a TCP GRO skb, when DF flag is not set. |
| - Forwarding of an skb that arrived on a virtualisation interface |
| (virtio-net/vhost/tap) with TSO/GSO size set by other network |
| stack. |
| - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an |
| interface with a smaller MTU. |
| - Arriving GRO skb (or GSO skb in a virtualised environment) that is |
| bridged to a NETIF_F_TSO tunnel stacked over an interface with an |
| insufficient MTU. |
| |
| If so: |
| - Consume the SKB and its segments. |
| - Issue an ICMP packet with 'Packet Too Big' message containing the |
| MTU, allowing the source host to reduce its Path MTU appropriately. |
| |
| Note: These cases are handled in the same manner in IPv4 output finish. |
| This patch aligns the behavior of IPv6 and the one of IPv4. |
| |
| Fixes: 9e50849054a4 ("netfilter: ipv6: move POSTROUTING invocation before fragmentation") |
| Signed-off-by: Aya Levin <ayal@nvidia.com> |
| Reviewed-by: Tariq Toukan <tariqt@nvidia.com> |
| Link: https://lore.kernel.org/r/1610027418-30438-1-git-send-email-ayal@nvidia.com |
| Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| net/ipv6/ip6_output.c | 40 +++++++++++++++++++++++++++++++++++++++- |
| 1 file changed, 39 insertions(+), 1 deletion(-) |
| |
| --- a/net/ipv6/ip6_output.c |
| +++ b/net/ipv6/ip6_output.c |
| @@ -128,8 +128,42 @@ static int ip6_finish_output2(struct net |
| return -EINVAL; |
| } |
| |
| +static int |
| +ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, |
| + struct sk_buff *skb, unsigned int mtu) |
| +{ |
| + struct sk_buff *segs, *nskb; |
| + netdev_features_t features; |
| + int ret = 0; |
| + |
| + /* Please see corresponding comment in ip_finish_output_gso |
| + * describing the cases where GSO segment length exceeds the |
| + * egress MTU. |
| + */ |
| + features = netif_skb_features(skb); |
| + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); |
| + if (IS_ERR_OR_NULL(segs)) { |
| + kfree_skb(skb); |
| + return -ENOMEM; |
| + } |
| + |
| + consume_skb(skb); |
| + |
| + skb_list_walk_safe(segs, segs, nskb) { |
| + int err; |
| + |
| + skb_mark_not_on_list(segs); |
| + err = ip6_fragment(net, sk, segs, ip6_finish_output2); |
| + if (err && ret == 0) |
| + ret = err; |
| + } |
| + |
| + return ret; |
| +} |
| + |
| static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) |
| { |
| + unsigned int mtu; |
| int ret; |
| |
| ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); |
| @@ -146,7 +180,11 @@ static int ip6_finish_output(struct net |
| } |
| #endif |
| |
| - if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || |
| + mtu = ip6_skb_dst_mtu(skb); |
| + if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) |
| + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); |
| + |
| + if ((skb->len > mtu && !skb_is_gso(skb)) || |
| dst_allfrag(skb_dst(skb)) || |
| (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) |
| return ip6_fragment(net, sk, skb, ip6_finish_output2); |