| From foo@baz Mon 17 Jun 2019 06:58:02 PM CEST |
| From: Eric Dumazet <edumazet@google.com> |
| Date: Sat, 15 Jun 2019 17:44:24 -0700 |
| Subject: tcp: add tcp_min_snd_mss sysctl |
| |
| From: Eric Dumazet <edumazet@google.com> |
| |
| commit 5f3e2bf008c2221478101ee72f5cb4654b9fc363 upstream. |
| |
| Some TCP peers announce a very small MSS option in their SYN and/or |
| SYN/ACK messages. |
| |
| This forces the stack to send packets with a very high network/cpu |
| overhead. |
| |
| Linux has enforced a minimal value of 48. Since this value includes |
| the size of TCP options, and that the options can consume up to 40 |
| bytes, this means that each segment can include only 8 bytes of payload. |
| |
| In some cases, it can be useful to increase the minimal value |
| to a saner value. |
| |
| We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility |
| reasons. |
| |
| Note that TCP_MAXSEG socket option enforces a minimal value |
| of (TCP_MIN_MSS). David Miller increased this minimal value |
| in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.") |
| from 64 to 88. |
| |
| We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS. |
| |
| CVE-2019-11479 -- tcp mss hardcoded to 48 |
| |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Suggested-by: Jonathan Looney <jtl@netflix.com> |
| Acked-by: Neal Cardwell <ncardwell@google.com> |
| Cc: Yuchung Cheng <ycheng@google.com> |
| Cc: Tyler Hicks <tyhicks@canonical.com> |
| Cc: Bruce Curtis <brucec@netflix.com> |
| Cc: Jonathan Lemon <jonathan.lemon@gmail.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| Documentation/networking/ip-sysctl.txt | 8 ++++++++ |
| include/net/netns/ipv4.h | 1 + |
| net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ |
| net/ipv4/tcp_ipv4.c | 1 + |
| net/ipv4/tcp_output.c | 3 +-- |
| 5 files changed, 22 insertions(+), 2 deletions(-) |
| |
| --- a/Documentation/networking/ip-sysctl.txt |
| +++ b/Documentation/networking/ip-sysctl.txt |
| @@ -220,6 +220,14 @@ tcp_base_mss - INTEGER |
| Path MTU discovery (MTU probing). If MTU probing is enabled, |
| this is the initial MSS used by the connection. |
| |
| +tcp_min_snd_mss - INTEGER |
| + TCP SYN and SYNACK messages usually advertise an ADVMSS option, |
| + as described in RFC 1122 and RFC 6691. |
| + If this ADVMSS option is smaller than tcp_min_snd_mss, |
| + it is silently capped to tcp_min_snd_mss. |
| + |
| + Default : 48 (at least 8 bytes of payload per segment) |
| + |
| tcp_congestion_control - STRING |
| Set the congestion control algorithm to be used for new |
| connections. The algorithm "reno" is always available, but |
| --- a/include/net/netns/ipv4.h |
| +++ b/include/net/netns/ipv4.h |
| @@ -88,6 +88,7 @@ struct netns_ipv4 { |
| int sysctl_tcp_fwmark_accept; |
| int sysctl_tcp_mtu_probing; |
| int sysctl_tcp_base_mss; |
| + int sysctl_tcp_min_snd_mss; |
| int sysctl_tcp_probe_threshold; |
| u32 sysctl_tcp_probe_interval; |
| |
| --- a/net/ipv4/sysctl_net_ipv4.c |
| +++ b/net/ipv4/sysctl_net_ipv4.c |
| @@ -36,6 +36,8 @@ static int ip_local_port_range_min[] = { |
| static int ip_local_port_range_max[] = { 65535, 65535 }; |
| static int tcp_adv_win_scale_min = -31; |
| static int tcp_adv_win_scale_max = 31; |
| +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; |
| +static int tcp_min_snd_mss_max = 65535; |
| static int ip_ttl_min = 1; |
| static int ip_ttl_max = 255; |
| static int tcp_syn_retries_min = 1; |
| @@ -942,6 +944,15 @@ static struct ctl_table ipv4_net_table[] |
| .proc_handler = proc_dointvec, |
| }, |
| { |
| + .procname = "tcp_min_snd_mss", |
| + .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, |
| + .maxlen = sizeof(int), |
| + .mode = 0644, |
| + .proc_handler = proc_dointvec_minmax, |
| + .extra1 = &tcp_min_snd_mss_min, |
| + .extra2 = &tcp_min_snd_mss_max, |
| + }, |
| + { |
| .procname = "tcp_probe_threshold", |
| .data = &init_net.ipv4.sysctl_tcp_probe_threshold, |
| .maxlen = sizeof(int), |
| --- a/net/ipv4/tcp_ipv4.c |
| +++ b/net/ipv4/tcp_ipv4.c |
| @@ -2419,6 +2419,7 @@ static int __net_init tcp_sk_init(struct |
| net->ipv4.sysctl_tcp_ecn_fallback = 1; |
| |
| net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; |
| + net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; |
| net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; |
| net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; |
| |
| --- a/net/ipv4/tcp_output.c |
| +++ b/net/ipv4/tcp_output.c |
| @@ -1332,8 +1332,7 @@ static inline int __tcp_mtu_to_mss(struc |
| mss_now -= icsk->icsk_ext_hdr_len; |
| |
| /* Then reserve room for full set of TCP options and 8 bytes of data */ |
| - if (mss_now < TCP_MIN_SND_MSS) |
| - mss_now = TCP_MIN_SND_MSS; |
| + mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); |
| return mss_now; |
| } |
| |