| From ea1f17d1c56fe3cfab272ce7760bb31df09bc042 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Fri, 9 Jul 2021 17:20:51 -0700 |
| Subject: mptcp: properly account bulk freed memory |
| |
| From: Paolo Abeni <pabeni@redhat.com> |
| |
| [ Upstream commit ce599c516386f09ca30848a1a4eb93d3fffbe187 ] |
| |
| After commit 879526030c8b ("mptcp: protect the rx path with |
| the msk socket spinlock") the rmem currently used by a given |
| msk is really sk_rmem_alloc - rmem_released. |
| |
| The safety check in mptcp_data_ready() does not take the above |
| in due account, as a result legit incoming data is kept in |
| subflow receive queue with no reason, delaying or blocking |
| MPTCP-level ack generation. |
| |
| This change addresses the issue introducing a new helper to fetch |
| the rmem memory and using it as needed. Additionally add a MIB |
| counter for the exceptional event described above - the peer is |
| misbehaving. |
| |
| Finally, introduce the required annotation when rmem_released is |
| updated. |
| |
| Fixes: 879526030c8b ("mptcp: protect the rx path with the msk socket spinlock") |
| Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/211 |
| Signed-off-by: Paolo Abeni <pabeni@redhat.com> |
| Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| net/mptcp/mib.c | 1 + |
| net/mptcp/mib.h | 1 + |
| net/mptcp/protocol.c | 12 +++++++----- |
| net/mptcp/protocol.h | 10 +++++++++- |
| 4 files changed, 18 insertions(+), 6 deletions(-) |
| |
| diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c |
| index eb2dc6dbe212..c8f4823cd79f 100644 |
| --- a/net/mptcp/mib.c |
| +++ b/net/mptcp/mib.c |
| @@ -42,6 +42,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { |
| SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), |
| SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), |
| SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), |
| + SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), |
| SNMP_MIB_SENTINEL |
| }; |
| |
| diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h |
| index f0da4f060fe1..93fa7c95e206 100644 |
| --- a/net/mptcp/mib.h |
| +++ b/net/mptcp/mib.h |
| @@ -35,6 +35,7 @@ enum linux_mptcp_mib_field { |
| MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ |
| MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ |
| MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ |
| + MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ |
| __MPTCP_MIB_MAX |
| }; |
| |
| diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c |
| index 18f152bdb66f..94b707a39bc3 100644 |
| --- a/net/mptcp/protocol.c |
| +++ b/net/mptcp/protocol.c |
| @@ -465,7 +465,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) |
| bool cleanup, rx_empty; |
| |
| cleanup = (space > 0) && (space >= (old_space << 1)); |
| - rx_empty = !atomic_read(&sk->sk_rmem_alloc); |
| + rx_empty = !__mptcp_rmem(sk); |
| |
| mptcp_for_each_subflow(msk, subflow) { |
| struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
| @@ -714,8 +714,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) |
| sk_rbuf = ssk_rbuf; |
| |
| /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ |
| - if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) |
| + if (__mptcp_rmem(sk) > sk_rbuf) { |
| + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); |
| return; |
| + } |
| |
| /* Wake-up the reader only for in-sequence data */ |
| mptcp_data_lock(sk); |
| @@ -1799,7 +1801,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, |
| if (!(flags & MSG_PEEK)) { |
| /* we will bulk release the skb memory later */ |
| skb->destructor = NULL; |
| - msk->rmem_released += skb->truesize; |
| + WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); |
| __skb_unlink(skb, &msk->receive_queue); |
| __kfree_skb(skb); |
| } |
| @@ -1918,7 +1920,7 @@ static void __mptcp_update_rmem(struct sock *sk) |
| |
| atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); |
| sk_mem_uncharge(sk, msk->rmem_released); |
| - msk->rmem_released = 0; |
| + WRITE_ONCE(msk->rmem_released, 0); |
| } |
| |
| static void __mptcp_splice_receive_queue(struct sock *sk) |
| @@ -2420,7 +2422,7 @@ static int __mptcp_init_sock(struct sock *sk) |
| msk->out_of_order_queue = RB_ROOT; |
| msk->first_pending = NULL; |
| msk->wmem_reserved = 0; |
| - msk->rmem_released = 0; |
| + WRITE_ONCE(msk->rmem_released, 0); |
| msk->tx_pending_data = 0; |
| msk->size_goal_cache = TCP_BASE_MSS; |
| |
| diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h |
| index f842c832f6b0..dc5b71de0a9a 100644 |
| --- a/net/mptcp/protocol.h |
| +++ b/net/mptcp/protocol.h |
| @@ -290,9 +290,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) |
| return (struct mptcp_sock *)sk; |
| } |
| |
| +/* the msk socket don't use the backlog, also account for the bulk |
| + * free memory |
| + */ |
| +static inline int __mptcp_rmem(const struct sock *sk) |
| +{ |
| + return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); |
| +} |
| + |
| static inline int __mptcp_space(const struct sock *sk) |
| { |
| - return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released); |
| + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); |
| } |
| |
| static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) |
| -- |
| 2.30.2 |
| |