| From foo@baz Fri Dec 11 11:39:46 EST 2015 |
| From: Rainer Weikusat <rweikusat@mobileactivedefense.com> |
| Date: Fri, 20 Nov 2015 22:07:23 +0000 |
| Subject: unix: avoid use-after-free in ep_remove_wait_queue |
| |
| From: Rainer Weikusat <rweikusat@mobileactivedefense.com> |
| |
| [ Upstream commit 7d267278a9ece963d77eefec61630223fce08c6c ] |
| |
| Rainer Weikusat <rweikusat@mobileactivedefense.com> writes: |
| An AF_UNIX datagram socket being the client in an n:1 association with |
| some server socket is only allowed to send messages to the server if the |
| receive queue of this socket contains at most sk_max_ack_backlog |
| datagrams. This implies that prospective writers might be forced to go |
| to sleep despite none of the message presently enqueued on the server |
| receive queue were sent by them. In order to ensure that these will be |
| woken up once space becomes again available, the present unix_dgram_poll |
| routine does a second sock_poll_wait call with the peer_wait wait queue |
| of the server socket as queue argument (unix_dgram_recvmsg does a wake |
| up on this queue after a datagram was received). This is inherently |
| problematic because the server socket is only guaranteed to remain alive |
| for as long as the client still holds a reference to it. In case the |
| connection is dissolved via connect or by the dead peer detection logic |
| in unix_dgram_sendmsg, the server socket may be freed despite "the |
| polling mechanism" (in particular, epoll) still has a pointer to the |
| corresponding peer_wait queue. There's no way to forcibly deregister a |
| wait queue with epoll. |
| |
| Based on an idea by Jason Baron, the patch below changes the code such |
| that a wait_queue_t belonging to the client socket is enqueued on the |
| peer_wait queue of the server whenever the peer receive queue full |
| condition is detected by either a sendmsg or a poll. A wake up on the |
| peer queue is then relayed to the ordinary wait queue of the client |
| socket via wake function. The connection to the peer wait queue is again |
| dissolved if either a wake up is about to be relayed or the client |
| socket reconnects or a dead peer is detected or the client socket is |
| itself closed. This enables removing the second sock_poll_wait from |
| unix_dgram_poll, thus avoiding the use-after-free, while still ensuring |
| that no blocked writer sleeps forever. |
| |
| Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com> |
| Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") |
| Reviewed-by: Jason Baron <jbaron@akamai.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| include/net/af_unix.h | 1 |
| net/unix/af_unix.c | 183 ++++++++++++++++++++++++++++++++++++++++++++------ |
| 2 files changed, 165 insertions(+), 19 deletions(-) |
| |
| --- a/include/net/af_unix.h |
| +++ b/include/net/af_unix.h |
| @@ -62,6 +62,7 @@ struct unix_sock { |
| #define UNIX_GC_CANDIDATE 0 |
| #define UNIX_GC_MAYBE_CYCLE 1 |
| struct socket_wq peer_wq; |
| + wait_queue_t peer_wake; |
| }; |
| |
| static inline struct unix_sock *unix_sk(struct sock *sk) |
| --- a/net/unix/af_unix.c |
| +++ b/net/unix/af_unix.c |
| @@ -313,6 +313,118 @@ found: |
| return s; |
| } |
| |
| +/* Support code for asymmetrically connected dgram sockets |
| + * |
| + * If a datagram socket is connected to a socket not itself connected |
| + * to the first socket (eg, /dev/log), clients may only enqueue more |
| + * messages if the present receive queue of the server socket is not |
| + * "too large". This means there's a second writeability condition |
| + * poll and sendmsg need to test. The dgram recv code will do a wake |
| + * up on the peer_wait wait queue of a socket upon reception of a |
| + * datagram which needs to be propagated to sleeping would-be writers |
| + * since these might not have sent anything so far. This can't be |
| + * accomplished via poll_wait because the lifetime of the server |
| + * socket might be less than that of its clients if these break their |
| + * association with it or if the server socket is closed while clients |
| + * are still connected to it and there's no way to inform "a polling |
| + * implementation" that it should let go of a certain wait queue |
| + * |
| + * In order to propagate a wake up, a wait_queue_t of the client |
| + * socket is enqueued on the peer_wait queue of the server socket |
| + * whose wake function does a wake_up on the ordinary client socket |
| + * wait queue. This connection is established whenever a write (or |
| + * poll for write) hit the flow control condition and broken when the |
| + * association to the server socket is dissolved or after a wake up |
| + * was relayed. |
| + */ |
| + |
| +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, |
| + void *key) |
| +{ |
| + struct unix_sock *u; |
| + wait_queue_head_t *u_sleep; |
| + |
| + u = container_of(q, struct unix_sock, peer_wake); |
| + |
| + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, |
| + q); |
| + u->peer_wake.private = NULL; |
| + |
| + /* relaying can only happen while the wq still exists */ |
| + u_sleep = sk_sleep(&u->sk); |
| + if (u_sleep) |
| + wake_up_interruptible_poll(u_sleep, key); |
| + |
| + return 0; |
| +} |
| + |
| +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) |
| +{ |
| + struct unix_sock *u, *u_other; |
| + int rc; |
| + |
| + u = unix_sk(sk); |
| + u_other = unix_sk(other); |
| + rc = 0; |
| + spin_lock(&u_other->peer_wait.lock); |
| + |
| + if (!u->peer_wake.private) { |
| + u->peer_wake.private = other; |
| + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); |
| + |
| + rc = 1; |
| + } |
| + |
| + spin_unlock(&u_other->peer_wait.lock); |
| + return rc; |
| +} |
| + |
| +static void unix_dgram_peer_wake_disconnect(struct sock *sk, |
| + struct sock *other) |
| +{ |
| + struct unix_sock *u, *u_other; |
| + |
| + u = unix_sk(sk); |
| + u_other = unix_sk(other); |
| + spin_lock(&u_other->peer_wait.lock); |
| + |
| + if (u->peer_wake.private == other) { |
| + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); |
| + u->peer_wake.private = NULL; |
| + } |
| + |
| + spin_unlock(&u_other->peer_wait.lock); |
| +} |
| + |
| +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, |
| + struct sock *other) |
| +{ |
| + unix_dgram_peer_wake_disconnect(sk, other); |
| + wake_up_interruptible_poll(sk_sleep(sk), |
| + POLLOUT | |
| + POLLWRNORM | |
| + POLLWRBAND); |
| +} |
| + |
| +/* preconditions: |
| + * - unix_peer(sk) == other |
| + * - association is stable |
| + */ |
| +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) |
| +{ |
| + int connected; |
| + |
| + connected = unix_dgram_peer_wake_connect(sk, other); |
| + |
| + if (unix_recvq_full(other)) |
| + return 1; |
| + |
| + if (connected) |
| + unix_dgram_peer_wake_disconnect(sk, other); |
| + |
| + return 0; |
| +} |
| + |
| static inline int unix_writable(struct sock *sk) |
| { |
| return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; |
| @@ -417,6 +529,8 @@ static void unix_release_sock(struct soc |
| skpair->sk_state_change(skpair); |
| sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); |
| } |
| + |
| + unix_dgram_peer_wake_disconnect(sk, skpair); |
| sock_put(skpair); /* It may now die */ |
| unix_peer(sk) = NULL; |
| } |
| @@ -650,6 +764,7 @@ static struct sock *unix_create1(struct |
| INIT_LIST_HEAD(&u->link); |
| mutex_init(&u->readlock); /* single task reading lock */ |
| init_waitqueue_head(&u->peer_wait); |
| + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); |
| unix_insert_socket(unix_sockets_unbound(sk), sk); |
| out: |
| if (sk == NULL) |
| @@ -1017,6 +1132,8 @@ restart: |
| if (unix_peer(sk)) { |
| struct sock *old_peer = unix_peer(sk); |
| unix_peer(sk) = other; |
| + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); |
| + |
| unix_state_double_unlock(sk, other); |
| |
| if (other != old_peer) |
| @@ -1456,6 +1573,7 @@ static int unix_dgram_sendmsg(struct kio |
| struct scm_cookie tmp_scm; |
| int max_level; |
| int data_len = 0; |
| + int sk_locked; |
| |
| if (NULL == siocb->scm) |
| siocb->scm = &tmp_scm; |
| @@ -1532,12 +1650,14 @@ restart: |
| goto out_free; |
| } |
| |
| + sk_locked = 0; |
| unix_state_lock(other); |
| +restart_locked: |
| err = -EPERM; |
| if (!unix_may_send(sk, other)) |
| goto out_unlock; |
| |
| - if (sock_flag(other, SOCK_DEAD)) { |
| + if (unlikely(sock_flag(other, SOCK_DEAD))) { |
| /* |
| * Check with 1003.1g - what should |
| * datagram error |
| @@ -1545,10 +1665,14 @@ restart: |
| unix_state_unlock(other); |
| sock_put(other); |
| |
| + if (!sk_locked) |
| + unix_state_lock(sk); |
| + |
| err = 0; |
| - unix_state_lock(sk); |
| if (unix_peer(sk) == other) { |
| unix_peer(sk) = NULL; |
| + unix_dgram_peer_wake_disconnect_wakeup(sk, other); |
| + |
| unix_state_unlock(sk); |
| |
| unix_dgram_disconnected(sk, other); |
| @@ -1574,21 +1698,38 @@ restart: |
| goto out_unlock; |
| } |
| |
| - if (unix_peer(other) != sk && unix_recvq_full(other)) { |
| - if (!timeo) { |
| - err = -EAGAIN; |
| - goto out_unlock; |
| + if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { |
| + if (timeo) { |
| + timeo = unix_wait_for_peer(other, timeo); |
| + |
| + err = sock_intr_errno(timeo); |
| + if (signal_pending(current)) |
| + goto out_free; |
| + |
| + goto restart; |
| } |
| |
| - timeo = unix_wait_for_peer(other, timeo); |
| + if (!sk_locked) { |
| + unix_state_unlock(other); |
| + unix_state_double_lock(sk, other); |
| + } |
| |
| - err = sock_intr_errno(timeo); |
| - if (signal_pending(current)) |
| - goto out_free; |
| + if (unix_peer(sk) != other || |
| + unix_dgram_peer_wake_me(sk, other)) { |
| + err = -EAGAIN; |
| + sk_locked = 1; |
| + goto out_unlock; |
| + } |
| |
| - goto restart; |
| + if (!sk_locked) { |
| + sk_locked = 1; |
| + goto restart_locked; |
| + } |
| } |
| |
| + if (unlikely(sk_locked)) |
| + unix_state_unlock(sk); |
| + |
| if (sock_flag(other, SOCK_RCVTSTAMP)) |
| __net_timestamp(skb); |
| maybe_add_creds(skb, sock, other); |
| @@ -1602,6 +1743,8 @@ restart: |
| return len; |
| |
| out_unlock: |
| + if (sk_locked) |
| + unix_state_unlock(sk); |
| unix_state_unlock(other); |
| out_free: |
| kfree_skb(skb); |
| @@ -2260,14 +2403,16 @@ static unsigned int unix_dgram_poll(stru |
| return mask; |
| |
| writable = unix_writable(sk); |
| - other = unix_peer_get(sk); |
| - if (other) { |
| - if (unix_peer(other) != sk) { |
| - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); |
| - if (unix_recvq_full(other)) |
| - writable = 0; |
| - } |
| - sock_put(other); |
| + if (writable) { |
| + unix_state_lock(sk); |
| + |
| + other = unix_peer(sk); |
| + if (other && unix_peer(other) != sk && |
| + unix_recvq_full(other) && |
| + unix_dgram_peer_wake_me(sk, other)) |
| + writable = 0; |
| + |
| + unix_state_unlock(sk); |
| } |
| |
| if (writable) |