| From: Willem de Bruijn <willemb@google.com> |
| Date: Mon, 23 Apr 2018 17:37:03 -0400 |
| Subject: packet: fix bitfield update race |
| |
| commit a6361f0ca4b25460f2cdf3235ebe8115f622901e upstream. |
| |
| Updates to the bitfields in struct packet_sock are not atomic. |
| Serialize these read-modify-write cycles. |
| |
| Move po->running into a separate variable. Its writes are protected by |
| po->bind_lock (except for one startup case at packet_create). Also |
| replace a textual precondition warning with lockdep annotation. |
| |
| All others are set only in packet_setsockopt. Serialize these |
| updates by holding the socket lock. Analogous to other field updates, |
| also hold the lock when testing whether a ring is active (pg_vec). |
| |
| Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg") |
| Reported-by: DaeRyong Jeong <threeearcat@gmail.com> |
| Reported-by: Byoungyoung Lee <byoungyoung@purdue.edu> |
| Signed-off-by: Willem de Bruijn <willemb@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| [bwh: Backported to 3.16: adjust context] |
| Signed-off-by: Ben Hutchings <ben@decadent.org.uk> |
| --- |
| net/packet/af_packet.c | 60 +++++++++++++++++++++++++++++++----------- |
| net/packet/internal.h | 10 +++---- |
| 2 files changed, 49 insertions(+), 21 deletions(-) |
| |
| --- a/net/packet/af_packet.c |
| +++ b/net/packet/af_packet.c |
| @@ -343,11 +343,11 @@ static void packet_pick_tx_queue(struct |
| skb_set_queue_mapping(skb, queue_index); |
| } |
| |
| -/* register_prot_hook must be invoked with the po->bind_lock held, |
| +/* __register_prot_hook must be invoked through register_prot_hook |
| * or from a context in which asynchronous accesses to the packet |
| * socket is not possible (packet_create()). |
| */ |
| -static void register_prot_hook(struct sock *sk) |
| +static void __register_prot_hook(struct sock *sk) |
| { |
| struct packet_sock *po = pkt_sk(sk); |
| |
| @@ -362,8 +362,13 @@ static void register_prot_hook(struct so |
| } |
| } |
| |
| -/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock |
| - * held. If the sync parameter is true, we will temporarily drop |
| +static void register_prot_hook(struct sock *sk) |
| +{ |
| + lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); |
| + __register_prot_hook(sk); |
| +} |
| + |
| +/* If the sync parameter is true, we will temporarily drop |
| * the po->bind_lock and do a synchronize_net to make sure no |
| * asynchronous packet processing paths still refer to the elements |
| * of po->prot_hook. If the sync parameter is false, it is the |
| @@ -373,6 +378,8 @@ static void __unregister_prot_hook(struc |
| { |
| struct packet_sock *po = pkt_sk(sk); |
| |
| + lockdep_assert_held_once(&po->bind_lock); |
| + |
| po->running = 0; |
| |
| if (po->fanout) |
| @@ -2887,7 +2894,7 @@ static int packet_create(struct net *net |
| |
| if (proto) { |
| po->prot_hook.type = proto; |
| - register_prot_hook(sk); |
| + __register_prot_hook(sk); |
| } |
| |
| mutex_lock(&net->packet.sklist_lock); |
| @@ -3386,12 +3393,18 @@ packet_setsockopt(struct socket *sock, i |
| |
| if (optlen != sizeof(val)) |
| return -EINVAL; |
| - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) |
| - return -EBUSY; |
| if (copy_from_user(&val, optval, sizeof(val))) |
| return -EFAULT; |
| - po->tp_loss = !!val; |
| - return 0; |
| + |
| + lock_sock(sk); |
| + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { |
| + ret = -EBUSY; |
| + } else { |
| + po->tp_loss = !!val; |
| + ret = 0; |
| + } |
| + release_sock(sk); |
| + return ret; |
| } |
| case PACKET_AUXDATA: |
| { |
| @@ -3402,7 +3415,9 @@ packet_setsockopt(struct socket *sock, i |
| if (copy_from_user(&val, optval, sizeof(val))) |
| return -EFAULT; |
| |
| + lock_sock(sk); |
| po->auxdata = !!val; |
| + release_sock(sk); |
| return 0; |
| } |
| case PACKET_ORIGDEV: |
| @@ -3414,7 +3429,9 @@ packet_setsockopt(struct socket *sock, i |
| if (copy_from_user(&val, optval, sizeof(val))) |
| return -EFAULT; |
| |
| + lock_sock(sk); |
| po->origdev = !!val; |
| + release_sock(sk); |
| return 0; |
| } |
| case PACKET_VNET_HDR: |
| @@ -3423,15 +3440,20 @@ packet_setsockopt(struct socket *sock, i |
| |
| if (sock->type != SOCK_RAW) |
| return -EINVAL; |
| - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) |
| - return -EBUSY; |
| if (optlen < sizeof(val)) |
| return -EINVAL; |
| if (copy_from_user(&val, optval, sizeof(val))) |
| return -EFAULT; |
| |
| - po->has_vnet_hdr = !!val; |
| - return 0; |
| + lock_sock(sk); |
| + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { |
| + ret = -EBUSY; |
| + } else { |
| + po->has_vnet_hdr = !!val; |
| + ret = 0; |
| + } |
| + release_sock(sk); |
| + return ret; |
| } |
| case PACKET_TIMESTAMP: |
| { |
| @@ -3462,11 +3484,17 @@ packet_setsockopt(struct socket *sock, i |
| |
| if (optlen != sizeof(val)) |
| return -EINVAL; |
| - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) |
| - return -EBUSY; |
| if (copy_from_user(&val, optval, sizeof(val))) |
| return -EFAULT; |
| - po->tp_tx_has_off = !!val; |
| + |
| + lock_sock(sk); |
| + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { |
| + ret = -EBUSY; |
| + } else { |
| + po->tp_tx_has_off = !!val; |
| + ret = 0; |
| + } |
| + release_sock(sk); |
| return 0; |
| } |
| case PACKET_QDISC_BYPASS: |
| --- a/net/packet/internal.h |
| +++ b/net/packet/internal.h |
| @@ -100,10 +100,12 @@ struct packet_sock { |
| int copy_thresh; |
| spinlock_t bind_lock; |
| struct mutex pg_vec_lock; |
| - unsigned int running:1, /* prot_hook is attached*/ |
| - auxdata:1, |
| + unsigned int running; /* bind_lock must be held */ |
| + unsigned int auxdata:1, /* writer must hold sock lock */ |
| origdev:1, |
| - has_vnet_hdr:1; |
| + has_vnet_hdr:1, |
| + tp_loss:1, |
| + tp_tx_has_off:1; |
| int ifindex; /* bound device */ |
| __be16 num; |
| struct packet_mclist *mclist; |
| @@ -111,8 +113,6 @@ struct packet_sock { |
| enum tpacket_versions tp_version; |
| unsigned int tp_hdrlen; |
| unsigned int tp_reserve; |
| - unsigned int tp_loss:1; |
| - unsigned int tp_tx_has_off:1; |
| unsigned int tp_tstamp; |
| struct net_device __rcu *cached_dev; |
| int (*xmit)(struct sk_buff *skb); |