| From foo@baz Mon Sep 17 13:33:56 CEST 2018 |
| From: Stephen Hemminger <stephen@networkplumber.org> |
| Date: Thu, 13 Sep 2018 07:58:50 -0700 |
| Subject: inet: frags: get rid of ipfrag_skb_cb/FRAG_CB |
| To: davem@davemloft.net, gregkh@linuxfoundation.org |
| Cc: netdev@vger.kernel.org, stable@vger.kernel.org, edumazet@google.com |
| Message-ID: <20180913145902.17531-19-sthemmin@microsoft.com> |
| |
| From: Eric Dumazet <edumazet@google.com> |
| |
| ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately |
| this integer is currently in a different cache line than skb->next, |
| meaning that we use two cache lines per skb when finding the insertion point. |
| |
| By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields |
| in a single cache line and save precious memory bandwidth. |
| |
| Note that after the fast path added by Changli Gao in commit |
| d6bebca92c66 ("fragment: add fast path for in-order fragments") |
| this change wont help the fast path, since we still need |
| to access prev->len (2nd cache line), but will show great |
| benefits when slow path is entered, since we perform |
| a linear scan of a potentially long list. |
| |
| Also, note that this potential long list is an attack vector, |
| we might consider also using an rb-tree there eventually. |
| |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| (cherry picked from commit bf66337140c64c27fa37222b7abca7e49d63fb57) |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| include/linux/skbuff.h | 1 + |
| net/ipv4/ip_fragment.c | 35 ++++++++++++++--------------------- |
| 2 files changed, 15 insertions(+), 21 deletions(-) |
| |
| --- a/include/linux/skbuff.h |
| +++ b/include/linux/skbuff.h |
| @@ -678,6 +678,7 @@ struct sk_buff { |
| * UDP receive path is one user. |
| */ |
| unsigned long dev_scratch; |
| + int ip_defrag_offset; |
| }; |
| /* |
| * This is the control buffer. It is free to use for every |
| --- a/net/ipv4/ip_fragment.c |
| +++ b/net/ipv4/ip_fragment.c |
| @@ -57,14 +57,6 @@ |
| */ |
| static const char ip_frag_cache_name[] = "ip4-frags"; |
| |
| -struct ipfrag_skb_cb |
| -{ |
| - struct inet_skb_parm h; |
| - int offset; |
| -}; |
| - |
| -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) |
| - |
| /* Describe an entry in the "incomplete datagrams" queue. */ |
| struct ipq { |
| struct inet_frag_queue q; |
| @@ -353,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, |
| * this fragment, right? |
| */ |
| prev = qp->q.fragments_tail; |
| - if (!prev || FRAG_CB(prev)->offset < offset) { |
| + if (!prev || prev->ip_defrag_offset < offset) { |
| next = NULL; |
| goto found; |
| } |
| prev = NULL; |
| for (next = qp->q.fragments; next != NULL; next = next->next) { |
| - if (FRAG_CB(next)->offset >= offset) |
| + if (next->ip_defrag_offset >= offset) |
| break; /* bingo! */ |
| prev = next; |
| } |
| @@ -370,7 +362,7 @@ found: |
| * any overlaps are eliminated. |
| */ |
| if (prev) { |
| - int i = (FRAG_CB(prev)->offset + prev->len) - offset; |
| + int i = (prev->ip_defrag_offset + prev->len) - offset; |
| |
| if (i > 0) { |
| offset += i; |
| @@ -387,8 +379,8 @@ found: |
| |
| err = -ENOMEM; |
| |
| - while (next && FRAG_CB(next)->offset < end) { |
| - int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ |
| + while (next && next->ip_defrag_offset < end) { |
| + int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */ |
| |
| if (i < next->len) { |
| int delta = -next->truesize; |
| @@ -401,7 +393,7 @@ found: |
| delta += next->truesize; |
| if (delta) |
| add_frag_mem_limit(qp->q.net, delta); |
| - FRAG_CB(next)->offset += i; |
| + next->ip_defrag_offset += i; |
| qp->q.meat -= i; |
| if (next->ip_summed != CHECKSUM_UNNECESSARY) |
| next->ip_summed = CHECKSUM_NONE; |
| @@ -425,7 +417,13 @@ found: |
| } |
| } |
| |
| - FRAG_CB(skb)->offset = offset; |
| + /* Note : skb->ip_defrag_offset and skb->dev share the same location */ |
| + dev = skb->dev; |
| + if (dev) |
| + qp->iif = dev->ifindex; |
| + /* Makes sure compiler wont do silly aliasing games */ |
| + barrier(); |
| + skb->ip_defrag_offset = offset; |
| |
| /* Insert this fragment in the chain of fragments. */ |
| skb->next = next; |
| @@ -436,11 +434,6 @@ found: |
| else |
| qp->q.fragments = skb; |
| |
| - dev = skb->dev; |
| - if (dev) { |
| - qp->iif = dev->ifindex; |
| - skb->dev = NULL; |
| - } |
| qp->q.stamp = skb->tstamp; |
| qp->q.meat += skb->len; |
| qp->ecn |= ecn; |
| @@ -516,7 +509,7 @@ static int ip_frag_reasm(struct ipq *qp, |
| } |
| |
| WARN_ON(!head); |
| - WARN_ON(FRAG_CB(head)->offset != 0); |
| + WARN_ON(head->ip_defrag_offset != 0); |
| |
| /* Allocate a new buffer for the datagram. */ |
| ihlen = ip_hdrlen(head); |