| From e93221c82dd5f7b043d42202d61aa577d748fa9a Mon Sep 17 00:00:00 2001 |
| From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com> |
| Date: Fri, 3 Jul 2020 22:26:43 +0200 |
| Subject: [PATCH] sched: consistently handle layer3 header accesses in the |
| presence of VLANs |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| commit d7bf2ebebc2bd61ab95e2a8e33541ef282f303d4 upstream. |
| |
| There are a couple of places in net/sched/ that check skb->protocol and act |
| on the value there. However, in the presence of VLAN tags, the value stored |
| in skb->protocol can be inconsistent based on whether VLAN acceleration is |
| enabled. The commit quoted in the Fixes tag below fixed the users of |
| skb->protocol to use a helper that will always see the VLAN ethertype. |
| |
| However, most of the callers don't actually handle the VLAN ethertype, but |
| expect to find the IP header type in the protocol field. This means that |
| things like changing the ECN field, or parsing diffserv values, stops |
| working if there's a VLAN tag, or if there are multiple nested VLAN |
| tags (QinQ). |
| |
| To fix this, change the helper to take an argument that indicates whether |
| the caller wants to skip the VLAN tags or not. When skipping VLAN tags, we |
| make sure to skip all of them, so behaviour is consistent even in QinQ |
| mode. |
| |
| To make the helper usable from the ECN code, move it to if_vlan.h instead |
| of pkt_sched.h. |
| |
| v3: |
| - Remove empty lines |
| - Move vlan variable definitions inside loop in skb_protocol() |
| - Also use skb_protocol() helper in IP{,6}_ECN_decapsulate() and |
| bpf_skb_ecn_set_ce() |
| |
| v2: |
| - Use eth_type_vlan() helper in skb_protocol() |
| - Also fix code that reads skb->protocol directly |
| - Change a couple of 'if/else if' statements to switch constructs to avoid |
| calling the helper twice |
| |
| Reported-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com> |
| Fixes: d8b9605d2697 ("net: sched: fix skb->protocol use in case of accelerated vlan path") |
| Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| [PG: start with 4.19.134-stable version - we don't have v5.3-rc1~140^2~15^2~3] |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h |
| index 244278d5c222..00fa5d0b5e99 100644 |
| --- a/include/linux/if_vlan.h |
| +++ b/include/linux/if_vlan.h |
| @@ -319,6 +319,34 @@ static inline bool eth_type_vlan(__be16 ethertype) |
| } |
| } |
| |
| +/* A getter for the SKB protocol field which will handle VLAN tags consistently |
| + * whether VLAN acceleration is enabled or not. |
| + */ |
| +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) |
| +{ |
| + unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); |
| + __be16 proto = skb->protocol; |
| + |
| + if (!skip_vlan) |
| + /* VLAN acceleration strips the VLAN header from the skb and |
| + * moves it to skb->vlan_proto |
| + */ |
| + return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; |
| + |
| + while (eth_type_vlan(proto)) { |
| + struct vlan_hdr vhdr, *vh; |
| + |
| + vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); |
| + if (!vh) |
| + break; |
| + |
| + proto = vh->h_vlan_encapsulated_proto; |
| + offset += sizeof(vhdr); |
| + } |
| + |
| + return proto; |
| +} |
| + |
| static inline bool vlan_hw_offload_capable(netdev_features_t features, |
| __be16 proto) |
| { |
| diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h |
| index 0f0d1efe06dd..e1eaf1780288 100644 |
| --- a/include/net/inet_ecn.h |
| +++ b/include/net/inet_ecn.h |
| @@ -4,6 +4,7 @@ |
| |
| #include <linux/ip.h> |
| #include <linux/skbuff.h> |
| +#include <linux/if_vlan.h> |
| |
| #include <net/inet_sock.h> |
| #include <net/dsfield.h> |
| @@ -172,7 +173,7 @@ static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) |
| |
| static inline int INET_ECN_set_ce(struct sk_buff *skb) |
| { |
| - switch (skb->protocol) { |
| + switch (skb_protocol(skb, true)) { |
| case cpu_to_be16(ETH_P_IP): |
| if (skb_network_header(skb) + sizeof(struct iphdr) <= |
| skb_tail_pointer(skb)) |
| @@ -191,7 +192,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) |
| |
| static inline int INET_ECN_set_ect1(struct sk_buff *skb) |
| { |
| - switch (skb->protocol) { |
| + switch (skb_protocol(skb, true)) { |
| case cpu_to_be16(ETH_P_IP): |
| if (skb_network_header(skb) + sizeof(struct iphdr) <= |
| skb_tail_pointer(skb)) |
| @@ -272,12 +273,16 @@ static inline int IP_ECN_decapsulate(const struct iphdr *oiph, |
| { |
| __u8 inner; |
| |
| - if (skb->protocol == htons(ETH_P_IP)) |
| + switch (skb_protocol(skb, true)) { |
| + case htons(ETH_P_IP): |
| inner = ip_hdr(skb)->tos; |
| - else if (skb->protocol == htons(ETH_P_IPV6)) |
| + break; |
| + case htons(ETH_P_IPV6): |
| inner = ipv6_get_dsfield(ipv6_hdr(skb)); |
| - else |
| + break; |
| + default: |
| return 0; |
| + } |
| |
| return INET_ECN_decapsulate(skb, oiph->tos, inner); |
| } |
| @@ -287,12 +292,16 @@ static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, |
| { |
| __u8 inner; |
| |
| - if (skb->protocol == htons(ETH_P_IP)) |
| + switch (skb_protocol(skb, true)) { |
| + case htons(ETH_P_IP): |
| inner = ip_hdr(skb)->tos; |
| - else if (skb->protocol == htons(ETH_P_IPV6)) |
| + break; |
| + case htons(ETH_P_IPV6): |
| inner = ipv6_get_dsfield(ipv6_hdr(skb)); |
| - else |
| + break; |
| + default: |
| return 0; |
| + } |
| |
| return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); |
| } |
| diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h |
| index aa99c73c3fbd..2abecd015f6d 100644 |
| --- a/include/net/pkt_sched.h |
| +++ b/include/net/pkt_sched.h |
| @@ -128,17 +128,6 @@ static inline void qdisc_run(struct Qdisc *q) |
| } |
| } |
| |
| -static inline __be16 tc_skb_protocol(const struct sk_buff *skb) |
| -{ |
| - /* We need to take extra care in case the skb came via |
| - * vlan accelerated path. In that case, use skb->vlan_proto |
| - * as the original vlan header was already stripped. |
| - */ |
| - if (skb_vlan_tag_present(skb)) |
| - return skb->vlan_proto; |
| - return skb->protocol; |
| -} |
| - |
| /* Calculate maximal size of packet seen by hard_start_xmit |
| routine of this device. |
| */ |
| diff --git a/net/core/filter.c b/net/core/filter.c |
| index f311dde6600a..633b048792b2 100644 |
| --- a/net/core/filter.c |
| +++ b/net/core/filter.c |
| @@ -2916,7 +2916,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) |
| |
| static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) |
| { |
| - __be16 from_proto = skb->protocol; |
| + __be16 from_proto = skb_protocol(skb, true); |
| |
| if (from_proto == htons(ETH_P_IP) && |
| to_proto == htons(ETH_P_IPV6)) |
| @@ -2989,7 +2989,7 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = { |
| |
| static u32 bpf_skb_net_base_len(const struct sk_buff *skb) |
| { |
| - switch (skb->protocol) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| return sizeof(struct iphdr); |
| case htons(ETH_P_IPV6): |
| @@ -3157,7 +3157,7 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, |
| u32 len_cur, len_diff_abs = abs(len_diff); |
| u32 len_min = bpf_skb_net_base_len(skb); |
| u32 len_max = __bpf_skb_max_len(skb); |
| - __be16 proto = skb->protocol; |
| + __be16 proto = skb_protocol(skb, true); |
| bool shrink = len_diff < 0; |
| u32 off; |
| int ret; |
| @@ -4922,7 +4922,7 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len |
| |
| switch (type) { |
| case BPF_LWT_ENCAP_SEG6_INLINE: |
| - if (skb->protocol != htons(ETH_P_IPV6)) |
| + if (skb_protocol(skb, true) != htons(ETH_P_IPV6)) |
| return -EBADMSG; |
| |
| err = seg6_do_srh_inline(skb, srh); |
| diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c |
| index 2b43cacf82af..1a8f2f85ea1a 100644 |
| --- a/net/sched/act_connmark.c |
| +++ b/net/sched/act_connmark.c |
| @@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, |
| tcf_lastuse_update(&ca->tcf_tm); |
| bstats_update(&ca->tcf_bstats, skb); |
| |
| - if (skb->protocol == htons(ETH_P_IP)) { |
| + switch (skb_protocol(skb, true)) { |
| + case htons(ETH_P_IP): |
| if (skb->len < sizeof(struct iphdr)) |
| goto out; |
| |
| proto = NFPROTO_IPV4; |
| - } else if (skb->protocol == htons(ETH_P_IPV6)) { |
| + break; |
| + case htons(ETH_P_IPV6): |
| if (skb->len < sizeof(struct ipv6hdr)) |
| goto out; |
| |
| proto = NFPROTO_IPV6; |
| - } else { |
| + break; |
| + default: |
| goto out; |
| } |
| |
| diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c |
| index d3cfad88dc3a..428b1ae00123 100644 |
| --- a/net/sched/act_csum.c |
| +++ b/net/sched/act_csum.c |
| @@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, |
| goto drop; |
| |
| update_flags = params->update_flags; |
| - protocol = tc_skb_protocol(skb); |
| + protocol = skb_protocol(skb, false); |
| again: |
| switch (protocol) { |
| case cpu_to_be16(ETH_P_IP): |
| diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c |
| index 6a8d3337c577..f98b2791ecec 100644 |
| --- a/net/sched/act_skbedit.c |
| +++ b/net/sched/act_skbedit.c |
| @@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, |
| if (params->flags & SKBEDIT_F_INHERITDSFIELD) { |
| int wlen = skb_network_offset(skb); |
| |
| - switch (tc_skb_protocol(skb)) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| wlen += sizeof(struct iphdr); |
| if (!pskb_may_pull(skb, wlen)) |
| diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c |
| index d2b836771e96..2b94d6f02dbb 100644 |
| --- a/net/sched/cls_api.c |
| +++ b/net/sched/cls_api.c |
| @@ -1727,7 +1727,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, |
| reclassify: |
| #endif |
| for (; tp; tp = rcu_dereference_bh(tp->next)) { |
| - __be16 protocol = tc_skb_protocol(skb); |
| + __be16 protocol = skb_protocol(skb, false); |
| int err; |
| |
| if (tp->protocol != protocol && |
| diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c |
| index 80ae7b9fa90a..ab53a93b2f2b 100644 |
| --- a/net/sched/cls_flow.c |
| +++ b/net/sched/cls_flow.c |
| @@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) |
| if (dst) |
| return ntohl(dst); |
| |
| - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); |
| + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); |
| } |
| |
| static u32 flow_get_proto(const struct sk_buff *skb, |
| @@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, |
| if (flow->ports.ports) |
| return ntohs(flow->ports.dst); |
| |
| - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); |
| + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); |
| } |
| |
| static u32 flow_get_iif(const struct sk_buff *skb) |
| @@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) |
| static u32 flow_get_nfct_src(const struct sk_buff *skb, |
| const struct flow_keys *flow) |
| { |
| - switch (tc_skb_protocol(skb)) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| return ntohl(CTTUPLE(skb, src.u3.ip)); |
| case htons(ETH_P_IPV6): |
| @@ -164,7 +164,7 @@ static u32 flow_get_nfct_src(const struct sk_buff *skb, |
| static u32 flow_get_nfct_dst(const struct sk_buff *skb, |
| const struct flow_keys *flow) |
| { |
| - switch (tc_skb_protocol(skb)) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| return ntohl(CTTUPLE(skb, dst.u3.ip)); |
| case htons(ETH_P_IPV6): |
| diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c |
| index 295401c6f73e..5e112f9e6078 100644 |
| --- a/net/sched/cls_flower.c |
| +++ b/net/sched/cls_flower.c |
| @@ -289,7 +289,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, |
| /* skb_flow_dissect() does not set n_proto in case an unknown |
| * protocol, so do it rather here. |
| */ |
| - skb_key.basic.n_proto = skb->protocol; |
| + skb_key.basic.n_proto = skb_protocol(skb, false); |
| skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); |
| skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); |
| |
| diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c |
| index df00566d327d..c95cf86fb431 100644 |
| --- a/net/sched/em_ipset.c |
| +++ b/net/sched/em_ipset.c |
| @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, |
| }; |
| int ret, network_offset; |
| |
| - switch (tc_skb_protocol(skb)) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| state.pf = NFPROTO_IPV4; |
| if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) |
| diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c |
| index ff9c9b711a74..8de4968177bd 100644 |
| --- a/net/sched/em_meta.c |
| +++ b/net/sched/em_meta.c |
| @@ -195,7 +195,7 @@ META_COLLECTOR(int_priority) |
| META_COLLECTOR(int_protocol) |
| { |
| /* Let userspace take care of the byte ordering */ |
| - dst->value = tc_skb_protocol(skb); |
| + dst->value = skb_protocol(skb, false); |
| } |
| |
| META_COLLECTOR(int_pkttype) |
| diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c |
| index 5d605bab9afc..896c0562cb42 100644 |
| --- a/net/sched/sch_cake.c |
| +++ b/net/sched/sch_cake.c |
| @@ -592,7 +592,7 @@ static void cake_update_flowkeys(struct flow_keys *keys, |
| struct nf_conntrack_tuple tuple = {}; |
| bool rev = !skb->_nfct; |
| |
| - if (tc_skb_protocol(skb) != htons(ETH_P_IP)) |
| + if (skb_protocol(skb, true) != htons(ETH_P_IP)) |
| return; |
| |
| if (!nf_ct_get_tuple_skb(&tuple, skb)) |
| @@ -1521,7 +1521,7 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) |
| u16 *buf, buf_; |
| u8 dscp; |
| |
| - switch (tc_skb_protocol(skb)) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); |
| if (unlikely(!buf)) |
| diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c |
| index 05605b30bef3..2b88710994d7 100644 |
| --- a/net/sched/sch_dsmark.c |
| +++ b/net/sched/sch_dsmark.c |
| @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
| if (p->set_tc_index) { |
| int wlen = skb_network_offset(skb); |
| |
| - switch (tc_skb_protocol(skb)) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| wlen += sizeof(struct iphdr); |
| if (!pskb_may_pull(skb, wlen) || |
| @@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) |
| index = skb->tc_index & (p->indices - 1); |
| pr_debug("index %d->%d\n", skb->tc_index, index); |
| |
| - switch (tc_skb_protocol(skb)) { |
| + switch (skb_protocol(skb, true)) { |
| case htons(ETH_P_IP): |
| ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, |
| p->mv[index].value); |
| @@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) |
| */ |
| if (p->mv[index].mask != 0xff || p->mv[index].value) |
| pr_warn("%s: unsupported protocol %d\n", |
| - __func__, ntohs(tc_skb_protocol(skb))); |
| + __func__, ntohs(skb_protocol(skb, true))); |
| break; |
| } |
| |
| diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c |
| index 689ef6f3ded8..2f1f0a378408 100644 |
| --- a/net/sched/sch_teql.c |
| +++ b/net/sched/sch_teql.c |
| @@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, |
| char haddr[MAX_ADDR_LEN]; |
| |
| neigh_ha_snapshot(haddr, n, dev); |
| - err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), |
| + err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), |
| haddr, NULL, skb->len); |
| |
| if (err < 0) |
| -- |
| 2.27.0 |
| |