| From 039b17dd52ce5f87548f7af1f20c983f1dcbbca5 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Mon, 25 Oct 2021 16:14:00 +0200 |
| Subject: vrf: run conntrack only in context of lower/physdev for locally |
| generated packets |
| |
| From: Florian Westphal <fw@strlen.de> |
| |
| [ Upstream commit 8c9c296adfae9ea05f655d69e9f6e13daa86fb4a ] |
| |
| The VRF driver invokes netfilter for output+postrouting hooks so that users |
| can create rules that check for 'oif $vrf' rather than lower device name. |
| |
| This is a problem when NAT rules are configured. |
| |
| To avoid any conntrack involvement in round 1, tag skbs as 'untracked' |
| to prevent conntrack from picking them up. |
| |
| This gets cleared before the packet gets handed to the ip stack so |
| conntrack will be active on the second iteration. |
| |
| One remaining issue is that a rule like |
| |
| output ... oif $vrfname notrack |
| |
| won't propagate to the second round because we can't tell |
| 'notrack set via ruleset' and 'notrack set by vrf driver' apart. |
| However, this isn't a regression: the 'notrack' removal happens |
| instead of unconditional nf_reset_ct(). |
| I'd also like to avoid leaking more vrf specific conditionals into the |
| netfilter infra. |
| |
| For ingress, conntrack has already been done before the packet makes it |
| to the vrf driver, with this patch egress does connection tracking with |
| lower/physical device as well. |
| |
| Signed-off-by: Florian Westphal <fw@strlen.de> |
| Acked-by: David Ahern <dsahern@kernel.org> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| drivers/net/vrf.c | 28 ++++++++++++++++++++++++---- |
| 1 file changed, 24 insertions(+), 4 deletions(-) |
| |
| diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c |
| index f08ed52d51f3f..f436b8c130611 100644 |
| --- a/drivers/net/vrf.c |
| +++ b/drivers/net/vrf.c |
| @@ -33,6 +33,7 @@ |
| #include <net/l3mdev.h> |
| #include <net/fib_rules.h> |
| #include <net/netns/generic.h> |
| +#include <net/netfilter/nf_conntrack.h> |
| |
| #define DRV_NAME "vrf" |
| #define DRV_VERSION "1.0" |
| @@ -147,12 +148,26 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, |
| return NETDEV_TX_OK; |
| } |
| |
| +static void vrf_nf_set_untracked(struct sk_buff *skb) |
| +{ |
| + if (skb_get_nfct(skb) == 0) |
| + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); |
| +} |
| + |
| +static void vrf_nf_reset_ct(struct sk_buff *skb) |
| +{ |
| + if (skb_get_nfct(skb) == IP_CT_UNTRACKED) |
| + nf_reset_ct(skb); |
| +} |
| + |
| #if IS_ENABLED(CONFIG_IPV6) |
| static int vrf_ip6_local_out(struct net *net, struct sock *sk, |
| struct sk_buff *skb) |
| { |
| int err; |
| |
| + vrf_nf_reset_ct(skb); |
| + |
| err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, |
| sk, skb, NULL, skb_dst(skb)->dev, dst_output); |
| |
| @@ -232,6 +247,8 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk, |
| { |
| int err; |
| |
| + vrf_nf_reset_ct(skb); |
| + |
| err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, |
| skb, NULL, skb_dst(skb)->dev, dst_output); |
| if (likely(err == 1)) |
| @@ -351,8 +368,7 @@ static void vrf_finish_direct(struct sk_buff *skb) |
| skb_pull(skb, ETH_HLEN); |
| } |
| |
| - /* reset skb device */ |
| - nf_reset_ct(skb); |
| + vrf_nf_reset_ct(skb); |
| } |
| |
| #if IS_ENABLED(CONFIG_IPV6) |
| @@ -366,7 +382,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, |
| struct neighbour *neigh; |
| int ret; |
| |
| - nf_reset_ct(skb); |
| + vrf_nf_reset_ct(skb); |
| |
| skb->protocol = htons(ETH_P_IPV6); |
| skb->dev = dev; |
| @@ -477,6 +493,8 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, |
| |
| skb->dev = vrf_dev; |
| |
| + vrf_nf_set_untracked(skb); |
| + |
| err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, |
| skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); |
| |
| @@ -584,7 +602,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s |
| bool is_v6gw = false; |
| int ret = -EINVAL; |
| |
| - nf_reset_ct(skb); |
| + vrf_nf_reset_ct(skb); |
| |
| /* Be paranoid, rather than too clever. */ |
| if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { |
| @@ -712,6 +730,8 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, |
| |
| skb->dev = vrf_dev; |
| |
| + vrf_nf_set_untracked(skb); |
| + |
| err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, |
| skb, NULL, vrf_dev, vrf_ip_out_direct_finish); |
| |
| -- |
| 2.33.0 |
| |