| From foo@baz Wed 19 Feb 2020 09:32:07 PM CET |
| From: Toke Høiland-Jørgensen <toke@redhat.com> |
| Date: Mon, 10 Feb 2020 17:10:46 +0100 |
| Subject: core: Don't skip generic XDP program execution for cloned SKBs |
| |
| From: Toke Høiland-Jørgensen <toke@redhat.com> |
| |
| [ Upstream commit ad1e03b2b3d4430baaa109b77bc308dc73050de3 ] |
| |
| The current generic XDP handler skips execution of XDP programs entirely if |
| an SKB is marked as cloned. This leads to some surprising behaviour, as |
| packets can end up being cloned in various ways, which will make an XDP |
| program not see all the traffic on an interface. |
| |
| This was discovered by a simple test case where an XDP program that always |
| returns XDP_DROP is installed on a veth device. When combining this with |
| the Scapy packet sniffer (which uses an AF_PACKET) socket on the sending |
| side, SKBs reliably end up in the cloned state, causing them to be passed |
| through to the receiving interface instead of being dropped. A minimal |
| reproducer script for this is included below. |
| |
| This patch fixed the issue by simply triggering the existing linearisation |
| code for cloned SKBs instead of skipping the XDP program execution. This |
| behaviour is in line with the behaviour of the native XDP implementation |
| for the veth driver, which will reallocate and copy the SKB data if the SKB |
| is marked as shared. |
| |
| Reproducer Python script (requires BCC and Scapy): |
| |
| from scapy.all import TCP, IP, Ether, sendp, sniff, AsyncSniffer, Raw, UDP |
| from bcc import BPF |
| import time, sys, subprocess, shlex |
| |
| SKB_MODE = (1 << 1) |
| DRV_MODE = (1 << 2) |
| PYTHON=sys.executable |
| |
| def client(): |
| time.sleep(2) |
| # Sniffing on the sender causes skb_cloned() to be set |
| s = AsyncSniffer() |
| s.start() |
| |
| for p in range(10): |
| sendp(Ether(dst="aa:aa:aa:aa:aa:aa", src="cc:cc:cc:cc:cc:cc")/IP()/UDP()/Raw("Test"), |
| verbose=False) |
| time.sleep(0.1) |
| |
| s.stop() |
| return 0 |
| |
| def server(mode): |
| prog = BPF(text="int dummy_drop(struct xdp_md *ctx) {return XDP_DROP;}") |
| func = prog.load_func("dummy_drop", BPF.XDP) |
| prog.attach_xdp("a_to_b", func, mode) |
| |
| time.sleep(1) |
| |
| s = sniff(iface="a_to_b", count=10, timeout=15) |
| if len(s): |
| print(f"Got {len(s)} packets - should have gotten 0") |
| return 1 |
| else: |
| print("Got no packets - as expected") |
| return 0 |
| |
| if len(sys.argv) < 2: |
| print(f"Usage: {sys.argv[0]} <skb|drv>") |
| sys.exit(1) |
| |
| if sys.argv[1] == "client": |
| sys.exit(client()) |
| elif sys.argv[1] == "server": |
| mode = SKB_MODE if sys.argv[2] == 'skb' else DRV_MODE |
| sys.exit(server(mode)) |
| else: |
| try: |
| mode = sys.argv[1] |
| if mode not in ('skb', 'drv'): |
| print(f"Usage: {sys.argv[0]} <skb|drv>") |
| sys.exit(1) |
| print(f"Running in {mode} mode") |
| |
| for cmd in [ |
| 'ip netns add netns_a', |
| 'ip netns add netns_b', |
| 'ip -n netns_a link add a_to_b type veth peer name b_to_a netns netns_b', |
| # Disable ipv6 to make sure there's no address autoconf traffic |
| 'ip netns exec netns_a sysctl -qw net.ipv6.conf.a_to_b.disable_ipv6=1', |
| 'ip netns exec netns_b sysctl -qw net.ipv6.conf.b_to_a.disable_ipv6=1', |
| 'ip -n netns_a link set dev a_to_b address aa:aa:aa:aa:aa:aa', |
| 'ip -n netns_b link set dev b_to_a address cc:cc:cc:cc:cc:cc', |
| 'ip -n netns_a link set dev a_to_b up', |
| 'ip -n netns_b link set dev b_to_a up']: |
| subprocess.check_call(shlex.split(cmd)) |
| |
| server = subprocess.Popen(shlex.split(f"ip netns exec netns_a {PYTHON} {sys.argv[0]} server {mode}")) |
| client = subprocess.Popen(shlex.split(f"ip netns exec netns_b {PYTHON} {sys.argv[0]} client")) |
| |
| client.wait() |
| server.wait() |
| sys.exit(server.returncode) |
| |
| finally: |
| subprocess.run(shlex.split("ip netns delete netns_a")) |
| subprocess.run(shlex.split("ip netns delete netns_b")) |
| |
| Fixes: d445516966dc ("net: xdp: support xdp generic on virtual devices") |
| Reported-by: Stepan Horacek <shoracek@redhat.com> |
| Suggested-by: Paolo Abeni <pabeni@redhat.com> |
| Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| net/core/dev.c | 4 ++-- |
| 1 file changed, 2 insertions(+), 2 deletions(-) |
| |
| --- a/net/core/dev.c |
| +++ b/net/core/dev.c |
| @@ -4256,14 +4256,14 @@ static u32 netif_receive_generic_xdp(str |
| /* Reinjected packets coming from act_mirred or similar should |
| * not get XDP generic processing. |
| */ |
| - if (skb_cloned(skb) || skb_is_tc_redirected(skb)) |
| + if (skb_is_tc_redirected(skb)) |
| return XDP_PASS; |
| |
| /* XDP packets must be linear and must have sufficient headroom |
| * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also |
| * native XDP provides, thus we need to do it here as well. |
| */ |
| - if (skb_is_nonlinear(skb) || |
| + if (skb_cloned(skb) || skb_is_nonlinear(skb) || |
| skb_headroom(skb) < XDP_PACKET_HEADROOM) { |
| int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); |
| int troom = skb->tail + skb->data_len - skb->end; |