|  | // SPDX-License-Identifier: GPL-2.0-only | 
|  | /* Copyright (c) 2023 Isovalent */ | 
|  |  | 
|  | #include <linux/netdevice.h> | 
|  | #include <linux/ethtool.h> | 
|  | #include <linux/etherdevice.h> | 
|  | #include <linux/filter.h> | 
|  | #include <linux/netfilter_netdev.h> | 
|  | #include <linux/bpf_mprog.h> | 
|  | #include <linux/indirect_call_wrapper.h> | 
|  |  | 
|  | #include <net/netkit.h> | 
|  | #include <net/dst.h> | 
|  | #include <net/tcx.h> | 
|  |  | 
|  | #define DRV_NAME "netkit" | 
|  |  | 
|  | struct netkit { | 
|  | /* Needed in fast-path */ | 
|  | struct net_device __rcu *peer; | 
|  | struct bpf_mprog_entry __rcu *active; | 
|  | enum netkit_action policy; | 
|  | enum netkit_scrub scrub; | 
|  | struct bpf_mprog_bundle	bundle; | 
|  |  | 
|  | /* Needed in slow-path */ | 
|  | enum netkit_mode mode; | 
|  | bool primary; | 
|  | u32 headroom; | 
|  | }; | 
|  |  | 
|  | struct netkit_link { | 
|  | struct bpf_link link; | 
|  | struct net_device *dev; | 
|  | u32 location; | 
|  | }; | 
|  |  | 
|  | static __always_inline int | 
|  | netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, | 
|  | enum netkit_action ret) | 
|  | { | 
|  | const struct bpf_mprog_fp *fp; | 
|  | const struct bpf_prog *prog; | 
|  |  | 
|  | bpf_mprog_foreach_prog(entry, fp, prog) { | 
|  | bpf_compute_data_pointers(skb); | 
|  | ret = bpf_prog_run(prog, skb); | 
|  | if (ret != NETKIT_NEXT) | 
|  | break; | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void netkit_xnet(struct sk_buff *skb) | 
|  | { | 
|  | skb->priority = 0; | 
|  | skb->mark = 0; | 
|  | } | 
|  |  | 
|  | static void netkit_prep_forward(struct sk_buff *skb, | 
|  | bool xnet, bool xnet_scrub) | 
|  | { | 
|  | skb_scrub_packet(skb, false); | 
|  | nf_skip_egress(skb, true); | 
|  | skb_reset_mac_header(skb); | 
|  | if (!xnet) | 
|  | return; | 
|  | ipvs_reset(skb); | 
|  | skb_clear_tstamp(skb); | 
|  | if (xnet_scrub) | 
|  | netkit_xnet(skb); | 
|  | } | 
|  |  | 
|  | static struct netkit *netkit_priv(const struct net_device *dev) | 
|  | { | 
|  | return netdev_priv(dev); | 
|  | } | 
|  |  | 
|  | static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) | 
|  | { | 
|  | struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | enum netkit_action ret = READ_ONCE(nk->policy); | 
|  | netdev_tx_t ret_dev = NET_XMIT_SUCCESS; | 
|  | const struct bpf_mprog_entry *entry; | 
|  | struct net_device *peer; | 
|  | int len = skb->len; | 
|  |  | 
|  | bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); | 
|  | rcu_read_lock(); | 
|  | peer = rcu_dereference(nk->peer); | 
|  | if (unlikely(!peer || !(peer->flags & IFF_UP) || | 
|  | !pskb_may_pull(skb, ETH_HLEN) || | 
|  | skb_orphan_frags(skb, GFP_ATOMIC))) | 
|  | goto drop; | 
|  | netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)), | 
|  | nk->scrub); | 
|  | eth_skb_pkt_type(skb, peer); | 
|  | skb->dev = peer; | 
|  | entry = rcu_dereference(nk->active); | 
|  | if (entry) | 
|  | ret = netkit_run(entry, skb, ret); | 
|  | switch (ret) { | 
|  | case NETKIT_NEXT: | 
|  | case NETKIT_PASS: | 
|  | eth_skb_pull_mac(skb); | 
|  | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | 
|  | if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { | 
|  | dev_sw_netstats_tx_add(dev, 1, len); | 
|  | dev_sw_netstats_rx_add(peer, len); | 
|  | } else { | 
|  | goto drop_stats; | 
|  | } | 
|  | break; | 
|  | case NETKIT_REDIRECT: | 
|  | dev_sw_netstats_tx_add(dev, 1, len); | 
|  | skb_do_redirect(skb); | 
|  | break; | 
|  | case NETKIT_DROP: | 
|  | default: | 
|  | drop: | 
|  | kfree_skb(skb); | 
|  | drop_stats: | 
|  | dev_core_stats_tx_dropped_inc(dev); | 
|  | ret_dev = NET_XMIT_DROP; | 
|  | break; | 
|  | } | 
|  | rcu_read_unlock(); | 
|  | bpf_net_ctx_clear(bpf_net_ctx); | 
|  | return ret_dev; | 
|  | } | 
|  |  | 
|  | static int netkit_open(struct net_device *dev) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct net_device *peer = rtnl_dereference(nk->peer); | 
|  |  | 
|  | if (!peer) | 
|  | return -ENOTCONN; | 
|  | if (peer->flags & IFF_UP) { | 
|  | netif_carrier_on(dev); | 
|  | netif_carrier_on(peer); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int netkit_close(struct net_device *dev) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct net_device *peer = rtnl_dereference(nk->peer); | 
|  |  | 
|  | netif_carrier_off(dev); | 
|  | if (peer) | 
|  | netif_carrier_off(peer); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int netkit_get_iflink(const struct net_device *dev) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct net_device *peer; | 
|  | int iflink = 0; | 
|  |  | 
|  | rcu_read_lock(); | 
|  | peer = rcu_dereference(nk->peer); | 
|  | if (peer) | 
|  | iflink = READ_ONCE(peer->ifindex); | 
|  | rcu_read_unlock(); | 
|  | return iflink; | 
|  | } | 
|  |  | 
|  | static void netkit_set_multicast(struct net_device *dev) | 
|  | { | 
|  | /* Nothing to do, we receive whatever gets pushed to us! */ | 
|  | } | 
|  |  | 
|  | static int netkit_set_macaddr(struct net_device *dev, void *sa) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  |  | 
|  | if (nk->mode != NETKIT_L2) | 
|  | return -EOPNOTSUPP; | 
|  |  | 
|  | return eth_mac_addr(dev, sa); | 
|  | } | 
|  |  | 
|  | static void netkit_set_headroom(struct net_device *dev, int headroom) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev), *nk2; | 
|  | struct net_device *peer; | 
|  |  | 
|  | if (headroom < 0) | 
|  | headroom = NET_SKB_PAD; | 
|  |  | 
|  | rcu_read_lock(); | 
|  | peer = rcu_dereference(nk->peer); | 
|  | if (unlikely(!peer)) | 
|  | goto out; | 
|  |  | 
|  | nk2 = netkit_priv(peer); | 
|  | nk->headroom = headroom; | 
|  | headroom = max(nk->headroom, nk2->headroom); | 
|  |  | 
|  | peer->needed_headroom = headroom; | 
|  | dev->needed_headroom = headroom; | 
|  | out: | 
|  | rcu_read_unlock(); | 
|  | } | 
|  |  | 
|  | INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev) | 
|  | { | 
|  | return rcu_dereference(netkit_priv(dev)->peer); | 
|  | } | 
|  |  | 
|  | static void netkit_get_stats(struct net_device *dev, | 
|  | struct rtnl_link_stats64 *stats) | 
|  | { | 
|  | dev_fetch_sw_netstats(stats, dev->tstats); | 
|  | stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); | 
|  | } | 
|  |  | 
|  | static void netkit_uninit(struct net_device *dev); | 
|  |  | 
|  | static const struct net_device_ops netkit_netdev_ops = { | 
|  | .ndo_open		= netkit_open, | 
|  | .ndo_stop		= netkit_close, | 
|  | .ndo_start_xmit		= netkit_xmit, | 
|  | .ndo_set_rx_mode	= netkit_set_multicast, | 
|  | .ndo_set_rx_headroom	= netkit_set_headroom, | 
|  | .ndo_set_mac_address	= netkit_set_macaddr, | 
|  | .ndo_get_iflink		= netkit_get_iflink, | 
|  | .ndo_get_peer_dev	= netkit_peer_dev, | 
|  | .ndo_get_stats64	= netkit_get_stats, | 
|  | .ndo_uninit		= netkit_uninit, | 
|  | .ndo_features_check	= passthru_features_check, | 
|  | }; | 
|  |  | 
|  | static void netkit_get_drvinfo(struct net_device *dev, | 
|  | struct ethtool_drvinfo *info) | 
|  | { | 
|  | strscpy(info->driver, DRV_NAME, sizeof(info->driver)); | 
|  | } | 
|  |  | 
|  | static const struct ethtool_ops netkit_ethtool_ops = { | 
|  | .get_drvinfo		= netkit_get_drvinfo, | 
|  | }; | 
|  |  | 
|  | static void netkit_setup(struct net_device *dev) | 
|  | { | 
|  | static const netdev_features_t netkit_features_hw_vlan = | 
|  | NETIF_F_HW_VLAN_CTAG_TX | | 
|  | NETIF_F_HW_VLAN_CTAG_RX | | 
|  | NETIF_F_HW_VLAN_STAG_TX | | 
|  | NETIF_F_HW_VLAN_STAG_RX; | 
|  | static const netdev_features_t netkit_features = | 
|  | netkit_features_hw_vlan | | 
|  | NETIF_F_SG | | 
|  | NETIF_F_FRAGLIST | | 
|  | NETIF_F_HW_CSUM | | 
|  | NETIF_F_RXCSUM | | 
|  | NETIF_F_SCTP_CRC | | 
|  | NETIF_F_HIGHDMA | | 
|  | NETIF_F_GSO_SOFTWARE | | 
|  | NETIF_F_GSO_ENCAP_ALL; | 
|  |  | 
|  | ether_setup(dev); | 
|  | dev->max_mtu = ETH_MAX_MTU; | 
|  | dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; | 
|  |  | 
|  | dev->flags |= IFF_NOARP; | 
|  | dev->priv_flags &= ~IFF_TX_SKB_SHARING; | 
|  | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; | 
|  | dev->priv_flags |= IFF_PHONY_HEADROOM; | 
|  | dev->priv_flags |= IFF_NO_QUEUE; | 
|  | dev->priv_flags |= IFF_DISABLE_NETPOLL; | 
|  | dev->lltx = true; | 
|  |  | 
|  | dev->ethtool_ops = &netkit_ethtool_ops; | 
|  | dev->netdev_ops  = &netkit_netdev_ops; | 
|  |  | 
|  | dev->features |= netkit_features; | 
|  | dev->hw_features = netkit_features; | 
|  | dev->hw_enc_features = netkit_features; | 
|  | dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; | 
|  | dev->vlan_features = dev->features & ~netkit_features_hw_vlan; | 
|  |  | 
|  | dev->needs_free_netdev = true; | 
|  |  | 
|  | netif_set_tso_max_size(dev, GSO_MAX_SIZE); | 
|  | } | 
|  |  | 
|  | static struct net *netkit_get_link_net(const struct net_device *dev) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct net_device *peer = rtnl_dereference(nk->peer); | 
|  |  | 
|  | return peer ? dev_net(peer) : dev_net(dev); | 
|  | } | 
|  |  | 
|  | static int netkit_check_policy(int policy, struct nlattr *tb, | 
|  | struct netlink_ext_ack *extack) | 
|  | { | 
|  | switch (policy) { | 
|  | case NETKIT_PASS: | 
|  | case NETKIT_DROP: | 
|  | return 0; | 
|  | default: | 
|  | NL_SET_ERR_MSG_ATTR(extack, tb, | 
|  | "Provided default xmit policy not supported"); | 
|  | return -EINVAL; | 
|  | } | 
|  | } | 
|  |  | 
|  | static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], | 
|  | struct netlink_ext_ack *extack) | 
|  | { | 
|  | struct nlattr *attr = tb[IFLA_ADDRESS]; | 
|  |  | 
|  | if (!attr) | 
|  | return 0; | 
|  | if (nla_len(attr) != ETH_ALEN) | 
|  | return -EINVAL; | 
|  | if (!is_valid_ether_addr(nla_data(attr))) | 
|  | return -EADDRNOTAVAIL; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static struct rtnl_link_ops netkit_link_ops; | 
|  |  | 
|  | static int netkit_new_link(struct net *peer_net, struct net_device *dev, | 
|  | struct nlattr *tb[], struct nlattr *data[], | 
|  | struct netlink_ext_ack *extack) | 
|  | { | 
|  | struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr; | 
|  | enum netkit_action policy_prim = NETKIT_PASS; | 
|  | enum netkit_action policy_peer = NETKIT_PASS; | 
|  | enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT; | 
|  | enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; | 
|  | enum netkit_mode mode = NETKIT_L3; | 
|  | unsigned char ifname_assign_type; | 
|  | struct ifinfomsg *ifmp = NULL; | 
|  | struct net_device *peer; | 
|  | char ifname[IFNAMSIZ]; | 
|  | struct netkit *nk; | 
|  | int err; | 
|  |  | 
|  | if (data) { | 
|  | if (data[IFLA_NETKIT_MODE]) | 
|  | mode = nla_get_u32(data[IFLA_NETKIT_MODE]); | 
|  | if (data[IFLA_NETKIT_PEER_INFO]) { | 
|  | attr = data[IFLA_NETKIT_PEER_INFO]; | 
|  | ifmp = nla_data(attr); | 
|  | rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack); | 
|  | tbp = peer_tb; | 
|  | } | 
|  | if (data[IFLA_NETKIT_SCRUB]) | 
|  | scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]); | 
|  | if (data[IFLA_NETKIT_PEER_SCRUB]) | 
|  | scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]); | 
|  | if (data[IFLA_NETKIT_POLICY]) { | 
|  | attr = data[IFLA_NETKIT_POLICY]; | 
|  | policy_prim = nla_get_u32(attr); | 
|  | err = netkit_check_policy(policy_prim, attr, extack); | 
|  | if (err < 0) | 
|  | return err; | 
|  | } | 
|  | if (data[IFLA_NETKIT_PEER_POLICY]) { | 
|  | attr = data[IFLA_NETKIT_PEER_POLICY]; | 
|  | policy_peer = nla_get_u32(attr); | 
|  | err = netkit_check_policy(policy_peer, attr, extack); | 
|  | if (err < 0) | 
|  | return err; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (ifmp && tbp[IFLA_IFNAME]) { | 
|  | nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); | 
|  | ifname_assign_type = NET_NAME_USER; | 
|  | } else { | 
|  | strscpy(ifname, "nk%d", IFNAMSIZ); | 
|  | ifname_assign_type = NET_NAME_ENUM; | 
|  | } | 
|  | if (mode != NETKIT_L2 && | 
|  | (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS])) | 
|  | return -EOPNOTSUPP; | 
|  |  | 
|  | peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, | 
|  | &netkit_link_ops, tbp, extack); | 
|  | if (IS_ERR(peer)) | 
|  | return PTR_ERR(peer); | 
|  |  | 
|  | netif_inherit_tso_max(peer, dev); | 
|  |  | 
|  | if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) | 
|  | eth_hw_addr_random(peer); | 
|  | if (ifmp && dev->ifindex) | 
|  | peer->ifindex = ifmp->ifi_index; | 
|  |  | 
|  | nk = netkit_priv(peer); | 
|  | nk->primary = false; | 
|  | nk->policy = policy_peer; | 
|  | nk->scrub = scrub_peer; | 
|  | nk->mode = mode; | 
|  | bpf_mprog_bundle_init(&nk->bundle); | 
|  |  | 
|  | err = register_netdevice(peer); | 
|  | if (err < 0) | 
|  | goto err_register_peer; | 
|  | netif_carrier_off(peer); | 
|  | if (mode == NETKIT_L2) | 
|  | dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL); | 
|  |  | 
|  | err = rtnl_configure_link(peer, NULL, 0, NULL); | 
|  | if (err < 0) | 
|  | goto err_configure_peer; | 
|  |  | 
|  | if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS]) | 
|  | eth_hw_addr_random(dev); | 
|  | if (tb[IFLA_IFNAME]) | 
|  | nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); | 
|  | else | 
|  | strscpy(dev->name, "nk%d", IFNAMSIZ); | 
|  |  | 
|  | nk = netkit_priv(dev); | 
|  | nk->primary = true; | 
|  | nk->policy = policy_prim; | 
|  | nk->scrub = scrub_prim; | 
|  | nk->mode = mode; | 
|  | bpf_mprog_bundle_init(&nk->bundle); | 
|  |  | 
|  | err = register_netdevice(dev); | 
|  | if (err < 0) | 
|  | goto err_configure_peer; | 
|  | netif_carrier_off(dev); | 
|  | if (mode == NETKIT_L2) | 
|  | dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL); | 
|  |  | 
|  | rcu_assign_pointer(netkit_priv(dev)->peer, peer); | 
|  | rcu_assign_pointer(netkit_priv(peer)->peer, dev); | 
|  | return 0; | 
|  | err_configure_peer: | 
|  | unregister_netdevice(peer); | 
|  | return err; | 
|  | err_register_peer: | 
|  | free_netdev(peer); | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev, | 
|  | bool bundle_fallback) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct bpf_mprog_entry *entry; | 
|  |  | 
|  | ASSERT_RTNL(); | 
|  | entry = rcu_dereference_rtnl(nk->active); | 
|  | if (entry) | 
|  | return entry; | 
|  | if (bundle_fallback) | 
|  | return &nk->bundle.a; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static void netkit_entry_update(struct net_device *dev, | 
|  | struct bpf_mprog_entry *entry) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  |  | 
|  | ASSERT_RTNL(); | 
|  | rcu_assign_pointer(nk->active, entry); | 
|  | } | 
|  |  | 
|  | static void netkit_entry_sync(void) | 
|  | { | 
|  | synchronize_rcu(); | 
|  | } | 
|  |  | 
|  | static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which) | 
|  | { | 
|  | struct net_device *dev; | 
|  | struct netkit *nk; | 
|  |  | 
|  | ASSERT_RTNL(); | 
|  |  | 
|  | switch (which) { | 
|  | case BPF_NETKIT_PRIMARY: | 
|  | case BPF_NETKIT_PEER: | 
|  | break; | 
|  | default: | 
|  | return ERR_PTR(-EINVAL); | 
|  | } | 
|  |  | 
|  | dev = __dev_get_by_index(net, ifindex); | 
|  | if (!dev) | 
|  | return ERR_PTR(-ENODEV); | 
|  | if (dev->netdev_ops != &netkit_netdev_ops) | 
|  | return ERR_PTR(-ENXIO); | 
|  |  | 
|  | nk = netkit_priv(dev); | 
|  | if (!nk->primary) | 
|  | return ERR_PTR(-EACCES); | 
|  | if (which == BPF_NETKIT_PEER) { | 
|  | dev = rcu_dereference_rtnl(nk->peer); | 
|  | if (!dev) | 
|  | return ERR_PTR(-ENODEV); | 
|  | } | 
|  | return dev; | 
|  | } | 
|  |  | 
|  | int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) | 
|  | { | 
|  | struct bpf_mprog_entry *entry, *entry_new; | 
|  | struct bpf_prog *replace_prog = NULL; | 
|  | struct net_device *dev; | 
|  | int ret; | 
|  |  | 
|  | rtnl_lock(); | 
|  | dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex, | 
|  | attr->attach_type); | 
|  | if (IS_ERR(dev)) { | 
|  | ret = PTR_ERR(dev); | 
|  | goto out; | 
|  | } | 
|  | entry = netkit_entry_fetch(dev, true); | 
|  | if (attr->attach_flags & BPF_F_REPLACE) { | 
|  | replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, | 
|  | prog->type); | 
|  | if (IS_ERR(replace_prog)) { | 
|  | ret = PTR_ERR(replace_prog); | 
|  | replace_prog = NULL; | 
|  | goto out; | 
|  | } | 
|  | } | 
|  | ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog, | 
|  | attr->attach_flags, attr->relative_fd, | 
|  | attr->expected_revision); | 
|  | if (!ret) { | 
|  | if (entry != entry_new) { | 
|  | netkit_entry_update(dev, entry_new); | 
|  | netkit_entry_sync(); | 
|  | } | 
|  | bpf_mprog_commit(entry); | 
|  | } | 
|  | out: | 
|  | if (replace_prog) | 
|  | bpf_prog_put(replace_prog); | 
|  | rtnl_unlock(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog) | 
|  | { | 
|  | struct bpf_mprog_entry *entry, *entry_new; | 
|  | struct net_device *dev; | 
|  | int ret; | 
|  |  | 
|  | rtnl_lock(); | 
|  | dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex, | 
|  | attr->attach_type); | 
|  | if (IS_ERR(dev)) { | 
|  | ret = PTR_ERR(dev); | 
|  | goto out; | 
|  | } | 
|  | entry = netkit_entry_fetch(dev, false); | 
|  | if (!entry) { | 
|  | ret = -ENOENT; | 
|  | goto out; | 
|  | } | 
|  | ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags, | 
|  | attr->relative_fd, attr->expected_revision); | 
|  | if (!ret) { | 
|  | if (!bpf_mprog_total(entry_new)) | 
|  | entry_new = NULL; | 
|  | netkit_entry_update(dev, entry_new); | 
|  | netkit_entry_sync(); | 
|  | bpf_mprog_commit(entry); | 
|  | } | 
|  | out: | 
|  | rtnl_unlock(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) | 
|  | { | 
|  | struct net_device *dev; | 
|  | int ret; | 
|  |  | 
|  | rtnl_lock(); | 
|  | dev = netkit_dev_fetch(current->nsproxy->net_ns, | 
|  | attr->query.target_ifindex, | 
|  | attr->query.attach_type); | 
|  | if (IS_ERR(dev)) { | 
|  | ret = PTR_ERR(dev); | 
|  | goto out; | 
|  | } | 
|  | ret = bpf_mprog_query(attr, uattr, netkit_entry_fetch(dev, false)); | 
|  | out: | 
|  | rtnl_unlock(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static struct netkit_link *netkit_link(const struct bpf_link *link) | 
|  | { | 
|  | return container_of(link, struct netkit_link, link); | 
|  | } | 
|  |  | 
|  | static int netkit_link_prog_attach(struct bpf_link *link, u32 flags, | 
|  | u32 id_or_fd, u64 revision) | 
|  | { | 
|  | struct netkit_link *nkl = netkit_link(link); | 
|  | struct bpf_mprog_entry *entry, *entry_new; | 
|  | struct net_device *dev = nkl->dev; | 
|  | int ret; | 
|  |  | 
|  | ASSERT_RTNL(); | 
|  | entry = netkit_entry_fetch(dev, true); | 
|  | ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags, | 
|  | id_or_fd, revision); | 
|  | if (!ret) { | 
|  | if (entry != entry_new) { | 
|  | netkit_entry_update(dev, entry_new); | 
|  | netkit_entry_sync(); | 
|  | } | 
|  | bpf_mprog_commit(entry); | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void netkit_link_release(struct bpf_link *link) | 
|  | { | 
|  | struct netkit_link *nkl = netkit_link(link); | 
|  | struct bpf_mprog_entry *entry, *entry_new; | 
|  | struct net_device *dev; | 
|  | int ret = 0; | 
|  |  | 
|  | rtnl_lock(); | 
|  | dev = nkl->dev; | 
|  | if (!dev) | 
|  | goto out; | 
|  | entry = netkit_entry_fetch(dev, false); | 
|  | if (!entry) { | 
|  | ret = -ENOENT; | 
|  | goto out; | 
|  | } | 
|  | ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0); | 
|  | if (!ret) { | 
|  | if (!bpf_mprog_total(entry_new)) | 
|  | entry_new = NULL; | 
|  | netkit_entry_update(dev, entry_new); | 
|  | netkit_entry_sync(); | 
|  | bpf_mprog_commit(entry); | 
|  | nkl->dev = NULL; | 
|  | } | 
|  | out: | 
|  | WARN_ON_ONCE(ret); | 
|  | rtnl_unlock(); | 
|  | } | 
|  |  | 
|  | static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog, | 
|  | struct bpf_prog *oprog) | 
|  | { | 
|  | struct netkit_link *nkl = netkit_link(link); | 
|  | struct bpf_mprog_entry *entry, *entry_new; | 
|  | struct net_device *dev; | 
|  | int ret = 0; | 
|  |  | 
|  | rtnl_lock(); | 
|  | dev = nkl->dev; | 
|  | if (!dev) { | 
|  | ret = -ENOLINK; | 
|  | goto out; | 
|  | } | 
|  | if (oprog && link->prog != oprog) { | 
|  | ret = -EPERM; | 
|  | goto out; | 
|  | } | 
|  | oprog = link->prog; | 
|  | if (oprog == nprog) { | 
|  | bpf_prog_put(nprog); | 
|  | goto out; | 
|  | } | 
|  | entry = netkit_entry_fetch(dev, false); | 
|  | if (!entry) { | 
|  | ret = -ENOENT; | 
|  | goto out; | 
|  | } | 
|  | ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog, | 
|  | BPF_F_REPLACE | BPF_F_ID, | 
|  | link->prog->aux->id, 0); | 
|  | if (!ret) { | 
|  | WARN_ON_ONCE(entry != entry_new); | 
|  | oprog = xchg(&link->prog, nprog); | 
|  | bpf_prog_put(oprog); | 
|  | bpf_mprog_commit(entry); | 
|  | } | 
|  | out: | 
|  | rtnl_unlock(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void netkit_link_dealloc(struct bpf_link *link) | 
|  | { | 
|  | kfree(netkit_link(link)); | 
|  | } | 
|  |  | 
|  | static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq) | 
|  | { | 
|  | const struct netkit_link *nkl = netkit_link(link); | 
|  | u32 ifindex = 0; | 
|  |  | 
|  | rtnl_lock(); | 
|  | if (nkl->dev) | 
|  | ifindex = nkl->dev->ifindex; | 
|  | rtnl_unlock(); | 
|  |  | 
|  | seq_printf(seq, "ifindex:\t%u\n", ifindex); | 
|  | seq_printf(seq, "attach_type:\t%u (%s)\n", | 
|  | nkl->location, | 
|  | nkl->location == BPF_NETKIT_PRIMARY ? "primary" : "peer"); | 
|  | } | 
|  |  | 
|  | static int netkit_link_fill_info(const struct bpf_link *link, | 
|  | struct bpf_link_info *info) | 
|  | { | 
|  | const struct netkit_link *nkl = netkit_link(link); | 
|  | u32 ifindex = 0; | 
|  |  | 
|  | rtnl_lock(); | 
|  | if (nkl->dev) | 
|  | ifindex = nkl->dev->ifindex; | 
|  | rtnl_unlock(); | 
|  |  | 
|  | info->netkit.ifindex = ifindex; | 
|  | info->netkit.attach_type = nkl->location; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int netkit_link_detach(struct bpf_link *link) | 
|  | { | 
|  | netkit_link_release(link); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static const struct bpf_link_ops netkit_link_lops = { | 
|  | .release	= netkit_link_release, | 
|  | .detach		= netkit_link_detach, | 
|  | .dealloc	= netkit_link_dealloc, | 
|  | .update_prog	= netkit_link_update, | 
|  | .show_fdinfo	= netkit_link_fdinfo, | 
|  | .fill_link_info	= netkit_link_fill_info, | 
|  | }; | 
|  |  | 
|  | static int netkit_link_init(struct netkit_link *nkl, | 
|  | struct bpf_link_primer *link_primer, | 
|  | const union bpf_attr *attr, | 
|  | struct net_device *dev, | 
|  | struct bpf_prog *prog) | 
|  | { | 
|  | bpf_link_init(&nkl->link, BPF_LINK_TYPE_NETKIT, | 
|  | &netkit_link_lops, prog); | 
|  | nkl->location = attr->link_create.attach_type; | 
|  | nkl->dev = dev; | 
|  | return bpf_link_prime(&nkl->link, link_primer); | 
|  | } | 
|  |  | 
|  | int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) | 
|  | { | 
|  | struct bpf_link_primer link_primer; | 
|  | struct netkit_link *nkl; | 
|  | struct net_device *dev; | 
|  | int ret; | 
|  |  | 
|  | rtnl_lock(); | 
|  | dev = netkit_dev_fetch(current->nsproxy->net_ns, | 
|  | attr->link_create.target_ifindex, | 
|  | attr->link_create.attach_type); | 
|  | if (IS_ERR(dev)) { | 
|  | ret = PTR_ERR(dev); | 
|  | goto out; | 
|  | } | 
|  | nkl = kzalloc(sizeof(*nkl), GFP_KERNEL_ACCOUNT); | 
|  | if (!nkl) { | 
|  | ret = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  | ret = netkit_link_init(nkl, &link_primer, attr, dev, prog); | 
|  | if (ret) { | 
|  | kfree(nkl); | 
|  | goto out; | 
|  | } | 
|  | ret = netkit_link_prog_attach(&nkl->link, | 
|  | attr->link_create.flags, | 
|  | attr->link_create.netkit.relative_fd, | 
|  | attr->link_create.netkit.expected_revision); | 
|  | if (ret) { | 
|  | nkl->dev = NULL; | 
|  | bpf_link_cleanup(&link_primer); | 
|  | goto out; | 
|  | } | 
|  | ret = bpf_link_settle(&link_primer); | 
|  | out: | 
|  | rtnl_unlock(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void netkit_release_all(struct net_device *dev) | 
|  | { | 
|  | struct bpf_mprog_entry *entry; | 
|  | struct bpf_tuple tuple = {}; | 
|  | struct bpf_mprog_fp *fp; | 
|  | struct bpf_mprog_cp *cp; | 
|  |  | 
|  | entry = netkit_entry_fetch(dev, false); | 
|  | if (!entry) | 
|  | return; | 
|  | netkit_entry_update(dev, NULL); | 
|  | netkit_entry_sync(); | 
|  | bpf_mprog_foreach_tuple(entry, fp, cp, tuple) { | 
|  | if (tuple.link) | 
|  | netkit_link(tuple.link)->dev = NULL; | 
|  | else | 
|  | bpf_prog_put(tuple.prog); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void netkit_uninit(struct net_device *dev) | 
|  | { | 
|  | netkit_release_all(dev); | 
|  | } | 
|  |  | 
|  | static void netkit_del_link(struct net_device *dev, struct list_head *head) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct net_device *peer = rtnl_dereference(nk->peer); | 
|  |  | 
|  | RCU_INIT_POINTER(nk->peer, NULL); | 
|  | unregister_netdevice_queue(dev, head); | 
|  | if (peer) { | 
|  | nk = netkit_priv(peer); | 
|  | RCU_INIT_POINTER(nk->peer, NULL); | 
|  | unregister_netdevice_queue(peer, head); | 
|  | } | 
|  | } | 
|  |  | 
|  | static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], | 
|  | struct nlattr *data[], | 
|  | struct netlink_ext_ack *extack) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct net_device *peer = rtnl_dereference(nk->peer); | 
|  | enum netkit_action policy; | 
|  | struct nlattr *attr; | 
|  | int err; | 
|  |  | 
|  | if (!nk->primary) { | 
|  | NL_SET_ERR_MSG(extack, | 
|  | "netkit link settings can be changed only through the primary device"); | 
|  | return -EACCES; | 
|  | } | 
|  |  | 
|  | if (data[IFLA_NETKIT_MODE]) { | 
|  | NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE], | 
|  | "netkit link operating mode cannot be changed after device creation"); | 
|  | return -EACCES; | 
|  | } | 
|  |  | 
|  | if (data[IFLA_NETKIT_SCRUB]) { | 
|  | NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB], | 
|  | "netkit scrubbing cannot be changed after device creation"); | 
|  | return -EACCES; | 
|  | } | 
|  |  | 
|  | if (data[IFLA_NETKIT_PEER_SCRUB]) { | 
|  | NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB], | 
|  | "netkit scrubbing cannot be changed after device creation"); | 
|  | return -EACCES; | 
|  | } | 
|  |  | 
|  | if (data[IFLA_NETKIT_PEER_INFO]) { | 
|  | NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO], | 
|  | "netkit peer info cannot be changed after device creation"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | if (data[IFLA_NETKIT_POLICY]) { | 
|  | attr = data[IFLA_NETKIT_POLICY]; | 
|  | policy = nla_get_u32(attr); | 
|  | err = netkit_check_policy(policy, attr, extack); | 
|  | if (err) | 
|  | return err; | 
|  | WRITE_ONCE(nk->policy, policy); | 
|  | } | 
|  |  | 
|  | if (data[IFLA_NETKIT_PEER_POLICY]) { | 
|  | err = -EOPNOTSUPP; | 
|  | attr = data[IFLA_NETKIT_PEER_POLICY]; | 
|  | policy = nla_get_u32(attr); | 
|  | if (peer) | 
|  | err = netkit_check_policy(policy, attr, extack); | 
|  | if (err) | 
|  | return err; | 
|  | nk = netkit_priv(peer); | 
|  | WRITE_ONCE(nk->policy, policy); | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static size_t netkit_get_size(const struct net_device *dev) | 
|  | { | 
|  | return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */ | 
|  | nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */ | 
|  | nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */ | 
|  | nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */ | 
|  | nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */ | 
|  | nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */ | 
|  | 0; | 
|  | } | 
|  |  | 
|  | static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) | 
|  | { | 
|  | struct netkit *nk = netkit_priv(dev); | 
|  | struct net_device *peer = rtnl_dereference(nk->peer); | 
|  |  | 
|  | if (nla_put_u8(skb, IFLA_NETKIT_PRIMARY, nk->primary)) | 
|  | return -EMSGSIZE; | 
|  | if (nla_put_u32(skb, IFLA_NETKIT_POLICY, nk->policy)) | 
|  | return -EMSGSIZE; | 
|  | if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode)) | 
|  | return -EMSGSIZE; | 
|  | if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub)) | 
|  | return -EMSGSIZE; | 
|  |  | 
|  | if (peer) { | 
|  | nk = netkit_priv(peer); | 
|  | if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy)) | 
|  | return -EMSGSIZE; | 
|  | if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub)) | 
|  | return -EMSGSIZE; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { | 
|  | [IFLA_NETKIT_PEER_INFO]		= { .len = sizeof(struct ifinfomsg) }, | 
|  | [IFLA_NETKIT_MODE]		= NLA_POLICY_MAX(NLA_U32, NETKIT_L3), | 
|  | [IFLA_NETKIT_POLICY]		= { .type = NLA_U32 }, | 
|  | [IFLA_NETKIT_PEER_POLICY]	= { .type = NLA_U32 }, | 
|  | [IFLA_NETKIT_SCRUB]		= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), | 
|  | [IFLA_NETKIT_PEER_SCRUB]	= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), | 
|  | [IFLA_NETKIT_PRIMARY]		= { .type = NLA_REJECT, | 
|  | .reject_message = "Primary attribute is read-only" }, | 
|  | }; | 
|  |  | 
|  | static struct rtnl_link_ops netkit_link_ops = { | 
|  | .kind		= DRV_NAME, | 
|  | .priv_size	= sizeof(struct netkit), | 
|  | .setup		= netkit_setup, | 
|  | .newlink	= netkit_new_link, | 
|  | .dellink	= netkit_del_link, | 
|  | .changelink	= netkit_change_link, | 
|  | .get_link_net	= netkit_get_link_net, | 
|  | .get_size	= netkit_get_size, | 
|  | .fill_info	= netkit_fill_info, | 
|  | .policy		= netkit_policy, | 
|  | .validate	= netkit_validate, | 
|  | .peer_type	= IFLA_NETKIT_PEER_INFO, | 
|  | .maxtype	= IFLA_NETKIT_MAX, | 
|  | }; | 
|  |  | 
|  | static __init int netkit_init(void) | 
|  | { | 
|  | BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT || | 
|  | (int)NETKIT_PASS != (int)TCX_PASS || | 
|  | (int)NETKIT_DROP != (int)TCX_DROP || | 
|  | (int)NETKIT_REDIRECT != (int)TCX_REDIRECT); | 
|  |  | 
|  | return rtnl_link_register(&netkit_link_ops); | 
|  | } | 
|  |  | 
|  | static __exit void netkit_exit(void) | 
|  | { | 
|  | rtnl_link_unregister(&netkit_link_ops); | 
|  | } | 
|  |  | 
|  | module_init(netkit_init); | 
|  | module_exit(netkit_exit); | 
|  |  | 
|  | MODULE_DESCRIPTION("BPF-programmable network device"); | 
|  | MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>"); | 
|  | MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>"); | 
|  | MODULE_LICENSE("GPL"); | 
|  | MODULE_ALIAS_RTNL_LINK(DRV_NAME); |