Merge branch 'mlxsw-spectrum_router-Add-GRE-tunnel-support-for-Spectrum-2'

Ido Schimmel says:

====================
mlxsw: spectrum_router: Add GRE tunnel support for Spectrum-2

Nir says:

In Spectrum-2, HW implementation of layer 3 tunnels differs from
Spectrum-1 when it comes to the underlay routing table selection.
Spectrum-2 uses a dedicated RIF that points to the virtual router used
for forwarding the encapsulated packets, while Spectrum-1 explicitly
specifies the virtual router itself.

Patches #1 and #2 add additional fields in RITR - Router interface table
register and RTDP - Routing tunnel decap properties respectively, the
fields are required for the new underlay RIF needed for Spectrum-2.

Patches #3-4 allow different set of RIF operations per ASIC type. The
first patch splits the operations and the following patch sets RIF ops
according to ASIC type.

Patches #5-9 introduce small changes to existing code to allow existence
of a dedicated underlay RIF along with the underlay virtual router, and
to support that new type of RIF that has no device.

Patch #10 takes care of updating the tunnel decap properties egress
underlay RIF required for Spectrum-2.

Patch #11 adds the implementation of Spectrum-2 specific RIF operations
and essentially enables layer 3 GRE tunnels on Spectrum-2.

Finally patches #12-18 add tests for GRE IP-in-IP tunnels, both in flat
and hierarchical topologies.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 9b48dff..5f8066a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5666,6 +5666,8 @@
 	MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4,
 	/* IPinIP IPv6 underlay Unicast */
 	MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6,
+	/* IPinIP generic - used for Spectrum-2 underlay RIF */
+	MLXSW_REG_RITR_LOOPBACK_GENERIC,
 };
 
 /* reg_ritr_loopback_protocol
@@ -5706,6 +5708,13 @@
  */
 MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16);
 
+/* reg_ritr_loopback_ipip_underlay_rif
+ * Underlay ingress router interface.
+ * Reserved for Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_underlay_rif, 0x14, 0, 16);
+
 /* reg_ritr_loopback_ipip_usip*
  * Encapsulation Underlay source IP.
  * Access: RW
@@ -5821,11 +5830,12 @@
 mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
 			    enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
 			    enum mlxsw_reg_ritr_loopback_ipip_options options,
-			    u16 uvr_id, u32 gre_key)
+			    u16 uvr_id, u16 underlay_rif, u32 gre_key)
 {
 	mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type);
 	mlxsw_reg_ritr_loopback_ipip_options_set(payload, options);
 	mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id);
+	mlxsw_reg_ritr_loopback_ipip_underlay_rif_set(payload, underlay_rif);
 	mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key);
 }
 
@@ -5833,12 +5843,12 @@
 mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
 			    enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
 			    enum mlxsw_reg_ritr_loopback_ipip_options options,
-			    u16 uvr_id, u32 usip, u32 gre_key)
+			    u16 uvr_id, u16 underlay_rif, u32 usip, u32 gre_key)
 {
 	mlxsw_reg_ritr_loopback_protocol_set(payload,
 				    MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4);
 	mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options,
-						 uvr_id, gre_key);
+						 uvr_id, underlay_rif, gre_key);
 	mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
 }
 
@@ -7200,6 +7210,13 @@
  */
 MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
 
+/* reg_rtdp_egress_router_interface
+ * Underlay egress router interface.
+ * Valid range is from 0 to cap_max_router_interfaces - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, egress_router_interface, 0x40, 0, 16);
+
 /* IPinIP */
 
 /* reg_rtdp_ipip_irif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 32519c9..a881697 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4094,6 +4094,7 @@
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask;
+	mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -4110,6 +4111,7 @@
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
+	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a1c32a8..1fa5c81 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -75,6 +75,11 @@
 	MLXSW_SP_RIF_TYPE_MAX,
 };
 
+struct mlxsw_sp_rif_ops;
+
+extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[];
+extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[];
+
 enum mlxsw_sp_fid_type {
 	MLXSW_SP_FID_TYPE_8021Q,
 	MLXSW_SP_FID_TYPE_8021D,
@@ -161,6 +166,7 @@
 	const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
 	const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
 	const struct mlxsw_sp_nve_ops **nve_ops_arr;
+	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 };
 
 static inline struct mlxsw_sp_upper *
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 41e607a..4993381 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -220,7 +220,7 @@
 	for (; i < rif_count; i++) {
 		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
 
-		if (!rif)
+		if (!rif || !mlxsw_sp_rif_dev(rif))
 			continue;
 		err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif,
 					      counters_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 00db26c..6400cd6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -145,6 +145,7 @@
 				     struct mlxsw_sp_ipip_entry *ipip_entry)
 {
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+	u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb);
 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
 	struct ip_tunnel_parm parms;
 	unsigned int type_check;
@@ -157,6 +158,7 @@
 	ikey = mlxsw_sp_ipip_parms4_ikey(parms);
 
 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
+	mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id);
 
 	type_check = has_ikey ?
 		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 042341c..0949404 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -80,7 +80,7 @@
 struct mlxsw_sp_rif {
 	struct list_head nexthop_list;
 	struct list_head neigh_list;
-	struct net_device *dev;
+	struct net_device *dev; /* NULL for underlay RIF */
 	struct mlxsw_sp_fid *fid;
 	unsigned char addr[ETH_ALEN];
 	int mtu;
@@ -120,6 +120,7 @@
 	struct mlxsw_sp_rif common;
 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
+	u16 ul_rif_id; /* Reserved for Spectrum. */
 };
 
 struct mlxsw_sp_rif_params_ipip_lb {
@@ -440,6 +441,8 @@
 	struct mlxsw_sp_fib *fib4;
 	struct mlxsw_sp_fib *fib6;
 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
+	struct mlxsw_sp_rif *ul_rif;
+	refcount_t ul_rif_refcnt;
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -1437,8 +1440,8 @@
 }
 
 static int
-mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
-			struct mlxsw_sp_vr *ul_vr, bool enable)
+mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
+			u16 ul_rif_id, bool enable)
 {
 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
 	struct mlxsw_sp_rif *rif = &lb_rif->common;
@@ -1453,7 +1456,7 @@
 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
-			    ul_vr->id, saddr4, lb_cf.okey);
+			    ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
 		break;
 
 	case MLXSW_SP_L3_PROTO_IPV6:
@@ -1468,14 +1471,13 @@
 {
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
-	struct mlxsw_sp_vr *ul_vr;
 	int err = 0;
 
 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
 	if (ipip_entry) {
 		lb_rif = ipip_entry->ol_lb;
-		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
-		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
+					      lb_rif->ul_rif_id, true);
 		if (err)
 			goto out;
 		lb_rif->common.mtu = ol_dev->mtu;
@@ -6224,10 +6226,12 @@
 
 	INIT_LIST_HEAD(&rif->nexthop_list);
 	INIT_LIST_HEAD(&rif->neigh_list);
-	ether_addr_copy(rif->addr, l3_dev->dev_addr);
-	rif->mtu = l3_dev->mtu;
+	if (l3_dev) {
+		ether_addr_copy(rif->addr, l3_dev->dev_addr);
+		rif->mtu = l3_dev->mtu;
+		rif->dev = l3_dev;
+	}
 	rif->vr_id = vr_id;
-	rif->dev = l3_dev;
 	rif->rif_index = rif_index;
 
 	return rif;
@@ -6251,7 +6255,19 @@
 
 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
 {
-	return lb_rif->ul_vr_id;
+	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
+	struct mlxsw_sp_vr *ul_vr;
+
+	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
+	if (WARN_ON(IS_ERR(ul_vr)))
+		return 0;
+
+	return ul_vr->id;
+}
+
+u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+	return lb_rif->ul_rif_id;
 }
 
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
@@ -6284,7 +6300,7 @@
 	int i, err;
 
 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
-	ops = mlxsw_sp->router->rif_ops_arr[type];
+	ops = mlxsw_sp->rif_ops_arr[type];
 
 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
 	if (IS_ERR(vr))
@@ -6303,6 +6319,7 @@
 		goto err_rif_alloc;
 	}
 	dev_hold(rif->dev);
+	mlxsw_sp->router->rifs[rif_index] = rif;
 	rif->mlxsw_sp = mlxsw_sp;
 	rif->ops = ops;
 
@@ -6329,7 +6346,6 @@
 	}
 
 	mlxsw_sp_rif_counters_alloc(rif);
-	mlxsw_sp->router->rifs[rif_index] = rif;
 
 	return rif;
 
@@ -6341,6 +6357,7 @@
 	if (fid)
 		mlxsw_sp_fid_put(fid);
 err_fid_get:
+	mlxsw_sp->router->rifs[rif_index] = NULL;
 	dev_put(rif->dev);
 	kfree(rif);
 err_rif_alloc:
@@ -6361,7 +6378,6 @@
 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
 
-	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
 	mlxsw_sp_rif_counters_free(rif);
 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
@@ -6369,6 +6385,7 @@
 	if (fid)
 		/* Loopback RIFs are not associated with a FID. */
 		mlxsw_sp_fid_put(fid);
+	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
 	dev_put(rif->dev);
 	kfree(rif);
 	vr->rif_count--;
@@ -6750,7 +6767,7 @@
 
 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
 		rif = mlxsw_sp->router->rifs[i];
-		if (rif && rif->dev != dev &&
+		if (rif && rif->dev && rif->dev != dev &&
 		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
 					     mlxsw_sp->mac_mask)) {
 			NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
@@ -7424,7 +7441,7 @@
 }
 
 static int
-mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
+mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
@@ -7436,11 +7453,12 @@
 	if (IS_ERR(ul_vr))
 		return PTR_ERR(ul_vr);
 
-	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
 	if (err)
 		goto err_loopback_op;
 
 	lb_rif->ul_vr_id = ul_vr->id;
+	lb_rif->ul_rif_id = 0;
 	++ul_vr->rif_count;
 	return 0;
 
@@ -7449,32 +7467,185 @@
 	return err;
 }
 
-static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
+static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 	struct mlxsw_sp_vr *ul_vr;
 
 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
-	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
+	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
 
 	--ul_vr->rif_count;
 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
 }
 
-static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
+static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
-	.configure		= mlxsw_sp_rif_ipip_lb_configure,
-	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
+	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
+	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
 };
 
-static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
+const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
-	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
+	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
+};
+
+static int
+mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+
+	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
+			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
+	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
+					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
+		       struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_rif *ul_rif;
+	u16 rif_index;
+	int err;
+
+	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
+		return ERR_PTR(err);
+	}
+
+	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
+	if (!ul_rif)
+		return ERR_PTR(-ENOMEM);
+
+	mlxsw_sp->router->rifs[rif_index] = ul_rif;
+	ul_rif->mlxsw_sp = mlxsw_sp;
+	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
+	if (err)
+		goto ul_rif_op_err;
+
+	return ul_rif;
+
+ul_rif_op_err:
+	mlxsw_sp->router->rifs[rif_index] = NULL;
+	kfree(ul_rif);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
+{
+	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+
+	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
+	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
+	kfree(ul_rif);
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+		    struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
+	if (IS_ERR(vr))
+		return ERR_CAST(vr);
+
+	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
+		return vr->ul_rif;
+
+	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
+	if (IS_ERR(vr->ul_rif)) {
+		err = PTR_ERR(vr->ul_rif);
+		goto err_ul_rif_create;
+	}
+
+	vr->rif_count++;
+	refcount_set(&vr->ul_rif_refcnt, 1);
+
+	return vr->ul_rif;
+
+err_ul_rif_create:
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
+{
+	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+	struct mlxsw_sp_vr *vr;
+
+	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
+
+	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
+		return;
+
+	vr->rif_count--;
+	mlxsw_sp_ul_rif_destroy(ul_rif);
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+static int
+mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_rif *ul_rif;
+	int err;
+
+	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
+	if (IS_ERR(ul_rif))
+		return PTR_ERR(ul_rif);
+
+	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
+	if (err)
+		goto err_loopback_op;
+
+	lb_rif->ul_vr_id = 0;
+	lb_rif->ul_rif_id = ul_rif->rif_index;
+
+	return 0;
+
+err_loopback_op:
+	mlxsw_sp_ul_rif_put(ul_rif);
+	return err;
+}
+
+static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_rif *ul_rif;
+
+	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
+	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
+	mlxsw_sp_ul_rif_put(ul_rif);
+}
+
+static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
+	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
+	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
+	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
+	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
+	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
+};
+
+const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
+	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
+	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
+	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
+	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
 };
 
 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
@@ -7487,8 +7658,6 @@
 	if (!mlxsw_sp->router->rifs)
 		return -ENOMEM;
 
-	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 3dbafde..cc1de91 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -29,6 +29,7 @@
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
+u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif);
 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh
new file mode 100755
index 0000000..abb6943
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel without key.
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="gre_flat4 gre_mtu_change"
+
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create gre $ol1 $ul1
+	sw2_flat_create gre $ol2 $ul2
+}
+
+gre_flat4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre flat"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh
new file mode 100755
index 0000000..c4f3733
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with key.
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="gre_flat4 gre_mtu_change"
+
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create gre $ol1 $ul1 key 233
+	sw2_flat_create gre $ol2 $ul2 key 233
+}
+
+gre_flat4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre flat with key"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change	gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh
new file mode 100755
index 0000000..a811130
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with key.
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="gre_flat4 gre_mtu_change"
+
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create gre $ol1 $ul1 ikey 111 okey 222
+	sw2_flat_create gre $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_flat4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre flat with ikey/okey"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change	gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh
new file mode 100755
index 0000000..05c5b3c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ipip_lib.sh for more details.
+
+ALL_TESTS="gre_hier4 gre_mtu_change"
+
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create gre $ol1 $ul1
+	sw2_hierarchical_create gre $ol2 $ul2
+}
+
+gre_hier4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre hierarchical"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh
new file mode 100755
index 0000000..9b105db
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ipip_lib.sh for more details.
+
+ALL_TESTS="gre_hier4 gre_mtu_change"
+
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create gre $ol1 $ul1 key 22
+	sw2_hierarchical_create gre $ol2 $ul2 key 22
+}
+
+gre_hier4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre hierarchical with key"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh
new file mode 100755
index 0000000..e275d25b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ipip_lib.sh for more details.
+
+ALL_TESTS="gre_hier4 gre_mtu_change"
+
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create gre $ol1 $ul1 ikey 111 okey 222
+	sw2_hierarchical_create gre $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_hier4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre hierarchical with ikey/okey"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh
new file mode 100644
index 0000000..30f36a5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh
@@ -0,0 +1,349 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Handles creation and destruction of IP-in-IP or GRE tunnels over the given
+# topology. Supports both flat and hierarchical models.
+#
+# Flat Model:
+# Overlay and underlay share the same VRF.
+# SW1 uses default VRF so tunnel has no bound dev.
+# SW2 uses non-default VRF tunnel has a bound dev.
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-----+
+# | SW1               |     |
+# |              $ol1 +     |
+# |      192.0.2.2/28       |
+# |                         |
+# |  + g1a (gre)            |
+# |    loc=192.0.2.65       |
+# |    rem=192.0.2.66 --.   |
+# |    tos=inherit      |   |
+# |  .------------------'   |
+# |  |                      |
+# |  v                      |
+# |  + $ul1.111 (vlan)      |
+# |  | 192.0.2.129/28       |
+# |   \                     |
+# |    \_______             |
+# |            |            |
+# |VRF default + $ul1       |
+# +------------|------------+
+#              |
+# +------------|------------+
+# | SW2        + $ul2       |
+# |     _______|            |
+# |    /                    |
+# |   /                     |
+# |  + $ul2.111 (vlan)      |
+# |  ^ 192.0.2.130/28       |
+# |  |                      |
+# |  |                      |
+# |  '------------------.   |
+# |  + g2a (gre)        |   |
+# |    loc=192.0.2.66   |   |
+# |    rem=192.0.2.65 --'   |
+# |    tos=inherit          |
+# |                         |
+# |              $ol2 +     |
+# |     192.0.2.17/28 |     |
+# | VRF v$ol2         |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# +-------------------------+
+#
+# Hierarchical model:
+# The tunnel is bound to a device in a different VRF
+#
+# +---------------------------+
+# | H1                        |
+# |               $h1 +       |
+# |      192.0.2.1/28 |       |
+# +-------------------|-------+
+#                     |
+# +-------------------|-------+
+# | SW1               |       |
+# | +-----------------|-----+ |
+# | |            $ol1 +     | |
+# | |     192.0.2.2/28      | |
+# | |                       | |
+# | |    + g1a (gre)        | |
+# | |    rem=192.0.2.66     | |
+# | |    tos=inherit        | |
+# | |    loc=192.0.2.65     | |
+# | |           ^           | |
+# | | VRF v$ol1 |           | |
+# | +-----------|-----------+ |
+# |             |             |
+# | +-----------|-----------+ |
+# | | VRF v$ul1 |           | |
+# | |           |           | |
+# | |           |           | |
+# | |           v           | |
+# | |    dummy1 +           | |
+# | |   192.0.2.65          | |
+# | |   .-------'           | |
+# | |   |                   | |
+# | |   v                   | |
+# | |   + $ul1.111 (vlan)   | |
+# | |   | 192.0.2.129/28    | |
+# | |   \                   | |
+# | |    \_____             | |
+# | |          |            | |
+# | |          + $ul1       | |
+# | +----------|------------+ |
+# +------------|--------------+
+#              |
+# +------------|--------------+
+# | SW2        |              |
+# | +----------|------------+ |
+# | |          + $ul2       | |
+# | |     _____|            | |
+# | |    /                  | |
+# | |   /                   | |
+# | |   | $ul2.111 (vlan)   | |
+# | |   + 192.0.2.130/28    | |
+# | |   ^                   | |
+# | |   |                   | |
+# | |   '-------.           | |
+# | |    dummy2 +           | |
+# | |    192.0.2.66         | |
+# | |           ^           | |
+# | |           |           | |
+# | |           |           | |
+# | | VRF v$ul2 |           | |
+# | +-----------|-----------+ |
+# |             |             |
+# | +-----------|-----------+ |
+# | | VRF v$ol2 |           | |
+# | |           |           | |
+# | |           v           | |
+# | |  g2a (gre)+           | |
+# | |  loc=192.0.2.66       | |
+# | |  rem=192.0.2.65       | |
+# | |  tos=inherit          | |
+# | |                       | |
+# | |            $ol2 +     | |
+# | |   192.0.2.17/28 |     | |
+# | +-----------------|-----+ |
+# +-------------------|-------+
+#                     |
+# +-------------------|-------+
+# | H2                |       |
+# |               $h2 +       |
+# |     192.0.2.18/28         |
+# +---------------------------+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28
+}
+
+sw1_flat_create()
+{
+	local type=$1; shift
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip link set dev $ol1 up
+        __addr_add_del $ol1 add "192.0.2.2/28"
+
+	ip link set dev $ul1 up
+	vlan_create $ul1 111 "" 192.0.2.129/28
+
+	tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit "$@"
+	ip link set dev g1a up
+        __addr_add_del g1a add "192.0.2.65/32"
+
+	ip route add 192.0.2.66/32 via 192.0.2.130
+
+	ip route add 192.0.2.16/28 nexthop dev g1a
+}
+
+sw1_flat_destroy()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip route del 192.0.2.16/28
+
+	ip route del 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	__simple_if_fini $ol1 192.0.2.2/28
+}
+
+sw2_flat_create()
+{
+	local type=$1; shift
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	simple_if_init $ol2 192.0.2.17/28
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+
+	tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 \
+		"$@"
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a
+}
+
+sw2_flat_destroy()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28
+}
+
+sw1_hierarchical_create()
+{
+	local type=$1; shift
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	simple_if_init $ol1 192.0.2.2/28
+	simple_if_init $ul1
+	ip link add name dummy1 type dummy
+	__simple_if_init dummy1 v$ul1 192.0.2.65/32
+
+	vlan_create $ul1 111 v$ul1 192.0.2.129/28
+	tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit dev dummy1 \
+		"$@"
+	ip link set dev g1a master v$ol1
+
+	ip route add vrf v$ul1 192.0.2.66/32 via 192.0.2.130
+	ip route add vrf v$ol1 192.0.2.16/28 nexthop dev g1a
+}
+
+sw1_hierarchical_destroy()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip route del vrf v$ol1 192.0.2.16/28
+	ip route del vrf v$ul1 192.0.2.66/32
+
+	tunnel_destroy g1a
+	vlan_destroy $ul1 111
+
+	__simple_if_fini dummy1 192.0.2.65/32
+	ip link del dev dummy1
+
+	simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28
+}
+
+sw2_hierarchical_create()
+{
+	local type=$1; shift
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	simple_if_init $ol2 192.0.2.17/28
+	simple_if_init $ul2
+
+	ip link add name dummy2 type dummy
+	__simple_if_init dummy2 v$ul2 192.0.2.66/32
+
+	vlan_create $ul2 111 v$ul2 192.0.2.130/28
+	tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev dummy2 \
+		"$@"
+	ip link set dev g2a master v$ol2
+
+	ip route add vrf v$ul2 192.0.2.65/32 via 192.0.2.129
+	ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a
+}
+
+sw2_hierarchical_destroy()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	ip route del vrf v$ol2 192.0.2.0/28
+	ip route del vrf v$ul2 192.0.2.65/32
+
+	tunnel_destroy g2a
+	vlan_destroy $ul2 111
+
+	__simple_if_fini dummy2 192.0.2.66/32
+	ip link del dev dummy2
+
+	simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28
+}
+
+topo_mtu_change()
+{
+	local mtu=$1
+
+	ip link set mtu $mtu dev $h1
+	ip link set mtu $mtu dev $ol1
+	ip link set mtu $mtu dev g1a
+	ip link set mtu $mtu dev $ul1
+	ip link set mtu $mtu dev $ul1.111
+	ip link set mtu $mtu dev $h2
+	ip link set mtu $mtu dev $ol2
+	ip link set mtu $mtu dev g2a
+	ip link set mtu $mtu dev $ul2
+	ip link set mtu $mtu dev $ul2.111
+}
+
+test_mtu_change()
+{
+	local encap=$1; shift
+
+	RET=0
+
+	ping_do $h1 192.0.2.18 "-s 1800	-w 3"
+	check_fail $? "ping $encap should not pass with size 1800"
+
+	RET=0
+
+	topo_mtu_change	2000
+	ping_do	$h1 192.0.2.18 "-s 1800	-w 3"
+	check_err $?
+	log_test "ping $encap packet size 1800 after MTU change"
+}