| From 7c0a613635e732b1f7f1f6a6d5885b98ba0152db Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Mon, 26 Oct 2020 15:43:59 +0200 |
| Subject: RDMA/mlx5: Fix devlink deadlock on net namespace deletion |
| |
| From: Parav Pandit <parav@nvidia.com> |
| |
| [ Upstream commit fbdd0049d98d44914fc57d4b91f867f4996c787b ] |
| |
| When a mlx5 core devlink instance is reloaded in different net namespace, |
| its associated IB device is deleted and recreated. |
| |
| Example sequence is: |
| $ ip netns add foo |
| $ devlink dev reload pci/0000:00:08.0 netns foo |
| $ ip netns del foo |
| |
| mlx5 IB device needs to attach and detach the netdevice to it through the |
| netdev notifier chain during load and unload sequence. A below call graph |
| of the unload flow. |
| |
| cleanup_net() |
| down_read(&pernet_ops_rwsem); <- first sem acquired |
| ops_pre_exit_list() |
| pre_exit() |
| devlink_pernet_pre_exit() |
| devlink_reload() |
| mlx5_devlink_reload_down() |
| mlx5_unload_one() |
| [...] |
| mlx5_ib_remove() |
| mlx5_ib_unbind_slave_port() |
| mlx5_remove_netdev_notifier() |
| unregister_netdevice_notifier() |
| down_write(&pernet_ops_rwsem);<- recurrsive lock |
| |
| Hence, when net namespace is deleted, mlx5 reload results in deadlock. |
| |
| When deadlock occurs, devlink mutex is also held. This not only deadlocks |
| the mlx5 device under reload, but all the processes which attempt to |
| access unrelated devlink devices are deadlocked. |
| |
| Hence, fix this by mlx5 ib driver to register for per net netdev notifier |
| instead of global one, which operats on the net namespace without holding |
| the pernet_ops_rwsem. |
| |
| Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload") |
| Link: https://lore.kernel.org/r/20201026134359.23150-1-parav@nvidia.com |
| Signed-off-by: Parav Pandit <parav@nvidia.com> |
| Signed-off-by: Leon Romanovsky <leonro@nvidia.com> |
| Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| drivers/infiniband/hw/mlx5/main.c | 6 ++++-- |
| .../net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 5 ----- |
| include/linux/mlx5/driver.h | 18 ++++++++++++++++++ |
| 3 files changed, 22 insertions(+), 7 deletions(-) |
| |
| diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c |
| index b805cc8124657..2a7b5ffb2a2ef 100644 |
| --- a/drivers/infiniband/hw/mlx5/main.c |
| +++ b/drivers/infiniband/hw/mlx5/main.c |
| @@ -3318,7 +3318,8 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) |
| int err; |
| |
| dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event; |
| - err = register_netdevice_notifier(&dev->port[port_num].roce.nb); |
| + err = register_netdevice_notifier_net(mlx5_core_net(dev->mdev), |
| + &dev->port[port_num].roce.nb); |
| if (err) { |
| dev->port[port_num].roce.nb.notifier_call = NULL; |
| return err; |
| @@ -3330,7 +3331,8 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) |
| static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) |
| { |
| if (dev->port[port_num].roce.nb.notifier_call) { |
| - unregister_netdevice_notifier(&dev->port[port_num].roce.nb); |
| + unregister_netdevice_notifier_net(mlx5_core_net(dev->mdev), |
| + &dev->port[port_num].roce.nb); |
| dev->port[port_num].roce.nb.notifier_call = NULL; |
| } |
| } |
| diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h |
| index d046db7bb047d..3a9fa629503f0 100644 |
| --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h |
| +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h |
| @@ -90,9 +90,4 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, |
| u32 key_type, u32 *p_key_id); |
| void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); |
| |
| -static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) |
| -{ |
| - return devlink_net(priv_to_devlink(dev)); |
| -} |
| - |
| #endif |
| diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h |
| index 372100c755e7f..e30be3dd5be0e 100644 |
| --- a/include/linux/mlx5/driver.h |
| +++ b/include/linux/mlx5/driver.h |
| @@ -1212,4 +1212,22 @@ static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) |
| return val.vbool; |
| } |
| |
| +/** |
| + * mlx5_core_net - Provide net namespace of the mlx5_core_dev |
| + * @dev: mlx5 core device |
| + * |
| + * mlx5_core_net() returns the net namespace of mlx5 core device. |
| + * This can be called only in below described limited context. |
| + * (a) When a devlink instance for mlx5_core is registered and |
| + * when devlink reload operation is disabled. |
| + * or |
| + * (b) during devlink reload reload_down() and reload_up callbacks |
| + * where it is ensured that devlink instance's net namespace is |
| + * stable. |
| + */ |
| +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) |
| +{ |
| + return devlink_net(priv_to_devlink(dev)); |
| +} |
| + |
| #endif /* MLX5_DRIVER_H */ |
| -- |
| 2.27.0 |
| |