| From 787fed95fdfc55cfe8d93e11825ead09fb7be5b2 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Wed, 10 May 2023 16:46:21 +0200 |
| Subject: devlink: change per-devlink netdev notifier to static one |
| |
| From: Jiri Pirko <jiri@nvidia.com> |
| |
| [ Upstream commit e93c9378e33f68b61ea9318580d841caa22fb9ea ] |
| |
| The commit 565b4824c39f ("devlink: change port event netdev notifier |
| from per-net to global") changed original per-net notifier to be |
| per-devlink instance. That fixed the issue of non-receiving events |
| of netdev uninit if that moved to a different namespace. |
| That worked fine in -net tree. |
| |
| However, later on when commit ee75f1fc44dd ("net/mlx5e: Create |
| separate devlink instance for ethernet auxiliary device") and |
| commit 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in |
| case of PCI device suspend") were merged, a deadlock was introduced |
| when removing a namespace with devlink instance with another nested |
| instance. |
| |
| Here there is the bad flow example resulting in deadlock with mlx5: |
| net_cleanup_work -> cleanup_net (takes down_read(&pernet_ops_rwsem) -> |
| devlink_pernet_pre_exit() -> devlink_reload() -> |
| mlx5_devlink_reload_down() -> mlx5_unload_one_devl_locked() -> |
| mlx5_detach_device() -> del_adev() -> mlx5e_remove() -> |
| mlx5e_destroy_devlink() -> devlink_free() -> |
| unregister_netdevice_notifier() (takes down_write(&pernet_ops_rwsem) |
| |
| Steps to reproduce: |
| $ modprobe mlx5_core |
| $ ip netns add ns1 |
| $ devlink dev reload pci/0000:08:00.0 netns ns1 |
| $ ip netns del ns1 |
| |
| Resolve this by converting the notifier from per-devlink instance to |
| a static one registered during init phase and leaving it registered |
| forever. Use this notifier for all devlink port instances created |
| later on. |
| |
| Note what a tree needs this fix only in case all of the cited fixes |
| commits are present. |
| |
| Reported-by: Moshe Shemesh <moshe@nvidia.com> |
| Fixes: 565b4824c39f ("devlink: change port event netdev notifier from per-net to global") |
| Fixes: ee75f1fc44dd ("net/mlx5e: Create separate devlink instance for ethernet auxiliary device") |
| Fixes: 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend") |
| Signed-off-by: Jiri Pirko <jiri@nvidia.com> |
| Reviewed-by: Simon Horman <simon.horman@corigine.com> |
| Link: https://lore.kernel.org/r/20230510144621.932017-1-jiri@resnulli.us |
| Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| net/devlink/core.c | 16 +++++++--------- |
| net/devlink/devl_internal.h | 1 - |
| net/devlink/leftover.c | 5 ++--- |
| 3 files changed, 9 insertions(+), 13 deletions(-) |
| |
| diff --git a/net/devlink/core.c b/net/devlink/core.c |
| index 777b091ef74df..0e58eee44bdb2 100644 |
| --- a/net/devlink/core.c |
| +++ b/net/devlink/core.c |
| @@ -204,11 +204,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, |
| if (ret < 0) |
| goto err_xa_alloc; |
| |
| - devlink->netdevice_nb.notifier_call = devlink_port_netdevice_event; |
| - ret = register_netdevice_notifier(&devlink->netdevice_nb); |
| - if (ret) |
| - goto err_register_netdevice_notifier; |
| - |
| devlink->dev = dev; |
| devlink->ops = ops; |
| xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); |
| @@ -233,8 +228,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, |
| |
| return devlink; |
| |
| -err_register_netdevice_notifier: |
| - xa_erase(&devlinks, devlink->index); |
| err_xa_alloc: |
| kfree(devlink); |
| return NULL; |
| @@ -266,8 +259,6 @@ void devlink_free(struct devlink *devlink) |
| xa_destroy(&devlink->params); |
| xa_destroy(&devlink->ports); |
| |
| - WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb)); |
| - |
| xa_erase(&devlinks, devlink->index); |
| |
| devlink_put(devlink); |
| @@ -303,6 +294,10 @@ static struct pernet_operations devlink_pernet_ops __net_initdata = { |
| .pre_exit = devlink_pernet_pre_exit, |
| }; |
| |
| +static struct notifier_block devlink_port_netdevice_nb __net_initdata = { |
| + .notifier_call = devlink_port_netdevice_event, |
| +}; |
| + |
| static int __init devlink_init(void) |
| { |
| int err; |
| @@ -311,6 +306,9 @@ static int __init devlink_init(void) |
| if (err) |
| goto out; |
| err = register_pernet_subsys(&devlink_pernet_ops); |
| + if (err) |
| + goto out; |
| + err = register_netdevice_notifier(&devlink_port_netdevice_nb); |
| |
| out: |
| WARN_ON(err); |
| diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h |
| index e133f423294a2..62921b2eb0d3f 100644 |
| --- a/net/devlink/devl_internal.h |
| +++ b/net/devlink/devl_internal.h |
| @@ -50,7 +50,6 @@ struct devlink { |
| u8 reload_failed:1; |
| refcount_t refcount; |
| struct rcu_work rwork; |
| - struct notifier_block netdevice_nb; |
| char priv[] __aligned(NETDEV_ALIGN); |
| }; |
| |
| diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c |
| index dffca2f9bfa7f..cd02549680767 100644 |
| --- a/net/devlink/leftover.c |
| +++ b/net/devlink/leftover.c |
| @@ -7073,10 +7073,9 @@ int devlink_port_netdevice_event(struct notifier_block *nb, |
| struct devlink_port *devlink_port = netdev->devlink_port; |
| struct devlink *devlink; |
| |
| - devlink = container_of(nb, struct devlink, netdevice_nb); |
| - |
| - if (!devlink_port || devlink_port->devlink != devlink) |
| + if (!devlink_port) |
| return NOTIFY_OK; |
| + devlink = devlink_port->devlink; |
| |
| switch (event) { |
| case NETDEV_POST_INIT: |
| -- |
| 2.39.2 |
| |