| From 7ca5488566d243f06ba818f7e98dcb483448c100 Mon Sep 17 00:00:00 2001 |
| From: Jason Gunthorpe <jgg@nvidia.com> |
| Date: Thu, 25 Jun 2020 20:42:19 +0300 |
| Subject: [PATCH] RDMA/ipoib: Fix ABBA deadlock with ipoib_reap_ah() |
| |
| commit 65936bf25f90fe440bb2d11624c7d10fab266639 upstream. |
| |
| ipoib_mcast_carrier_on_task() insanely open codes a rtnl_lock() such that |
| the only time flush_workqueue() can be called is if it also clears |
| IPOIB_FLAG_OPER_UP. |
| |
| Thus the flush inside ipoib_flush_ah() will deadlock if it gets unlucky |
| enough, and lockdep doesn't help us to find it early: |
| |
| CPU0 CPU1 CPU2 |
| __ipoib_ib_dev_flush() |
| down_read(vlan_rwsem) |
| |
| ipoib_vlan_add() |
| rtnl_trylock() |
| down_write(vlan_rwsem) |
| |
| ipoib_mcast_carrier_on_task() |
| while (!rtnl_trylock()) |
| msleep(20); |
| |
| ipoib_flush_ah() |
| flush_workqueue(priv->wq) |
| |
| Clean up the ah_reaper related functions and lifecycle to make sense: |
| |
| - Start/Stop of the reaper should only be done in open/stop NDOs, not in |
| any other places |
| |
| - cancel and flush of the reaper should only happen in the stop NDO. |
| cancel is only functional when combined with IPOIB_STOP_REAPER. |
| |
| - Non-stop places were flushing the AH's just need to flush out dead AH's |
| synchronously and ignore the background task completely. It is fully |
| locked and harmless to leave running. |
| |
| Which ultimately fixes the ABBA deadlock by removing the unnecessary |
| flush_workqueue() from the problematic place under the vlan_rwsem. |
| |
| Fixes: efc82eeeae4e ("IB/ipoib: No longer use flush as a parameter") |
| Link: https://lore.kernel.org/r/20200625174219.290842-1-kamalheib1@gmail.com |
| Reported-by: Kamal Heib <kheib@redhat.com> |
| Tested-by: Kamal Heib <kheib@redhat.com> |
| Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c |
| index d10da34d9fef..7b598b66f235 100644 |
| --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c |
| +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c |
| @@ -669,13 +669,12 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, |
| return rc; |
| } |
| |
| -static void __ipoib_reap_ah(struct net_device *dev) |
| +static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) |
| { |
| - struct ipoib_dev_priv *priv = ipoib_priv(dev); |
| struct ipoib_ah *ah, *tah; |
| unsigned long flags; |
| |
| - netif_tx_lock_bh(dev); |
| + netif_tx_lock_bh(priv->dev); |
| spin_lock_irqsave(&priv->lock, flags); |
| |
| list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) |
| @@ -686,37 +685,37 @@ static void __ipoib_reap_ah(struct net_device *dev) |
| } |
| |
| spin_unlock_irqrestore(&priv->lock, flags); |
| - netif_tx_unlock_bh(dev); |
| + netif_tx_unlock_bh(priv->dev); |
| } |
| |
| void ipoib_reap_ah(struct work_struct *work) |
| { |
| struct ipoib_dev_priv *priv = |
| container_of(work, struct ipoib_dev_priv, ah_reap_task.work); |
| - struct net_device *dev = priv->dev; |
| |
| - __ipoib_reap_ah(dev); |
| + ipoib_reap_dead_ahs(priv); |
| |
| if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) |
| queue_delayed_work(priv->wq, &priv->ah_reap_task, |
| round_jiffies_relative(HZ)); |
| } |
| |
| -static void ipoib_flush_ah(struct net_device *dev) |
| +static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) |
| { |
| - struct ipoib_dev_priv *priv = ipoib_priv(dev); |
| - |
| - cancel_delayed_work(&priv->ah_reap_task); |
| - flush_workqueue(priv->wq); |
| - ipoib_reap_ah(&priv->ah_reap_task.work); |
| + clear_bit(IPOIB_STOP_REAPER, &priv->flags); |
| + queue_delayed_work(priv->wq, &priv->ah_reap_task, |
| + round_jiffies_relative(HZ)); |
| } |
| |
| -static void ipoib_stop_ah(struct net_device *dev) |
| +static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) |
| { |
| - struct ipoib_dev_priv *priv = ipoib_priv(dev); |
| - |
| set_bit(IPOIB_STOP_REAPER, &priv->flags); |
| - ipoib_flush_ah(dev); |
| + cancel_delayed_work(&priv->ah_reap_task); |
| + /* |
| + * After ipoib_stop_ah_reaper() we always go through |
| + * ipoib_reap_dead_ahs() which ensures the work is really stopped and |
| + * does a final flush out of the dead_ah's list |
| + */ |
| } |
| |
| static int recvs_pending(struct net_device *dev) |
| @@ -845,16 +844,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) |
| return 0; |
| } |
| |
| -void ipoib_ib_dev_stop(struct net_device *dev) |
| -{ |
| - struct ipoib_dev_priv *priv = ipoib_priv(dev); |
| - |
| - priv->rn_ops->ndo_stop(dev); |
| - |
| - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); |
| - ipoib_flush_ah(dev); |
| -} |
| - |
| int ipoib_ib_dev_open_default(struct net_device *dev) |
| { |
| struct ipoib_dev_priv *priv = ipoib_priv(dev); |
| @@ -898,10 +887,7 @@ int ipoib_ib_dev_open(struct net_device *dev) |
| return -1; |
| } |
| |
| - clear_bit(IPOIB_STOP_REAPER, &priv->flags); |
| - queue_delayed_work(priv->wq, &priv->ah_reap_task, |
| - round_jiffies_relative(HZ)); |
| - |
| + ipoib_start_ah_reaper(priv); |
| if (priv->rn_ops->ndo_open(dev)) { |
| pr_warn("%s: Failed to open dev\n", dev->name); |
| goto dev_stop; |
| @@ -912,13 +898,20 @@ int ipoib_ib_dev_open(struct net_device *dev) |
| return 0; |
| |
| dev_stop: |
| - set_bit(IPOIB_STOP_REAPER, &priv->flags); |
| - cancel_delayed_work(&priv->ah_reap_task); |
| - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); |
| - ipoib_ib_dev_stop(dev); |
| + ipoib_stop_ah_reaper(priv); |
| return -1; |
| } |
| |
| +void ipoib_ib_dev_stop(struct net_device *dev) |
| +{ |
| + struct ipoib_dev_priv *priv = ipoib_priv(dev); |
| + |
| + priv->rn_ops->ndo_stop(dev); |
| + |
| + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); |
| + ipoib_stop_ah_reaper(priv); |
| +} |
| + |
| void ipoib_pkey_dev_check_presence(struct net_device *dev) |
| { |
| struct ipoib_dev_priv *priv = ipoib_priv(dev); |
| @@ -1229,7 +1222,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, |
| ipoib_mcast_dev_flush(dev); |
| if (oper_up) |
| set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); |
| - ipoib_flush_ah(dev); |
| + ipoib_reap_dead_ahs(priv); |
| } |
| |
| if (level >= IPOIB_FLUSH_NORMAL) |
| @@ -1304,7 +1297,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) |
| * the neighbor garbage collection is stopped and reaped. |
| * That should all be done now, so make a final ah flush. |
| */ |
| - ipoib_stop_ah(dev); |
| + ipoib_reap_dead_ahs(priv); |
| |
| clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); |
| |
| diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c |
| index 4fd095fd63b6..044bcacad6e4 100644 |
| --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c |
| +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c |
| @@ -1979,6 +1979,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) |
| |
| /* no more works over the priv->wq */ |
| if (priv->wq) { |
| + /* See ipoib_mcast_carrier_on_task() */ |
| + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); |
| flush_workqueue(priv->wq); |
| destroy_workqueue(priv->wq); |
| priv->wq = NULL; |
| -- |
| 2.27.0 |
| |