releases/5.5.9/ib-mlx5-fix-implicit-odp-race.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From de5ed007a03d71daaa505f5daa4d3666530c7090 Mon Sep 17 00:00:00 2001
 From: Artemy Kovalyov <artemyko@mellanox.com>
 Date: Thu, 27 Feb 2020 13:39:18 +0200
 Subject: IB/mlx5: Fix implicit ODP race

 From: Artemy Kovalyov <artemyko@mellanox.com>

 commit de5ed007a03d71daaa505f5daa4d3666530c7090 upstream.

 Following race may occur because of the call_srcu and the placement of
 the synchronize_srcu vs the xa_erase.

 CPU0				   CPU1

 mlx5_ib_free_implicit_mr:	   destroy_unused_implicit_child_mr:
  xa_erase(odp_mkeys)
  synchronize_srcu()
 				    xa_lock(implicit_children)
 				    if (still in xarray)
 				       atomic_inc()
 				       call_srcu()
 				    xa_unlock(implicit_children)
  xa_erase(implicit_children):
    xa_lock(implicit_children)
    __xa_erase()
    xa_unlock(implicit_children)

  flush_workqueue()
 				   [..]
 				    free_implicit_child_mr_rcu:
 				     (via call_srcu)
 				      queue_work()

  WARN_ON(atomic_read())
 				   [..]
 				    free_implicit_child_mr_work:
 				     (via wq)
 				      free_implicit_child_mr()
  mlx5_mr_cache_invalidate()
 				     mlx5_ib_update_xlt() <-- UMR QP fail
 				     atomic_dec()

 The wait_event() solves the race because it blocks until
 free_implicit_child_mr_work() completes.

 Fixes: 5256edcb98a1 ("RDMA/mlx5: Rework implicit ODP destroy")
 Link: https://lore.kernel.org/r/20200227113918.94432-1-leon@kernel.org
 Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
 Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
 Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
 Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

 ---
  drivers/infiniband/hw/mlx5/mlx5_ib.h |    1 +
  drivers/infiniband/hw/mlx5/odp.c     |   17 +++++++----------
  2 files changed, 8 insertions(+), 10 deletions(-)

 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
 +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
 @@ -629,6 +629,7 @@ struct mlx5_ib_mr {

  	/* For ODP and implicit */
  	atomic_t		num_deferred_work;
 +	wait_queue_head_t       q_deferred_work;
  	struct xarray		implicit_children;
  	union {
  		struct rcu_head rcu;
 --- a/drivers/infiniband/hw/mlx5/odp.c
 +++ b/drivers/infiniband/hw/mlx5/odp.c
 @@ -197,7 +197,8 @@ static void free_implicit_child_mr(struc
  	mr->parent = NULL;
  	mlx5_mr_cache_free(mr->dev, mr);
  	ib_umem_odp_release(odp);
 -	atomic_dec(&imr->num_deferred_work);
 +	if (atomic_dec_and_test(&imr->num_deferred_work))
 +		wake_up(&imr->q_deferred_work);
  }

  static void free_implicit_child_mr_work(struct work_struct *work)
 @@ -516,6 +517,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implici
  	imr->umem = &umem_odp->umem;
  	imr->is_odp_implicit = true;
  	atomic_set(&imr->num_deferred_work, 0);
 +	init_waitqueue_head(&imr->q_deferred_work);
  	xa_init(&imr->implicit_children);

  	err = mlx5_ib_update_xlt(imr, 0,
 @@ -573,10 +575,7 @@ void mlx5_ib_free_implicit_mr(struct mlx
  	 * under xa_lock while the child is in the xarray. Thus at this point
  	 * it is only decreasing, and all work holding it is now on the wq.
  	 */
 -	if (atomic_read(&imr->num_deferred_work)) {
 -		flush_workqueue(system_unbound_wq);
 -		WARN_ON(atomic_read(&imr->num_deferred_work));
 -	}
 +	wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));

  	/*
  	 * Fence the imr before we destroy the children. This allows us to
 @@ -607,10 +606,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib
  	/* Wait for all running page-fault handlers to finish. */
  	synchronize_srcu(&mr->dev->odp_srcu);

 -	if (atomic_read(&mr->num_deferred_work)) {
 -		flush_workqueue(system_unbound_wq);
 -		WARN_ON(atomic_read(&mr->num_deferred_work));
 -	}
 +	wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));

  	dma_fence_odp_mr(mr);
  }
 @@ -1682,7 +1678,8 @@ static void destroy_prefetch_work(struct
  	u32 i;

  	for (i = 0; i < work->num_sge; ++i)
 -		atomic_dec(&work->frags[i].mr->num_deferred_work);
 +		if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
 +			wake_up(&work->frags[i].mr->q_deferred_work);
  	kvfree(work);
  }
	From de5ed007a03d71daaa505f5daa4d3666530c7090 Mon Sep 17 00:00:00 2001
	From: Artemy Kovalyov <artemyko@mellanox.com>
	Date: Thu, 27 Feb 2020 13:39:18 +0200
	Subject: IB/mlx5: Fix implicit ODP race

	From: Artemy Kovalyov <artemyko@mellanox.com>

	commit de5ed007a03d71daaa505f5daa4d3666530c7090 upstream.

	Following race may occur because of the call_srcu and the placement of
	the synchronize_srcu vs the xa_erase.

	CPU0 CPU1

	mlx5_ib_free_implicit_mr: destroy_unused_implicit_child_mr:
	xa_erase(odp_mkeys)
	synchronize_srcu()
	xa_lock(implicit_children)
	if (still in xarray)
	atomic_inc()
	call_srcu()
	xa_unlock(implicit_children)
	xa_erase(implicit_children):
	xa_lock(implicit_children)
	__xa_erase()
	xa_unlock(implicit_children)

	flush_workqueue()
	[..]
	free_implicit_child_mr_rcu:
	(via call_srcu)
	queue_work()

	WARN_ON(atomic_read())
	[..]
	free_implicit_child_mr_work:
	(via wq)
	free_implicit_child_mr()
	mlx5_mr_cache_invalidate()
	mlx5_ib_update_xlt() <-- UMR QP fail
	atomic_dec()

	The wait_event() solves the race because it blocks until
	free_implicit_child_mr_work() completes.

	Fixes: 5256edcb98a1 ("RDMA/mlx5: Rework implicit ODP destroy")
	Link: https://lore.kernel.org/r/20200227113918.94432-1-leon@kernel.org
	Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
	Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
	Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
	Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

	---
	drivers/infiniband/hw/mlx5/mlx5_ib.h \| 1 +
	drivers/infiniband/hw/mlx5/odp.c \| 17 +++++++----------
	2 files changed, 8 insertions(+), 10 deletions(-)

	--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
	+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
	@@ -629,6 +629,7 @@ struct mlx5_ib_mr {

	/* For ODP and implicit */
	atomic_t num_deferred_work;
	+ wait_queue_head_t q_deferred_work;
	struct xarray implicit_children;
	union {
	struct rcu_head rcu;
	--- a/drivers/infiniband/hw/mlx5/odp.c
	+++ b/drivers/infiniband/hw/mlx5/odp.c
	@@ -197,7 +197,8 @@ static void free_implicit_child_mr(struc
	mr->parent = NULL;
	mlx5_mr_cache_free(mr->dev, mr);
	ib_umem_odp_release(odp);
	- atomic_dec(&imr->num_deferred_work);
	+ if (atomic_dec_and_test(&imr->num_deferred_work))
	+ wake_up(&imr->q_deferred_work);
	}

	static void free_implicit_child_mr_work(struct work_struct *work)
	@@ -516,6 +517,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implici
	imr->umem = &umem_odp->umem;
	imr->is_odp_implicit = true;
	atomic_set(&imr->num_deferred_work, 0);
	+ init_waitqueue_head(&imr->q_deferred_work);
	xa_init(&imr->implicit_children);

	err = mlx5_ib_update_xlt(imr, 0,
	@@ -573,10 +575,7 @@ void mlx5_ib_free_implicit_mr(struct mlx
	* under xa_lock while the child is in the xarray. Thus at this point
	* it is only decreasing, and all work holding it is now on the wq.
	*/
	- if (atomic_read(&imr->num_deferred_work)) {
	- flush_workqueue(system_unbound_wq);
	- WARN_ON(atomic_read(&imr->num_deferred_work));
	- }
	+ wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));

	/*
	* Fence the imr before we destroy the children. This allows us to
	@@ -607,10 +606,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib
	/* Wait for all running page-fault handlers to finish. */
	synchronize_srcu(&mr->dev->odp_srcu);

	- if (atomic_read(&mr->num_deferred_work)) {
	- flush_workqueue(system_unbound_wq);
	- WARN_ON(atomic_read(&mr->num_deferred_work));
	- }
	+ wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));

	dma_fence_odp_mr(mr);
	}
	@@ -1682,7 +1678,8 @@ static void destroy_prefetch_work(struct
	u32 i;

	for (i = 0; i < work->num_sge; ++i)
	- atomic_dec(&work->frags[i].mr->num_deferred_work);
	+ if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
	+ wake_up(&work->frags[i].mr->q_deferred_work);
	kvfree(work);
	}