| From a043165eaf19bcc3451eae420422913d13bec273 Mon Sep 17 00:00:00 2001 |
| From: Jack Morgenstein <jackm@dev.mellanox.co.il> |
| Date: Mon, 16 Jan 2017 18:31:37 +0200 |
| Subject: [PATCH] net/mlx4_core: Fix racy CQ (Completion Queue) free |
| |
| commit 291c566a28910614ce42d0ffe82196eddd6346f4 upstream. |
| |
| In function mlx4_cq_completion() and mlx4_cq_event(), the |
| radix_tree_lookup requires a rcu_read_lock. |
| This is mandatory: if another core frees the CQ, it could |
| run the radix_tree_node_rcu_free() call_rcu() callback while |
| its being used by the radix tree lookup function. |
| |
| Additionally, in function mlx4_cq_event(), since we are adding |
| the rcu lock around the radix-tree lookup, we no longer need to take |
| the spinlock. Also, the synchronize_irq() call for the async event |
| eliminates the need for incrementing the cq reference count in |
| mlx4_cq_event(). |
| |
| Other changes: |
| 1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock: |
| we no longer take this spinlock in the interrupt context. |
| The spinlock here, therefore, simply protects against different |
| threads simultaneously invoking mlx4_cq_free() for different cq's. |
| |
| 2. In function mlx4_cq_free(), we move the radix tree delete to before |
| the synchronize_irq() calls. This guarantees that we will not |
| access this cq during any subsequent interrupts, and therefore can |
| safely free the CQ after the synchronize_irq calls. The rcu_read_lock |
| in the interrupt handlers only needs to protect against corrupting the |
| radix tree; the interrupt handlers may access the cq outside the |
| rcu_read_lock due to the synchronize_irq calls which protect against |
| premature freeing of the cq. |
| |
| 3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg. |
| |
| 4. We leave the cq reference count mechanism in place, because it is |
| still needed for the cq completion tasklet mechanism. |
| |
| Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ") |
| Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") |
| Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> |
| Signed-off-by: Matan Barak <matanb@mellanox.com> |
| Signed-off-by: Tariq Toukan <tariqt@mellanox.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c |
| index a849da92f857..6b8635378f1f 100644 |
| --- a/drivers/net/ethernet/mellanox/mlx4/cq.c |
| +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c |
| @@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) |
| { |
| struct mlx4_cq *cq; |
| |
| + rcu_read_lock(); |
| cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, |
| cqn & (dev->caps.num_cqs - 1)); |
| + rcu_read_unlock(); |
| + |
| if (!cq) { |
| mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); |
| return; |
| } |
| |
| + /* Acessing the CQ outside of rcu_read_lock is safe, because |
| + * the CQ is freed only after interrupt handling is completed. |
| + */ |
| ++cq->arm_sn; |
| |
| cq->comp(cq); |
| @@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) |
| struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; |
| struct mlx4_cq *cq; |
| |
| - spin_lock(&cq_table->lock); |
| - |
| + rcu_read_lock(); |
| cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); |
| - if (cq) |
| - atomic_inc(&cq->refcount); |
| - |
| - spin_unlock(&cq_table->lock); |
| + rcu_read_unlock(); |
| |
| if (!cq) { |
| - mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); |
| + mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); |
| return; |
| } |
| |
| + /* Acessing the CQ outside of rcu_read_lock is safe, because |
| + * the CQ is freed only after interrupt handling is completed. |
| + */ |
| cq->event(cq, event_type); |
| - |
| - if (atomic_dec_and_test(&cq->refcount)) |
| - complete(&cq->free); |
| } |
| |
| static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, |
| @@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, |
| if (err) |
| return err; |
| |
| - spin_lock_irq(&cq_table->lock); |
| + spin_lock(&cq_table->lock); |
| err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); |
| - spin_unlock_irq(&cq_table->lock); |
| + spin_unlock(&cq_table->lock); |
| if (err) |
| goto err_icm; |
| |
| @@ -349,9 +351,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, |
| return 0; |
| |
| err_radix: |
| - spin_lock_irq(&cq_table->lock); |
| + spin_lock(&cq_table->lock); |
| radix_tree_delete(&cq_table->tree, cq->cqn); |
| - spin_unlock_irq(&cq_table->lock); |
| + spin_unlock(&cq_table->lock); |
| |
| err_icm: |
| mlx4_cq_free_icm(dev, cq->cqn); |
| @@ -370,15 +372,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) |
| if (err) |
| mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); |
| |
| + spin_lock(&cq_table->lock); |
| + radix_tree_delete(&cq_table->tree, cq->cqn); |
| + spin_unlock(&cq_table->lock); |
| + |
| synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); |
| if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != |
| priv->eq_table.eq[MLX4_EQ_ASYNC].irq) |
| synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); |
| |
| - spin_lock_irq(&cq_table->lock); |
| - radix_tree_delete(&cq_table->tree, cq->cqn); |
| - spin_unlock_irq(&cq_table->lock); |
| - |
| if (atomic_dec_and_test(&cq->refcount)) |
| complete(&cq->free); |
| wait_for_completion(&cq->free); |
| -- |
| 2.12.0 |
| |