| From 3eeb100dd1cd3e617e579acb9baf160d1fe36c5d Mon Sep 17 00:00:00 2001 |
| From: Sagi Grimberg <sagi@grimberg.me> |
| Date: Mon, 27 Jul 2020 17:32:09 -0700 |
| Subject: [PATCH] nvme-rdma: fix controller reset hang during traffic |
| |
| commit 9f98772ba307dd89a3d17dc2589f213d3972fc64 upstream. |
| |
| commit fe35ec58f0d3 ("block: update hctx map when use multiple maps") |
| exposed an issue where we may hang trying to wait for queue freeze |
| during I/O. We call blk_mq_update_nr_hw_queues which in case of multiple |
| queue maps (which we have now for default/read/poll) is attempting to |
| freeze the queue. However we never started queue freeze when starting the |
| reset, which means that we have inflight pending requests that entered the |
| queue that we will not complete once the queue is quiesced. |
| |
| So start a freeze before we quiesce the queue, and unfreeze the queue |
| after we successfully connected the I/O queues (and make sure to call |
| blk_mq_update_nr_hw_queues only after we are sure that the queue was |
| already frozen). |
| |
| This follows to how the pci driver handles resets. |
| |
| Fixes: fe35ec58f0d3 ("block: update hctx map when use multiple maps") |
| Signed-off-by: Sagi Grimberg <sagi@grimberg.me> |
| Signed-off-by: Christoph Hellwig <hch@lst.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c |
| index fe86b2623e51..ad17c79fd1e2 100644 |
| --- a/drivers/nvme/host/rdma.c |
| +++ b/drivers/nvme/host/rdma.c |
| @@ -875,15 +875,20 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) |
| ret = PTR_ERR(ctrl->ctrl.connect_q); |
| goto out_free_tag_set; |
| } |
| - } else { |
| - blk_mq_update_nr_hw_queues(&ctrl->tag_set, |
| - ctrl->ctrl.queue_count - 1); |
| } |
| |
| ret = nvme_rdma_start_io_queues(ctrl); |
| if (ret) |
| goto out_cleanup_connect_q; |
| |
| + if (!new) { |
| + nvme_start_queues(&ctrl->ctrl); |
| + nvme_wait_freeze(&ctrl->ctrl); |
| + blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset, |
| + ctrl->ctrl.queue_count - 1); |
| + nvme_unfreeze(&ctrl->ctrl); |
| + } |
| + |
| return 0; |
| |
| out_cleanup_connect_q: |
| @@ -913,6 +918,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, |
| bool remove) |
| { |
| if (ctrl->ctrl.queue_count > 1) { |
| + nvme_start_freeze(&ctrl->ctrl); |
| nvme_stop_queues(&ctrl->ctrl); |
| nvme_rdma_stop_io_queues(ctrl); |
| if (ctrl->ctrl.tagset) |
| -- |
| 2.27.0 |
| |