releases/5.4.66/nvme-tcp-fix-timeout-handler.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From 0b84d1273f7f472f567e84659327dca604860a4e Mon Sep 17 00:00:00 2001
 From: Sasha Levin <sashal@kernel.org>
 Date: Tue, 28 Jul 2020 13:16:36 -0700
 Subject: nvme-tcp: fix timeout handler

 From: Sagi Grimberg <sagi@grimberg.me>

 [ Upstream commit 236187c4ed195161dfa4237c7beffbba0c5ae45b ]

 When a request times out in a LIVE state, we simply trigger error
 recovery and let the error recovery handle the request cancellation,
 however when a request times out in a non LIVE state, we make sure to
 complete it immediately as it might block controller setup or teardown
 and prevent forward progress.

 However tearing down the entire set of I/O and admin queues causes
 freeze/unfreeze imbalance (q->mq_freeze_depth) because and is really
 an overkill to what we actually need, which is to just fence controller
 teardown that may be running, stop the queue, and cancel the request if
 it is not already completed.

 Now that we have the controller teardown_lock, we can safely serialize
 request cancellation. This addresses a hang caused by calling extra
 queue freeze on controller namespaces, causing unfreeze to not complete
 correctly.

 Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
 Signed-off-by: Sasha Levin <sashal@kernel.org>
 ---
  drivers/nvme/host/tcp.c | 56 ++++++++++++++++++++++++++---------------
  1 file changed, 36 insertions(+), 20 deletions(-)

 diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
 index a94c80727de1e..98a045429293e 100644
 --- a/drivers/nvme/host/tcp.c
 +++ b/drivers/nvme/host/tcp.c
 @@ -421,6 +421,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
  	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
  		return;

 +	dev_warn(ctrl->device, "starting error recovery\n");
  	queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
  }

 @@ -2057,40 +2058,55 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
  	nvme_tcp_queue_request(&ctrl->async_req);
  }

 +static void nvme_tcp_complete_timed_out(struct request *rq)
 +{
 +	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 +	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
 +
 +	/* fence other contexts that may complete the command */
 +	mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
 +	nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
 +	if (!blk_mq_request_completed(rq)) {
 +		nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
 +		blk_mq_complete_request(rq);
 +	}
 +	mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
 +}
 +
  static enum blk_eh_timer_return
  nvme_tcp_timeout(struct request *rq, bool reserved)
  {
  	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 -	struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
 +	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
  	struct nvme_tcp_cmd_pdu *pdu = req->pdu;

 -	/*
 -	 * Restart the timer if a controller reset is already scheduled. Any
 -	 * timed out commands would be handled before entering the connecting
 -	 * state.
 -	 */
 -	if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
 -		return BLK_EH_RESET_TIMER;
 -
 -	dev_warn(ctrl->ctrl.device,
 +	dev_warn(ctrl->device,
  		"queue %d: timeout request %#x type %d\n",
  		nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);

 -	if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
 +	if (ctrl->state != NVME_CTRL_LIVE) {
  		/*
 -		 * Teardown immediately if controller times out while starting
 -		 * or we are already started error recovery. all outstanding
 -		 * requests are completed on shutdown, so we return BLK_EH_DONE.
 +		 * If we are resetting, connecting or deleting we should
 +		 * complete immediately because we may block controller
 +		 * teardown or setup sequence
 +		 * - ctrl disable/shutdown fabrics requests
 +		 * - connect requests
 +		 * - initialization admin requests
 +		 * - I/O requests that entered after unquiescing and
 +		 *   the controller stopped responding
 +		 *
 +		 * All other requests should be cancelled by the error
 +		 * recovery work, so it's fine that we fail it here.
  		 */
 -		flush_work(&ctrl->err_work);
 -		nvme_tcp_teardown_io_queues(&ctrl->ctrl, false);
 -		nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false);
 +		nvme_tcp_complete_timed_out(rq);
  		return BLK_EH_DONE;
  	}

 -	dev_warn(ctrl->ctrl.device, "starting error recovery\n");
 -	nvme_tcp_error_recovery(&ctrl->ctrl);
 -
 +	/*
 +	 * LIVE state should trigger the normal error recovery which will
 +	 * handle completing this request.
 +	 */
 +	nvme_tcp_error_recovery(ctrl);
  	return BLK_EH_RESET_TIMER;
  }

 --
 2.25.1
	From 0b84d1273f7f472f567e84659327dca604860a4e Mon Sep 17 00:00:00 2001
	From: Sasha Levin <sashal@kernel.org>
	Date: Tue, 28 Jul 2020 13:16:36 -0700
	Subject: nvme-tcp: fix timeout handler

	From: Sagi Grimberg <sagi@grimberg.me>

	[ Upstream commit 236187c4ed195161dfa4237c7beffbba0c5ae45b ]

	When a request times out in a LIVE state, we simply trigger error
	recovery and let the error recovery handle the request cancellation,
	however when a request times out in a non LIVE state, we make sure to
	complete it immediately as it might block controller setup or teardown
	and prevent forward progress.

	However tearing down the entire set of I/O and admin queues causes
	freeze/unfreeze imbalance (q->mq_freeze_depth) because and is really
	an overkill to what we actually need, which is to just fence controller
	teardown that may be running, stop the queue, and cancel the request if
	it is not already completed.

	Now that we have the controller teardown_lock, we can safely serialize
	request cancellation. This addresses a hang caused by calling extra
	queue freeze on controller namespaces, causing unfreeze to not complete
	correctly.

	Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
	Signed-off-by: Sasha Levin <sashal@kernel.org>
	---
	drivers/nvme/host/tcp.c \| 56 ++++++++++++++++++++++++++---------------
	1 file changed, 36 insertions(+), 20 deletions(-)

	diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
	index a94c80727de1e..98a045429293e 100644
	--- a/drivers/nvme/host/tcp.c
	+++ b/drivers/nvme/host/tcp.c
	@@ -421,6 +421,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
	return;

	+ dev_warn(ctrl->device, "starting error recovery\n");
	queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
	}

	@@ -2057,40 +2058,55 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
	nvme_tcp_queue_request(&ctrl->async_req);
	}

	+static void nvme_tcp_complete_timed_out(struct request *rq)
	+{
	+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
	+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
	+
	+ /* fence other contexts that may complete the command */
	+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
	+ nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
	+ if (!blk_mq_request_completed(rq)) {
	+ nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
	+ blk_mq_complete_request(rq);
	+ }
	+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
	+}
	+
	static enum blk_eh_timer_return
	nvme_tcp_timeout(struct request *rq, bool reserved)
	{
	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
	- struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
	+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
	struct nvme_tcp_cmd_pdu *pdu = req->pdu;

	- /*
	- * Restart the timer if a controller reset is already scheduled. Any
	- * timed out commands would be handled before entering the connecting
	- * state.
	- */
	- if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
	- return BLK_EH_RESET_TIMER;
	-
	- dev_warn(ctrl->ctrl.device,
	+ dev_warn(ctrl->device,
	"queue %d: timeout request %#x type %d\n",
	nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);

	- if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
	+ if (ctrl->state != NVME_CTRL_LIVE) {
	/*
	- * Teardown immediately if controller times out while starting
	- * or we are already started error recovery. all outstanding
	- * requests are completed on shutdown, so we return BLK_EH_DONE.
	+ * If we are resetting, connecting or deleting we should
	+ * complete immediately because we may block controller
	+ * teardown or setup sequence
	+ * - ctrl disable/shutdown fabrics requests
	+ * - connect requests
	+ * - initialization admin requests
	+ * - I/O requests that entered after unquiescing and
	+ * the controller stopped responding
	+ *
	+ * All other requests should be cancelled by the error
	+ * recovery work, so it's fine that we fail it here.
	*/
	- flush_work(&ctrl->err_work);
	- nvme_tcp_teardown_io_queues(&ctrl->ctrl, false);
	- nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false);
	+ nvme_tcp_complete_timed_out(rq);
	return BLK_EH_DONE;
	}

	- dev_warn(ctrl->ctrl.device, "starting error recovery\n");
	- nvme_tcp_error_recovery(&ctrl->ctrl);
	-
	+ /*
	+ * LIVE state should trigger the normal error recovery which will
	+ * handle completing this request.
	+ */
	+ nvme_tcp_error_recovery(ctrl);
	return BLK_EH_RESET_TIMER;
	}

	--
	2.25.1