| From e00eafa5297155b0473e0b2250d40ca00d7bcdfb Mon Sep 17 00:00:00 2001 |
| From: Keith Busch <keith.busch@intel.com> |
| Date: Wed, 23 Jan 2019 18:46:11 -0700 |
| Subject: nvme-pci: fix rapid add remove sequence |
| |
| [ Upstream commit 5c959d73dba6495ec01d04c206ee679d61ccb2b0 ] |
| |
| A surprise removal may fail to tear down request queues if it is racing |
| with the initial asynchronous probe. If that happens, the remove path |
| won't see the queue resources to tear down, and the controller reset |
| path may create a new request queue on a removed device, but will not |
| be able to make forward progress, deadlocking the pci removal. |
| |
| Protect setting up non-blocking resources from a shutdown by holding the |
| same mutex, and transition to the CONNECTING state after these resources |
| are initialized so the probe path may see the dead controller state |
| before dispatching new IO. |
| |
| Link: https://bugzilla.kernel.org/show_bug.cgi?id=202081 |
| Reported-by: Alex Gagniuc <Alex_Gagniuc@Dellteam.com> |
| Signed-off-by: Keith Busch <keith.busch@intel.com> |
| Tested-by: Alex Gagniuc <mr.nuke.me@gmail.com> |
| Signed-off-by: Christoph Hellwig <hch@lst.de> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| drivers/nvme/host/pci.c | 22 ++++++++++++---------- |
| 1 file changed, 12 insertions(+), 10 deletions(-) |
| |
| diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c |
| index f46313f441ec..6398ffbce6de 100644 |
| --- a/drivers/nvme/host/pci.c |
| +++ b/drivers/nvme/host/pci.c |
| @@ -2260,16 +2260,7 @@ static void nvme_reset_work(struct work_struct *work) |
| if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) |
| nvme_dev_disable(dev, false); |
| |
| - /* |
| - * Introduce CONNECTING state from nvme-fc/rdma transports to mark the |
| - * initializing procedure here. |
| - */ |
| - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { |
| - dev_warn(dev->ctrl.device, |
| - "failed to mark controller CONNECTING\n"); |
| - goto out; |
| - } |
| - |
| + mutex_lock(&dev->shutdown_lock); |
| result = nvme_pci_enable(dev); |
| if (result) |
| goto out; |
| @@ -2288,6 +2279,17 @@ static void nvme_reset_work(struct work_struct *work) |
| */ |
| dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; |
| dev->ctrl.max_segments = NVME_MAX_SEGS; |
| + mutex_unlock(&dev->shutdown_lock); |
| + |
| + /* |
| + * Introduce CONNECTING state from nvme-fc/rdma transports to mark the |
| + * initializing procedure here. |
| + */ |
| + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { |
| + dev_warn(dev->ctrl.device, |
| + "failed to mark controller CONNECTING\n"); |
| + goto out; |
| + } |
| |
| result = nvme_init_identify(&dev->ctrl); |
| if (result) |
| -- |
| 2.19.1 |
| |