| From ed9eb71085ecb7ded9a5118cec2ab70667cc7350 Mon Sep 17 00:00:00 2001 |
| From: Ilya Dryomov <idryomov@gmail.com> |
| Date: Sat, 3 Jul 2021 11:31:26 +0200 |
| Subject: rbd: don't hold lock_rwsem while running_list is being drained |
| |
| From: Ilya Dryomov <idryomov@gmail.com> |
| |
| commit ed9eb71085ecb7ded9a5118cec2ab70667cc7350 upstream. |
| |
| Currently rbd_quiesce_lock() holds lock_rwsem for read while blocking |
| on releasing_wait completion. On the I/O completion side, each image |
| request also needs to take lock_rwsem for read. Because rw_semaphore |
| implementation doesn't allow new readers after a writer has indicated |
| interest in the lock, this can result in a deadlock if something that |
| needs to take lock_rwsem for write gets involved. For example: |
| |
| 1. watch error occurs |
| 2. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and |
| releases lock_rwsem |
| 3. after reestablishing the watch, rbd_reregister_watch() takes |
| lock_rwsem for write and calls rbd_reacquire_lock() |
| 4. rbd_quiesce_lock() downgrades lock_rwsem to for read and blocks on |
| releasing_wait until running_list becomes empty |
| 5. another watch error occurs |
| 6. rbd_watch_errcb() blocks trying to take lock_rwsem for write |
| 7. no in-flight image request can complete and delete itself from |
| running_list because lock_rwsem won't be granted anymore |
| |
| A similar scenario can occur with "lock has been acquired" and "lock |
| has been released" notification handers which also take lock_rwsem for |
| write to update owner_cid. |
| |
| We don't actually get anything useful from sitting on lock_rwsem in |
| rbd_quiesce_lock() -- owner_cid updates certainly don't need to be |
| synchronized with. In fact the whole owner_cid tracking logic could |
| probably be removed from the kernel client because we don't support |
| proxied maintenance operations. |
| |
| Cc: stable@vger.kernel.org # 5.3+ |
| URL: https://tracker.ceph.com/issues/42757 |
| Signed-off-by: Ilya Dryomov <idryomov@gmail.com> |
| Tested-by: Robin Geuze <robin.geuze@nl.team.blue> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| drivers/block/rbd.c | 12 +++++------- |
| 1 file changed, 5 insertions(+), 7 deletions(-) |
| |
| --- a/drivers/block/rbd.c |
| +++ b/drivers/block/rbd.c |
| @@ -4100,8 +4100,6 @@ again: |
| |
| static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) |
| { |
| - bool need_wait; |
| - |
| dout("%s rbd_dev %p\n", __func__, rbd_dev); |
| lockdep_assert_held_write(&rbd_dev->lock_rwsem); |
| |
| @@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_ |
| */ |
| rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; |
| rbd_assert(!completion_done(&rbd_dev->releasing_wait)); |
| - need_wait = !list_empty(&rbd_dev->running_list); |
| - downgrade_write(&rbd_dev->lock_rwsem); |
| - if (need_wait) |
| - wait_for_completion(&rbd_dev->releasing_wait); |
| - up_read(&rbd_dev->lock_rwsem); |
| + if (list_empty(&rbd_dev->running_list)) |
| + return true; |
| + |
| + up_write(&rbd_dev->lock_rwsem); |
| + wait_for_completion(&rbd_dev->releasing_wait); |
| |
| down_write(&rbd_dev->lock_rwsem); |
| if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) |