| From 90981de1b9a37ac9847125499bd66ea70a642bbd Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Tue, 24 Sep 2019 10:49:54 +0100 |
| Subject: Btrfs: fix race setting up and completing qgroup rescan workers |
| |
| From: Filipe Manana <fdmanana@suse.com> |
| |
| [ Upstream commit 13fc1d271a2e3ab8a02071e711add01fab9271f6 ] |
| |
| There is a race between setting up a qgroup rescan worker and completing |
| a qgroup rescan worker that can lead to callers of the qgroup rescan wait |
| ioctl to either not wait for the rescan worker to complete or to hang |
| forever due to missing wake ups. The following diagram shows a sequence |
| of steps that illustrates the race. |
| |
| CPU 1 CPU 2 CPU 3 |
| |
| btrfs_ioctl_quota_rescan() |
| btrfs_qgroup_rescan() |
| qgroup_rescan_init() |
| mutex_lock(&fs_info->qgroup_rescan_lock) |
| spin_lock(&fs_info->qgroup_lock) |
| |
| fs_info->qgroup_flags |= |
| BTRFS_QGROUP_STATUS_FLAG_RESCAN |
| |
| init_completion( |
| &fs_info->qgroup_rescan_completion) |
| |
| fs_info->qgroup_rescan_running = true |
| |
| mutex_unlock(&fs_info->qgroup_rescan_lock) |
| spin_unlock(&fs_info->qgroup_lock) |
| |
| btrfs_init_work() |
| --> starts the worker |
| |
| btrfs_qgroup_rescan_worker() |
| mutex_lock(&fs_info->qgroup_rescan_lock) |
| |
| fs_info->qgroup_flags &= |
| ~BTRFS_QGROUP_STATUS_FLAG_RESCAN |
| |
| mutex_unlock(&fs_info->qgroup_rescan_lock) |
| |
| starts transaction, updates qgroup status |
| item, etc |
| |
| btrfs_ioctl_quota_rescan() |
| btrfs_qgroup_rescan() |
| qgroup_rescan_init() |
| mutex_lock(&fs_info->qgroup_rescan_lock) |
| spin_lock(&fs_info->qgroup_lock) |
| |
| fs_info->qgroup_flags |= |
| BTRFS_QGROUP_STATUS_FLAG_RESCAN |
| |
| init_completion( |
| &fs_info->qgroup_rescan_completion) |
| |
| fs_info->qgroup_rescan_running = true |
| |
| mutex_unlock(&fs_info->qgroup_rescan_lock) |
| spin_unlock(&fs_info->qgroup_lock) |
| |
| btrfs_init_work() |
| --> starts another worker |
| |
| mutex_lock(&fs_info->qgroup_rescan_lock) |
| |
| fs_info->qgroup_rescan_running = false |
| |
| mutex_unlock(&fs_info->qgroup_rescan_lock) |
| |
| complete_all(&fs_info->qgroup_rescan_completion) |
| |
| Before the rescan worker started by the task at CPU 3 completes, if |
| another task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS |
| because the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at |
| fs_info->qgroup_flags, which is expected and correct behaviour. |
| |
| However if other task calls btrfs_ioctl_quota_rescan_wait() before the |
| rescan worker started by the task at CPU 3 completes, it will return |
| immediately without waiting for the new rescan worker to complete, |
| because fs_info->qgroup_rescan_running is set to false by CPU 2. |
| |
| This race is making test case btrfs/171 (from fstests) to fail often: |
| |
| btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad) |
| # --- tests/btrfs/171.out 2018-09-16 21:30:48.505104287 +0100 |
| # +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad 2019-09-19 02:01:36.938486039 +0100 |
| # @@ -1,2 +1,3 @@ |
| # QA output created by 171 |
| # +ERROR: quota rescan failed: Operation now in progress |
| # Silence is golden |
| # ... |
| # (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad' to see the entire diff) |
| |
| That is because the test calls the btrfs-progs commands "qgroup quota |
| rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes |
| calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs" |
| commands 'qgroup assign' and 'qgroup remove' often call the rescan start |
| ioctl after calling the qgroup assign ioctl, |
| btrfs_ioctl_qgroup_assign()), since previous waits didn't actually wait |
| for a rescan worker to complete. |
| |
| Another problem the race can cause is missing wake ups for waiters, |
| since the call to complete_all() happens outside a critical section and |
| after clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence |
| diagram above, if we have a waiter for the first rescan task (executed |
| by CPU 2), then fs_info->qgroup_rescan_completion.wait is not empty, and |
| if after the rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and |
| before it calls complete_all() against |
| fs_info->qgroup_rescan_completion, the task at CPU 3 calls |
| init_completion() against fs_info->qgroup_rescan_completion which |
| re-initilizes its wait queue to an empty queue, therefore causing the |
| rescan worker at CPU 2 to call complete_all() against an empty queue, |
| never waking up the task waiting for that rescan worker. |
| |
| Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting |
| fs_info->qgroup_rescan_running to false in the same critical section, |
| delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing the |
| call to complete_all() in that same critical section. This gives the |
| protection needed to avoid rescan wait ioctl callers not waiting for a |
| running rescan worker and the lost wake ups problem, since setting that |
| rescan flag and boolean as well as initializing the wait queue is done |
| already in a critical section delimited by that mutex (at |
| qgroup_rescan_init()). |
| |
| Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion") |
| Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running") |
| CC: stable@vger.kernel.org # 4.4+ |
| Reviewed-by: Josef Bacik <josef@toxicpanda.com> |
| Signed-off-by: Filipe Manana <fdmanana@suse.com> |
| Signed-off-by: David Sterba <dsterba@suse.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| fs/btrfs/qgroup.c | 33 +++++++++++++++++++-------------- |
| 1 file changed, 19 insertions(+), 14 deletions(-) |
| |
| --- a/fs/btrfs/qgroup.c |
| +++ b/fs/btrfs/qgroup.c |
| @@ -2328,9 +2328,6 @@ out: |
| btrfs_free_path(path); |
| |
| mutex_lock(&fs_info->qgroup_rescan_lock); |
| - if (!btrfs_fs_closing(fs_info)) |
| - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
| - |
| if (err > 0 && |
| fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { |
| fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
| @@ -2346,16 +2343,30 @@ out: |
| trans = btrfs_start_transaction(fs_info->quota_root, 1); |
| if (IS_ERR(trans)) { |
| err = PTR_ERR(trans); |
| + trans = NULL; |
| btrfs_err(fs_info, |
| "fail to start transaction for status update: %d\n", |
| err); |
| - goto done; |
| } |
| - ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); |
| - if (ret < 0) { |
| - err = ret; |
| - btrfs_err(fs_info, "fail to update qgroup status: %d\n", err); |
| + |
| + mutex_lock(&fs_info->qgroup_rescan_lock); |
| + if (!btrfs_fs_closing(fs_info)) |
| + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
| + if (trans) { |
| + ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); |
| + if (ret < 0) { |
| + err = ret; |
| + btrfs_err(fs_info, "fail to update qgroup status: %d", |
| + err); |
| + } |
| } |
| + fs_info->qgroup_rescan_running = false; |
| + complete_all(&fs_info->qgroup_rescan_completion); |
| + mutex_unlock(&fs_info->qgroup_rescan_lock); |
| + |
| + if (!trans) |
| + return; |
| + |
| btrfs_end_transaction(trans, fs_info->quota_root); |
| |
| if (btrfs_fs_closing(fs_info)) { |
| @@ -2366,12 +2377,6 @@ out: |
| } else { |
| btrfs_err(fs_info, "qgroup scan failed with %d", err); |
| } |
| - |
| -done: |
| - mutex_lock(&fs_info->qgroup_rescan_lock); |
| - fs_info->qgroup_rescan_running = false; |
| - mutex_unlock(&fs_info->qgroup_rescan_lock); |
| - complete_all(&fs_info->qgroup_rescan_completion); |
| } |
| |
| /* |