releases/4.4.195/btrfs-fix-race-setting-up-and-completing-qgroup-resc.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From 90981de1b9a37ac9847125499bd66ea70a642bbd Mon Sep 17 00:00:00 2001
 From: Sasha Levin <sashal@kernel.org>
 Date: Tue, 24 Sep 2019 10:49:54 +0100
 Subject: Btrfs: fix race setting up and completing qgroup rescan workers

 From: Filipe Manana <fdmanana@suse.com>

 [ Upstream commit 13fc1d271a2e3ab8a02071e711add01fab9271f6 ]

 There is a race between setting up a qgroup rescan worker and completing
 a qgroup rescan worker that can lead to callers of the qgroup rescan wait
 ioctl to either not wait for the rescan worker to complete or to hang
 forever due to missing wake ups. The following diagram shows a sequence
 of steps that illustrates the race.

         CPU 1                                                         CPU 2                                  CPU 3

  btrfs_ioctl_quota_rescan()
   btrfs_qgroup_rescan()
    qgroup_rescan_init()
     mutex_lock(&fs_info->qgroup_rescan_lock)
     spin_lock(&fs_info->qgroup_lock)

     fs_info->qgroup_flags |=
       BTRFS_QGROUP_STATUS_FLAG_RESCAN

     init_completion(
       &fs_info->qgroup_rescan_completion)

     fs_info->qgroup_rescan_running = true

     mutex_unlock(&fs_info->qgroup_rescan_lock)
     spin_unlock(&fs_info->qgroup_lock)

     btrfs_init_work()
      --> starts the worker

                                                         btrfs_qgroup_rescan_worker()
                                                          mutex_lock(&fs_info->qgroup_rescan_lock)

                                                          fs_info->qgroup_flags &=
                                                            ~BTRFS_QGROUP_STATUS_FLAG_RESCAN

                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)

                                                          starts transaction, updates qgroup status
                                                          item, etc

                                                                                                            btrfs_ioctl_quota_rescan()
                                                                                                             btrfs_qgroup_rescan()
                                                                                                              qgroup_rescan_init()
                                                                                                               mutex_lock(&fs_info->qgroup_rescan_lock)
                                                                                                               spin_lock(&fs_info->qgroup_lock)

                                                                                                               fs_info->qgroup_flags |=
                                                                                                                 BTRFS_QGROUP_STATUS_FLAG_RESCAN

                                                                                                               init_completion(
                                                                                                                 &fs_info->qgroup_rescan_completion)

                                                                                                               fs_info->qgroup_rescan_running = true

                                                                                                               mutex_unlock(&fs_info->qgroup_rescan_lock)
                                                                                                               spin_unlock(&fs_info->qgroup_lock)

                                                                                                               btrfs_init_work()
                                                                                                                --> starts another worker

                                                          mutex_lock(&fs_info->qgroup_rescan_lock)

                                                          fs_info->qgroup_rescan_running = false

                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)

 							 complete_all(&fs_info->qgroup_rescan_completion)

 Before the rescan worker started by the task at CPU 3 completes, if
 another task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS
 because the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at
 fs_info->qgroup_flags, which is expected and correct behaviour.

 However if other task calls btrfs_ioctl_quota_rescan_wait() before the
 rescan worker started by the task at CPU 3 completes, it will return
 immediately without waiting for the new rescan worker to complete,
 because fs_info->qgroup_rescan_running is set to false by CPU 2.

 This race is making test case btrfs/171 (from fstests) to fail often:

   btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
 #      --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
 #      +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
 #      @@ -1,2 +1,3 @@
 #       QA output created by 171
 #      +ERROR: quota rescan failed: Operation now in progress
 #       Silence is golden
 #      ...
 #      (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)

 That is because the test calls the btrfs-progs commands "qgroup quota
 rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
 calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
 commands 'qgroup assign' and 'qgroup remove' often call the rescan start
 ioctl after calling the qgroup assign ioctl,
 btrfs_ioctl_qgroup_assign()), since previous waits didn't actually wait
 for a rescan worker to complete.

 Another problem the race can cause is missing wake ups for waiters,
 since the call to complete_all() happens outside a critical section and
 after clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence
 diagram above, if we have a waiter for the first rescan task (executed
 by CPU 2), then fs_info->qgroup_rescan_completion.wait is not empty, and
 if after the rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and
 before it calls complete_all() against
 fs_info->qgroup_rescan_completion, the task at CPU 3 calls
 init_completion() against fs_info->qgroup_rescan_completion which
 re-initilizes its wait queue to an empty queue, therefore causing the
 rescan worker at CPU 2 to call complete_all() against an empty queue,
 never waking up the task waiting for that rescan worker.

 Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
 fs_info->qgroup_rescan_running to false in the same critical section,
 delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing the
 call to complete_all() in that same critical section. This gives the
 protection needed to avoid rescan wait ioctl callers not waiting for a
 running rescan worker and the lost wake ups problem, since setting that
 rescan flag and boolean as well as initializing the wait queue is done
 already in a critical section delimited by that mutex (at
 qgroup_rescan_init()).

 Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
 Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
 CC: stable@vger.kernel.org # 4.4+
 Reviewed-by: Josef Bacik <josef@toxicpanda.com>
 Signed-off-by: Filipe Manana <fdmanana@suse.com>
 Signed-off-by: David Sterba <dsterba@suse.com>
 Signed-off-by: Sasha Levin <sashal@kernel.org>
 ---
  fs/btrfs/qgroup.c |   33 +++++++++++++++++++--------------
  1 file changed, 19 insertions(+), 14 deletions(-)

 --- a/fs/btrfs/qgroup.c
 +++ b/fs/btrfs/qgroup.c
 @@ -2328,9 +2328,6 @@ out:
  	btrfs_free_path(path);

  	mutex_lock(&fs_info->qgroup_rescan_lock);
 -	if (!btrfs_fs_closing(fs_info))
 -		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 -
  	if (err > 0 &&
  	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
  		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 @@ -2346,16 +2343,30 @@ out:
  	trans = btrfs_start_transaction(fs_info->quota_root, 1);
  	if (IS_ERR(trans)) {
  		err = PTR_ERR(trans);
 +		trans = NULL;
  		btrfs_err(fs_info,
  			  "fail to start transaction for status update: %d\n",
  			  err);
 -		goto done;
  	}
 -	ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
 -	if (ret < 0) {
 -		err = ret;
 -		btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
 +
 +	mutex_lock(&fs_info->qgroup_rescan_lock);
 +	if (!btrfs_fs_closing(fs_info))
 +		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 +	if (trans) {
 +		ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
 +		if (ret < 0) {
 +			err = ret;
 +			btrfs_err(fs_info, "fail to update qgroup status: %d",
 +				  err);
 +		}
  	}
 +	fs_info->qgroup_rescan_running = false;
 +	complete_all(&fs_info->qgroup_rescan_completion);
 +	mutex_unlock(&fs_info->qgroup_rescan_lock);
 +
 +	if (!trans)
 +		return;
 +
  	btrfs_end_transaction(trans, fs_info->quota_root);

  	if (btrfs_fs_closing(fs_info)) {
 @@ -2366,12 +2377,6 @@ out:
  	} else {
  		btrfs_err(fs_info, "qgroup scan failed with %d", err);
  	}
 -
 -done:
 -	mutex_lock(&fs_info->qgroup_rescan_lock);
 -	fs_info->qgroup_rescan_running = false;
 -	mutex_unlock(&fs_info->qgroup_rescan_lock);
 -	complete_all(&fs_info->qgroup_rescan_completion);
  }

  /*
	From 90981de1b9a37ac9847125499bd66ea70a642bbd Mon Sep 17 00:00:00 2001
	From: Sasha Levin <sashal@kernel.org>
	Date: Tue, 24 Sep 2019 10:49:54 +0100
	Subject: Btrfs: fix race setting up and completing qgroup rescan workers

	From: Filipe Manana <fdmanana@suse.com>

	[ Upstream commit 13fc1d271a2e3ab8a02071e711add01fab9271f6 ]

	There is a race between setting up a qgroup rescan worker and completing
	a qgroup rescan worker that can lead to callers of the qgroup rescan wait
	ioctl to either not wait for the rescan worker to complete or to hang
	forever due to missing wake ups. The following diagram shows a sequence
	of steps that illustrates the race.

	CPU 1 CPU 2 CPU 3

	btrfs_ioctl_quota_rescan()
	btrfs_qgroup_rescan()
	qgroup_rescan_init()
	mutex_lock(&fs_info->qgroup_rescan_lock)
	spin_lock(&fs_info->qgroup_lock)

	fs_info->qgroup_flags \|=
	BTRFS_QGROUP_STATUS_FLAG_RESCAN

	init_completion(
	&fs_info->qgroup_rescan_completion)

	fs_info->qgroup_rescan_running = true

	mutex_unlock(&fs_info->qgroup_rescan_lock)
	spin_unlock(&fs_info->qgroup_lock)

	btrfs_init_work()
	--> starts the worker

	btrfs_qgroup_rescan_worker()
	mutex_lock(&fs_info->qgroup_rescan_lock)

	fs_info->qgroup_flags &=
	~BTRFS_QGROUP_STATUS_FLAG_RESCAN

	mutex_unlock(&fs_info->qgroup_rescan_lock)

	starts transaction, updates qgroup status
	item, etc

	btrfs_ioctl_quota_rescan()
	btrfs_qgroup_rescan()
	qgroup_rescan_init()
	mutex_lock(&fs_info->qgroup_rescan_lock)
	spin_lock(&fs_info->qgroup_lock)

	fs_info->qgroup_flags \|=
	BTRFS_QGROUP_STATUS_FLAG_RESCAN

	init_completion(
	&fs_info->qgroup_rescan_completion)

	fs_info->qgroup_rescan_running = true

	mutex_unlock(&fs_info->qgroup_rescan_lock)
	spin_unlock(&fs_info->qgroup_lock)

	btrfs_init_work()
	--> starts another worker

	mutex_lock(&fs_info->qgroup_rescan_lock)

	fs_info->qgroup_rescan_running = false

	mutex_unlock(&fs_info->qgroup_rescan_lock)

	complete_all(&fs_info->qgroup_rescan_completion)

	Before the rescan worker started by the task at CPU 3 completes, if
	another task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS
	because the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at
	fs_info->qgroup_flags, which is expected and correct behaviour.

	However if other task calls btrfs_ioctl_quota_rescan_wait() before the
	rescan worker started by the task at CPU 3 completes, it will return
	immediately without waiting for the new rescan worker to complete,
	because fs_info->qgroup_rescan_running is set to false by CPU 2.

	This race is making test case btrfs/171 (from fstests) to fail often:

	btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
	# --- tests/btrfs/171.out 2018-09-16 21:30:48.505104287 +0100
	# +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad 2019-09-19 02:01:36.938486039 +0100
	# @@ -1,2 +1,3 @@
	# QA output created by 171
	# +ERROR: quota rescan failed: Operation now in progress
	# Silence is golden
	# ...
	# (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad' to see the entire diff)

	That is because the test calls the btrfs-progs commands "qgroup quota
	rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
	calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
	commands 'qgroup assign' and 'qgroup remove' often call the rescan start
	ioctl after calling the qgroup assign ioctl,
	btrfs_ioctl_qgroup_assign()), since previous waits didn't actually wait
	for a rescan worker to complete.

	Another problem the race can cause is missing wake ups for waiters,
	since the call to complete_all() happens outside a critical section and
	after clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence
	diagram above, if we have a waiter for the first rescan task (executed
	by CPU 2), then fs_info->qgroup_rescan_completion.wait is not empty, and
	if after the rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and
	before it calls complete_all() against
	fs_info->qgroup_rescan_completion, the task at CPU 3 calls
	init_completion() against fs_info->qgroup_rescan_completion which
	re-initilizes its wait queue to an empty queue, therefore causing the
	rescan worker at CPU 2 to call complete_all() against an empty queue,
	never waking up the task waiting for that rescan worker.

	Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
	fs_info->qgroup_rescan_running to false in the same critical section,
	delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing the
	call to complete_all() in that same critical section. This gives the
	protection needed to avoid rescan wait ioctl callers not waiting for a
	running rescan worker and the lost wake ups problem, since setting that
	rescan flag and boolean as well as initializing the wait queue is done
	already in a critical section delimited by that mutex (at
	qgroup_rescan_init()).

	Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
	Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
	CC: stable@vger.kernel.org # 4.4+
	Reviewed-by: Josef Bacik <josef@toxicpanda.com>
	Signed-off-by: Filipe Manana <fdmanana@suse.com>
	Signed-off-by: David Sterba <dsterba@suse.com>
	Signed-off-by: Sasha Levin <sashal@kernel.org>
	---
	fs/btrfs/qgroup.c \| 33 +++++++++++++++++++--------------
	1 file changed, 19 insertions(+), 14 deletions(-)

	--- a/fs/btrfs/qgroup.c
	+++ b/fs/btrfs/qgroup.c
	@@ -2328,9 +2328,6 @@ out:
	btrfs_free_path(path);

	mutex_lock(&fs_info->qgroup_rescan_lock);
	- if (!btrfs_fs_closing(fs_info))
	- fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
	-
	if (err > 0 &&
	fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
	@@ -2346,16 +2343,30 @@ out:
	trans = btrfs_start_transaction(fs_info->quota_root, 1);
	if (IS_ERR(trans)) {
	err = PTR_ERR(trans);
	+ trans = NULL;
	btrfs_err(fs_info,
	"fail to start transaction for status update: %d\n",
	err);
	- goto done;
	}
	- ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
	- if (ret < 0) {
	- err = ret;
	- btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
	+
	+ mutex_lock(&fs_info->qgroup_rescan_lock);
	+ if (!btrfs_fs_closing(fs_info))
	+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
	+ if (trans) {
	+ ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
	+ if (ret < 0) {
	+ err = ret;
	+ btrfs_err(fs_info, "fail to update qgroup status: %d",
	+ err);
	+ }
	}
	+ fs_info->qgroup_rescan_running = false;
	+ complete_all(&fs_info->qgroup_rescan_completion);
	+ mutex_unlock(&fs_info->qgroup_rescan_lock);
	+
	+ if (!trans)
	+ return;
	+
	btrfs_end_transaction(trans, fs_info->quota_root);

	if (btrfs_fs_closing(fs_info)) {
	@@ -2366,12 +2377,6 @@ out:
	} else {
	btrfs_err(fs_info, "qgroup scan failed with %d", err);
	}
	-
	-done:
	- mutex_lock(&fs_info->qgroup_rescan_lock);
	- fs_info->qgroup_rescan_running = false;
	- mutex_unlock(&fs_info->qgroup_rescan_lock);
	- complete_all(&fs_info->qgroup_rescan_completion);
	}

	/*