releases/4.14.159/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From def98c84b6cdf2eeea19ec5736e90e316df5206b Mon Sep 17 00:00:00 2001
 From: Tejun Heo <tj@kernel.org>
 Date: Wed, 18 Sep 2019 18:43:40 -0700
 Subject: workqueue: Fix spurious sanity check failures in destroy_workqueue()

 From: Tejun Heo <tj@kernel.org>

 commit def98c84b6cdf2eeea19ec5736e90e316df5206b upstream.

 Before actually destrying a workqueue, destroy_workqueue() checks
 whether it's actually idle.  If it isn't, it prints out a bunch of
 warning messages and leaves the workqueue dangling.  It unfortunately
 has a couple issues.

 * Mayday list queueing increments pwq's refcnts which gets detected as
   busy and fails the sanity checks.  However, because mayday list
   queueing is asynchronous, this condition can happen without any
   actual work items left in the workqueue.

 * Sanity check failure leaves the sysfs interface behind too which can
   lead to init failure of newer instances of the workqueue.

 This patch fixes the above two by

 * If a workqueue has a rescuer, disable and kill the rescuer before
   sanity checks.  Disabling and killing is guaranteed to flush the
   existing mayday list.

 * Remove sysfs interface before sanity checks.

 Signed-off-by: Tejun Heo <tj@kernel.org>
 Reported-by: Marcin Pawlowski <mpawlowski@fb.com>
 Reported-by: "Williams, Gerald S" <gerald.s.williams@intel.com>
 Cc: stable@vger.kernel.org
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

 ---
  kernel/workqueue.c |   24 +++++++++++++++++++-----
  1 file changed, 19 insertions(+), 5 deletions(-)

 --- a/kernel/workqueue.c
 +++ b/kernel/workqueue.c
 @@ -4084,9 +4084,28 @@ void destroy_workqueue(struct workqueue_
  	struct pool_workqueue *pwq;
  	int node;

 +	/*
 +	 * Remove it from sysfs first so that sanity check failure doesn't
 +	 * lead to sysfs name conflicts.
 +	 */
 +	workqueue_sysfs_unregister(wq);
 +
  	/* drain it before proceeding with destruction */
  	drain_workqueue(wq);

 +	/* kill rescuer, if sanity checks fail, leave it w/o rescuer */
 +	if (wq->rescuer) {
 +		struct worker *rescuer = wq->rescuer;
 +
 +		/* this prevents new queueing */
 +		spin_lock_irq(&wq_mayday_lock);
 +		wq->rescuer = NULL;
 +		spin_unlock_irq(&wq_mayday_lock);
 +
 +		/* rescuer will empty maydays list before exiting */
 +		kthread_stop(rescuer->task);
 +	}
 +
  	/* sanity checks */
  	mutex_lock(&wq->mutex);
  	for_each_pwq(pwq, wq) {
 @@ -4118,11 +4137,6 @@ void destroy_workqueue(struct workqueue_
  	list_del_rcu(&wq->list);
  	mutex_unlock(&wq_pool_mutex);

 -	workqueue_sysfs_unregister(wq);
 -
 -	if (wq->rescuer)
 -		kthread_stop(wq->rescuer->task);
 -
  	if (!(wq->flags & WQ_UNBOUND)) {
  		/*
  		 * The base ref is never dropped on per-cpu pwqs.  Directly
	From def98c84b6cdf2eeea19ec5736e90e316df5206b Mon Sep 17 00:00:00 2001
	From: Tejun Heo <tj@kernel.org>
	Date: Wed, 18 Sep 2019 18:43:40 -0700
	Subject: workqueue: Fix spurious sanity check failures in destroy_workqueue()

	From: Tejun Heo <tj@kernel.org>

	commit def98c84b6cdf2eeea19ec5736e90e316df5206b upstream.

	Before actually destrying a workqueue, destroy_workqueue() checks
	whether it's actually idle. If it isn't, it prints out a bunch of
	warning messages and leaves the workqueue dangling. It unfortunately
	has a couple issues.

	* Mayday list queueing increments pwq's refcnts which gets detected as
	busy and fails the sanity checks. However, because mayday list
	queueing is asynchronous, this condition can happen without any
	actual work items left in the workqueue.

	* Sanity check failure leaves the sysfs interface behind too which can
	lead to init failure of newer instances of the workqueue.

	This patch fixes the above two by

	* If a workqueue has a rescuer, disable and kill the rescuer before
	sanity checks. Disabling and killing is guaranteed to flush the
	existing mayday list.

	* Remove sysfs interface before sanity checks.

	Signed-off-by: Tejun Heo <tj@kernel.org>
	Reported-by: Marcin Pawlowski <mpawlowski@fb.com>
	Reported-by: "Williams, Gerald S" <gerald.s.williams@intel.com>
	Cc: stable@vger.kernel.org
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

	---
	kernel/workqueue.c \| 24 +++++++++++++++++++-----
	1 file changed, 19 insertions(+), 5 deletions(-)

	--- a/kernel/workqueue.c
	+++ b/kernel/workqueue.c
	@@ -4084,9 +4084,28 @@ void destroy_workqueue(struct workqueue_
	struct pool_workqueue *pwq;
	int node;

	+ /*
	+ * Remove it from sysfs first so that sanity check failure doesn't
	+ * lead to sysfs name conflicts.
	+ */
	+ workqueue_sysfs_unregister(wq);
	+
	/* drain it before proceeding with destruction */
	drain_workqueue(wq);

	+ /* kill rescuer, if sanity checks fail, leave it w/o rescuer */
	+ if (wq->rescuer) {
	+ struct worker *rescuer = wq->rescuer;
	+
	+ /* this prevents new queueing */
	+ spin_lock_irq(&wq_mayday_lock);
	+ wq->rescuer = NULL;
	+ spin_unlock_irq(&wq_mayday_lock);
	+
	+ /* rescuer will empty maydays list before exiting */
	+ kthread_stop(rescuer->task);
	+ }
	+
	/* sanity checks */
	mutex_lock(&wq->mutex);
	for_each_pwq(pwq, wq) {
	@@ -4118,11 +4137,6 @@ void destroy_workqueue(struct workqueue_
	list_del_rcu(&wq->list);
	mutex_unlock(&wq_pool_mutex);

	- workqueue_sysfs_unregister(wq);
	-
	- if (wq->rescuer)
	- kthread_stop(wq->rescuer->task);
	-
	if (!(wq->flags & WQ_UNBOUND)) {
	/*
	* The base ref is never dropped on per-cpu pwqs. Directly