releases/5.0.20/cgroup-protect-cgroup-nr_-dying_-descendants-by-css_.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From 60473ee0f4d7e850335b0e21fc1c41eddfebacd3 Mon Sep 17 00:00:00 2001
 From: Roman Gushchin <guro@fb.com>
 Date: Fri, 19 Apr 2019 10:03:03 -0700
 Subject: cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock

 [ Upstream commit 4dcabece4c3a9f9522127be12cc12cc120399b2f ]

 The number of descendant cgroups and the number of dying
 descendant cgroups are currently synchronized using the cgroup_mutex.

 The number of descendant cgroups will be required by the cgroup v2
 freezer, which will use it to determine if a cgroup is frozen
 (depending on total number of descendants and number of frozen
 descendants). It's not always acceptable to grab the cgroup_mutex,
 especially from quite hot paths (e.g. exit()).

 To avoid this, let's additionally synchronize these counters using
 the css_set_lock.

 So, it's safe to read these counters with either cgroup_mutex or
 css_set_lock locked, and for changing both locks should be acquired.

 Signed-off-by: Roman Gushchin <guro@fb.com>
 Signed-off-by: Tejun Heo <tj@kernel.org>
 Cc: kernel-team@fb.com
 Signed-off-by: Sasha Levin <sashal@kernel.org>
 ---
  include/linux/cgroup-defs.h | 5 +++++
  kernel/cgroup/cgroup.c      | 6 ++++++
  2 files changed, 11 insertions(+)

 diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
 index 120d1d40704bc..319c07305500f 100644
 --- a/include/linux/cgroup-defs.h
 +++ b/include/linux/cgroup-defs.h
 @@ -348,6 +348,11 @@ struct cgroup {
  	 * Dying cgroups are cgroups which were deleted by a user,
  	 * but are still existing because someone else is holding a reference.
  	 * max_descendants is a maximum allowed number of descent cgroups.
 +	 *
 +	 * nr_descendants and nr_dying_descendants are protected
 +	 * by cgroup_mutex and css_set_lock. It's fine to read them holding
 +	 * any of cgroup_mutex and css_set_lock; for writing both locks
 +	 * should be held.
  	 */
  	int nr_descendants;
  	int nr_dying_descendants;
 diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
 index f84bf28f36ba8..ee77b0f00eddd 100644
 --- a/kernel/cgroup/cgroup.c
 +++ b/kernel/cgroup/cgroup.c
 @@ -4728,9 +4728,11 @@ static void css_release_work_fn(struct work_struct *work)
  		if (cgroup_on_dfl(cgrp))
  			cgroup_rstat_flush(cgrp);

 +		spin_lock_irq(&css_set_lock);
  		for (tcgrp = cgroup_parent(cgrp); tcgrp;
  		     tcgrp = cgroup_parent(tcgrp))
  			tcgrp->nr_dying_descendants--;
 +		spin_unlock_irq(&css_set_lock);

  		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
  		cgrp->id = -1;
 @@ -4948,12 +4950,14 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
  	if (ret)
  		goto out_psi_free;

 +	spin_lock_irq(&css_set_lock);
  	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
  		cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;

  		if (tcgrp != cgrp)
  			tcgrp->nr_descendants++;
  	}
 +	spin_unlock_irq(&css_set_lock);

  	if (notify_on_release(parent))
  		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 @@ -5238,10 +5242,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
  	if (parent && cgroup_is_threaded(cgrp))
  		parent->nr_threaded_children--;

 +	spin_lock_irq(&css_set_lock);
  	for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
  		tcgrp->nr_descendants--;
  		tcgrp->nr_dying_descendants++;
  	}
 +	spin_unlock_irq(&css_set_lock);

  	cgroup1_check_for_release(parent);

 --
 2.20.1
	From 60473ee0f4d7e850335b0e21fc1c41eddfebacd3 Mon Sep 17 00:00:00 2001
	From: Roman Gushchin <guro@fb.com>
	Date: Fri, 19 Apr 2019 10:03:03 -0700
	Subject: cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock

	[ Upstream commit 4dcabece4c3a9f9522127be12cc12cc120399b2f ]

	The number of descendant cgroups and the number of dying
	descendant cgroups are currently synchronized using the cgroup_mutex.

	The number of descendant cgroups will be required by the cgroup v2
	freezer, which will use it to determine if a cgroup is frozen
	(depending on total number of descendants and number of frozen
	descendants). It's not always acceptable to grab the cgroup_mutex,
	especially from quite hot paths (e.g. exit()).

	To avoid this, let's additionally synchronize these counters using
	the css_set_lock.

	So, it's safe to read these counters with either cgroup_mutex or
	css_set_lock locked, and for changing both locks should be acquired.

	Signed-off-by: Roman Gushchin <guro@fb.com>
	Signed-off-by: Tejun Heo <tj@kernel.org>
	Cc: kernel-team@fb.com
	Signed-off-by: Sasha Levin <sashal@kernel.org>
	---
	include/linux/cgroup-defs.h \| 5 +++++
	kernel/cgroup/cgroup.c \| 6 ++++++
	2 files changed, 11 insertions(+)

	diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
	index 120d1d40704bc..319c07305500f 100644
	--- a/include/linux/cgroup-defs.h
	+++ b/include/linux/cgroup-defs.h
	@@ -348,6 +348,11 @@ struct cgroup {
	* Dying cgroups are cgroups which were deleted by a user,
	* but are still existing because someone else is holding a reference.
	* max_descendants is a maximum allowed number of descent cgroups.
	+ *
	+ * nr_descendants and nr_dying_descendants are protected
	+ * by cgroup_mutex and css_set_lock. It's fine to read them holding
	+ * any of cgroup_mutex and css_set_lock; for writing both locks
	+ * should be held.
	*/
	int nr_descendants;
	int nr_dying_descendants;
	diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
	index f84bf28f36ba8..ee77b0f00eddd 100644
	--- a/kernel/cgroup/cgroup.c
	+++ b/kernel/cgroup/cgroup.c
	@@ -4728,9 +4728,11 @@ static void css_release_work_fn(struct work_struct *work)
	if (cgroup_on_dfl(cgrp))
	cgroup_rstat_flush(cgrp);

	+ spin_lock_irq(&css_set_lock);
	for (tcgrp = cgroup_parent(cgrp); tcgrp;
	tcgrp = cgroup_parent(tcgrp))
	tcgrp->nr_dying_descendants--;
	+ spin_unlock_irq(&css_set_lock);

	cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
	cgrp->id = -1;
	@@ -4948,12 +4950,14 @@ static struct cgroup cgroup_create(struct cgroup parent)
	if (ret)
	goto out_psi_free;

	+ spin_lock_irq(&css_set_lock);
	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
	cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;

	if (tcgrp != cgrp)
	tcgrp->nr_descendants++;
	}
	+ spin_unlock_irq(&css_set_lock);

	if (notify_on_release(parent))
	set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
	@@ -5238,10 +5242,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
	if (parent && cgroup_is_threaded(cgrp))
	parent->nr_threaded_children--;

	+ spin_lock_irq(&css_set_lock);
	for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
	tcgrp->nr_descendants--;
	tcgrp->nr_dying_descendants++;
	}
	+ spin_unlock_irq(&css_set_lock);

	cgroup1_check_for_release(parent);

	--
	2.20.1