queue/mm-memcontrol-use-special-workqueue-for-creating-per.patch - pub/scm/linux/kernel/git/paulg/longterm-queue-4.8 - Git at Google

 From 1019d14540ff92e9719de7ab56769efd3a8d6a6f Mon Sep 17 00:00:00 2001
 From: Vladimir Davydov <vdavydov.dev@gmail.com>
 Date: Mon, 12 Dec 2016 16:41:29 -0800
 Subject: [PATCH] mm: memcontrol: use special workqueue for creating per-memcg
  caches

 commit 13583c3d3224508582ec03d881d0b68dd3ee8e10 upstream.

 Creating a lot of cgroups at the same time might stall all worker
 threads with kmem cache creation works, because kmem cache creation is
 done with the slab_mutex held.  The problem was amplified by commits
 801faf0db894 ("mm/slab: lockless decision to grow cache") in case of
 SLAB and 81ae6d03952c ("mm/slub.c: replace kick_all_cpus_sync() with
 synchronize_sched() in kmem_cache_shrink()") in case of SLUB, which
 increased the maximal time the slab_mutex can be held.

 To prevent that from happening, let's use a special ordered single
 threaded workqueue for kmem cache creation.  This shouldn't introduce
 any functional changes regarding how kmem caches are created, as the
 work function holds the global slab_mutex during its whole runtime
 anyway, making it impossible to run more than one work at a time.  By
 using a single threaded workqueue, we just avoid creating a thread per
 each work.  Ordering is required to avoid a situation when a cgroup's
 work is put off indefinitely because there are other cgroups to serve,
 in other words to guarantee fairness.

 Link: https://bugzilla.kernel.org/show_bug.cgi?id=172981
 Link: http://lkml.kernel.org/r/20161004131417.GC1862@esperanza
 Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
 Reported-by: Doug Smythies <dsmythies@telus.net>
 Acked-by: Michal Hocko <mhocko@suse.com>
 Cc: Christoph Lameter <cl@linux.com>
 Cc: David Rientjes <rientjes@google.com>
 Cc: Johannes Weiner <hannes@cmpxchg.org>
 Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
 Cc: Pekka Enberg <penberg@kernel.org>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
 Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
 index face124e24e9..a6da3f1aca60 100644
 --- a/mm/memcontrol.c
 +++ b/mm/memcontrol.c
 @@ -2182,6 +2182,8 @@ struct memcg_kmem_cache_create_work {
  	struct work_struct work;
  };

 +static struct workqueue_struct *memcg_kmem_cache_create_wq;
 +
  static void memcg_kmem_cache_create_func(struct work_struct *w)
  {
  	struct memcg_kmem_cache_create_work *cw =
 @@ -2213,7 +2215,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
  	cw->cachep = cachep;
  	INIT_WORK(&cw->work, memcg_kmem_cache_create_func);

 -	schedule_work(&cw->work);
 +	queue_work(memcg_kmem_cache_create_wq, &cw->work);
  }

  static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 @@ -5806,6 +5808,17 @@ static int __init mem_cgroup_init(void)
  {
  	int cpu, node;

 +#ifndef CONFIG_SLOB
 +	/*
 +	 * Kmem cache creation is mostly done with the slab_mutex held,
 +	 * so use a special workqueue to avoid stalling all worker
 +	 * threads in case lots of cgroups are created simultaneously.
 +	 */
 +	memcg_kmem_cache_create_wq =
 +		alloc_ordered_workqueue("memcg_kmem_cache_create", 0);
 +	BUG_ON(!memcg_kmem_cache_create_wq);
 +#endif
 +
  	hotcpu_notifier(memcg_cpu_hotplug_callback, 0);

  	for_each_possible_cpu(cpu)
 --
 2.12.0
	From 1019d14540ff92e9719de7ab56769efd3a8d6a6f Mon Sep 17 00:00:00 2001
	From: Vladimir Davydov <vdavydov.dev@gmail.com>
	Date: Mon, 12 Dec 2016 16:41:29 -0800
	Subject: [PATCH] mm: memcontrol: use special workqueue for creating per-memcg
	caches

	commit 13583c3d3224508582ec03d881d0b68dd3ee8e10 upstream.

	Creating a lot of cgroups at the same time might stall all worker
	threads with kmem cache creation works, because kmem cache creation is
	done with the slab_mutex held. The problem was amplified by commits
	801faf0db894 ("mm/slab: lockless decision to grow cache") in case of
	SLAB and 81ae6d03952c ("mm/slub.c: replace kick_all_cpus_sync() with
	synchronize_sched() in kmem_cache_shrink()") in case of SLUB, which
	increased the maximal time the slab_mutex can be held.

	To prevent that from happening, let's use a special ordered single
	threaded workqueue for kmem cache creation. This shouldn't introduce
	any functional changes regarding how kmem caches are created, as the
	work function holds the global slab_mutex during its whole runtime
	anyway, making it impossible to run more than one work at a time. By
	using a single threaded workqueue, we just avoid creating a thread per
	each work. Ordering is required to avoid a situation when a cgroup's
	work is put off indefinitely because there are other cgroups to serve,
	in other words to guarantee fairness.

	Link: https://bugzilla.kernel.org/show_bug.cgi?id=172981
	Link: http://lkml.kernel.org/r/20161004131417.GC1862@esperanza
	Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
	Reported-by: Doug Smythies <dsmythies@telus.net>
	Acked-by: Michal Hocko <mhocko@suse.com>
	Cc: Christoph Lameter <cl@linux.com>
	Cc: David Rientjes <rientjes@google.com>
	Cc: Johannes Weiner <hannes@cmpxchg.org>
	Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
	Cc: Pekka Enberg <penberg@kernel.org>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
	Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

	diff --git a/mm/memcontrol.c b/mm/memcontrol.c
	index face124e24e9..a6da3f1aca60 100644
	--- a/mm/memcontrol.c
	+++ b/mm/memcontrol.c
	@@ -2182,6 +2182,8 @@ struct memcg_kmem_cache_create_work {
	struct work_struct work;
	};

	+static struct workqueue_struct *memcg_kmem_cache_create_wq;
	+
	static void memcg_kmem_cache_create_func(struct work_struct *w)
	{
	struct memcg_kmem_cache_create_work *cw =
	@@ -2213,7 +2215,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
	cw->cachep = cachep;
	INIT_WORK(&cw->work, memcg_kmem_cache_create_func);

	- schedule_work(&cw->work);
	+ queue_work(memcg_kmem_cache_create_wq, &cw->work);
	}

	static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
	@@ -5806,6 +5808,17 @@ static int __init mem_cgroup_init(void)
	{
	int cpu, node;

	+#ifndef CONFIG_SLOB
	+ /*
	+ * Kmem cache creation is mostly done with the slab_mutex held,
	+ * so use a special workqueue to avoid stalling all worker
	+ * threads in case lots of cgroups are created simultaneously.
	+ */
	+ memcg_kmem_cache_create_wq =
	+ alloc_ordered_workqueue("memcg_kmem_cache_create", 0);
	+ BUG_ON(!memcg_kmem_cache_create_wq);
	+#endif
	+
	hotcpu_notifier(memcg_cpu_hotplug_callback, 0);

	for_each_possible_cpu(cpu)
	--
	2.12.0