Merge branch 'for-4.11-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "This contains fixes for two long standing subtle bugs:

   - kthread_bind() on a new kthread binds it to specific CPUs and
     prevents userland from messing with the affinity or cgroup
     membership. Unfortunately, for cgroup membership, there's a window
     between kthread creation and kthread_bind*() invocation where the
     kthread can be moved into a non-root cgroup by userland.

     Depending on what controllers are in effect, this can assign the
     kthread unexpected attributes. For example, in the reported case,
     workqueue workers ended up in a non-root cpuset cgroups and had
     their CPU affinities overridden. This broke workqueue invariants
     and led to workqueue stalls.

     Fixed by closing the window between kthread creation and
     kthread_bind() as suggested by Oleg.

   - There was a bug in cgroup mount path which could allow two
     competing mount attempts to attach the same cgroup_root to two
     different superblocks.

     This was caused by mishandling return value from kernfs_pin_sb().

     Fixed"

* 'for-4.11-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: avoid attaching a cgroup root to two different superblocks
  cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f6b43fb..af9c86e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -570,6 +570,25 @@
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
+static inline void cgroup_init_kthreadd(void)
+{
+	/*
+	 * kthreadd is inherited by all kthreads, keep it in the root so
+	 * that the new kthreads are guaranteed to stay in the root until
+	 * initialization is finished.
+	 */
+	current->no_cgroup_migration = 1;
+}
+
+static inline void cgroup_kthread_ready(void)
+{
+	/*
+	 * This kthread finished initialization.  The creator should have
+	 * set PF_NO_SETAFFINITY if this kthread should stay in the root.
+	 */
+	current->no_cgroup_migration = 0;
+}
+
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
@@ -590,6 +609,8 @@
 
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
+static inline void cgroup_init_kthreadd(void) {}
+static inline void cgroup_kthread_ready(void) {}
 
 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 					       struct cgroup *ancestor)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d67eee8..4cf9a59 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,10 @@
 #ifdef CONFIG_COMPAT_BRK
 	unsigned			brk_randomized:1;
 #endif
+#ifdef CONFIG_CGROUPS
+	/* disallow userland-initiated cgroup migration */
+	unsigned			no_cgroup_migration:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 1dc22f6..12e19f0 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1146,7 +1146,7 @@
 		 * path is super cold.  Let's just sleep a bit and retry.
 		 */
 		pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
-		if (IS_ERR(pinned_sb) ||
+		if (IS_ERR_OR_NULL(pinned_sb) ||
 		    !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
 			mutex_unlock(&cgroup_mutex);
 			if (!IS_ERR_OR_NULL(pinned_sb))
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4885132..687f5e0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2425,11 +2425,12 @@
 		tsk = tsk->group_leader;
 
 	/*
-	 * Workqueue threads may acquire PF_NO_SETAFFINITY and become
-	 * trapped in a cpuset, or RT worker may be born in a cgroup
-	 * with no rt_runtime allocated.  Just say no.
+	 * kthreads may acquire PF_NO_SETAFFINITY during initialization.
+	 * If userland migrates such a kthread to a non-root cgroup, it can
+	 * become trapped in a cpuset, or RT kthread may be born in a
+	 * cgroup with no rt_runtime allocated.  Just say no.
 	 */
-	if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
+	if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
 		ret = -EINVAL;
 		goto out_unlock_rcu;
 	}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2f26ade..26db528 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
 #include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/cgroup.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
@@ -225,6 +226,7 @@
 
 	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		cgroup_kthread_ready();
 		__kthread_parkme(self);
 		ret = threadfn(data);
 	}
@@ -538,6 +540,7 @@
 	set_mems_allowed(node_states[N_MEMORY]);
 
 	current->flags |= PF_NOFREEZE;
+	cgroup_init_kthreadd();
 
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);