pidns: Don't allow new processes in a dead pid namespace.
By adding a flag to track when a pid namespace is dead, and
by testing that flag just before a process attaches to the
pid namespace, it is possible to guarantee that processes
never enter a dead pid namespace. Currently sending SIGKILL
to all of the process in a dead pid namespace gives us this
guarantee but we need something a little strong to support
unsharing and joining a pid namespace.
To ensure that this does not slow down the common case I
tested this code with lat_proc from lm_bench and I did
not see any increase in the fork overhead.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 8e0bee8..e9a836d 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -20,6 +20,7 @@
struct kref kref;
struct pidmap pidmap[PIDMAP_ENTRIES];
int last_pid;
+ int dead;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
@@ -53,6 +54,17 @@
kref_put(&ns->kref, free_pid_ns);
}
+static inline bool pid_ns_dead(struct pid *pid)
+{
+ int i;
+
+ for (i = 0; i <= pid->level; i++) {
+ if (pid->numbers[i].ns->dead)
+ return true;
+ }
+ return false;
+}
+
#else /* !CONFIG_PID_NS */
#include <linux/err.h>
@@ -78,6 +90,12 @@
{
BUG();
}
+
+static inline bool pid_ns_dead(struct pid *pid)
+{
+ return false;
+}
+
#endif /* CONFIG_PID_NS */
extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 59e3756..9861982 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1329,6 +1329,13 @@
goto bad_fork_free_pid;
}
+ if (pid_ns_dead(pid)) {
+ spin_unlock(¤t->sighand->siglock);
+ write_unlock_irq(&tasklist_lock);
+ retval = -EPERM;
+ goto bad_fork_free_pid;
+ }
+
if (clone_flags & CLONE_THREAD) {
current->signal->nr_threads++;
atomic_inc(¤t->signal->live);
diff --git a/kernel/pid.c b/kernel/pid.c
index 5641aca..8c51c26 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -76,6 +76,7 @@
[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
},
.last_pid = 0,
+ .dead = 0,
.level = 0,
.child_reaper = &init_task,
.proc_inum = PROC_PID_INIT_INO,
@@ -292,6 +293,8 @@
tmp = ns;
for (i = ns->level; i >= 0; i--) {
+ if (ns->dead)
+ goto out_free;
nr = alloc_pidmap(tmp);
if (nr < 0)
goto out_free;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 0190b26..c5da309 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -175,6 +175,7 @@
*
*/
read_lock(&tasklist_lock);
+ pid_ns->dead = 1;
for (nr = next_pidmap(pid_ns, 0); nr > 0; nr = next_pidmap(pid_ns, nr)) {
/*