proc: Prototype for converting proc to work with multipe user namespaces.

- Proc generic still needs work to handle /proc/net
- Proc sysctl still needs work.
- The /proc/<pid> parts are mostly there but they are still of
  prototype quality.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5eb0206..e77d972 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,6 +83,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
@@ -628,6 +629,7 @@
 }
 
 static const struct inode_operations proc_def_inode_operations = {
+	.permission	= proc_base_permission,
 	.setattr	= proc_setattr,
 };
 
@@ -1666,6 +1668,7 @@
 }
 
 static const struct inode_operations proc_pid_link_inode_operations = {
+	.permission	= proc_base_permission,
 	.readlink	= proc_pid_readlink,
 	.follow_link	= proc_pid_follow_link,
 	.setattr	= proc_setattr,
@@ -2188,6 +2191,7 @@
  * proc directories can do almost nothing..
  */
 static const struct inode_operations proc_fd_inode_operations = {
+	.permission	= proc_base_permission,
 	.lookup		= proc_lookupfd,
 	.permission	= proc_fd_permission,
 	.setattr	= proc_setattr,
@@ -2241,6 +2245,7 @@
  * proc directories can do almost nothing..
  */
 static const struct inode_operations proc_fdinfo_inode_operations = {
+	.permission	= proc_base_permission,
 	.lookup		= proc_lookupfdinfo,
 	.setattr	= proc_setattr,
 };
@@ -2477,6 +2482,7 @@
 }
 
 static const struct inode_operations proc_attr_dir_inode_operations = {
+	.permission	= proc_base_permission,
 	.lookup		= proc_attr_dir_lookup,
 	.getattr	= pid_getattr,
 	.setattr	= proc_setattr,
@@ -2885,6 +2891,7 @@
 }
 
 static const struct inode_operations proc_tgid_base_inode_operations = {
+	.permission	= proc_base_permission,
 	.lookup		= proc_tgid_base_lookup,
 	.getattr	= pid_getattr,
 	.setattr	= proc_setattr,
@@ -3227,6 +3234,7 @@
 };
 
 static const struct inode_operations proc_tid_base_inode_operations = {
+	.permission	= proc_base_permission,
 	.lookup		= proc_tid_base_lookup,
 	.getattr	= pid_getattr,
 	.setattr	= proc_setattr,
@@ -3452,6 +3460,7 @@
 }
 
 static const struct inode_operations proc_task_inode_operations = {
+	.permission	= proc_base_permission,
 	.lookup		= proc_task_lookup,
 	.getattr	= proc_task_getattr,
 	.setattr	= proc_setattr,
@@ -3462,3 +3471,91 @@
 	.readdir	= proc_task_readdir,
 	.llseek		= default_llseek,
 };
+
+static int proc_dac_permission_check(struct inode *inode, int mask,
+	struct user_namespace *inode_user_ns)
+{
+	unsigned int mode = inode->i_mode;
+
+	mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
+
+	if (current_user_ns() != inode_user_ns)
+		goto other_perms;
+
+	if (likely(current_fsuid() == inode->i_uid))
+		mode >>= 6;
+	else {
+		if (in_group_p(inode->i_gid))
+			mode >>= 3;
+	}
+
+other_perms:
+	/*
+	 * If the DACs are ok we don't need any capability check.
+	 */
+	if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
+		return 0;
+	return -EACCES;
+}
+
+
+static int proc_userns_permission(struct inode *inode, int mask,
+	struct user_namespace *inode_user_ns)
+{
+	int ret;
+	/*
+	 * Do the basic DAC permission checks.
+	 */
+	ret = proc_dac_permission_check(inode, mask, inode_user_ns);
+	if (ret != -EACCES)
+		return ret;
+
+	ret = 0;
+	if (S_ISDIR(inode->i_mode)) {
+		/* DACs are overridable for directories */
+		if (ns_capable(inode_user_ns, CAP_DAC_OVERRIDE))
+			return 0;
+		if (!(mask & MAY_WRITE))
+			if (ns_capable(inode_user_ns, CAP_DAC_READ_SEARCH))
+				return 0;
+		return -EACCES;
+	}
+	/*
+	 * Read/write DACs are always overridable.
+	 * Executable DACs are overridable when there is
+	 * at least one exec bit set.
+	 */
+	if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
+		if (ns_capable(inode_user_ns, CAP_DAC_OVERRIDE))
+			return 0;
+
+	/*
+	 * Searching includes executable on directories, else just read.
+	 */
+	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+	if (mask == MAY_READ)
+		if (ns_capable(inode_user_ns, CAP_DAC_READ_SEARCH))
+			return 0;
+
+	return -EACCES;
+}
+
+int proc_base_permission(struct inode *inode, int mask)
+{
+	struct user_namespace *inode_user_ns;
+	struct task_struct *task;
+	int ret;
+
+	ret = -EACCES;
+	task = get_proc_task(inode);
+	if (!task)
+		return -ESRCH;
+
+	inode_user_ns = get_user_ns(task_cred_xxx(task, user_ns));
+	
+	ret = proc_userns_permission(inode, mask, inode_user_ns);
+
+	put_user_ns(inode_user_ns);
+	put_task_struct(task);
+	return ret;
+}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 7838e5c..d6930a7 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -145,3 +145,5 @@
 extern const struct inode_operations proc_ns_dir_inode_operations;
 extern const struct file_operations proc_ns_dir_operations;
 
+extern int proc_base_permission(struct inode *inode, int mask);
+
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index acc6eaf..10a88a5 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -179,6 +179,7 @@
 }
 
 const struct inode_operations proc_ns_dir_inode_operations = {
+	.permission	= proc_base_permission,
 	.lookup		= proc_ns_dir_lookup,
 	.getattr	= pid_getattr,
 	.setattr	= proc_setattr,
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b7..19128e0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
 #include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 
 #include "internal.h"
 
@@ -62,6 +63,11 @@
 		}
 
 		sb->s_flags |= MS_ACTIVE;
+		/* Guarantee that we are always in the init_user_ns
+		 * Individually files are treated as exceptions.
+		 */
+		put_user_ns(sb->s_user_ns);
+		sb->s_user_ns = get_user_ns(&init_user_ns);
 	}
 
 	ei = PROC_I(sb->s_root->d_inode);
@@ -84,6 +90,7 @@
 }
 
 static struct file_system_type proc_fs_type = {
+	.fs_flags	= FS_SAFE,
 	.name		= "proc",
 	.mount		= proc_mount,
 	.kill_sb	= proc_kill_sb,
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ca84212..077d68c 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -45,8 +45,9 @@
 			       struct nsproxy *nsproxy,
 			       struct ctl_table *table)
 {
+	/* FIXME make this code fully userns aware */
 	/* Allow network administrator to have same access as root. */
-	if (capable(CAP_NET_ADMIN)) {
+	if (ns_capable(nsproxy->net_ns->user_ns, CAP_NET_ADMIN)) {
 		int mode = (table->mode >> 6) & 7;
 		return (mode << 6) | (mode << 3) | mode;
 	}