| From 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8 Mon Sep 17 00:00:00 2001 |
| From: "Eric W. Biederman" <ebiederm@xmission.com> |
| Date: Tue, 2 Dec 2014 12:27:26 -0600 |
| Subject: userns: Add a knob to disable setgroups on a per user namespace basis |
| |
| From: "Eric W. Biederman" <ebiederm@xmission.com> |
| |
| commit 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8 upstream. |
| |
| - Expose the knob to user space through a proc file /proc/<pid>/setgroups |
| |
| A value of "deny" means the setgroups system call is disabled in the |
| current processes user namespace and can not be enabled in the |
| future in this user namespace. |
| |
| A value of "allow" means the segtoups system call is enabled. |
| |
| - Descendant user namespaces inherit the value of setgroups from |
| their parents. |
| |
| - A proc file is used (instead of a sysctl) as sysctls currently do |
| not allow checking the permissions at open time. |
| |
| - Writing to the proc file is restricted to before the gid_map |
| for the user namespace is set. |
| |
| This ensures that disabling setgroups at a user namespace |
| level will never remove the ability to call setgroups |
| from a process that already has that ability. |
| |
| A process may opt in to the setgroups disable for itself by |
| creating, entering and configuring a user namespace or by calling |
| setns on an existing user namespace with setgroups disabled. |
| Processes without privileges already can not call setgroups so this |
| is a noop. Prodcess with privilege become processes without |
| privilege when entering a user namespace and as with any other path |
| to dropping privilege they would not have the ability to call |
| setgroups. So this remains within the bounds of what is possible |
| without a knob to disable setgroups permanently in a user namespace. |
| |
| Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| fs/proc/base.c | 53 +++++++++++++++++++++++++ |
| include/linux/user_namespace.h | 7 +++ |
| kernel/user.c | 1 |
| kernel/user_namespace.c | 85 +++++++++++++++++++++++++++++++++++++++++ |
| 4 files changed, 146 insertions(+) |
| |
| --- a/fs/proc/base.c |
| +++ b/fs/proc/base.c |
| @@ -2612,6 +2612,57 @@ static const struct file_operations proc |
| .llseek = seq_lseek, |
| .release = proc_id_map_release, |
| }; |
| + |
| +static int proc_setgroups_open(struct inode *inode, struct file *file) |
| +{ |
| + struct user_namespace *ns = NULL; |
| + struct task_struct *task; |
| + int ret; |
| + |
| + ret = -ESRCH; |
| + task = get_proc_task(inode); |
| + if (task) { |
| + rcu_read_lock(); |
| + ns = get_user_ns(task_cred_xxx(task, user_ns)); |
| + rcu_read_unlock(); |
| + put_task_struct(task); |
| + } |
| + if (!ns) |
| + goto err; |
| + |
| + if (file->f_mode & FMODE_WRITE) { |
| + ret = -EACCES; |
| + if (!ns_capable(ns, CAP_SYS_ADMIN)) |
| + goto err_put_ns; |
| + } |
| + |
| + ret = single_open(file, &proc_setgroups_show, ns); |
| + if (ret) |
| + goto err_put_ns; |
| + |
| + return 0; |
| +err_put_ns: |
| + put_user_ns(ns); |
| +err: |
| + return ret; |
| +} |
| + |
| +static int proc_setgroups_release(struct inode *inode, struct file *file) |
| +{ |
| + struct seq_file *seq = file->private_data; |
| + struct user_namespace *ns = seq->private; |
| + int ret = single_release(inode, file); |
| + put_user_ns(ns); |
| + return ret; |
| +} |
| + |
| +static const struct file_operations proc_setgroups_operations = { |
| + .open = proc_setgroups_open, |
| + .write = proc_setgroups_write, |
| + .read = seq_read, |
| + .llseek = seq_lseek, |
| + .release = proc_setgroups_release, |
| +}; |
| #endif /* CONFIG_USER_NS */ |
| |
| static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, |
| @@ -2720,6 +2771,7 @@ static const struct pid_entry tgid_base_ |
| REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), |
| REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), |
| REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), |
| + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), |
| #endif |
| #ifdef CONFIG_CHECKPOINT_RESTORE |
| REG("timers", S_IRUGO, proc_timers_operations), |
| @@ -3073,6 +3125,7 @@ static const struct pid_entry tid_base_s |
| REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), |
| REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), |
| REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), |
| + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), |
| #endif |
| }; |
| |
| --- a/include/linux/user_namespace.h |
| +++ b/include/linux/user_namespace.h |
| @@ -17,6 +17,10 @@ struct uid_gid_map { /* 64 bytes -- 1 ca |
| } extent[UID_GID_MAP_MAX_EXTENTS]; |
| }; |
| |
| +#define USERNS_SETGROUPS_ALLOWED 1UL |
| + |
| +#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED |
| + |
| struct user_namespace { |
| struct uid_gid_map uid_map; |
| struct uid_gid_map gid_map; |
| @@ -27,6 +31,7 @@ struct user_namespace { |
| kuid_t owner; |
| kgid_t group; |
| unsigned int proc_inum; |
| + unsigned long flags; |
| bool may_mount_sysfs; |
| bool may_mount_proc; |
| }; |
| @@ -59,6 +64,8 @@ extern struct seq_operations proc_projid |
| extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); |
| extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); |
| extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); |
| +extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); |
| +extern int proc_setgroups_show(struct seq_file *m, void *v); |
| extern bool userns_may_setgroups(const struct user_namespace *ns); |
| #else |
| |
| --- a/kernel/user.c |
| +++ b/kernel/user.c |
| @@ -51,6 +51,7 @@ struct user_namespace init_user_ns = { |
| .owner = GLOBAL_ROOT_UID, |
| .group = GLOBAL_ROOT_GID, |
| .proc_inum = PROC_USER_INIT_INO, |
| + .flags = USERNS_INIT_FLAGS, |
| .may_mount_sysfs = true, |
| .may_mount_proc = true, |
| }; |
| --- a/kernel/user_namespace.c |
| +++ b/kernel/user_namespace.c |
| @@ -100,6 +100,11 @@ int create_user_ns(struct cred *new) |
| ns->owner = owner; |
| ns->group = group; |
| |
| + /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ |
| + mutex_lock(&userns_state_mutex); |
| + ns->flags = parent_ns->flags; |
| + mutex_unlock(&userns_state_mutex); |
| + |
| set_cred_user_ns(new, ns); |
| |
| update_mnt_policy(ns); |
| @@ -827,6 +832,84 @@ static bool new_idmap_permitted(const st |
| return false; |
| } |
| |
| +int proc_setgroups_show(struct seq_file *seq, void *v) |
| +{ |
| + struct user_namespace *ns = seq->private; |
| + unsigned long userns_flags = ACCESS_ONCE(ns->flags); |
| + |
| + seq_printf(seq, "%s\n", |
| + (userns_flags & USERNS_SETGROUPS_ALLOWED) ? |
| + "allow" : "deny"); |
| + return 0; |
| +} |
| + |
| +ssize_t proc_setgroups_write(struct file *file, const char __user *buf, |
| + size_t count, loff_t *ppos) |
| +{ |
| + struct seq_file *seq = file->private_data; |
| + struct user_namespace *ns = seq->private; |
| + char kbuf[8], *pos; |
| + bool setgroups_allowed; |
| + ssize_t ret; |
| + |
| + /* Only allow a very narrow range of strings to be written */ |
| + ret = -EINVAL; |
| + if ((*ppos != 0) || (count >= sizeof(kbuf))) |
| + goto out; |
| + |
| + /* What was written? */ |
| + ret = -EFAULT; |
| + if (copy_from_user(kbuf, buf, count)) |
| + goto out; |
| + kbuf[count] = '\0'; |
| + pos = kbuf; |
| + |
| + /* What is being requested? */ |
| + ret = -EINVAL; |
| + if (strncmp(pos, "allow", 5) == 0) { |
| + pos += 5; |
| + setgroups_allowed = true; |
| + } |
| + else if (strncmp(pos, "deny", 4) == 0) { |
| + pos += 4; |
| + setgroups_allowed = false; |
| + } |
| + else |
| + goto out; |
| + |
| + /* Verify there is not trailing junk on the line */ |
| + pos = skip_spaces(pos); |
| + if (*pos != '\0') |
| + goto out; |
| + |
| + ret = -EPERM; |
| + mutex_lock(&userns_state_mutex); |
| + if (setgroups_allowed) { |
| + /* Enabling setgroups after setgroups has been disabled |
| + * is not allowed. |
| + */ |
| + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) |
| + goto out_unlock; |
| + } else { |
| + /* Permanently disabling setgroups after setgroups has |
| + * been enabled by writing the gid_map is not allowed. |
| + */ |
| + if (ns->gid_map.nr_extents != 0) |
| + goto out_unlock; |
| + ns->flags &= ~USERNS_SETGROUPS_ALLOWED; |
| + } |
| + mutex_unlock(&userns_state_mutex); |
| + |
| + /* Report a successful write */ |
| + *ppos = count; |
| + ret = count; |
| +out: |
| + return ret; |
| +out_unlock: |
| + mutex_unlock(&userns_state_mutex); |
| + goto out; |
| +} |
| + |
| bool userns_may_setgroups(const struct user_namespace *ns) |
| { |
| bool allowed; |
| @@ -836,6 +919,8 @@ bool userns_may_setgroups(const struct u |
| * the user namespace has been established. |
| */ |
| allowed = ns->gid_map.nr_extents != 0; |
| + /* Is setgroups allowed? */ |
| + allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); |
| mutex_unlock(&userns_state_mutex); |
| |
| return allowed; |