Change the behavior
Make security.capability implicitly fall back to
security.nscapability for callers not in init_user_ns.
respect security.capability first (if security.nscapability
is also set)
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
diff --git a/fs/xattr.c b/fs/xattr.c
index 4861322..dec0bc0 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -73,6 +73,9 @@
return inode_permission(inode, mask);
}
+extern void cap_setxattr_make_nscap(struct dentry *dentry, const void *value,
+ size_t size, void **wvalue, size_t *wsize);
+
/**
* __vfs_setxattr_noperm - perform setxattr operation without performing
* permission checks.
@@ -94,13 +97,28 @@
{
struct inode *inode = dentry->d_inode;
int error = -EOPNOTSUPP;
+ void *wvalue = NULL;
+ size_t wsize = 0;
int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN);
- if (issec)
+ if (issec) {
inode->i_flags &= ~S_NOSEC;
+ /* if root in a non-init user_ns tries to set
+ * security.capability, write a security.nscapability
+ * in its place */
+ if (!strcmp(name, "security.capability") &&
+ current_user_ns() != &init_user_ns) {
+ cap_setxattr_make_nscap(dentry, value, size, &wvalue, &wsize);
+ if (!wvalue)
+ return -EPERM;
+ value = wvalue;
+ size = wsize;
+ name = "security.nscapability";
+ }
+ }
if (inode->i_op->setxattr) {
- error = inode->i_op->setxattr(dentry, name, value, size, flags);
+ error = inode->i_op->setxattr(dentry, name, wvalue ? wvalue : value, size, flags);
if (!error) {
fsnotify_xattr(dentry);
security_inode_post_setxattr(dentry, name, value,
@@ -114,6 +132,8 @@
fsnotify_xattr(dentry);
}
+ if (wvalue)
+ kfree(wvalue);
return error;
}
diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index f0b4a66..1f7e4c6 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -86,6 +86,7 @@
*/
struct vfs_ns_cap_data {
__le32 magic_etc;
+ __le32 rootid;
struct {
__le32 permitted; /* Little endian */
__le32 inheritable; /* Little endian */
diff --git a/security/commoncap.c b/security/commoncap.c
index 8f3f34a..723f4b2 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -348,6 +348,128 @@
}
/*
+ * getsecurity: We are called if reading of security.capable
+ * failed. Since that does not exist, check whether the
+ * security.nscapability exists. If it does, convert the kuid
+ * into the caller's context and return it
+ */
+static int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc)
+{
+ int error, ret;
+ struct user_namespace *ns;
+ kuid_t kroot;
+ uid_t root;
+ char *tmpbuf = NULL;
+ bool foundroot = false;
+ struct vfs_ns_cap_data *nscap;
+ struct dentry *dentry;
+
+ if (!inode->i_op->getxattr)
+ return -EOPNOTSUPP;
+
+ /* TODO - do we want to return the capability with the rootid converted
+ * if this is security.nscapability? It's not critical, so just say
+ * no for now. */
+ if (strcmp(name, "nscapability") == 0 && current_user_ns() != &init_user_ns)
+ return -EPERM;
+
+ if (strcmp(name, "capability") != 0)
+ return -EOPNOTSUPP;
+
+ dentry = d_find_alias(inode);
+ if (!dentry)
+ return -EINVAL;
+
+ ret = vfs_getxattr_alloc(dentry, "security.nscapability",
+ &tmpbuf, 0, GFP_NOFS);
+
+ if (ret != sizeof(struct vfs_ns_cap_data)) {
+ kfree(tmpbuf);
+ return -EOPNOTSUPP;
+ }
+
+ /* verify the uid maps to a ancestor root uid, if so convert
+ this to a valid security.capability */
+ nscap = (struct vfs_ns_cap_data *) tmpbuf;
+ root = le32_to_cpu(nscap->rootid);
+ kroot = make_kuid(&init_user_ns, root);
+ for (ns = current_user_ns(); ; ns = ns->parent) {
+ if (from_kuid(ns, kroot) == 0) {
+ foundroot = true;
+ break;
+ }
+ if (ns == &init_user_ns)
+ break;
+ }
+ if (!foundroot) {
+ kfree(tmpbuf);
+ return -EOPNOTSUPP;
+ }
+
+ error = sizeof(struct vfs_cap_data);
+ if (alloc) {
+ *buffer = kmalloc(sizeof(struct vfs_cap_data), GFP_ATOMIC);
+ if (*buffer) {
+ struct vfs_ns_cap_data *cap = *buffer;
+ __le32 nsmagic, magic;
+ memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
+ nsmagic = le32_to_cpu(nscap->magic_etc);
+ magic = VFS_CAP_REVISION;
+ if (NS_CAPS_FLAGS(nsmagic) & VFS_NS_CAP_EFFECTIVE)
+ magic |= VFS_CAP_FLAGS_EFFECTIVE;
+ cap->magic_etc = cpu_to_le32(magic);
+ }
+ }
+ kfree(tmpbuf);
+ return error;
+}
+
+/*
+ * Use requested a write of security.capability but is in a non-init
+ * userns. So we construct and write a security.nscapability.
+ *
+ * If all is ok, wvalue has an allocated new value. Otherwise, wvalue
+ * is NULL.
+ */
+void cap_setxattr_make_nscap(struct dentry *dentry, const void *value, size_t size,
+ void **wvalue, size_t *wsize)
+{
+ struct vfs_ns_cap_data nscap;
+ const struct vfs_cap_data *cap = value;
+ __u32 magic, nsmagic;
+ struct user_namespace *ns = current_user_ns();
+ struct inode *inode;
+ kuid_t rootid;
+
+ if (!value || size != sizeof(struct vfs_cap_data))
+ return;
+ inode = dentry->d_inode;
+ if (!inode || !capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
+ return;
+
+ /* TODO - refuse if security.capability exists */
+
+ rootid = make_kuid(ns, 0);
+ if (!uid_valid(rootid))
+ return;
+
+ nscap.rootid = cpu_to_le32(from_kuid(&init_user_ns, rootid));
+ nsmagic = VFS_NS_CAP_REVISION;
+ magic = le32_to_cpu(cap->magic_etc);
+ if (magic & VFS_CAP_FLAGS_EFFECTIVE)
+ nsmagic |= VFS_NS_CAP_REVISION << 8;
+ nscap.magic_etc = cpu_to_le32(nsmagic);
+ memcpy(&nscap.data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
+
+ *wsize = sizeof(struct vfs_ns_cap_data);
+ *wvalue = kmalloc(*wsize, GFP_ATOMIC);
+ if (!*wvalue)
+ return;
+ memcpy(*wvalue, &nscap, *wsize);
+ return;
+}
+
+/*
* Calculate the new process capability sets from the capability sets attached
* to a file.
*/
@@ -456,6 +578,8 @@
ssize_t size;
struct vfs_ns_cap_data nscap;
bool foundroot = false;
+ kuid_t kroot;
+ uid_t root;
struct user_namespace *ns;
memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
@@ -463,18 +587,6 @@
if (!inode || !inode->i_op->getxattr)
return -ENODATA;
- /* verify that current or ancestor userns root owns this file */
- for (ns = current_user_ns(); ; ns = ns->parent) {
- if (from_kuid(ns, dentry->d_inode->i_uid) == 0) {
- foundroot = true;
- break;
- }
- if (ns == &init_user_ns)
- break;
- }
- if (!foundroot)
- return -ENODATA;
-
size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_NS_CAPS,
&nscap, sizeof(nscap));
if (size == -ENODATA || size == -EOPNOTSUPP)
@@ -485,6 +597,19 @@
if (size != sizeof(nscap))
return -EINVAL;
+ root = le32_to_cpu(nscap.rootid);
+ kroot = make_kuid(&init_user_ns, root);
+ for (ns = current_user_ns(); ; ns = ns->parent) {
+ if (from_kuid(ns, kroot) == 0) {
+ foundroot = true;
+ break;
+ }
+ if (ns == &init_user_ns)
+ break;
+ }
+ if (!foundroot)
+ return -ENODATA;
+
magic_etc = le32_to_cpu(nscap.magic_etc);
if (NS_CAPS_VERSION(magic_etc) != VFS_NS_CAP_REVISION)
@@ -525,9 +650,9 @@
if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
return 0;
- rc = get_vfs_ns_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
+ rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
if (rc == -ENODATA)
- rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
+ rc = get_vfs_ns_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
if (rc < 0) {
if (rc == -EINVAL)
printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
@@ -728,13 +853,19 @@
const void *value, size_t size, int flags)
{
if (!strcmp(name, XATTR_NAME_CAPS)) {
- if (!capable(CAP_SETFCAP))
+ if (current_user_ns() == &init_user_ns && !capable(CAP_SETFCAP))
return -EPERM;
+ /* for non-init userns we'll check permission later in
+ * cap_setxattr_make_nscap() */
return 0;
}
if (!strcmp(name, XATTR_NAME_NS_CAPS)) {
- if (!capable_wrt_inode_uidgid(dentry->d_inode, CAP_SETFCAP))
+ /* only initial userns is allowed to set security.nscapability
+ * directly. We could be more flexible, but would need to
+ * convert the rootid to target ns. Defer.
+ */
+ if (!capable(CAP_SETFCAP))
return -EPERM;
return 0;
}
@@ -766,6 +897,8 @@
}
if (!strcmp(name, XATTR_NAME_NS_CAPS)) {
+ /* do allow root to clear this capability out if they really
+ * want to */
if (!capable_wrt_inode_uidgid(dentry->d_inode, CAP_SETFCAP))
return -EPERM;
return 0;
@@ -1161,6 +1294,7 @@
LSM_HOOK_INIT(bprm_secureexec, cap_bprm_secureexec),
LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
+ LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
LSM_HOOK_INIT(mmap_file, cap_mmap_file),
LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),