| /* VFS-based union mounts for Linux |
| * |
| * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH. |
| * Copyright (C) 2007-2009 Novell Inc. |
| * Copyright (C) 2009-2012 Red Hat, Inc. |
| * |
| * Author(s): Jan Blunck (j.blunck@tu-harburg.de) |
| * Valerie Aurora <vaurora@redhat.com> |
| * David Howells <dhowells@redhat.com> |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; version 2 |
| * of the License. |
| */ |
| |
| #include <linux/module.h> |
| #include <linux/fs.h> |
| #include <linux/mount.h> |
| #include <linux/fs_struct.h> |
| #include <linux/slab.h> |
| #include <linux/fsnotify.h> |
| #include <linux/xattr.h> |
| #include <linux/file.h> |
| #include <linux/security.h> |
| #include <linux/splice.h> |
| #include <linux/xattr.h> |
| |
| #include "union.h" |
| |
| /** |
| * union_alloc - allocate a union stack |
| * @path: path of topmost directory |
| * |
| * Allocate a union_stack large enough to contain the maximum number |
| * of layers in this union mount. |
| */ |
| static struct union_stack *union_alloc(struct path *topmost) |
| { |
| unsigned int layers = topmost->dentry->d_sb->s_union_count; |
| BUG_ON(!S_ISDIR(topmost->dentry->d_inode->i_mode)); |
| |
| return kcalloc(sizeof(struct path), layers, GFP_KERNEL); |
| } |
| |
| /** |
| * d_free_unions - free all unions for this dentry |
| * @dentry: topmost dentry in the union stack to remove |
| * |
| * This must be called when freeing a dentry. |
| */ |
| void d_free_unions(struct dentry *topmost) |
| { |
| struct path *path; |
| unsigned int i, layers = topmost->d_sb->s_union_count; |
| |
| if (!IS_DIR_UNIONED(topmost)) |
| return; |
| |
| for (i = 0; i < layers; i++) { |
| path = union_find_dir(topmost, i); |
| if (path->mnt) |
| path_put(path); |
| } |
| kfree(topmost->d_union_stack); |
| topmost->d_union_stack = NULL; |
| } |
| |
| /** |
| * union_add_dir - Add another layer to a unioned directory |
| * @topmost: topmost directory |
| * @lower: directory in the current layer |
| * @layer: index of layer to add this at |
| * |
| * @layer counts starting at 0 for the dir below the topmost dir. |
| * |
| * This transfers the caller's references to the constituents of *lower to the |
| * union stack. |
| */ |
| int union_add_dir(struct path *topmost, struct path *lower, unsigned layer) |
| { |
| struct dentry *dentry = topmost->dentry; |
| struct path *path; |
| |
| BUG_ON(layer >= dentry->d_sb->s_union_count); |
| |
| if (!dentry->d_union_stack) |
| dentry->d_union_stack = union_alloc(topmost); |
| if (!dentry->d_union_stack) |
| return -ENOMEM; |
| |
| path = union_find_dir(dentry, layer); |
| *path = *lower; |
| return 0; |
| } |
| |
| /** |
| * union_copyup_xattr |
| * @old: dentry of original file |
| * @new: dentry of new copy |
| * |
| * Copy up extended attributes from the original file to the new one. |
| * |
| * XXX - Permissions? For now, copying up every xattr. |
| */ |
| static int union_copyup_xattr(struct dentry *old, struct dentry *new) |
| { |
| ssize_t list_size, size; |
| char *buf, *name, *value; |
| int error; |
| |
| /* Check for xattr support */ |
| if (!old->d_inode->i_op->getxattr || |
| !new->d_inode->i_op->getxattr) |
| return 0; |
| |
| /* Find out how big the list of xattrs is */ |
| list_size = vfs_listxattr(old, NULL, 0); |
| if (list_size <= 0) |
| return list_size; |
| |
| /* Allocate memory for the list */ |
| buf = kzalloc(list_size, GFP_KERNEL); |
| if (!buf) |
| return -ENOMEM; |
| |
| /* Allocate memory for the xattr's value */ |
| error = -ENOMEM; |
| value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); |
| if (!value) |
| goto out; |
| |
| /* Actually get the list of xattrs */ |
| list_size = vfs_listxattr(old, buf, list_size); |
| if (list_size <= 0) { |
| error = list_size; |
| goto out_free_value; |
| } |
| |
| for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { |
| /* XXX Locking? old is on read-only fs */ |
| size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); |
| if (size <= 0) { |
| error = size; |
| goto out_free_value; |
| } |
| /* XXX do we really need to check for size overflow? */ |
| /* XXX locks new dentry, lock ordering problems? */ |
| error = vfs_setxattr(new, name, value, size, 0); |
| if (error) |
| goto out_free_value; |
| } |
| |
| out_free_value: |
| kfree(value); |
| out: |
| kfree(buf); |
| return error; |
| } |
| |
| /** |
| * union_create_topmost_dir - Create a matching dir in the topmost file system |
| * @parent - parent of target on topmost layer |
| * @name - name of target |
| * @topmost - path of target on topmost layer |
| * @lower - path of source on lower layer |
| * |
| * As we lookup each directory on the lower layer of a union, we create a |
| * matching directory on the topmost layer if it does not already exist. |
| * |
| * We don't use vfs_mkdir() for a few reasons: don't want to do the security |
| * check, don't want to make the dir opaque, don't need to sanitize the mode. |
| * |
| * XXX - owner is wrong, set credentials properly |
| * XXX - rmdir() directory on failure of xattr copyup |
| * XXX - not atomic w/ respect to crash |
| */ |
| int union_create_topmost_dir(struct path *parent, struct qstr *name, |
| struct path *topmost, struct path *lower) |
| { |
| struct inode *dir = parent->dentry->d_inode; |
| int mode = lower->dentry->d_inode->i_mode; |
| int error; |
| |
| BUG_ON(topmost->dentry->d_inode); |
| |
| /* XXX - Do we even need to check this? */ |
| if (!dir->i_op->mkdir) |
| return -EPERM; |
| |
| error = mnt_want_write(parent->mnt); |
| if (error) |
| return error; |
| |
| error = dir->i_op->mkdir(dir, topmost->dentry, mode); |
| if (error) |
| goto out; |
| |
| error = union_copyup_xattr(lower->dentry, topmost->dentry); |
| if (error) |
| goto out_rmdir; |
| |
| fsnotify_mkdir(dir, topmost->dentry); |
| |
| mnt_drop_write(parent->mnt); |
| |
| return 0; |
| out_rmdir: |
| /* XXX rm created dir */ |
| dput(topmost->dentry); |
| out: |
| mnt_drop_write(parent->mnt); |
| return error; |
| } |
| |
| struct union_filldir_info { |
| struct dentry *topmost_dentry; |
| int error; |
| }; |
| |
| /** |
| * union_copyup_dir_one - copy up a single directory entry |
| * |
| * Individual directory entry copyup function for union_copyup_dir. |
| * We get the entries from higher level layers first. |
| */ |
| static int union_copyup_dir_one(void *buf, const char *name, int namlen, |
| loff_t offset, u64 ino, unsigned int d_type) |
| { |
| struct union_filldir_info *ufi = (struct union_filldir_info *) buf; |
| struct dentry *topmost_dentry = ufi->topmost_dentry; |
| struct dentry *dentry; |
| int err = 0; |
| |
| switch (namlen) { |
| case 2: |
| if (name[1] != '.') |
| break; |
| case 1: |
| if (name[0] != '.') |
| break; |
| return 0; |
| } |
| |
| /* Lookup this entry in the topmost directory */ |
| dentry = lookup_one_len(name, topmost_dentry, namlen); |
| |
| if (IS_ERR(dentry)) { |
| printk(KERN_WARNING "%s: error looking up %s\n", __func__, |
| dentry->d_name.name); |
| err = PTR_ERR(dentry); |
| goto out; |
| } |
| |
| /* XXX do we need to revalidate on readdir anyway? think NFS */ |
| if (dentry->d_op && dentry->d_op->d_revalidate) |
| goto fallthru; |
| |
| /* If the entry already exists, one of the following is true: it was |
| * already copied up (due to an earlier lookup), an entry with the same |
| * name already exists on the topmost file system, it is a whiteout, or |
| * it is a fallthru. In each case, the top level entry masks any |
| * entries from lower file systems, so don't copy up this entry. |
| */ |
| if (dentry->d_inode || d_is_whiteout(dentry) || d_is_fallthru(dentry)) |
| goto out_dput; |
| |
| /* If the entry doesn't exist, create a fallthru entry in the topmost |
| * file system. All possible directory types are used, so each file |
| * system must implement its own way of storing a fallthru entry. |
| */ |
| fallthru: |
| err = topmost_dentry->d_inode->i_op->fallthru(topmost_dentry->d_inode, |
| dentry); |
| |
| /* It's okay if it exists, ultimate responsibility rests with |
| * ->fallthru() */ |
| if (err == -EEXIST) |
| err = 0; |
| out_dput: |
| dput(dentry); |
| out: |
| if (err) |
| ufi->error = err; |
| return err; |
| } |
| |
| /** |
| * union_copyup_dir - copy up low-level directory entries to topmost dir |
| * |
| * readdir() is difficult to support on union file systems for two reasons: We |
| * must eliminate duplicates and apply whiteouts, and we must return something |
| * in f_pos that lets us restart in the same place when we return. Our |
| * solution is to, on first readdir() of the directory, copy up all visible |
| * entries from the low-level file systems and mark the entries that refer to |
| * low-level file system objects as "fallthru" entries. |
| * |
| * Locking strategy: We hold the topmost dir's i_mutex on entry. We grab the |
| * i_mutex on lower directories one by one. So the locking order is: |
| * |
| * Writable/topmost layers > Read-only/lower layers |
| * |
| * So there is no problem with lock ordering for union stacks with |
| * multiple lower layers. E.g.: |
| * |
| * (topmost) A->B->C (bottom) |
| * (topmost) D->C->B (bottom) |
| * |
| */ |
| int union_copyup_dir(struct path *topmost_path) |
| { |
| struct union_filldir_info ufi; |
| struct dentry *topmost_dentry = topmost_path->dentry; |
| unsigned int i, layers = topmost_dentry->d_sb->s_union_count; |
| int error = 0; |
| |
| BUG_ON(IS_OPAQUE(topmost_dentry->d_inode)); |
| |
| if (!topmost_dentry->d_inode->i_op || |
| !topmost_dentry->d_inode->i_op->fallthru) |
| return -EOPNOTSUPP; |
| |
| error = mnt_want_write(topmost_path->mnt); |
| if (error) |
| return error; |
| |
| for (i = 0; i < layers; i++) { |
| struct file * ftmp; |
| struct inode * inode; |
| struct path *path; |
| |
| path = union_find_dir(topmost_dentry, i); |
| if (!path->mnt) |
| continue; |
| |
| ftmp = dentry_open(path, O_RDONLY | O_DIRECTORY | O_NOATIME, |
| current_cred()); |
| if (IS_ERR(ftmp)) { |
| printk (KERN_ERR "unable to open dir %s for " |
| "directory copyup: %ld\n", |
| path->dentry->d_name.name, PTR_ERR(ftmp)); |
| path_put(path); |
| error = PTR_ERR(ftmp); |
| break; |
| } |
| |
| inode = path->dentry->d_inode; |
| mutex_lock(&inode->i_mutex); |
| |
| error = -ENOENT; |
| if (IS_DEADDIR(inode)) |
| goto out_fput; |
| |
| /* Read the whole directory, calling our directory entry copyup |
| * function on each entry. |
| */ |
| ufi.topmost_dentry = topmost_dentry; |
| ufi.error = 0; |
| error = ftmp->f_op->readdir(ftmp, &ufi, union_copyup_dir_one); |
| out_fput: |
| mutex_unlock(&inode->i_mutex); |
| fput(ftmp); |
| |
| if (ufi.error) |
| error = ufi.error; |
| if (error) |
| break; |
| |
| /* XXX Should process directories below an opaque directory in |
| * case there are fallthrus in it |
| */ |
| if (IS_OPAQUE(path->dentry->d_inode)) |
| break; |
| } |
| |
| /* Mark this dir opaque to show that we have already copied up the |
| * lower entries. Be sure to do this AFTER the directory entries have |
| * been copied up so that if we crash in the middle of copyup, we will |
| * try to copyup the dir next time we read it. |
| * |
| * XXX - Could leave directory non-opaque, and force reread/copyup of |
| * directory each time it is read in from disk. That would make it |
| * easy to update lower file systems (when not union mounted) and have |
| * the changes show up when union mounted again. |
| */ |
| if (!error) { |
| topmost_dentry->d_inode->i_flags |= S_OPAQUE; |
| mark_inode_dirty(topmost_dentry->d_inode); |
| } |
| |
| mnt_drop_write(topmost_path->mnt); |
| return error; |
| } |
| |
| /* Relationship between i_mode and the DT_xxx types */ |
| static inline unsigned char dt_type(struct inode *inode) |
| { |
| return (inode->i_mode >> 12) & 15; |
| } |
| |
| /** |
| * generic_readdir_fallthru - Helper to lookup target of a fallthru |
| * @topmost_dentry: dentry for the topmost dentry of the dir being read |
| * @name: name of fallthru dirent |
| * @namelen: length of @name |
| * @ino: return inode number of target, if found |
| * @d_type: return directory type of target, if found |
| * |
| * In readdir(), client file systems need to lookup the target of a |
| * fallthru in a lower layer for three reasons: (1) fill in d_ino, (2) |
| * fill in d_type, (2) make sure there is something to fall through to |
| * (and if not, don't return this dentry). Upon detecting a fallthru |
| * dentry in readdir(), the client file system should call this function. |
| * |
| * Returns 0 on success and -ENOENT if no matching directory entry was |
| * found (which can happen when the topmost file system is unmounted |
| * and remounted over a different file system than). Any other errors |
| * are unexpected. |
| */ |
| int generic_readdir_fallthru(struct dentry *topmost_dentry, const char *name, |
| int namlen, ino_t *ino, unsigned char *d_type) |
| { |
| struct path *parent; |
| struct dentry *dentry; |
| unsigned int i, layers = topmost_dentry->d_sb->s_union_count; |
| |
| BUG_ON(!mutex_is_locked(&topmost_dentry->d_inode->i_mutex)); |
| |
| for (i = 0; i < layers; i++) { |
| parent = union_find_dir(topmost_dentry, i); |
| mutex_lock(&parent->dentry->d_inode->i_mutex); |
| dentry = lookup_one_len(name, parent->dentry, namlen); |
| mutex_unlock(&parent->dentry->d_inode->i_mutex); |
| if (IS_ERR(dentry)) |
| return PTR_ERR(dentry); |
| if (dentry->d_inode) { |
| *ino = dentry->d_inode->i_ino; |
| *d_type = dt_type(dentry->d_inode); |
| dput(dentry); |
| return 0; |
| } |
| dput(dentry); |
| } |
| return -ENOENT; |
| } |
| EXPORT_SYMBOL(generic_readdir_fallthru); |
| |
| /** |
| * union_create_file |
| * @parent: path of the upper parent directory |
| * @lower: path of the source file |
| * @new: path of the new file, negative dentry |
| * |
| * Must already have mnt_want_write() on the mnt and the parent's i_mutex. |
| */ |
| static int union_create_file(struct path *parent, struct path *lower, |
| struct dentry *new) |
| { |
| BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex)); |
| |
| return vfs_create(parent->dentry->d_inode, new, |
| lower->dentry->d_inode->i_mode, true); |
| } |
| |
| /** |
| * union_create_symlink |
| * @parent: Upper parent of the symlink |
| * @lower: Path of the source symlink |
| * @new: Path of the new symlink, negative dentry |
| * |
| * Must already have mnt_want_write() on the mnt and the parent's i_mutex. |
| */ |
| static int union_create_symlink(struct path *parent, struct path *lower, |
| struct dentry *new) |
| { |
| struct inode *inode = lower->dentry->d_inode; |
| char *content; |
| int error; |
| |
| BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex)); |
| |
| content = kmalloc(PATH_MAX + 2, GFP_KERNEL); |
| if (!content) |
| return -ENOMEM; |
| |
| error = inode->i_op->readlink(lower->dentry, content, PATH_MAX + 1); |
| if (error < 0) |
| goto error; |
| content[error] = 0; |
| |
| error = vfs_symlink(parent->dentry->d_inode, new, content); |
| error: |
| kfree(content); |
| return error; |
| } |
| |
| /** |
| * union_copyup_data - Copy up len bytes of old's data to new |
| * @lower: path of source file in lower layer |
| * @new_mnt: vfsmount of target file |
| * @new_dentry: dentry of target file |
| * @len: number of bytes to copy |
| */ |
| static int union_copyup_data(struct path *lower, struct path *new_path, |
| size_t len) |
| { |
| const struct cred *cred = current_cred(); |
| struct file *lower_file; |
| struct file *new_file; |
| loff_t offset = 0; |
| long bytes; |
| int error = 0; |
| |
| if (len == 0) |
| return 0; |
| |
| lower_file = dentry_open(lower, O_RDONLY, cred); |
| if (IS_ERR(lower_file)) |
| return PTR_ERR(lower_file); |
| |
| new_file = dentry_open(new_path, O_WRONLY, cred); |
| if (IS_ERR(new_file)) { |
| error = PTR_ERR(new_file); |
| goto out_fput; |
| } |
| |
| bytes = do_splice_direct(lower_file, &offset, new_file, len, |
| SPLICE_F_MOVE); |
| if (bytes < 0) |
| error = bytes; |
| |
| fput(new_file); |
| out_fput: |
| fput(lower_file); |
| return error; |
| } |
| |
| /** |
| * union_copyup_file - Copy up a regular file, symlink or special file |
| * @parent: Parent dir on upper fs |
| * @lower: path of file to be copied up |
| * @dentry: dentry to copy up to |
| * @len: number of bytes of file data to copy up |
| */ |
| static int union_copyup_file(struct path *parent, struct path *lower, |
| struct dentry *dentry, size_t len) |
| { |
| const struct cred *saved_cred; |
| struct cred *override_cred; |
| struct path to; |
| int error; |
| |
| BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex)); |
| |
| override_cred = prepare_kernel_cred(NULL); |
| if (!override_cred) |
| return -ENOMEM; |
| |
| override_cred->fsuid = lower->dentry->d_inode->i_uid; |
| override_cred->fsgid = lower->dentry->d_inode->i_gid; |
| |
| saved_cred = override_creds(override_cred); |
| |
| if (S_ISREG(lower->dentry->d_inode->i_mode)) { |
| error = union_create_file(parent, lower, dentry); |
| if (error) |
| goto out; |
| to.mnt = parent->mnt; |
| to.dentry = dentry; |
| error = union_copyup_data(lower, &to, len); |
| } else if (S_ISLNK(lower->dentry->d_inode->i_mode)) { |
| error = union_create_symlink(parent, lower, dentry); |
| goto out; |
| } else { |
| /* Don't currently support copyup of special files, though in |
| * theory there's no reason we couldn't at least copy up |
| * blockdev, chrdev and FIFO files |
| */ |
| error = -EXDEV; |
| goto out; |
| } |
| if (error) |
| /* Most likely error: ENOSPC */ |
| vfs_unlink(parent->dentry->d_inode, dentry); |
| |
| out: |
| revert_creds(saved_cred); |
| put_cred(override_cred); |
| return error; |
| } |
| |
| /** |
| * union_copyup - Copy up a file and len bytes of data |
| * @parent: Parent dir on upper fs |
| * @path: Path of file to be copied up from |
| * @copy_all: Copy all the file (if true) or just @len bytes of it |
| * @len: Amount of file data to copy up |
| * |
| * Parent's i_mutex must be held by caller. Newly copied up path is |
| * returned in @path and original is path_put(). |
| * |
| * NOTE! If a copy up takes place, path->mnt will be changed to the same as |
| * the topmost dir, but won't have a ref taken on it. |
| */ |
| int union_copyup(struct path *parent, struct path *path, |
| bool copy_all, size_t len) |
| { |
| struct dentry *top_dentry; |
| int error; |
| |
| pr_devel("-->%s(%s,%s)\n", __func__, |
| parent->dentry->d_name.name, |
| path->dentry->d_name.name); |
| |
| BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex)); |
| |
| if (!IS_DIR_UNIONED(parent->dentry) || parent->mnt == path->mnt) |
| return 0; |
| |
| BUG_ON(!S_ISDIR(parent->dentry->d_inode->i_mode)); |
| if (IS_DEADDIR(parent->dentry->d_inode)) |
| return -ENOENT; |
| |
| if (copy_all && S_ISREG(path->dentry->d_inode->i_mode)) { |
| loff_t filesize = i_size_read(path->dentry->d_inode); |
| /* Check for overflow of file size */ |
| if ((ssize_t)filesize != filesize) |
| return -EFBIG; |
| len = filesize; |
| } |
| |
| top_dentry = lookup_one_len(path->dentry->d_name.name, parent->dentry, |
| path->dentry->d_name.len); |
| if (IS_ERR(top_dentry)) |
| return PTR_ERR(top_dentry); |
| |
| if (top_dentry->d_inode) { |
| /* We raced with someone else and "lost". That's okay, they |
| * did all the work of copying up the file. |
| * |
| * Note that currently data copyup happens under the parent |
| * dir's i_mutex. If we move it outside that, we'll need some |
| * way of waiting for the data copyup to complete here. |
| */ |
| pr_devel("<--%s() = 0 [lost]\n", __func__); |
| return 0; |
| } |
| |
| error = 0; |
| if (!S_ISREG(path->dentry->d_inode->i_mode) && |
| !S_ISLNK(path->dentry->d_inode->i_mode)) |
| goto out_dput; |
| |
| pr_devel("- copy!\n"); |
| error = union_copyup_file(parent, path, top_dentry, len); |
| if (error < 0) |
| goto out_dput; |
| pr_devel("- copied\n"); |
| |
| path_put(path); |
| path->mnt = parent->mnt; |
| path->dentry = top_dentry; |
| return 0; |
| |
| out_dput: |
| dput(top_dentry); |
| pr_devel("<--%s() = %d\n", __func__, error); |
| return error; |
| } |