fs/union.c - pub/scm/linux/kernel/git/dhowells/linux-unionmount - Git at Google

 /* VFS-based union mounts for Linux
  *
  * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
  * Copyright (C) 2007-2009 Novell Inc.
  * Copyright (C) 2009-2012 Red Hat, Inc.
  *
  *   Author(s): Jan Blunck (j.blunck@tu-harburg.de)
  *              Valerie Aurora <vaurora@redhat.com>
  *              David Howells <dhowells@redhat.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; version 2
  * of the License.
  */

 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
 #include <linux/fsnotify.h>
 #include <linux/xattr.h>
 #include <linux/file.h>
 #include <linux/security.h>
 #include <linux/splice.h>
 #include <linux/xattr.h>

 #include "union.h"

 /**
  * union_alloc - allocate a union stack
  * @path: path of topmost directory
  *
  * Allocate a union_stack large enough to contain the maximum number
  * of layers in this union mount.
  */
 static struct union_stack *union_alloc(struct path *topmost)
 {
 	unsigned int layers = topmost->dentry->d_sb->s_union_count;
 	BUG_ON(!S_ISDIR(topmost->dentry->d_inode->i_mode));

 	return kcalloc(sizeof(struct path), layers, GFP_KERNEL);
 }

 /**
  * d_free_unions - free all unions for this dentry
  * @dentry: topmost dentry in the union stack to remove
  *
  * This must be called when freeing a dentry.
  */
 void d_free_unions(struct dentry *topmost)
 {
 	struct path *path;
 	unsigned int i, layers = topmost->d_sb->s_union_count;

 	if (!IS_DIR_UNIONED(topmost))
 		return;

 	for (i = 0; i < layers; i++) {
 		path = union_find_dir(topmost, i);
 		if (path->mnt)
 			path_put(path);
 	}
 	kfree(topmost->d_union_stack);
 	topmost->d_union_stack = NULL;
 }

 /**
  * union_add_dir - Add another layer to a unioned directory
  * @topmost: topmost directory
  * @lower: directory in the current layer
  * @layer: index of layer to add this at
  *
  * @layer counts starting at 0 for the dir below the topmost dir.
  *
  * This transfers the caller's references to the constituents of *lower to the
  * union stack.
  */
 int union_add_dir(struct path *topmost, struct path *lower, unsigned layer)
 {
 	struct dentry *dentry = topmost->dentry;
 	struct path *path;

 	BUG_ON(layer >= dentry->d_sb->s_union_count);

 	if (!dentry->d_union_stack)
 		dentry->d_union_stack = union_alloc(topmost);
 	if (!dentry->d_union_stack)
 		return -ENOMEM;

 	path = union_find_dir(dentry, layer);
 	*path = *lower;
 	return 0;
 }

 /**
  * union_copyup_xattr
  * @old: dentry of original file
  * @new: dentry of new copy
  *
  * Copy up extended attributes from the original file to the new one.
  *
  * XXX - Permissions?  For now, copying up every xattr.
  */
 static int union_copyup_xattr(struct dentry *old, struct dentry *new)
 {
 	ssize_t list_size, size;
 	char *buf, *name, *value;
 	int error;

 	/* Check for xattr support */
 	if (!old->d_inode->i_op->getxattr ||
 	    !new->d_inode->i_op->getxattr)
 		return 0;

 	/* Find out how big the list of xattrs is */
 	list_size = vfs_listxattr(old, NULL, 0);
 	if (list_size <= 0)
 		return list_size;

 	/* Allocate memory for the list */
 	buf = kzalloc(list_size, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;

 	/* Allocate memory for the xattr's value */
 	error = -ENOMEM;
 	value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
 	if (!value)
 		goto out;

 	/* Actually get the list of xattrs */
 	list_size = vfs_listxattr(old, buf, list_size);
 	if (list_size <= 0) {
 		error = list_size;
 		goto out_free_value;
 	}

 	for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
 		/* XXX Locking? old is on read-only fs */
 		size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
 		if (size <= 0) {
 			error = size;
 			goto out_free_value;
 		}
 		/* XXX do we really need to check for size overflow? */
 		/* XXX locks new dentry, lock ordering problems? */
 		error = vfs_setxattr(new, name, value, size, 0);
 		if (error)
 			goto out_free_value;
 	}

 out_free_value:
 	kfree(value);
 out:
 	kfree(buf);
 	return error;
 }

 /**
  * union_create_topmost_dir - Create a matching dir in the topmost file system
  * @parent - parent of target on topmost layer
  * @name - name of target
  * @topmost - path of target on topmost layer
  * @lower - path of source on lower layer
  *
  * As we lookup each directory on the lower layer of a union, we create a
  * matching directory on the topmost layer if it does not already exist.
  *
  * We don't use vfs_mkdir() for a few reasons: don't want to do the security
  * check, don't want to make the dir opaque, don't need to sanitize the mode.
  *
  * XXX - owner is wrong, set credentials properly
  * XXX - rmdir() directory on failure of xattr copyup
  * XXX - not atomic w/ respect to crash
  */
 int union_create_topmost_dir(struct path *parent, struct qstr *name,
 			     struct path *topmost, struct path *lower)
 {
 	struct inode *dir = parent->dentry->d_inode;
 	int mode = lower->dentry->d_inode->i_mode;
 	int error;

 	BUG_ON(topmost->dentry->d_inode);

 	/* XXX - Do we even need to check this? */
 	if (!dir->i_op->mkdir)
 		return -EPERM;

 	error = mnt_want_write(parent->mnt);
 	if (error)
 		return error;

 	error = dir->i_op->mkdir(dir, topmost->dentry, mode);
 	if (error)
 		goto out;

 	error = union_copyup_xattr(lower->dentry, topmost->dentry);
 	if (error)
 		goto out_rmdir;

 	fsnotify_mkdir(dir, topmost->dentry);

 	mnt_drop_write(parent->mnt);

 	return 0;
 out_rmdir:
 	/* XXX rm created dir */
 	dput(topmost->dentry);
 out:
 	mnt_drop_write(parent->mnt);
 	return error;
 }

 struct union_filldir_info {
 	struct dentry *topmost_dentry;
 	int error;
 };

 /**
  * union_copyup_dir_one - copy up a single directory entry
  *
  * Individual directory entry copyup function for union_copyup_dir.
  * We get the entries from higher level layers first.
  */
 static int union_copyup_dir_one(void *buf, const char *name, int namlen,
 				loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct union_filldir_info *ufi = (struct union_filldir_info *) buf;
 	struct dentry *topmost_dentry = ufi->topmost_dentry;
 	struct dentry *dentry;
 	int err = 0;

 	switch (namlen) {
 	case 2:
 		if (name[1] != '.')
 			break;
 	case 1:
 		if (name[0] != '.')
 			break;
 		return 0;
 	}

 	/* Lookup this entry in the topmost directory */
 	dentry = lookup_one_len(name, topmost_dentry, namlen);

 	if (IS_ERR(dentry)) {
 		printk(KERN_WARNING "%s: error looking up %s\n", __func__,
 		       dentry->d_name.name);
 		err = PTR_ERR(dentry);
 		goto out;
 	}

 	/* XXX do we need to revalidate on readdir anyway? think NFS */
 	if (dentry->d_op && dentry->d_op->d_revalidate)
 		goto fallthru;

 	/* If the entry already exists, one of the following is true: it was
 	 * already copied up (due to an earlier lookup), an entry with the same
 	 * name already exists on the topmost file system, it is a whiteout, or
 	 * it is a fallthru.  In each case, the top level entry masks any
 	 * entries from lower file systems, so don't copy up this entry.
 	 */
 	if (dentry->d_inode || d_is_whiteout(dentry) || d_is_fallthru(dentry))
 		goto out_dput;

 	/* If the entry doesn't exist, create a fallthru entry in the topmost
 	 * file system.  All possible directory types are used, so each file
 	 * system must implement its own way of storing a fallthru entry.
 	 */
 fallthru:
 	err = topmost_dentry->d_inode->i_op->fallthru(topmost_dentry->d_inode,
 						      dentry);

 	/* It's okay if it exists, ultimate responsibility rests with
 	 * ->fallthru() */
 	if (err == -EEXIST)
 		err = 0;
 out_dput:
 	dput(dentry);
 out:
 	if (err)
 		ufi->error = err;
 	return err;
 }

 /**
  * union_copyup_dir - copy up low-level directory entries to topmost dir
  *
  * readdir() is difficult to support on union file systems for two reasons: We
  * must eliminate duplicates and apply whiteouts, and we must return something
  * in f_pos that lets us restart in the same place when we return.  Our
  * solution is to, on first readdir() of the directory, copy up all visible
  * entries from the low-level file systems and mark the entries that refer to
  * low-level file system objects as "fallthru" entries.
  *
  * Locking strategy: We hold the topmost dir's i_mutex on entry.  We grab the
  * i_mutex on lower directories one by one.  So the locking order is:
  *
  *	Writable/topmost layers > Read-only/lower layers
  *
  * So there is no problem with lock ordering for union stacks with
  * multiple lower layers.  E.g.:
  *
  *	(topmost) A->B->C (bottom)
  *	(topmost) D->C->B (bottom)
  *
  */
 int union_copyup_dir(struct path *topmost_path)
 {
 	struct union_filldir_info ufi;
 	struct dentry *topmost_dentry = topmost_path->dentry;
 	unsigned int i, layers = topmost_dentry->d_sb->s_union_count;
 	int error = 0;

 	BUG_ON(IS_OPAQUE(topmost_dentry->d_inode));

 	if (!topmost_dentry->d_inode->i_op ||
 	    !topmost_dentry->d_inode->i_op->fallthru)
 		return -EOPNOTSUPP;

 	error = mnt_want_write(topmost_path->mnt);
 	if (error)
 		return error;

 	for (i = 0; i < layers; i++) {
 		struct file * ftmp;
 		struct inode * inode;
 		struct path *path;

 		path = union_find_dir(topmost_dentry, i);
 		if (!path->mnt)
 			continue;

 		ftmp = dentry_open(path, O_RDONLY | O_DIRECTORY | O_NOATIME,
 				   current_cred());
 		if (IS_ERR(ftmp)) {
 			printk (KERN_ERR "unable to open dir %s for "
 				"directory copyup: %ld\n",
 				path->dentry->d_name.name, PTR_ERR(ftmp));
 			path_put(path);
 			error = PTR_ERR(ftmp);
 			break;
 		}

 		inode = path->dentry->d_inode;
 		mutex_lock(&inode->i_mutex);

 		error = -ENOENT;
 		if (IS_DEADDIR(inode))
 			goto out_fput;

 		/* Read the whole directory, calling our directory entry copyup
 		 * function on each entry.
 		 */
 		ufi.topmost_dentry = topmost_dentry;
 		ufi.error = 0;
 		error = ftmp->f_op->readdir(ftmp, &ufi, union_copyup_dir_one);
 out_fput:
 		mutex_unlock(&inode->i_mutex);
 		fput(ftmp);

 		if (ufi.error)
 			error = ufi.error;
 		if (error)
 			break;

 		/* XXX Should process directories below an opaque directory in
 		 * case there are fallthrus in it
 		 */
 		if (IS_OPAQUE(path->dentry->d_inode))
 			break;
 	}

 	/* Mark this dir opaque to show that we have already copied up the
 	 * lower entries.  Be sure to do this AFTER the directory entries have
 	 * been copied up so that if we crash in the middle of copyup, we will
 	 * try to copyup the dir next time we read it.
 	 *
 	 * XXX - Could leave directory non-opaque, and force reread/copyup of
 	 * directory each time it is read in from disk.  That would make it
 	 * easy to update lower file systems (when not union mounted) and have
 	 * the changes show up when union mounted again.
 	 */
 	if (!error) {
 		topmost_dentry->d_inode->i_flags |= S_OPAQUE;
 		mark_inode_dirty(topmost_dentry->d_inode);
 	}

 	mnt_drop_write(topmost_path->mnt);
 	return error;
 }

 /* Relationship between i_mode and the DT_xxx types */
 static inline unsigned char dt_type(struct inode *inode)
 {
 	return (inode->i_mode >> 12) & 15;
 }

 /**
  * generic_readdir_fallthru - Helper to lookup target of a fallthru
  * @topmost_dentry: dentry for the topmost dentry of the dir being read
  * @name: name of fallthru dirent
  * @namelen: length of @name
  * @ino: return inode number of target, if found
  * @d_type: return directory type of target, if found
  *
  * In readdir(), client file systems need to lookup the target of a
  * fallthru in a lower layer for three reasons: (1) fill in d_ino, (2)
  * fill in d_type, (2) make sure there is something to fall through to
  * (and if not, don't return this dentry).  Upon detecting a fallthru
  * dentry in readdir(), the client file system should call this function.
  *
  * Returns 0 on success and -ENOENT if no matching directory entry was
  * found (which can happen when the topmost file system is unmounted
  * and remounted over a different file system than).  Any other errors
  * are unexpected.
  */
 int generic_readdir_fallthru(struct dentry *topmost_dentry, const char *name,
 			     int namlen, ino_t *ino, unsigned char *d_type)
 {
 	struct path *parent;
 	struct dentry *dentry;
 	unsigned int i, layers = topmost_dentry->d_sb->s_union_count;

 	BUG_ON(!mutex_is_locked(&topmost_dentry->d_inode->i_mutex));

 	for (i = 0; i < layers; i++) {
 		parent = union_find_dir(topmost_dentry, i);
 		mutex_lock(&parent->dentry->d_inode->i_mutex);
 		dentry = lookup_one_len(name, parent->dentry, namlen);
 		mutex_unlock(&parent->dentry->d_inode->i_mutex);
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
 		if (dentry->d_inode) {
 			*ino = dentry->d_inode->i_ino;
 			*d_type = dt_type(dentry->d_inode);
 			dput(dentry);
 			return 0;
 		}
 		dput(dentry);
 	}
 	return -ENOENT;
 }
 EXPORT_SYMBOL(generic_readdir_fallthru);

 /**
  * union_create_file
  * @parent: path of the upper parent directory
  * @lower: path of the source file
  * @new: path of the new file, negative dentry
  *
  * Must already have mnt_want_write() on the mnt and the parent's i_mutex.
  */
 static int union_create_file(struct path *parent, struct path *lower,
 			     struct dentry *new)
 {
 	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

 	return vfs_create(parent->dentry->d_inode, new,
 			  lower->dentry->d_inode->i_mode, true);
 }

 /**
  * union_create_symlink
  * @parent: Upper parent of the symlink
  * @lower: Path of the source symlink
  * @new: Path of the new symlink, negative dentry
  *
  * Must already have mnt_want_write() on the mnt and the parent's i_mutex.
  */
 static int union_create_symlink(struct path *parent, struct path *lower,
 				struct dentry *new)
 {
 	struct inode *inode = lower->dentry->d_inode;
 	char *content;
 	int error;

 	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

 	content = kmalloc(PATH_MAX + 2, GFP_KERNEL);
 	if (!content)
 		return -ENOMEM;

 	error = inode->i_op->readlink(lower->dentry, content, PATH_MAX + 1);
 	if (error < 0)
 		goto error;
 	content[error] = 0;

 	error = vfs_symlink(parent->dentry->d_inode, new, content);
 error:
 	kfree(content);
 	return error;
 }

 /**
  * union_copyup_data - Copy up len bytes of old's data to new
  * @lower: path of source file in lower layer
  * @new_mnt: vfsmount of target file
  * @new_dentry: dentry of target file
  * @len: number of bytes to copy
  */
 static int union_copyup_data(struct path *lower, struct path *new_path,
 			     size_t len)
 {
 	const struct cred *cred = current_cred();
 	struct file *lower_file;
 	struct file *new_file;
 	loff_t offset = 0;
 	long bytes;
 	int error = 0;

 	if (len == 0)
 		return 0;

 	lower_file = dentry_open(lower, O_RDONLY, cred);
 	if (IS_ERR(lower_file))
 		return PTR_ERR(lower_file);

 	new_file = dentry_open(new_path, O_WRONLY, cred);
 	if (IS_ERR(new_file)) {
 		error = PTR_ERR(new_file);
 		goto out_fput;
 	}

 	bytes = do_splice_direct(lower_file, &offset, new_file, len,
 				 SPLICE_F_MOVE);
 	if (bytes < 0)
 		error = bytes;

 	fput(new_file);
 out_fput:
 	fput(lower_file);
 	return error;
 }

 /**
  * union_copyup_file - Copy up a regular file, symlink or special file
  * @parent: Parent dir on upper fs
  * @lower: path of file to be copied up
  * @dentry: dentry to copy up to
  * @len: number of bytes of file data to copy up
  */
 static int union_copyup_file(struct path *parent, struct path *lower,
 			     struct dentry *dentry, size_t len)
 {
 	const struct cred *saved_cred;
 	struct cred *override_cred;
 	struct path to;
 	int error;

 	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

 	override_cred = prepare_kernel_cred(NULL);
 	if (!override_cred)
 		return -ENOMEM;

 	override_cred->fsuid = lower->dentry->d_inode->i_uid;
 	override_cred->fsgid = lower->dentry->d_inode->i_gid;

 	saved_cred = override_creds(override_cred);

 	if (S_ISREG(lower->dentry->d_inode->i_mode)) {
 		error = union_create_file(parent, lower, dentry);
 		if (error)
 			goto out;
 		to.mnt = parent->mnt;
 		to.dentry = dentry;
 		error = union_copyup_data(lower, &to, len);
 	} else if (S_ISLNK(lower->dentry->d_inode->i_mode)) {
 		error = union_create_symlink(parent, lower, dentry);
 		goto out;
 	} else {
 		/* Don't currently support copyup of special files, though in
 		 * theory there's no reason we couldn't at least copy up
 		 * blockdev, chrdev and FIFO files
 		 */
 		error = -EXDEV;
 		goto out;
 	}
 	if (error)
 		/* Most likely error: ENOSPC */
 		vfs_unlink(parent->dentry->d_inode, dentry);

 out:
 	revert_creds(saved_cred);
 	put_cred(override_cred);
 	return error;
 }

 /**
  * union_copyup - Copy up a file and len bytes of data
  * @parent: Parent dir on upper fs
  * @path: Path of file to be copied up from
  * @copy_all: Copy all the file (if true) or just @len bytes of it
  * @len: Amount of file data to copy up
  *
  * Parent's i_mutex must be held by caller.  Newly copied up path is
  * returned in @path and original is path_put().
  *
  * NOTE!  If a copy up takes place, path->mnt will be changed to the same as
  * the topmost dir, but won't have a ref taken on it.
  */
 int union_copyup(struct path *parent, struct path *path,
 		 bool copy_all, size_t len)
 {
 	struct dentry *top_dentry;
 	int error;

 	pr_devel("-->%s(%s,%s)\n", __func__,
 		 parent->dentry->d_name.name,
 		 path->dentry->d_name.name);

 	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

 	if (!IS_DIR_UNIONED(parent->dentry) || parent->mnt == path->mnt)
 		return 0;

 	BUG_ON(!S_ISDIR(parent->dentry->d_inode->i_mode));
 	if (IS_DEADDIR(parent->dentry->d_inode))
 		return -ENOENT;

 	if (copy_all && S_ISREG(path->dentry->d_inode->i_mode)) {
 		loff_t filesize = i_size_read(path->dentry->d_inode);
 		/* Check for overflow of file size */
 		if ((ssize_t)filesize != filesize)
 			return -EFBIG;
 		len = filesize;
 	}

 	top_dentry = lookup_one_len(path->dentry->d_name.name, parent->dentry,
 				    path->dentry->d_name.len);
 	if (IS_ERR(top_dentry))
 		return PTR_ERR(top_dentry);

 	if (top_dentry->d_inode) {
 		/* We raced with someone else and "lost".  That's okay, they
 		 * did all the work of copying up the file.
 		 *
 		 * Note that currently data copyup happens under the parent
 		 * dir's i_mutex.  If we move it outside that, we'll need some
 		 * way of waiting for the data copyup to complete here.
 		 */
 		pr_devel("<--%s() = 0 [lost]\n", __func__);
 		return 0;
 	}

 	error = 0;
 	if (!S_ISREG(path->dentry->d_inode->i_mode) &&
 	    !S_ISLNK(path->dentry->d_inode->i_mode))
 		goto out_dput;

 	pr_devel("- copy!\n");
 	error = union_copyup_file(parent, path, top_dentry, len);
 	if (error < 0)
 		goto out_dput;
 	pr_devel("- copied\n");

 	path_put(path);
 	path->mnt = parent->mnt;
 	path->dentry = top_dentry;
 	return 0;

 out_dput:
 	dput(top_dentry);
 	pr_devel("<--%s() = %d\n", __func__, error);
 	return error;
 }
	/* VFS-based union mounts for Linux
	*
	* Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
	* Copyright (C) 2007-2009 Novell Inc.
	* Copyright (C) 2009-2012 Red Hat, Inc.
	*
	* Author(s): Jan Blunck (j.blunck@tu-harburg.de)
	* Valerie Aurora <vaurora@redhat.com>
	* David Howells <dhowells@redhat.com>
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public License
	* as published by the Free Software Foundation; version 2
	* of the License.
	*/

	#include <linux/module.h>
	#include <linux/fs.h>
	#include <linux/mount.h>
	#include <linux/fs_struct.h>
	#include <linux/slab.h>
	#include <linux/fsnotify.h>
	#include <linux/xattr.h>
	#include <linux/file.h>
	#include <linux/security.h>
	#include <linux/splice.h>
	#include <linux/xattr.h>

	#include "union.h"

	/**
	* union_alloc - allocate a union stack
	* @path: path of topmost directory
	*
	* Allocate a union_stack large enough to contain the maximum number
	* of layers in this union mount.
	*/
	static struct union_stack union_alloc(struct path topmost)
	{
	unsigned int layers = topmost->dentry->d_sb->s_union_count;
	BUG_ON(!S_ISDIR(topmost->dentry->d_inode->i_mode));

	return kcalloc(sizeof(struct path), layers, GFP_KERNEL);
	}

	/**
	* d_free_unions - free all unions for this dentry
	* @dentry: topmost dentry in the union stack to remove
	*
	* This must be called when freeing a dentry.
	*/
	void d_free_unions(struct dentry *topmost)
	{
	struct path *path;
	unsigned int i, layers = topmost->d_sb->s_union_count;

	if (!IS_DIR_UNIONED(topmost))
	return;

	for (i = 0; i < layers; i++) {
	path = union_find_dir(topmost, i);
	if (path->mnt)
	path_put(path);
	}
	kfree(topmost->d_union_stack);
	topmost->d_union_stack = NULL;
	}

	/**
	* union_add_dir - Add another layer to a unioned directory
	* @topmost: topmost directory
	* @lower: directory in the current layer
	* @layer: index of layer to add this at
	*
	* @layer counts starting at 0 for the dir below the topmost dir.
	*
	* This transfers the caller's references to the constituents of *lower to the
	* union stack.
	*/
	int union_add_dir(struct path topmost, struct path lower, unsigned layer)
	{
	struct dentry *dentry = topmost->dentry;
	struct path *path;

	BUG_ON(layer >= dentry->d_sb->s_union_count);

	if (!dentry->d_union_stack)
	dentry->d_union_stack = union_alloc(topmost);
	if (!dentry->d_union_stack)
	return -ENOMEM;

	path = union_find_dir(dentry, layer);
	path = lower;
	return 0;
	}

	/**
	* union_copyup_xattr
	* @old: dentry of original file
	* @new: dentry of new copy
	*
	* Copy up extended attributes from the original file to the new one.
	*
	* XXX - Permissions? For now, copying up every xattr.
	*/
	static int union_copyup_xattr(struct dentry old, struct dentry new)
	{
	ssize_t list_size, size;
	char buf, name, *value;
	int error;

	/* Check for xattr support */
	if (!old->d_inode->i_op->getxattr \|\|
	!new->d_inode->i_op->getxattr)
	return 0;

	/* Find out how big the list of xattrs is */
	list_size = vfs_listxattr(old, NULL, 0);
	if (list_size <= 0)
	return list_size;

	/* Allocate memory for the list */
	buf = kzalloc(list_size, GFP_KERNEL);
	if (!buf)
	return -ENOMEM;

	/* Allocate memory for the xattr's value */
	error = -ENOMEM;
	value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
	if (!value)
	goto out;

	/* Actually get the list of xattrs */
	list_size = vfs_listxattr(old, buf, list_size);
	if (list_size <= 0) {
	error = list_size;
	goto out_free_value;
	}

	for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
	/* XXX Locking? old is on read-only fs */
	size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
	if (size <= 0) {
	error = size;
	goto out_free_value;
	}
	/* XXX do we really need to check for size overflow? */
	/* XXX locks new dentry, lock ordering problems? */
	error = vfs_setxattr(new, name, value, size, 0);
	if (error)
	goto out_free_value;
	}

	out_free_value:
	kfree(value);
	out:
	kfree(buf);
	return error;
	}

	/**
	* union_create_topmost_dir - Create a matching dir in the topmost file system
	* @parent - parent of target on topmost layer
	* @name - name of target
	* @topmost - path of target on topmost layer
	* @lower - path of source on lower layer
	*
	* As we lookup each directory on the lower layer of a union, we create a
	* matching directory on the topmost layer if it does not already exist.
	*
	* We don't use vfs_mkdir() for a few reasons: don't want to do the security
	* check, don't want to make the dir opaque, don't need to sanitize the mode.
	*
	* XXX - owner is wrong, set credentials properly
	* XXX - rmdir() directory on failure of xattr copyup
	* XXX - not atomic w/ respect to crash
	*/
	int union_create_topmost_dir(struct path parent, struct qstr name,
	struct path topmost, struct path lower)
	{
	struct inode *dir = parent->dentry->d_inode;
	int mode = lower->dentry->d_inode->i_mode;
	int error;

	BUG_ON(topmost->dentry->d_inode);

	/* XXX - Do we even need to check this? */
	if (!dir->i_op->mkdir)
	return -EPERM;

	error = mnt_want_write(parent->mnt);
	if (error)
	return error;

	error = dir->i_op->mkdir(dir, topmost->dentry, mode);
	if (error)
	goto out;

	error = union_copyup_xattr(lower->dentry, topmost->dentry);
	if (error)
	goto out_rmdir;

	fsnotify_mkdir(dir, topmost->dentry);

	mnt_drop_write(parent->mnt);

	return 0;
	out_rmdir:
	/* XXX rm created dir */
	dput(topmost->dentry);
	out:
	mnt_drop_write(parent->mnt);
	return error;
	}

	struct union_filldir_info {
	struct dentry *topmost_dentry;
	int error;
	};

	/**
	* union_copyup_dir_one - copy up a single directory entry
	*
	* Individual directory entry copyup function for union_copyup_dir.
	* We get the entries from higher level layers first.
	*/
	static int union_copyup_dir_one(void buf, const char name, int namlen,
	loff_t offset, u64 ino, unsigned int d_type)
	{
	struct union_filldir_info ufi = (struct union_filldir_info ) buf;
	struct dentry *topmost_dentry = ufi->topmost_dentry;
	struct dentry *dentry;
	int err = 0;

	switch (namlen) {
	case 2:
	if (name[1] != '.')
	break;
	case 1:
	if (name[0] != '.')
	break;
	return 0;
	}

	/* Lookup this entry in the topmost directory */
	dentry = lookup_one_len(name, topmost_dentry, namlen);

	if (IS_ERR(dentry)) {
	printk(KERN_WARNING "%s: error looking up %s\n", __func__,
	dentry->d_name.name);
	err = PTR_ERR(dentry);
	goto out;
	}

	/* XXX do we need to revalidate on readdir anyway? think NFS */
	if (dentry->d_op && dentry->d_op->d_revalidate)
	goto fallthru;

	/* If the entry already exists, one of the following is true: it was
	* already copied up (due to an earlier lookup), an entry with the same
	* name already exists on the topmost file system, it is a whiteout, or
	* it is a fallthru. In each case, the top level entry masks any
	* entries from lower file systems, so don't copy up this entry.
	*/
	if (dentry->d_inode \|\| d_is_whiteout(dentry) \|\| d_is_fallthru(dentry))
	goto out_dput;

	/* If the entry doesn't exist, create a fallthru entry in the topmost
	* file system. All possible directory types are used, so each file
	* system must implement its own way of storing a fallthru entry.
	*/
	fallthru:
	err = topmost_dentry->d_inode->i_op->fallthru(topmost_dentry->d_inode,
	dentry);

	/* It's okay if it exists, ultimate responsibility rests with
	* ->fallthru() */
	if (err == -EEXIST)
	err = 0;
	out_dput:
	dput(dentry);
	out:
	if (err)
	ufi->error = err;
	return err;
	}

	/**
	* union_copyup_dir - copy up low-level directory entries to topmost dir
	*
	* readdir() is difficult to support on union file systems for two reasons: We
	* must eliminate duplicates and apply whiteouts, and we must return something
	* in f_pos that lets us restart in the same place when we return. Our
	* solution is to, on first readdir() of the directory, copy up all visible
	* entries from the low-level file systems and mark the entries that refer to
	* low-level file system objects as "fallthru" entries.
	*
	* Locking strategy: We hold the topmost dir's i_mutex on entry. We grab the
	* i_mutex on lower directories one by one. So the locking order is:
	*
	* Writable/topmost layers > Read-only/lower layers
	*
	* So there is no problem with lock ordering for union stacks with
	* multiple lower layers. E.g.:
	*
	* (topmost) A->B->C (bottom)
	* (topmost) D->C->B (bottom)
	*
	*/
	int union_copyup_dir(struct path *topmost_path)
	{
	struct union_filldir_info ufi;
	struct dentry *topmost_dentry = topmost_path->dentry;
	unsigned int i, layers = topmost_dentry->d_sb->s_union_count;
	int error = 0;

	BUG_ON(IS_OPAQUE(topmost_dentry->d_inode));

	if (!topmost_dentry->d_inode->i_op \|\|
	!topmost_dentry->d_inode->i_op->fallthru)
	return -EOPNOTSUPP;

	error = mnt_want_write(topmost_path->mnt);
	if (error)
	return error;

	for (i = 0; i < layers; i++) {
	struct file * ftmp;
	struct inode * inode;
	struct path *path;

	path = union_find_dir(topmost_dentry, i);
	if (!path->mnt)
	continue;

	ftmp = dentry_open(path, O_RDONLY \| O_DIRECTORY \| O_NOATIME,
	current_cred());
	if (IS_ERR(ftmp)) {
	printk (KERN_ERR "unable to open dir %s for "
	"directory copyup: %ld\n",
	path->dentry->d_name.name, PTR_ERR(ftmp));
	path_put(path);
	error = PTR_ERR(ftmp);
	break;
	}

	inode = path->dentry->d_inode;
	mutex_lock(&inode->i_mutex);

	error = -ENOENT;
	if (IS_DEADDIR(inode))
	goto out_fput;

	/* Read the whole directory, calling our directory entry copyup
	* function on each entry.
	*/
	ufi.topmost_dentry = topmost_dentry;
	ufi.error = 0;
	error = ftmp->f_op->readdir(ftmp, &ufi, union_copyup_dir_one);
	out_fput:
	mutex_unlock(&inode->i_mutex);
	fput(ftmp);

	if (ufi.error)
	error = ufi.error;
	if (error)
	break;

	/* XXX Should process directories below an opaque directory in
	* case there are fallthrus in it
	*/
	if (IS_OPAQUE(path->dentry->d_inode))
	break;
	}

	/* Mark this dir opaque to show that we have already copied up the
	* lower entries. Be sure to do this AFTER the directory entries have
	* been copied up so that if we crash in the middle of copyup, we will
	* try to copyup the dir next time we read it.
	*
	* XXX - Could leave directory non-opaque, and force reread/copyup of
	* directory each time it is read in from disk. That would make it
	* easy to update lower file systems (when not union mounted) and have
	* the changes show up when union mounted again.
	*/
	if (!error) {
	topmost_dentry->d_inode->i_flags \|= S_OPAQUE;
	mark_inode_dirty(topmost_dentry->d_inode);
	}

	mnt_drop_write(topmost_path->mnt);
	return error;
	}

	/* Relationship between i_mode and the DT_xxx types */
	static inline unsigned char dt_type(struct inode *inode)
	{
	return (inode->i_mode >> 12) & 15;
	}

	/**
	* generic_readdir_fallthru - Helper to lookup target of a fallthru
	* @topmost_dentry: dentry for the topmost dentry of the dir being read
	* @name: name of fallthru dirent
	* @namelen: length of @name
	* @ino: return inode number of target, if found
	* @d_type: return directory type of target, if found
	*
	* In readdir(), client file systems need to lookup the target of a
	* fallthru in a lower layer for three reasons: (1) fill in d_ino, (2)
	* fill in d_type, (2) make sure there is something to fall through to
	* (and if not, don't return this dentry). Upon detecting a fallthru
	* dentry in readdir(), the client file system should call this function.
	*
	* Returns 0 on success and -ENOENT if no matching directory entry was
	* found (which can happen when the topmost file system is unmounted
	* and remounted over a different file system than). Any other errors
	* are unexpected.
	*/
	int generic_readdir_fallthru(struct dentry topmost_dentry, const char name,
	int namlen, ino_t ino, unsigned char d_type)
	{
	struct path *parent;
	struct dentry *dentry;
	unsigned int i, layers = topmost_dentry->d_sb->s_union_count;

	BUG_ON(!mutex_is_locked(&topmost_dentry->d_inode->i_mutex));

	for (i = 0; i < layers; i++) {
	parent = union_find_dir(topmost_dentry, i);
	mutex_lock(&parent->dentry->d_inode->i_mutex);
	dentry = lookup_one_len(name, parent->dentry, namlen);
	mutex_unlock(&parent->dentry->d_inode->i_mutex);
	if (IS_ERR(dentry))
	return PTR_ERR(dentry);
	if (dentry->d_inode) {
	*ino = dentry->d_inode->i_ino;
	*d_type = dt_type(dentry->d_inode);
	dput(dentry);
	return 0;
	}
	dput(dentry);
	}
	return -ENOENT;
	}
	EXPORT_SYMBOL(generic_readdir_fallthru);

	/**
	* union_create_file
	* @parent: path of the upper parent directory
	* @lower: path of the source file
	* @new: path of the new file, negative dentry
	*
	* Must already have mnt_want_write() on the mnt and the parent's i_mutex.
	*/
	static int union_create_file(struct path parent, struct path lower,
	struct dentry *new)
	{
	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

	return vfs_create(parent->dentry->d_inode, new,
	lower->dentry->d_inode->i_mode, true);
	}

	/**
	* union_create_symlink
	* @parent: Upper parent of the symlink
	* @lower: Path of the source symlink
	* @new: Path of the new symlink, negative dentry
	*
	* Must already have mnt_want_write() on the mnt and the parent's i_mutex.
	*/
	static int union_create_symlink(struct path parent, struct path lower,
	struct dentry *new)
	{
	struct inode *inode = lower->dentry->d_inode;
	char *content;
	int error;

	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

	content = kmalloc(PATH_MAX + 2, GFP_KERNEL);
	if (!content)
	return -ENOMEM;

	error = inode->i_op->readlink(lower->dentry, content, PATH_MAX + 1);
	if (error < 0)
	goto error;
	content[error] = 0;

	error = vfs_symlink(parent->dentry->d_inode, new, content);
	error:
	kfree(content);
	return error;
	}

	/**
	* union_copyup_data - Copy up len bytes of old's data to new
	* @lower: path of source file in lower layer
	* @new_mnt: vfsmount of target file
	* @new_dentry: dentry of target file
	* @len: number of bytes to copy
	*/
	static int union_copyup_data(struct path lower, struct path new_path,
	size_t len)
	{
	const struct cred *cred = current_cred();
	struct file *lower_file;
	struct file *new_file;
	loff_t offset = 0;
	long bytes;
	int error = 0;

	if (len == 0)
	return 0;

	lower_file = dentry_open(lower, O_RDONLY, cred);
	if (IS_ERR(lower_file))
	return PTR_ERR(lower_file);

	new_file = dentry_open(new_path, O_WRONLY, cred);
	if (IS_ERR(new_file)) {
	error = PTR_ERR(new_file);
	goto out_fput;
	}

	bytes = do_splice_direct(lower_file, &offset, new_file, len,
	SPLICE_F_MOVE);
	if (bytes < 0)
	error = bytes;

	fput(new_file);
	out_fput:
	fput(lower_file);
	return error;
	}

	/**
	* union_copyup_file - Copy up a regular file, symlink or special file
	* @parent: Parent dir on upper fs
	* @lower: path of file to be copied up
	* @dentry: dentry to copy up to
	* @len: number of bytes of file data to copy up
	*/
	static int union_copyup_file(struct path parent, struct path lower,
	struct dentry *dentry, size_t len)
	{
	const struct cred *saved_cred;
	struct cred *override_cred;
	struct path to;
	int error;

	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

	override_cred = prepare_kernel_cred(NULL);
	if (!override_cred)
	return -ENOMEM;

	override_cred->fsuid = lower->dentry->d_inode->i_uid;
	override_cred->fsgid = lower->dentry->d_inode->i_gid;

	saved_cred = override_creds(override_cred);

	if (S_ISREG(lower->dentry->d_inode->i_mode)) {
	error = union_create_file(parent, lower, dentry);
	if (error)
	goto out;
	to.mnt = parent->mnt;
	to.dentry = dentry;
	error = union_copyup_data(lower, &to, len);
	} else if (S_ISLNK(lower->dentry->d_inode->i_mode)) {
	error = union_create_symlink(parent, lower, dentry);
	goto out;
	} else {
	/* Don't currently support copyup of special files, though in
	* theory there's no reason we couldn't at least copy up
	* blockdev, chrdev and FIFO files
	*/
	error = -EXDEV;
	goto out;
	}
	if (error)
	/* Most likely error: ENOSPC */
	vfs_unlink(parent->dentry->d_inode, dentry);

	out:
	revert_creds(saved_cred);
	put_cred(override_cred);
	return error;
	}

	/**
	* union_copyup - Copy up a file and len bytes of data
	* @parent: Parent dir on upper fs
	* @path: Path of file to be copied up from
	* @copy_all: Copy all the file (if true) or just @len bytes of it
	* @len: Amount of file data to copy up
	*
	* Parent's i_mutex must be held by caller. Newly copied up path is
	* returned in @path and original is path_put().
	*
	* NOTE! If a copy up takes place, path->mnt will be changed to the same as
	* the topmost dir, but won't have a ref taken on it.
	*/
	int union_copyup(struct path parent, struct path path,
	bool copy_all, size_t len)
	{
	struct dentry *top_dentry;
	int error;

	pr_devel("-->%s(%s,%s)\n", __func__,
	parent->dentry->d_name.name,
	path->dentry->d_name.name);

	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));

	if (!IS_DIR_UNIONED(parent->dentry) \|\| parent->mnt == path->mnt)
	return 0;

	BUG_ON(!S_ISDIR(parent->dentry->d_inode->i_mode));
	if (IS_DEADDIR(parent->dentry->d_inode))
	return -ENOENT;

	if (copy_all && S_ISREG(path->dentry->d_inode->i_mode)) {
	loff_t filesize = i_size_read(path->dentry->d_inode);
	/* Check for overflow of file size */
	if ((ssize_t)filesize != filesize)
	return -EFBIG;
	len = filesize;
	}

	top_dentry = lookup_one_len(path->dentry->d_name.name, parent->dentry,
	path->dentry->d_name.len);
	if (IS_ERR(top_dentry))
	return PTR_ERR(top_dentry);

	if (top_dentry->d_inode) {
	/* We raced with someone else and "lost". That's okay, they
	* did all the work of copying up the file.
	*
	* Note that currently data copyup happens under the parent
	* dir's i_mutex. If we move it outside that, we'll need some
	* way of waiting for the data copyup to complete here.
	*/
	pr_devel("<--%s() = 0 [lost]\n", __func__);
	return 0;
	}

	error = 0;
	if (!S_ISREG(path->dentry->d_inode->i_mode) &&
	!S_ISLNK(path->dentry->d_inode->i_mode))
	goto out_dput;

	pr_devel("- copy!\n");
	error = union_copyup_file(parent, path, top_dentry, len);
	if (error < 0)
	goto out_dput;
	pr_devel("- copied\n");

	path_put(path);
	path->mnt = parent->mnt;
	path->dentry = top_dentry;
	return 0;

	out_dput:
	dput(top_dentry);
	pr_devel("<--%s() = %d\n", __func__, error);
	return error;
	}