fuse2 + sample
usage:
linux/samples/fuse2/loraw -2 -p -t ~/mnt/fuse/
options:
-d: debug
-s: single threaded
-b: FUSE_DEV_IOC_CLONE (v1)
-p: use ioctl for device I/O (v2)
-m: use "map read" transferring offset into file instead of actual data
-1: use regular fuse
-2: use experimental fuse2
-t: use shared memory instead of threads
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
diff --git a/fs/Kconfig b/fs/Kconfig
index bfb1c60..27e98bd 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -118,6 +118,7 @@
source "fs/autofs/Kconfig"
source "fs/fuse/Kconfig"
+source "fs/fuse2/Kconfig"
source "fs/overlayfs/Kconfig"
menu "Caches"
diff --git a/fs/Makefile b/fs/Makefile
index d60089f..f259c3d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -108,6 +108,7 @@
obj-$(CONFIG_AUTOFS_FS) += autofs/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
+obj-$(CONFIG_FUSE2_CORE) += fuse2/
obj-$(CONFIG_OVERLAY_FS) += overlayfs/
obj-$(CONFIG_ORANGEFS_FS) += orangefs/
obj-$(CONFIG_UDF_FS) += udf/
diff --git a/fs/fuse2/Kconfig b/fs/fuse2/Kconfig
new file mode 100644
index 0000000..0b726c1
--- /dev/null
+++ b/fs/fuse2/Kconfig
@@ -0,0 +1,7 @@
+config FUSE2_CORE
+ tristate
+ select FS_POSIX_ACL
+
+config FUSE2_FS
+ tristate "Experimental new fuse driver"
+ select FUSE2_CORE
diff --git a/fs/fuse2/Makefile b/fs/fuse2/Makefile
new file mode 100644
index 0000000..9d805b1
--- /dev/null
+++ b/fs/fuse2/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the FUSE filesystem.
+#
+
+obj-$(CONFIG_FUSE2_CORE) += fuse2_core.o
+obj-$(CONFIG_FUSE2_FS) += fuse2.o
+
+fuse2-y := fudev.o
+fuse2_core-y := request.o dir.o file.o inode.o xattr.o acl.o readdir.o map.o
diff --git a/fs/fuse2/acl.c b/fs/fuse2/acl.c
new file mode 100644
index 0000000..e237f8c
--- /dev/null
+++ b/fs/fuse2/acl.c
@@ -0,0 +1,99 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2016 Canonical Ltd. <seth.forshee@canonical.com>
+ *
+ * This program can be distributed under the terms of the GNU GPL.
+ * See the file COPYING.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+
+struct posix_acl *fuse2_get_acl(struct inode *inode, int type)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int size;
+ const char *name;
+ void *value = NULL;
+ struct posix_acl *acl;
+
+ if (!fc->posix_acl || fc->no_getxattr)
+ return NULL;
+
+ if (type == ACL_TYPE_ACCESS)
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ else if (type == ACL_TYPE_DEFAULT)
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ else
+ return ERR_PTR(-EOPNOTSUPP);
+
+ value = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = fuse2_getxattr(inode, name, value, PAGE_SIZE);
+ if (size > 0)
+ acl = posix_acl_from_xattr(fc->user_ns, value, size);
+ else if ((size == 0) || (size == -ENODATA) ||
+ (size == -EOPNOTSUPP && fc->no_getxattr))
+ acl = NULL;
+ else if (size == -ERANGE)
+ acl = ERR_PTR(-E2BIG);
+ else
+ acl = ERR_PTR(size);
+
+ kfree(value);
+ forget_cached_acl(inode, type);
+
+ return acl;
+}
+
+int fuse2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ const char *name;
+ int ret;
+
+ if (!fc->posix_acl || fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ if (type == ACL_TYPE_ACCESS)
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ else if (type == ACL_TYPE_DEFAULT)
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ else
+ return -EINVAL;
+
+ if (acl) {
+ /*
+ * Fuse userspace is responsible for updating access
+ * permissions in the inode, if needed. fuse2_setxattr
+ * invalidates the inode attributes, which will force
+ * them to be refreshed the next time they are used,
+ * and it also updates i_ctime.
+ */
+ size_t size = posix_acl_xattr_size(acl->a_count);
+ void *value;
+
+ if (size > PAGE_SIZE)
+ return -E2BIG;
+
+ value = kmalloc(size, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ ret = posix_acl_to_xattr(fc->user_ns, acl, value, size);
+ if (ret < 0) {
+ kfree(value);
+ return ret;
+ }
+
+ ret = fuse2_setxattr(inode, name, value, size, 0);
+ kfree(value);
+ } else {
+ ret = fuse2_removexattr(inode, name);
+ }
+
+ return ret;
+}
diff --git a/fs/fuse2/dev.h b/fs/fuse2/dev.h
new file mode 100644
index 0000000..487d72c7
--- /dev/null
+++ b/fs/fuse2/dev.h
@@ -0,0 +1,157 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2019 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include <linux/fuse.h>
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/refcount.h>
+#include <linux/mm_types.h>
+
+/*
+ * Request flags
+ *
+ * FR_ISREPLY: set if the request has reply
+ * FR_FORCE: force sending of the request
+ * FR_PENDING: request is not yet in userspace
+ * FR_SENT: request sent to userspace
+ * FR_FINISHED: request is finished
+ * FR_ZEROTAIL: zero tail of returned data
+ */
+enum fuse_req_flag {
+ FR_ISREPLY,
+ FR_FORCE,
+ FR_PENDING,
+ FR_SENT,
+ FR_FINISHED,
+ FR_ZEROTAIL,
+ FR_KILLABLE,
+};
+
+/* FUSE page descriptor */
+struct fuse_page_desc {
+ unsigned int length;
+ unsigned int offset;
+};
+
+/* Number of page pointers embedded in fuse_req */
+#define FUSE_REQ_INLINE_PAGES 1
+
+#define FUSE_REQ_INLINE_DATA 196
+
+struct fuse_req {
+ /* This can be on either pending or processing lists */
+ struct list_head list;
+
+ /* refcount */
+ refcount_t count;
+
+ /* Request flags, updated with test/set/clear_bit() */
+ unsigned long flags;
+
+ union {
+ /* The request header */
+ struct fuse_in_header inh;
+
+ /* The reply header */
+ struct fuse_out_header outh;
+
+ /* Inline data */
+ char inlinedata[FUSE_REQ_INLINE_DATA];
+ };
+
+ /* length of inline in data */
+ unsigned short inline_inlen;
+
+ /* length of inline out data */
+ unsigned short inline_outlen;
+
+ /* mandatory out len */
+ unsigned int mand_outlen;
+
+ /* max out len */
+ unsigned int max_outlen;
+
+ /* size of the 'pages' array */
+ unsigned short max_pages;
+
+ /* number of pages in vector */
+ unsigned short num_pages;
+
+ /* Used to wake up the task waiting for completion of request*/
+ wait_queue_head_t waitq;
+
+ /* page vector */
+ struct page **pages;
+
+ /* page-descriptor vector */
+ struct fuse_page_desc *page_descs;
+
+ /* inline page vector */
+ struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
+
+ /* inline page-descriptor vector */
+ struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
+};
+
+/* One forget request */
+struct fuse_forget {
+ struct fuse_forget_one forget_one;
+ struct list_head list;
+};
+
+/** One input argument of a request */
+struct fuse_in_arg {
+ unsigned size;
+ const void *value;
+};
+
+/** One output argument of a request */
+struct fuse_arg {
+ unsigned size;
+ void *value;
+};
+
+struct fuse_args {
+ bool force:1;
+ bool killable:1;
+ struct {
+ struct {
+ uint32_t opcode;
+ uint64_t nodeid;
+ } h;
+ unsigned numargs;
+ struct fuse_in_arg args[3];
+
+ } in;
+ struct {
+ unsigned argvar:1;
+ unsigned numargs;
+ struct fuse_arg args[2];
+ } out;
+};
+
+struct fuse_dev_operations {
+ void (*put)(void *dev);
+ void (*abort)(void *dev);
+ int (*send)(void *dev, struct fuse_req *req);
+ ssize_t (*simple_send)(void *dev, struct fuse_args *args, uid_t uid,
+ gid_t gid, pid_t pid);
+ void (*forget)(void *dev, struct fuse_forget *forget);
+};
+
+struct dentry *fuse_mount_common(struct file_system_type *fs_type,
+ int flags, void *opts,
+ const struct fuse_dev_operations *dev_ops,
+ void *dev_priv);
+void fuse_kill_sb(struct super_block *sb);
+void fuse2_get_request(struct fuse_req *req);
+void fuse2_put_request(struct fuse_req *req);
+int fuse2_map_open(struct super_block *sb, struct file *file);
+int fuse2_map_close(struct super_block *sb, unsigned long mapfd);
diff --git a/fs/fuse2/dir.c b/fs/fuse2/dir.c
new file mode 100644
index 0000000..7b0f13a
--- /dev/null
+++ b/fs/fuse2/dir.c
@@ -0,0 +1,993 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/user_namespace.h>
+
+static void fuse_lookup_init(struct fuse_args *args, u64 nodeid,
+ const struct qstr *name,
+ struct fuse_entry_out *outarg)
+{
+ memset(outarg, 0, sizeof(struct fuse_entry_out));
+ args->in.h.opcode = FUSE_LOOKUP;
+ args->in.h.nodeid = nodeid;
+ args->in.numargs = 1;
+ args->in.args[0].size = name->len + 1;
+ args->in.args[0].value = name->name;
+ args->out.numargs = 1;
+ args->out.args[0].size = sizeof(struct fuse_entry_out);
+ args->out.args[0].value = outarg;
+}
+
+/*
+ * Check whether the dentry is still valid
+ *
+ * If the entry validity timeout has expired and the dentry is
+ * positive, try to redo the lookup. If the lookup results in a
+ * different inode, then let the VFS invalidate the dentry and redo
+ * the lookup once more. If the lookup results in the same inode,
+ * then refresh the attributes, timeouts and mark the dentry valid.
+ */
+static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
+{
+ struct inode *inode;
+ struct dentry *parent;
+ struct fuse_conn *fc;
+ struct fuse_inode *fi;
+ struct fuse_entry_out outarg;
+ FUSE_ARGS(args);
+ int ret;
+
+ inode = d_inode_rcu(entry);
+ if (inode && is_bad_inode(inode))
+ goto invalid;
+
+
+ /* For negative dentries, always do a fresh lookup */
+ if (!inode)
+ goto invalid;
+
+ ret = -ECHILD;
+ if (flags & LOOKUP_RCU)
+ goto out;
+
+ fc = get_fuse_conn(inode);
+
+ parent = dget_parent(entry);
+ fuse_lookup_init(&args, get_node_id(d_inode(parent)),
+ &entry->d_name, &outarg);
+ ret = fuse2_simple_request(fc, &args);
+ dput(parent);
+ /* Zero nodeid is same as -ENOENT */
+ if (!ret && !outarg.nodeid)
+ ret = -ENOENT;
+ if (!ret) {
+ fi = get_fuse_inode(inode);
+ if (outarg.nodeid != get_node_id(inode)) {
+ fuse2_force_forget(fc, outarg.nodeid);
+ goto invalid;
+ }
+ spin_lock(&fi->lock);
+ fi->nlookup++;
+ spin_unlock(&fi->lock);
+ }
+ if (ret == -ENOMEM)
+ goto out;
+ if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ goto invalid;
+
+ fuse2_change_attributes(inode, &outarg.attr);
+ ret = 1;
+out:
+ return ret;
+
+invalid:
+ ret = 0;
+ goto out;
+}
+
+const struct dentry_operations fuse2_dentry_operations = {
+ .d_revalidate = fuse_dentry_revalidate,
+// .d_delete = always_delete_dentry,
+};
+
+static int fuse_valid_type(int m)
+{
+ return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
+ S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
+}
+
+static int fuse_lookup_name(struct super_block *sb, u64 nodeid,
+ const struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ FUSE_ARGS(args);
+ int err;
+
+ *inode = NULL;
+ if (name->len > FUSE_NAME_MAX)
+ return -ENAMETOOLONG;
+
+ fuse_lookup_init(&args, nodeid, name, outarg);
+ err = fuse2_simple_request(fc, &args);
+ /* Zero nodeid is same as -ENOENT, but with valid timeout */
+ if (err || !outarg->nodeid)
+ return err;
+
+ if (!outarg->nodeid || !fuse_valid_type(outarg->attr.mode))
+ return -EIO;
+
+ *inode = fuse2_iget(sb, outarg->nodeid, outarg->generation,
+ &outarg->attr);
+ if (!*inode) {
+ fuse2_force_forget(fc, outarg->nodeid);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+ unsigned int flags)
+{
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ bool outarg_valid = true;
+
+ err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+ &outarg, &inode);
+ if (err == -ENOENT) {
+ outarg_valid = false;
+ err = 0;
+ }
+ if (err)
+ goto out_err;
+
+ err = -EIO;
+ if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+ goto out_iput;
+
+ return d_splice_alias(inode, entry);
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
+}
+
+/*
+ * Atomic create+open operation
+ *
+ * If the filesystem doesn't support this, then fall back to separate
+ * 'mknod' + 'open' requests.
+ */
+static int fuse_create_open(struct inode *dir, struct dentry *entry,
+ struct file *file, unsigned flags,
+ umode_t mode)
+{
+ int err;
+ struct inode *inode;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ FUSE_ARGS(args);
+ struct fuse_create_in inarg;
+ struct fuse_open_out outopen;
+ struct fuse_entry_out outentry;
+ struct fuse_inode *fi;
+ struct fuse_file *ff;
+
+ /* Userspace expects S_IFREG in create mode */
+ BUG_ON((mode & S_IFMT) != S_IFREG);
+
+ ff = fuse2_file_alloc(fc);
+ if (!ff)
+ return -ENOMEM;
+
+ mode &= ~current_umask();
+
+ flags &= ~O_NOCTTY;
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outentry, 0, sizeof(outentry));
+ inarg.flags = flags;
+ inarg.mode = mode;
+ inarg.umask = current_umask();
+ args.in.h.opcode = FUSE_CREATE;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ args.out.numargs = 2;
+ args.out.args[0].size = sizeof(outentry);
+ args.out.args[0].value = &outentry;
+ args.out.args[1].size = sizeof(outopen);
+ args.out.args[1].value = &outopen;
+ err = fuse2_simple_request(fc, &args);
+ if (err)
+ goto out_free_ff;
+
+ err = -EIO;
+ if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
+ goto out_free_ff;
+
+ ff->fh = outopen.fh;
+ ff->nodeid = outentry.nodeid;
+ ff->open_flags = outopen.open_flags;
+ inode = fuse2_iget(dir->i_sb, outentry.nodeid, outentry.generation,
+ &outentry.attr);
+ if (!inode) {
+ flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
+ fuse2_release_common(fc, ff, flags, NULL, false);
+ fuse2_force_forget(fc, outentry.nodeid);
+ return -ENOMEM;
+ }
+ d_instantiate(entry, inode);
+ err = finish_open(file, entry, generic_file_open);
+ if (err) {
+ fi = get_fuse_inode(inode);
+ fuse2_release_common(fc, ff, flags, NULL, false);
+ } else {
+ file->private_data = ff;
+ fuse2_finish_open(inode, file);
+ }
+ return err;
+
+out_free_ff:
+ fuse2_file_free(ff);
+ return err;
+}
+
+static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
+ struct file *file, unsigned flags,
+ umode_t mode)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct dentry *res = NULL;
+
+ if (d_in_lookup(entry)) {
+ res = fuse_lookup(dir, entry, 0);
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+
+ if (res)
+ entry = res;
+ }
+
+ if (!(flags & O_CREAT) || d_really_is_positive(entry))
+ goto no_open;
+
+ /* Only creates */
+ file->f_mode |= FMODE_CREATED;
+
+ if (fc->no_create)
+ goto mknod;
+
+ err = fuse_create_open(dir, entry, file, flags, mode);
+ if (err == -ENOSYS) {
+ fc->no_create = 1;
+ goto mknod;
+ }
+out_dput:
+ dput(res);
+ return err;
+
+mknod:
+ err = fuse_mknod(dir, entry, mode, 0);
+ if (err)
+ goto out_dput;
+no_open:
+ return finish_no_open(file, res);
+}
+
+/*
+ * Code shared between mknod, mkdir, symlink and link
+ */
+static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
+ struct inode *dir, struct dentry *entry,
+ umode_t mode)
+{
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ struct dentry *d;
+ int err;
+
+ memset(&outarg, 0, sizeof(outarg));
+ args->in.h.nodeid = get_node_id(dir);
+ args->out.numargs = 1;
+ args->out.args[0].size = sizeof(outarg);
+ args->out.args[0].value = &outarg;
+
+ err = fuse2_simple_request(fc, args);
+ if (err)
+ return err;
+
+ if (invalid_nodeid(outarg.nodeid))
+ return -EIO;
+
+ if ((outarg.attr.mode ^ mode) & S_IFMT)
+ return -EIO;
+
+ inode = fuse2_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+ &outarg.attr);
+ if (!inode) {
+ fuse2_force_forget(fc, outarg.nodeid);
+ return -ENOMEM;
+ }
+
+ d_drop(entry);
+ d = d_splice_alias(inode, entry);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+
+ dput(d);
+
+ return 0;
+}
+
+static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
+ dev_t rdev)
+{
+ struct fuse_mknod_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ FUSE_ARGS(args);
+
+ mode &= ~current_umask();
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ inarg.rdev = new_encode_dev(rdev);
+ inarg.umask = current_umask();
+ args.in.h.opcode = FUSE_MKNOD;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, mode);
+}
+
+static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
+ bool excl)
+{
+ return fuse_mknod(dir, entry, mode, 0);
+}
+
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
+{
+ struct fuse_mkdir_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ FUSE_ARGS(args);
+
+ mode &= ~current_umask();
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ inarg.umask = current_umask();
+ args.in.h.opcode = FUSE_MKDIR;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, S_IFDIR);
+}
+
+static int fuse_symlink(struct inode *dir, struct dentry *entry,
+ const char *link)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ unsigned len = strlen(link) + 1;
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_SYMLINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ args.in.args[1].size = len;
+ args.in.args[1].value = link;
+ return create_new_entry(fc, &args, dir, entry, S_IFLNK);
+}
+
+static int fuse_unlink(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_UNLINK;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse2_simple_request(fc, &args);
+ if (!err) {
+ struct inode *inode = d_inode(entry);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ /*
+ * If i_nlink == 0 then unlink doesn't make sense, yet this can
+ * happen if userspace filesystem is careless. It would be
+ * difficult to enforce correct nlink usage so just ignore this
+ * condition here
+ */
+ if (inode->i_nlink > 0)
+ drop_nlink(inode);
+ spin_unlock(&fi->lock);
+ }
+ return err;
+}
+
+static int fuse_rmdir(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_RMDIR;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse2_simple_request(fc, &args);
+ if (!err)
+ clear_nlink(d_inode(entry));
+
+ return err;
+}
+
+static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent,
+ unsigned int flags, int opcode, size_t argsize)
+{
+ struct fuse_rename2_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(olddir);
+ FUSE_ARGS(args);
+
+ memset(&inarg, 0, argsize);
+ inarg.newdir = get_node_id(newdir);
+ inarg.flags = flags;
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = get_node_id(olddir);
+ args.in.numargs = 3;
+ args.in.args[0].size = argsize;
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = oldent->d_name.len + 1;
+ args.in.args[1].value = oldent->d_name.name;
+ args.in.args[2].size = newent->d_name.len + 1;
+ args.in.args[2].value = newent->d_name.name;
+
+ return fuse2_simple_request(fc, &args);
+}
+
+static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent,
+ unsigned int flags)
+{
+ struct fuse_conn *fc = get_fuse_conn(olddir);
+ int err;
+
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ return -EINVAL;
+
+ if (flags) {
+ if (fc->no_rename2)
+ return -EINVAL;
+
+ err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+ FUSE_RENAME2,
+ sizeof(struct fuse_rename2_in));
+ if (err == -ENOSYS) {
+ fc->no_rename2 = 1;
+ err = -EINVAL;
+ }
+ } else {
+ err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
+ FUSE_RENAME,
+ sizeof(struct fuse_rename_in));
+ }
+
+ return err;
+}
+
+static int fuse_link(struct dentry *entry, struct inode *newdir,
+ struct dentry *newent)
+{
+ int err;
+ struct fuse_link_in inarg;
+ struct inode *inode = d_inode(entry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.oldnodeid = get_node_id(inode);
+ args.in.h.opcode = FUSE_LINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = newent->d_name.len + 1;
+ args.in.args[1].value = newent->d_name.name;
+ err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
+ if (!err) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ inc_nlink(inode);
+ spin_unlock(&fi->lock);
+ }
+ return err;
+}
+
+static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
+ struct kstat *stat)
+{
+ unsigned int blkbits;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ stat->dev = inode->i_sb->s_dev;
+ stat->ino = attr->ino;
+ stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
+ stat->nlink = attr->nlink;
+ stat->uid = make_kuid(fc->user_ns, attr->uid);
+ stat->gid = make_kgid(fc->user_ns, attr->gid);
+ stat->rdev = inode->i_rdev;
+ stat->atime.tv_sec = attr->atime;
+ stat->atime.tv_nsec = attr->atimensec;
+ stat->mtime.tv_sec = attr->mtime;
+ stat->mtime.tv_nsec = attr->mtimensec;
+ stat->ctime.tv_sec = attr->ctime;
+ stat->ctime.tv_nsec = attr->ctimensec;
+ stat->size = attr->size;
+ stat->blocks = attr->blocks;
+
+ if (attr->blksize != 0)
+ blkbits = ilog2(attr->blksize);
+ else
+ blkbits = inode->i_sb->s_blocksize_bits;
+
+ stat->blksize = 1 << blkbits;
+}
+
+static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
+ struct file *file)
+{
+ int err;
+ struct fuse_getattr_in inarg;
+ struct fuse_attr_out outarg;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+ /* Directories have separate file-handle space */
+ if (file && S_ISREG(inode->i_mode)) {
+ struct fuse_file *ff = file->private_data;
+
+ inarg.getattr_flags |= FUSE_GETATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ args.in.h.opcode = FUSE_GETATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse2_simple_request(fc, &args);
+ if (!err) {
+ if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ } else {
+ fuse2_change_attributes(inode, &outarg.attr);
+ if (stat)
+ fuse_fillattr(inode, &outarg.attr, stat);
+ }
+ }
+ return err;
+}
+
+int fuse2_update_attributes(struct inode *inode, struct file *file)
+{
+ return fuse_do_getattr(inode, NULL, file);
+}
+
+/*
+ * Calling into a user-controlled filesystem gives the filesystem
+ * daemon ptrace-like capabilities over the current process. This
+ * means, that the filesystem daemon is able to record the exact
+ * filesystem operations performed, and can also control the behavior
+ * of the requester process in otherwise impossible ways. For example
+ * it can delay the operation for arbitrary length of time allowing
+ * DoS against the requester.
+ *
+ * For this reason only those processes can call into the filesystem,
+ * for which the owner of the mount has ptrace privilege. This
+ * excludes processes started by other users, suid or sgid processes.
+ */
+int fuse2_allow_current_process(struct fuse_conn *fc)
+{
+ return current_in_userns(fc->user_ns);
+}
+
+static int fuse_permission(struct inode *inode, int mask)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
+ if (!fuse2_allow_current_process(fc))
+ return -EACCES;
+
+ err = fuse_do_getattr(inode, NULL, NULL);
+ if (err)
+ return err;
+
+ return generic_permission(inode, mask);
+}
+
+static int fuse_readlink_page(struct inode *inode, struct page *page)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ unsigned int count = PAGE_SIZE - 1;
+ int err;
+
+ req = fuse2_get_req(fc, 1);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->inline_inlen = sizeof(struct fuse_in_header);
+
+ req->mand_outlen = req->inline_outlen = sizeof(struct fuse_out_header);
+ req->max_outlen = req->mand_outlen + count;
+
+ req->num_pages = 1;
+ get_page(req->pages[0] = page);
+ req->page_descs[0].length = count;
+ req->inh.opcode = FUSE_READLINK;
+ req->inh.nodeid = get_node_id(inode);
+ req->inh.len = req->inline_inlen;
+
+ __set_bit(FR_ZEROTAIL, &req->flags);
+ err = fuse2_request_send(fc, req);
+ if (!err) {
+ char *link = page_address(page);
+ size_t len = req->outh.len - req->mand_outlen;
+
+ WARN_ON(len >= PAGE_SIZE);
+ link[len] = '\0';
+ }
+
+ fuse2_put_request(req);
+
+ return err;
+}
+
+static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
+{
+ struct page *page;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out_err;
+
+ err = -ECHILD;
+ if (!dentry)
+ goto out_err;
+
+ page = alloc_page(GFP_KERNEL);
+ err = -ENOMEM;
+ if (!page)
+ goto out_err;
+
+ err = fuse_readlink_page(inode, page);
+ if (err) {
+ put_page(page);
+ goto out_err;
+ }
+
+ set_delayed_call(callback, page_put_link, page);
+
+ return page_address(page);
+
+out_err:
+ return ERR_PTR(err);
+}
+
+static int fuse_dir_open(struct inode *inode, struct file *file)
+{
+ return fuse2_open_common(inode, file, true);
+}
+
+static int fuse_dir_release(struct inode *inode, struct file *file)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+
+ fuse2_release_common(fc, ff, file->f_flags, (fl_owner_t) file, true);
+
+ return 0;
+}
+
+static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ if (fc->no_fsyncdir)
+ return 0;
+
+ inode_lock(inode);
+ err = fuse2_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
+ if (err == -ENOSYS) {
+ fc->no_fsyncdir = 1;
+ err = 0;
+ }
+ inode_unlock(inode);
+
+ return err;
+}
+
+static bool update_mtime(unsigned ivalid)
+{
+ /* Always update if mtime is explicitly set */
+ if (ivalid & ATTR_MTIME_SET)
+ return true;
+
+ /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
+ if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
+ return false;
+
+ /* In all other cases update */
+ return true;
+}
+
+static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
+ struct fuse_setattr_in *arg)
+{
+ unsigned ivalid = iattr->ia_valid;
+
+ if (ivalid & ATTR_MODE)
+ arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
+ if (ivalid & ATTR_UID)
+ arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
+ if (ivalid & ATTR_GID)
+ arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
+ if (ivalid & ATTR_SIZE)
+ arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
+ if (ivalid & ATTR_ATIME) {
+ arg->valid |= FATTR_ATIME;
+ arg->atime = iattr->ia_atime.tv_sec;
+ arg->atimensec = iattr->ia_atime.tv_nsec;
+ if (!(ivalid & ATTR_ATIME_SET))
+ arg->valid |= FATTR_ATIME_NOW;
+ }
+ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
+ arg->valid |= FATTR_MTIME;
+ arg->mtime = iattr->ia_mtime.tv_sec;
+ arg->mtimensec = iattr->ia_mtime.tv_nsec;
+ if (!(ivalid & ATTR_MTIME_SET))
+ arg->valid |= FATTR_MTIME_NOW;
+ }
+}
+
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
+ struct inode *inode,
+ struct fuse_setattr_in *inarg_p,
+ struct fuse_attr_out *outarg_p)
+{
+ args->in.h.opcode = FUSE_SETATTR;
+ args->in.h.nodeid = get_node_id(inode);
+ args->in.numargs = 1;
+ args->in.args[0].size = sizeof(*inarg_p);
+ args->in.args[0].value = inarg_p;
+ args->out.numargs = 1;
+ args->out.args[0].size = sizeof(*outarg_p);
+ args->out.args[0].value = outarg_p;
+}
+
+/*
+ * Set attributes, and at the same time refresh them.
+ *
+ * Truncation is slightly complicated, because the 'truncate' request
+ * may fail, in which case we don't want to touch the mapping.
+ * vmtruncate() doesn't allow for this case, so do the rlimit checking
+ * and the actual truncation by hand.
+ */
+static int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
+ struct file *file)
+{
+ struct inode *inode = d_inode(dentry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ FUSE_ARGS(args);
+ struct fuse_setattr_in inarg;
+ struct fuse_attr_out outarg;
+ bool is_truncate = false;
+ loff_t oldsize;
+ int err;
+
+ err = setattr_prepare(dentry, attr);
+ if (err)
+ return err;
+
+ if (attr->ia_valid & ATTR_OPEN) {
+ /* This is coming from open(..., ... | O_TRUNC); */
+ WARN_ON(!(attr->ia_valid & ATTR_SIZE));
+ WARN_ON(attr->ia_size != 0);
+
+ /*
+ * No need to send request to userspace, since actual
+ * truncation has already been done by OPEN. But still
+ * need to truncate page cache.
+ */
+ truncate_pagecache(inode, 0);
+ return 0;
+ }
+
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (WARN_ON(!S_ISREG(inode->i_mode)))
+ return -EIO;
+ is_truncate = true;
+ }
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+ iattr_to_fattr(fc, attr, &inarg);
+ if (file) {
+ struct fuse_file *ff = file->private_data;
+ inarg.valid |= FATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ if (attr->ia_valid & ATTR_SIZE) {
+ /* For mandatory locking in truncate */
+ inarg.valid |= FATTR_LOCKOWNER;
+ inarg.lock_owner = fuse2_lock_owner_id(fc, current->files);
+ }
+ fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+ err = fuse2_simple_request(fc, &args);
+ if (err)
+ goto error;
+
+ if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ goto error;
+ }
+
+ spin_lock(&fi->lock);
+ fuse2_change_attributes_common(inode, &outarg.attr);
+ oldsize = inode->i_size;
+ i_size_write(inode, outarg.attr.size);
+ spin_unlock(&fi->lock);
+
+ /*
+ * Only call invalidate_inode_pages2() after removing
+ * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
+ */
+ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+ truncate_pagecache(inode, outarg.attr.size);
+ invalidate_inode_pages2(inode->i_mapping);
+ }
+
+ return 0;
+
+error:
+ return err;
+}
+
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+ struct inode *inode = d_inode(entry);
+ struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
+
+ if (!fuse2_allow_current_process(get_fuse_conn(inode)))
+ return -EACCES;
+
+ if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
+ attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
+ ATTR_MODE);
+ }
+ if (!attr->ia_valid)
+ return 0;
+
+ return fuse_do_setattr(entry, attr, file);
+}
+
+static int fuse_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse2_allow_current_process(fc))
+ return -EACCES;
+
+ return fuse_do_getattr(inode, stat, NULL);
+}
+
+static const struct inode_operations fuse_dir_inode_operations = {
+ .lookup = fuse_lookup,
+ .mkdir = fuse_mkdir,
+ .symlink = fuse_symlink,
+ .unlink = fuse_unlink,
+ .rmdir = fuse_rmdir,
+ .rename = fuse_rename2,
+ .link = fuse_link,
+ .setattr = fuse_setattr,
+ .create = fuse_create,
+ .atomic_open = fuse_atomic_open,
+ .mknod = fuse_mknod,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .listxattr = fuse2_listxattr,
+ .get_acl = fuse2_get_acl,
+ .set_acl = fuse2_set_acl,
+};
+
+static const struct file_operations fuse_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = fuse2_readdir,
+ .open = fuse_dir_open,
+ .release = fuse_dir_release,
+ .fsync = fuse_dir_fsync,
+};
+
+static const struct inode_operations fuse_common_inode_operations = {
+ .setattr = fuse_setattr,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .listxattr = fuse2_listxattr,
+ .get_acl = fuse2_get_acl,
+ .set_acl = fuse2_set_acl,
+};
+
+static const struct inode_operations fuse_symlink_inode_operations = {
+ .setattr = fuse_setattr,
+ .get_link = fuse_get_link,
+ .getattr = fuse_getattr,
+ .listxattr = fuse2_listxattr,
+};
+
+void fuse2_init_common(struct inode *inode)
+{
+ inode->i_op = &fuse_common_inode_operations;
+}
+
+void fuse2_init_dir(struct inode *inode)
+{
+ inode->i_op = &fuse_dir_inode_operations;
+ inode->i_fop = &fuse_dir_operations;
+}
+
+void fuse2_init_symlink(struct inode *inode)
+{
+ inode->i_op = &fuse_symlink_inode_operations;
+ inode_nohighmem(inode);
+}
diff --git a/fs/fuse2/file.c b/fs/fuse2/file.c
new file mode 100644
index 0000000..9324262
--- /dev/null
+++ b/fs/fuse2/file.c
@@ -0,0 +1,991 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/module.h>
+#include <linux/compat.h>
+#include <linux/swap.h>
+#include <linux/falloc.h>
+#include <linux/uio.h>
+
+static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ int opcode, struct fuse_open_out *outargp)
+{
+ struct fuse_open_in inarg;
+ FUSE_ARGS(args);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(*outargp);
+ args.out.args[0].value = outargp;
+
+ return fuse2_simple_request(fc, &args);
+}
+
+struct fuse_file *fuse2_file_alloc(struct fuse_conn *fc)
+{
+ struct fuse_file *ff;
+
+ ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
+ if (unlikely(!ff))
+ return NULL;
+
+ ff->fc = fc;
+ ff->kh = atomic64_inc_return(&fc->khctr);
+
+ return ff;
+}
+
+void fuse2_file_free(struct fuse_file *ff)
+{
+ kfree(ff);
+}
+
+static int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ bool isdir)
+{
+ struct fuse_file *ff;
+ int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+ struct fuse_open_out outarg;
+ int err;
+
+ ff = fuse2_file_alloc(fc);
+ if (!ff)
+ return -ENOMEM;
+
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ if (err) {
+ fuse2_file_free(ff);
+ return err;
+ }
+ ff->open_flags = outarg.open_flags;
+ ff->fh = outarg.fh;
+ ff->nodeid = nodeid;
+ file->private_data = ff;
+
+ return 0;
+}
+
+void fuse2_finish_open(struct inode *inode, struct file *file)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ if (file->f_flags & O_TRUNC) {
+ spin_lock(&fi->lock);
+ i_size_write(inode, 0);
+ spin_unlock(&fi->lock);
+ }
+}
+
+int fuse2_open_common(struct inode *inode, struct file *file, bool isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ err = generic_file_open(inode, file);
+ if (err)
+ return err;
+
+ err = fuse_do_open(fc, get_node_id(inode), file, isdir);
+ if (!err)
+ fuse2_finish_open(inode, file);
+
+ return err;
+}
+
+void fuse2_release_common(struct fuse_conn *fc, struct fuse_file *ff,
+ int flags, fl_owner_t id, bool isdir)
+{
+ struct fuse_release_in inarg;
+ FUSE_ARGS(args);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.flags = flags;
+ args.force = true;
+ args.in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+
+ if (ff->flock) {
+ inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+ inarg.lock_owner = fuse2_lock_owner_id(ff->fc, id);
+ }
+ fuse2_simple_request(fc, &args);
+
+ kfree(ff);
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
+{
+ return fuse2_open_common(inode, file, false);
+}
+
+static int fuse_release(struct inode *inode, struct file *file)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+
+ fuse2_release_common(fc, ff, file->f_flags, (fl_owner_t) file, false);
+
+ /* return value is ignored by VFS */
+ return 0;
+}
+
+/*
+ * Scramble the ID space with XTEA, so that the value of the files_struct
+ * pointer is not exposed to userspace.
+ */
+u64 fuse2_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
+{
+ u32 *k = fc->scramble_key;
+ u64 v = (unsigned long) id;
+ u32 v0 = v;
+ u32 v1 = v >> 32;
+ u32 sum = 0;
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
+ sum += 0x9E3779B9;
+ v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
+ }
+
+ return (u64) v0 + ((u64) v1 << 32);
+}
+
+static int fuse_flush(struct file *file, fl_owner_t id)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_flush_in inarg;
+ FUSE_ARGS(args);
+ int err;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ if (fc->no_flush)
+ return 0;
+
+ err = filemap_check_errors(file->f_mapping);
+ if (err)
+ return err;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.lock_owner = fuse2_lock_owner_id(fc, id);
+ args.force = true;
+ args.in.h.opcode = FUSE_FLUSH;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse2_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_flush = 1;
+ err = 0;
+ }
+ return err;
+}
+
+int fuse2_fsync_common(struct file *file, loff_t start, loff_t end,
+ int datasync, int opcode)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ FUSE_ARGS(args);
+ struct fuse_fsync_in inarg;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.fsync_flags = datasync ? 1 : 0;
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ return fuse2_simple_request(fc, &args);
+}
+
+static int fuse_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ if (fc->no_fsync)
+ return 0;
+
+ inode_lock(inode);
+ err = fuse2_fsync_common(file, start, end, datasync, FUSE_FSYNC);
+ if (err == -ENOSYS) {
+ fc->no_fsync = 1;
+ err = 0;
+ }
+ inode_unlock(inode);
+
+ return err;
+}
+
+void fuse2_read_fill(struct fuse_req *req, struct fuse_read_in **p,
+ struct file *file, loff_t pos, size_t count, int opcode)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_read_in *inarg;
+
+ req->inline_inlen = sizeof(struct fuse_in_header) + sizeof(*inarg);
+ BUILD_BUG_ON(req->inline_inlen > FUSE_REQ_INLINE_DATA);
+
+ req->mand_outlen = req->inline_outlen = sizeof(struct fuse_out_header);
+ req->max_outlen = req->mand_outlen + count;
+
+ inarg = (void *) req->inlinedata + sizeof(struct fuse_in_header);
+ inarg->fh = ff->fh;
+ inarg->offset = pos;
+ inarg->size = count;
+ inarg->flags = file->f_flags;
+ if (p)
+ *p = inarg;
+
+ req->inh.opcode = opcode;
+ req->inh.nodeid = ff->nodeid;
+ req->inh.len = req->inline_inlen;
+}
+
+static ssize_t fuse_send_read(struct fuse_req *req, struct kiocb *iocb,
+ loff_t pos, size_t count)
+{
+ struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ ssize_t res;
+
+ fuse2_read_fill(req, NULL, iocb->ki_filp, pos, count, FUSE_READ);
+ res = fuse2_request_send(fc, req);
+
+ return res ? res : req->outh.len - req->mand_outlen;
+}
+
+static int fuse_do_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ loff_t pos = page_offset(page);
+ size_t count = PAGE_SIZE;
+ int err;
+
+ req = fuse2_get_req(fc, 1);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->num_pages = 1;
+ get_page(req->pages[0] = page);
+ req->page_descs[0].length = count;
+ __set_bit(FR_ZEROTAIL, &req->flags);
+ fuse2_read_fill(req, NULL, file, pos, count, FUSE_READ);
+ err = fuse2_request_send(fc, req);
+
+ if (!err)
+ SetPageUptodate(page);
+
+ fuse2_put_request(req);
+
+ return err;
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out;
+
+ err = fuse_do_readpage(file, page);
+ out:
+ unlock_page(page);
+ return err;
+}
+
+static ssize_t fuse_send_write(struct fuse_req *req, struct kiocb *iocb,
+ loff_t pos, size_t count)
+{
+ struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_write_in *inarg;
+ struct fuse_write_out *outarg;
+ ssize_t res;
+
+ req->inline_inlen = sizeof(struct fuse_in_header) + sizeof(*inarg);
+ BUILD_BUG_ON(req->inline_inlen > FUSE_REQ_INLINE_DATA);
+
+ req->inline_outlen = sizeof(struct fuse_out_header) + sizeof(*outarg);
+ BUILD_BUG_ON(req->inline_outlen > FUSE_REQ_INLINE_DATA);
+ req->max_outlen = req->mand_outlen = req->inline_outlen;
+
+ inarg = (void *) req->inlinedata + sizeof(struct fuse_in_header);
+ inarg->fh = ff->fh;
+ inarg->offset = pos;
+ inarg->size = count;
+ inarg->flags = file->f_flags;
+ if (iocb->ki_flags & IOCB_DSYNC)
+ inarg->flags |= O_DSYNC;
+ if (iocb->ki_flags & IOCB_SYNC)
+ inarg->flags |= O_SYNC;
+ if (!capable(CAP_FSETID))
+ inarg->write_flags |= FUSE_WRITE_KILL_PRIV;
+ req->inh.opcode = FUSE_WRITE;
+ req->inh.nodeid = ff->nodeid;
+ req->inh.len = req->inline_inlen + count;
+ res = fuse2_request_send(fc, req);
+ outarg = (void *) req->inlinedata + sizeof(struct fuse_out_header);
+
+ return res ? res : outarg->size;
+}
+
+static void fuse_write_update_size(struct inode *inode, loff_t pos)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ if (pos > inode->i_size)
+ i_size_write(inode, pos);
+ spin_unlock(&fi->lock);
+}
+
+static inline void fuse_page_descs_length_init(struct fuse_req *req,
+ unsigned index, unsigned nr_pages)
+{
+ int i;
+
+ for (i = index; i < index + nr_pages; i++)
+ req->page_descs[i].length = PAGE_SIZE -
+ req->page_descs[i].offset;
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
+ size_t *nbytesp, int write)
+{
+ size_t nbytes = 0; /* # bytes already packed in req */
+ ssize_t ret = 0;
+
+ while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
+ unsigned npages;
+ size_t start;
+ ret = iov_iter_get_pages(ii, &req->pages[req->num_pages],
+ *nbytesp - nbytes,
+ req->max_pages - req->num_pages,
+ &start);
+ if (ret < 0)
+ break;
+
+ iov_iter_advance(ii, ret);
+ nbytes += ret;
+
+ ret += start;
+ npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ req->page_descs[req->num_pages].offset = start;
+ fuse_page_descs_length_init(req, req->num_pages, npages);
+
+ req->num_pages += npages;
+ req->page_descs[req->num_pages - 1].length -=
+ (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
+ }
+
+ *nbytesp = nbytes;
+
+ return ret < 0 ? ret : 0;
+}
+
+/* If set, it is WRITE; otherwise - READ */
+#define FUSE_DIO_WRITE (1 << 0)
+
+static ssize_t fuse_direct_io(struct kiocb *iocb, struct iov_iter *iter,
+ int flags)
+{
+ int write = flags & FUSE_DIO_WRITE;
+ struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(iter);
+ ssize_t res = 0;
+ struct fuse_req *req;
+ bool should_dirty = !write && iter_is_iovec(iter);
+ unsigned int i;
+ int err = 0;
+
+ req = fuse2_get_req(fc, iov_iter_npages(iter, fc->max_pages));
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ while (count) {
+ ssize_t nres;
+ size_t nbytes = count;
+ err = fuse_get_user_pages(req, iter, &nbytes, write);
+ if (err && !nbytes)
+ break;
+
+ if (write)
+ nres = fuse_send_write(req, iocb, pos, nbytes);
+ else
+ nres = fuse_send_read(req, iocb, pos, nbytes);
+
+ if (should_dirty) {
+ for (i = 0; i < req->num_pages; i++)
+ set_page_dirty_lock(req->pages[i]);
+ }
+ if (nres < 0) {
+ err = nres;
+ break;
+ } else if (nres > nbytes) {
+ res = 0;
+ err = -EIO;
+ break;
+ }
+ count -= nres;
+ res += nres;
+ pos += nres;
+ if (nres != nbytes)
+ break;
+ if (count) {
+ fuse2_put_request(req);
+ req = fuse2_get_req(fc, iov_iter_npages(iter, fc->max_pages));
+ if (IS_ERR(req))
+ break;
+ }
+ }
+ if (!IS_ERR(req))
+ fuse2_put_request(req);
+
+ if (res > 0) {
+ iocb->ki_pos = pos;
+ return res;
+ }
+
+ return err;
+}
+
+static ssize_t fuse_read_buf(struct file *file, loff_t pos,
+ void *buf, size_t len)
+{
+ struct fuse_conn *fc = get_fuse_conn(file_inode(file));
+ struct fuse_file *ff = file->private_data;
+ struct fuse_read_in inarg = {
+ .fh = ff->fh,
+ .offset = pos,
+ .size = len,
+ .flags = file->f_flags,
+ };
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_READ;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.argvar = 1;
+ args.out.numargs = 1;
+ args.out.args[0].size = len;
+ args.out.args[0].value = buf;
+
+ return fuse2_simple_request(fc, &args);
+}
+
+struct fuse_kvec_ctx {
+ struct file *file;
+ size_t res;
+ loff_t pos;
+};
+
+static int fuse_read_kvec(struct kvec *vec, void *_ctx)
+{
+ struct fuse_kvec_ctx *ctx = _ctx;
+ ssize_t res;
+
+ while (vec->iov_len) {
+ size_t len = min(vec->iov_len, PAGE_SIZE);
+
+ res = fuse_read_buf(ctx->file, ctx->pos, vec->iov_base, len);
+ if (res <= 0)
+ return res;
+
+ ctx->res += res;
+ ctx->pos += res;
+
+ if (res < len)
+ break;
+
+ vec->iov_len -= res;
+ vec->iov_base += res;
+ }
+
+ return 0;
+}
+
+static int fuse_send_map(struct kiocb *iocb, size_t count,
+ struct fuse_map_out *outarg)
+{
+ struct file *file = iocb->ki_filp;
+ struct fuse_conn *fc = get_fuse_conn(file_inode(file));
+ struct fuse_file *ff = file->private_data;
+ struct fuse_read_in inarg = {
+ .fh = ff->fh,
+ .offset = iocb->ki_pos,
+ .size = count,
+ .flags = file->f_flags,
+ };
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_MAP;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(*outarg);
+ args.out.args[0].value = outarg;
+
+ return fuse2_simple_request(fc, &args);
+}
+
+static ssize_t fuse_file_map_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct fuse_conn *fc = get_fuse_conn(file_inode(iocb->ki_filp));
+ struct fuse_map_out outarg;
+ struct file *mapfile;
+ ssize_t res, total = 0;
+ size_t count;
+ loff_t pos;
+
+ while ((count = iov_iter_count(to))) {
+ res = fuse_send_map(iocb, count, &outarg);
+ if (res || !outarg.size)
+ break;
+
+ res = -EBADF;
+ mapfile = fuse2_map_get(fc, outarg.mapfd);
+ if (!mapfile)
+ break;
+
+ iov_iter_truncate(to, outarg.size);
+ pos = outarg.offset;
+ res = vfs_iter_read(mapfile, to, &pos, /* FIXME */ 0);
+ fput(mapfile);
+ if (res < 0)
+ break;
+ iov_iter_reexpand(to, count - res);
+ if (res == 0)
+ break;
+
+ total += res;
+ iocb->ki_pos += res;
+ }
+
+ return total ?: res;
+}
+
+static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(file);
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ if (ff->open_flags & FOPEN_MAP)
+ return fuse_file_map_iter(iocb, to);
+
+ if (iov_iter_is_kvec(to)) {
+ struct fuse_kvec_ctx ctx = {
+ .file = file,
+ .pos = iocb->ki_pos
+ };
+ size_t len = iov_iter_count(to);
+ int err;
+
+ err = iov_iter_for_each_range(to, len, fuse_read_kvec, &ctx);
+ iocb->ki_pos = ctx.pos;
+
+ if (ctx.res > 0)
+ return ctx.res;
+
+ return err;
+ }
+
+ return fuse_direct_io(iocb, to, 0);
+}
+
+static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t res;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ /* Don't allow parallel writes to the same file */
+ inode_lock(inode);
+ res = generic_write_checks(iocb, from);
+ if (res > 0)
+ res = fuse_direct_io(iocb, from, FUSE_DIO_WRITE);
+ if (res > 0)
+ fuse_write_update_size(inode, iocb->ki_pos);
+ inode_unlock(inode);
+
+ return res;
+}
+
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ /* Can't provide the coherency needed for MAP_SHARED */
+ if (vma->vm_flags & VM_MAYSHARE)
+ return -ENODEV;
+
+ invalidate_inode_pages2(file->f_mapping);
+
+ return generic_file_mmap(file, vma);
+}
+
+static int convert_fuse_file_lock(struct fuse_conn *fc,
+ const struct fuse_file_lock *ffl,
+ struct file_lock *fl)
+{
+ switch (ffl->type) {
+ case F_UNLCK:
+ break;
+
+ case F_RDLCK:
+ case F_WRLCK:
+ if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX ||
+ ffl->end < ffl->start)
+ return -EIO;
+
+ fl->fl_start = ffl->start;
+ fl->fl_end = ffl->end;
+
+ /*
+ * Convert pid into init's pid namespace. The locks API will
+ * translate it into the caller's pid namespace.
+ */
+ rcu_read_lock();
+ fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
+ rcu_read_unlock();
+ break;
+
+ default:
+ return -EIO;
+ }
+ fl->fl_type = ffl->type;
+ return 0;
+}
+
+static void fuse_lk_fill(struct fuse_args *args, struct file *file,
+ const struct file_lock *fl, int opcode, pid_t pid,
+ int flock, struct fuse_lk_in *inarg)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+
+ memset(inarg, 0, sizeof(*inarg));
+ inarg->fh = ff->fh;
+ inarg->owner = fuse2_lock_owner_id(fc, fl->fl_owner);
+ inarg->lk.start = fl->fl_start;
+ inarg->lk.end = fl->fl_end;
+ inarg->lk.type = fl->fl_type;
+ inarg->lk.pid = pid;
+ if (flock)
+ inarg->lk_flags |= FUSE_LK_FLOCK;
+ args->in.h.opcode = opcode;
+ args->in.h.nodeid = get_node_id(inode);
+ args->in.numargs = 1;
+ args->in.args[0].size = sizeof(*inarg);
+ args->in.args[0].value = inarg;
+}
+
+static int fuse_getlk(struct file *file, struct file_lock *fl)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_lk_in inarg;
+ struct fuse_lk_out outarg;
+ int err;
+
+ fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse2_simple_request(fc, &args);
+ if (!err)
+ err = convert_fuse_file_lock(fc, &outarg.lk, fl);
+
+ return err;
+}
+
+static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_lk_in inarg;
+ int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
+ struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
+ pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns);
+ int err;
+
+ if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
+ /* NLM needs asynchronous locks, which we don't support yet */
+ return -ENOLCK;
+ }
+
+ /* Unlock on close is handled by the flush method */
+ if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
+ return 0;
+
+ fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
+ err = fuse2_simple_request(fc, &args);
+
+ /* locking is restartable */
+ if (err == -EINTR)
+ err = -ERESTARTSYS;
+
+ return err;
+}
+
+static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (cmd == F_CANCELLK) {
+ err = 0;
+ } else if (cmd == F_GETLK) {
+ if (fc->no_lock) {
+ posix_test_lock(file, fl);
+ err = 0;
+ } else
+ err = fuse_getlk(file, fl);
+ } else {
+ if (fc->no_lock)
+ err = posix_lock_file(file, fl, NULL);
+ else
+ err = fuse_setlk(file, fl, 0);
+ }
+ return err;
+}
+
+static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (fc->no_flock) {
+ err = locks_lock_file_wait(file, fl);
+ } else {
+ struct fuse_file *ff = file->private_data;
+
+ /* emulate flock with POSIX locks */
+ ff->flock = true;
+ err = fuse_setlk(file, fl, 1);
+ }
+
+ return err;
+}
+
+static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ FUSE_ARGS(args);
+ struct fuse_lseek_in inarg = {
+ .fh = ff->fh,
+ .offset = offset,
+ .whence = whence
+ };
+ struct fuse_lseek_out outarg;
+ int err;
+
+ if (fc->no_lseek)
+ goto fallback;
+
+ args.in.h.opcode = FUSE_LSEEK;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse2_simple_request(fc, &args);
+ if (err) {
+ if (err == -ENOSYS) {
+ fc->no_lseek = 1;
+ goto fallback;
+ }
+ return err;
+ }
+
+ return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes);
+
+fallback:
+ err = fuse2_update_attributes(inode, file);
+ if (!err)
+ return generic_file_llseek(file, offset, whence);
+ else
+ return err;
+}
+
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
+{
+ loff_t retval;
+ struct inode *inode = file_inode(file);
+
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
+ retval = generic_file_llseek(file, offset, whence);
+ break;
+ case SEEK_END:
+ inode_lock(inode);
+ retval = fuse2_update_attributes(inode, file);
+ if (!retval)
+ retval = generic_file_llseek(file, offset, whence);
+ inode_unlock(inode);
+ break;
+ case SEEK_HOLE:
+ case SEEK_DATA:
+ inode_lock(inode);
+ retval = fuse_lseek(file, offset, whence);
+ inode_unlock(inode);
+ break;
+ default:
+ retval = -EINVAL;
+ }
+
+ return retval;
+}
+
+static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
+{
+ return round_up(off, fc->max_pages << PAGE_SHIFT);
+}
+
+static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+ loff_t length)
+{
+ struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = ff->fc;
+ FUSE_ARGS(args);
+ struct fuse_fallocate_in inarg = {
+ .fh = ff->fh,
+ .offset = offset,
+ .length = length,
+ .mode = mode
+ };
+ int err;
+ bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
+ (mode & FALLOC_FL_PUNCH_HOLE);
+
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
+ if (fc->no_fallocate)
+ return -EOPNOTSUPP;
+
+ if (lock_inode) {
+ inode_lock(inode);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ loff_t endbyte = offset + length - 1;
+ err = filemap_write_and_wait_range(inode->i_mapping,
+ offset, endbyte);
+ if (err)
+ goto out;
+ }
+ }
+
+ args.in.h.opcode = FUSE_FALLOCATE;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse2_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_fallocate = 1;
+ err = -EOPNOTSUPP;
+ }
+ if (err)
+ goto out;
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ truncate_pagecache_range(inode, offset, offset + length - 1);
+out:
+ if (lock_inode)
+ inode_unlock(inode);
+
+ return err;
+}
+
+static const struct file_operations fuse_file_operations = {
+ .llseek = fuse_file_llseek,
+ .read_iter = fuse_file_read_iter,
+ .write_iter = fuse_file_write_iter,
+ .mmap = fuse_file_mmap,
+ .open = fuse_open,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ .lock = fuse_file_lock,
+ .flock = fuse_file_flock,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
+ .fallocate = fuse_file_fallocate,
+};
+
+static const struct address_space_operations fuse_file_aops = {
+ .readpage = fuse_readpage,
+};
+
+void fuse2_init_file_inode(struct inode *inode)
+{
+ inode->i_fop = &fuse_file_operations;
+ inode->i_data.a_ops = &fuse_file_aops;
+}
diff --git a/fs/fuse2/fudev.c b/fs/fuse2/fudev.c
new file mode 100644
index 0000000..bbf95583
--- /dev/null
+++ b/fs/fuse2/fudev.c
@@ -0,0 +1,1475 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "dev.h"
+
+#include <linux/module.h>
+#include <linux/uio.h>
+#include <linux/miscdevice.h>
+#include <linux/highmem.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/cred.h>
+#include <linux/parser.h>
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Filesystem in Userspace");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_MISCDEV(FUSE_MINOR);
+MODULE_ALIAS("devname:fuse");
+
+/* Ordinary requests have even IDs, while interrupts IDs are odd */
+#define FUSE_INT_REQ_BIT 1
+#define FUSE_REQ_ID_STEP (2 * (nr_cpumask_bits + 1))
+
+static DEFINE_MUTEX(fudev_mutex);
+
+struct fuse_iqueue {
+ /* Connection established */
+ unsigned connected;
+
+ /* Readers of the connection are waiting on this */
+ wait_queue_head_t waitq;
+
+ /* The next unique request id */
+ u64 reqctr;
+
+ /* The list of pending requests */
+ struct list_head pending;
+
+ /* Queue of pending forgets */
+ struct list_head forgets;
+
+ /* Batching of FORGET requests (positive indicates FORGET batch) */
+ int forget_batch;
+};
+
+#define FUSE_PQ_HASH_BITS 8
+#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS)
+
+struct fuse_pqueue {
+ /* Connection established */
+ unsigned connected;
+
+ /* Lock protecting accessess to members of this structure */
+ spinlock_t lock;
+
+ /* Hash table of requests being processed */
+ struct list_head *processing;
+};
+
+/*
+ * Fuse device instance
+ */
+struct fudev {
+ /* refcount */
+ refcount_t count;
+
+ /* Processing queue */
+ struct fuse_pqueue pq;
+
+ /* Input queue */
+ struct fuse_iqueue iq;
+
+ /* Per-cpu mapped buffers */
+ struct fudev_aux * __percpu *aux_devs;
+
+ /* Our super block */
+ struct super_block *sb;
+};
+
+struct fudev_aux {
+ /* Fuse device this is bound to or NULL */
+ struct fudev *dev;
+
+ /* Request (if exists) */
+ struct fuse_req *req;
+
+ /* Memory mapped input buffer */
+ void *ibuf;
+
+ /* Size of input buffer */
+ size_t ibufsize;
+
+ /* Memory mapped output buffer */
+ void *obuf;
+
+ /* Size of output buffer */
+ size_t obufsize;
+
+ unsigned int cpu;
+
+ u64 reqctr;
+
+ int reserved;
+
+ int ready;
+
+ int done;
+
+ wait_queue_head_t waitq;
+
+};
+
+static const struct file_operations fudev_operations;
+static const struct file_operations fudev_aux_operations;
+
+static struct fudev *fudev(struct file *file)
+{
+ BUG_ON(file->f_op != &fudev_operations);
+ return file->private_data;
+}
+
+static struct fudev_aux *fudev_aux(struct file *file)
+{
+ BUG_ON(file->f_op != &fudev_aux_operations);
+ return file->private_data;
+}
+
+static u64 fuse_get_unique(struct fuse_iqueue *fiq)
+{
+ fiq->reqctr += FUSE_REQ_ID_STEP;
+ return fiq->reqctr;
+}
+
+static unsigned int fuse_req_hash(u64 unique)
+{
+ return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
+}
+
+static int fuse_read_pages(struct iov_iter *to, struct fuse_req *req,
+ unsigned int nbytes)
+{
+ unsigned int i;
+
+ for (i = 0; i < req->num_pages && nbytes; i++) {
+ struct page *page = req->pages[i];
+ unsigned int offset = req->page_descs[i].offset;
+ unsigned int count = min(nbytes, req->page_descs[i].length);
+
+ if (copy_page_to_iter(page, offset, count, to) != count)
+ return -EFAULT;
+
+ nbytes -= count;
+ }
+ return 0;
+}
+
+static int fuse_read_one(struct iov_iter *iter, void *val, unsigned size)
+{
+ return _copy_to_iter(val, size, iter) == size ? 0 : -EFAULT;
+}
+
+static bool forget_pending(struct fuse_iqueue *fiq)
+{
+ return !list_empty(&fiq->forgets);
+}
+
+static bool request_pending(struct fuse_iqueue *fiq)
+{
+ return !list_empty(&fiq->pending) || forget_pending(fiq);
+}
+
+static void fuse_free_forgets(struct list_head *head)
+{
+ struct fuse_forget *forget, *next;
+
+ list_for_each_entry_safe(forget, next, head, list) {
+ list_del(&forget->list);
+ kfree(forget);
+ }
+}
+static ssize_t fuse_read_forget(struct fuse_iqueue *fiq, struct iov_iter *to)
+__releases(fiq->waitq.lock)
+{
+ int err;
+ const size_t one_len = sizeof(struct fuse_forget_one);
+ unsigned int count, max_forgets;
+ struct fuse_forget *forget;
+ size_t nbytes = iov_iter_count(to);
+ struct list_head head, *last;
+ struct fuse_batch_forget_in arg = { .count = 0 };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_BATCH_FORGET,
+ .unique = fuse_get_unique(fiq),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ if (nbytes < ih.len) {
+ spin_unlock(&fiq->waitq.lock);
+ return -EINVAL;
+ }
+
+ max_forgets = (nbytes - ih.len) / one_len;
+
+ for (count = 0, last = &fiq->forgets;
+ count < max_forgets && last->next != &fiq->forgets;
+ count++, last = last->next);
+
+ list_cut_position(&head, &fiq->forgets, last);
+ spin_unlock(&fiq->waitq.lock);
+
+ arg.count = count;
+ ih.len += count * one_len;
+ err = fuse_read_one(to, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_read_one(to, &arg, sizeof(arg));
+
+ if (!err) {
+ list_for_each_entry(forget, &head, list) {
+ err = fuse_read_one(to, &forget->forget_one, one_len);
+ if (err)
+ break;
+ }
+ }
+ fuse_free_forgets(&head);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
+/*
+ * This function is called when a request is finished. Either a reply
+ * has arrived or it was aborted (and not yet sent) or some error
+ * occurred during communication with userspace, or the device file
+ * was closed. The requester thread is woken up (if still waiting),
+ * the 'end' callback is called if given, else the reference to the
+ * request is released
+ */
+static void request_end(struct fuse_req *req)
+{
+ if (!test_and_set_bit(FR_FINISHED, &req->flags)) {
+ /* Wake up waiter sleeping in request_wait_answer() */
+ wake_up(&req->waitq);
+ }
+}
+
+static void fuse_io_end(struct fuse_pqueue *fpq, struct fuse_req *req, int err)
+{
+ if (err) {
+ spin_lock(&fpq->lock);
+ if (fpq->connected)
+ req->outh.error = -EIO;
+ spin_unlock(&fpq->lock);
+ }
+ request_end(req);
+
+ spin_lock(&fpq->lock);
+ list_del_init(&req->list);
+ spin_unlock(&fpq->lock);
+
+ fuse2_put_request(req);
+}
+
+
+/* Look up request on processing list by unique ID */
+static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
+{
+ unsigned int hash = fuse_req_hash(unique);
+ struct fuse_req *req;
+
+ list_for_each_entry(req, &fpq->processing[hash], list) {
+ if (req->inh.unique == unique)
+ return req;
+ }
+ return NULL;
+}
+
+static int copy_out_args(struct iov_iter *from, struct fuse_req *req)
+{
+ bool zeroing = test_bit(FR_ZEROTAIL, &req->flags);
+ unsigned int nbytes = iov_iter_count(from);
+ const unsigned int hlen = sizeof(struct fuse_out_header);
+ unsigned int thislen = req->inline_outlen - hlen;
+ void *arg = req->inlinedata + hlen;
+ unsigned int i;
+ size_t ret;
+
+ if (req->outh.error)
+ return nbytes > 0 ? -EINVAL : 0;
+
+ if (nbytes < req->mand_outlen - hlen || nbytes > req->max_outlen - hlen)
+ return -EINVAL;
+
+ thislen = min(thislen, nbytes);
+
+ ret = _copy_from_iter(arg, thislen, from);
+ if (ret != thislen)
+ return -EFAULT;
+
+ nbytes -= thislen;
+ for (i = 0; i < req->num_pages; i++) {
+ unsigned offset = req->page_descs[i].offset;
+ unsigned count = min(nbytes, req->page_descs[i].length);
+ struct page *page = req->pages[i];
+
+ if (zeroing && count < PAGE_SIZE)
+ clear_highpage(page);
+
+ if (!count) {
+ if (!zeroing)
+ break;
+
+ flush_dcache_page(page);
+ continue;
+ }
+ ret = copy_page_from_iter(page, offset, count, from);
+ if (ret != count)
+ return -EFAULT;
+
+ flush_dcache_page(page);
+ nbytes -= count;
+ }
+ WARN_ON(nbytes);
+
+ return 0;
+}
+
+/*
+ * Read a single request into the userspace filesystem's buffer. This
+ * function waits until a request is available, then removes it from
+ * the pending list and copies request data to userspace buffer. If
+ * no reply is needed (FORGET) or request has been aborted or there
+ * was an error during the copying then it's finished by calling
+ * request_end(). Otherwise add it to the processing list, and set
+ * the 'sent' flag.
+ */
+static ssize_t fudev_read(struct kiocb *iocb, struct iov_iter *to)
+{
+ ssize_t err;
+ struct file *file = iocb->ki_filp;
+ struct fudev *fud = fudev(file);
+ struct fuse_iqueue *fiq = &fud->iq;
+ struct fuse_pqueue *fpq = &fud->pq;
+ struct fuse_req *req;
+ unsigned int reqsize, hash;
+
+restart:
+ spin_lock(&fiq->waitq.lock);
+ err = -EAGAIN;
+ if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
+ !request_pending(fiq))
+ goto err_unlock;
+
+ err = wait_event_interruptible_exclusive_locked(fiq->waitq,
+ !fiq->connected || request_pending(fiq));
+ if (err)
+ goto err_unlock;
+
+ if (!fiq->connected) {
+ err = -ENODEV;
+ goto err_unlock;
+ }
+
+ if (forget_pending(fiq)) {
+ if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
+ return fuse_read_forget(fiq, to);
+
+ if (fiq->forget_batch <= -8)
+ fiq->forget_batch = 16;
+ }
+
+ req = list_entry(fiq->pending.next, struct fuse_req, list);
+ clear_bit(FR_PENDING, &req->flags);
+
+ hash = fuse_req_hash(req->inh.unique);
+ spin_lock(&fpq->lock);
+ list_move_tail(&req->list, &fpq->processing[hash]);
+ spin_unlock(&fpq->lock);
+
+ spin_unlock(&fiq->waitq.lock);
+
+ reqsize = req->inh.len;
+
+ /* If request is too large, reply with an error and restart the read */
+ if (iov_iter_count(to) < reqsize) {
+ req->outh.error = -EIO;
+ /* SETXATTR is special, since it may contain too large data */
+ if (req->inh.opcode == FUSE_SETXATTR)
+ req->outh.error = -E2BIG;
+ request_end(req);
+ fuse2_put_request(req);
+ goto restart;
+ }
+
+
+ err = fuse_read_one(to, req->inlinedata, req->inline_inlen);
+ if (!err && reqsize > req->inline_inlen)
+ err = fuse_read_pages(to, req, reqsize - req->inline_inlen);
+
+ if (!err && test_bit(FR_ISREPLY, &req->flags)) {
+ set_bit(FR_SENT, &req->flags);
+ return reqsize;
+ }
+
+ fuse_io_end(fpq, req, err);
+ if (err)
+ return err;
+ return reqsize;
+
+err_unlock:
+ spin_unlock(&fiq->waitq.lock);
+ return err;
+}
+
+/*
+ * Write a single reply to a request. First the header is copied from
+ * the write buffer. The request is then searched on the processing
+ * list by the unique ID found in the header. If found, then remove
+ * it from the list and copy the rest of the buffer to the request.
+ * The request is finished by calling request_end()
+ */
+static ssize_t fudev_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ int err;
+ unsigned int nbytes = iov_iter_count(from);
+ struct fudev *fud = fudev(iocb->ki_filp);
+ struct fuse_pqueue *fpq = &fud->pq;
+ struct fuse_req *req;
+ struct fuse_out_header oh;
+
+ if (nbytes < sizeof(struct fuse_out_header))
+ return -EINVAL;
+
+ if (_copy_from_iter(&oh, sizeof(oh), from) != sizeof(oh))
+ return -EFAULT;
+
+ if (oh.len != nbytes)
+ return -EINVAL;
+
+ /*
+ * Zero oh.unique indicates unsolicited notification message
+ * and error contains notification code.
+ */
+ if (!oh.unique)
+ return -EINVAL;
+
+ if (oh.error <= -1000 || oh.error > 0)
+ return -EINVAL;
+
+ spin_lock(&fpq->lock);
+ req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
+ if (!req || !test_bit(FR_SENT, &req->flags)) {
+ spin_unlock(&fpq->lock);
+ return -ENOENT;
+ }
+ clear_bit(FR_SENT, &req->flags);
+ req->outh = oh;
+ spin_unlock(&fpq->lock);
+
+ err = copy_out_args(from, req);
+
+ fuse_io_end(fpq, req, err);
+ if (err)
+ return err;
+ return nbytes;
+}
+
+static void fuse_pqueue_init(struct fuse_pqueue *fpq)
+{
+ unsigned int i;
+
+ spin_lock_init(&fpq->lock);
+ for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&fpq->processing[i]);
+ fpq->connected = 1;
+}
+
+static void fuse_iqueue_init(struct fuse_iqueue *fiq)
+{
+ init_waitqueue_head(&fiq->waitq);
+ INIT_LIST_HEAD(&fiq->pending);
+ INIT_LIST_HEAD(&fiq->forgets);
+ fiq->connected = 1;
+}
+
+static void fudev_put(void *priv)
+{
+ struct fudev *fud = priv;
+
+ if (refcount_dec_and_test(&fud->count)) {
+ kfree(fud->pq.processing);
+ kfree(fud);
+ }
+}
+
+static int fudev_open(struct inode *inode, struct file *file)
+{
+ struct fudev *fud;
+ struct list_head *pq;
+
+ fud = kzalloc(sizeof(struct fudev), GFP_KERNEL);
+ if (!fud)
+ goto nomem;
+
+ pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
+ if (!pq)
+ goto nomem;
+
+ refcount_set(&fud->count, 1);
+ fud->pq.processing = pq;
+ fuse_pqueue_init(&fud->pq);
+ fuse_iqueue_init(&fud->iq);
+
+ file->private_data = fud;
+
+ return 0;
+
+nomem:
+ kfree(fud);
+ return -ENOMEM;
+}
+
+static __poll_t fudev_poll(struct file *file, poll_table *wait)
+{
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
+ struct fudev *fud = fudev(file);
+ struct fuse_iqueue *fiq = &fud->iq;
+
+ poll_wait(file, &fiq->waitq, wait);
+
+ spin_lock(&fiq->waitq.lock);
+ if (!fiq->connected)
+ mask = EPOLLERR;
+ else if (request_pending(fiq))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ spin_unlock(&fiq->waitq.lock);
+
+ return mask;
+}
+
+/* Abort all requests on the given list (pending or processing) */
+static void end_requests(struct list_head *head, spinlock_t *lock)
+{
+ struct fuse_req *req;
+
+ while (!list_empty(head)) {
+ req = list_entry(head->next, struct fuse_req, list);
+ list_del_init(&req->list);
+ req->outh.error = -ECONNABORTED;
+ clear_bit(FR_PENDING, &req->flags);
+ fuse2_get_request(req);
+ spin_unlock(lock);
+ request_end(req);
+ fuse2_put_request(req);
+ spin_lock(lock);
+ }
+}
+
+static void fudev_abort(void *priv)
+{
+ struct fudev *fud = priv;
+ struct fuse_iqueue *fiq;
+ struct fuse_pqueue *fpq;
+ unsigned int i;
+
+ fpq = &fud->pq;
+ spin_lock(&fpq->lock);
+ fpq->connected = 0;
+ for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
+ end_requests(&fpq->processing[i], &fpq->lock);
+ spin_unlock(&fpq->lock);
+
+ fiq = &fud->iq;
+ spin_lock(&fiq->waitq.lock);
+ fiq->connected = 0;
+
+ end_requests(&fiq->pending, &fiq->waitq.lock);
+ fuse_free_forgets(&fiq->forgets);
+ wake_up_all_locked(&fiq->waitq);
+ spin_unlock(&fiq->waitq.lock);
+}
+
+static int fudev_release(struct inode *inode, struct file *file)
+{
+ struct fudev *fud = fudev(file);
+
+ fudev_abort(fud);
+ fudev_put(fud);
+
+ return 0;
+}
+
+static long fudev_map_ioctl(struct fudev *fud, unsigned int cmd,
+ unsigned long arg)
+{
+ struct file *file;
+
+ switch (cmd) {
+ case FUSE2_DEV_IOC_MAP_OPEN:
+ file = fget(arg);
+ if (!file)
+ return -EBADF;
+ return fuse2_map_open(fud->sb, file);
+
+ case FUSE2_DEV_IOC_MAP_CLOSE:
+ return fuse2_map_close(fud->sb, arg);
+
+ default:
+ return -ENOTTY;
+ }
+
+}
+
+static long fudev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return fudev_map_ioctl(fudev(file), cmd, arg);
+}
+
+static const struct file_operations fudev_operations = {
+ .owner = THIS_MODULE,
+ .open = fudev_open,
+ .llseek = no_llseek,
+ .read_iter = fudev_read,
+ .write_iter = fudev_write,
+ .poll = fudev_poll,
+ .release = fudev_release,
+ .unlocked_ioctl = fudev_ioctl,
+};
+
+
+static int fudev_aux_open(struct inode *inode, struct file *file)
+{
+ struct fudev_aux *fux;
+
+ fux = kzalloc(sizeof(struct fudev_aux), GFP_KERNEL);
+ if (!fux)
+ return -ENOMEM;
+
+ init_waitqueue_head(&fux->waitq);
+ file->private_data = fux;
+
+ return 0;
+}
+
+static long fudev_aux_bind(struct file *file, unsigned long arg)
+{
+ int err;
+ int oldfd;
+ struct file *old;
+ struct fudev *fud;
+ struct fudev_aux *fux = fudev_aux(file);
+ const struct cpumask *cpumask = current->cpus_ptr;
+
+ if (get_user(oldfd, (__u32 __user *) arg) != 0)
+ return -EFAULT;
+
+ old = fget(oldfd);
+ if (!old)
+ return -EINVAL;
+
+ err = -EINVAL;
+ if (old->f_op != &fudev_operations ||
+ old->f_cred->user_ns != file->f_cred->user_ns)
+ goto out_put_old;
+
+ fud = fudev(old);
+
+ mutex_lock(&fudev_mutex);
+ err = -EINVAL;
+ if (fux->dev)
+ goto out_unlock;
+
+ if (cpumask_weight(cpumask) == 1) {
+ unsigned int cpu;
+
+ if (!fud->aux_devs) {
+ fud->aux_devs = alloc_percpu(struct fudev_aux *);
+ err = -ENOMEM;
+ if (!fud->aux_devs)
+ goto out_unlock;
+ }
+ cpu = get_cpu();
+ fux->cpu = cpu;
+ fux->reqctr = cpu + 1;
+ pr_info("...binding to cpu %u\n", cpu);
+ WARN_ON(cpu != cpumask_first(cpumask));
+ *per_cpu_ptr(fud->aux_devs, cpu) = fux;
+ put_cpu();
+ }
+
+ refcount_inc(&fud->count);
+ fux->dev = fud;
+ err = 0;
+
+out_unlock:
+ mutex_unlock(&fudev_mutex);
+out_put_old:
+ fput(old);
+
+ return err;
+}
+
+static inline void fudev_barrier(void)
+{
+ /* FIXME: ??? */
+ barrier();
+}
+
+static void fudev_req_done(struct fudev_aux *fux)
+{
+ fux->ready = 0;
+ fudev_barrier();
+ fux->done = 1;
+ wake_up(&fux->waitq);
+}
+
+static long fudev_aux_proc(struct file *file, unsigned int cmd)
+{
+ struct fudev_aux *fux = fudev_aux(file);
+ int ret;
+
+ if (cpumask_weight(current->cpus_ptr) != 1 ||
+ fux->cpu != smp_processor_id())
+ return -EINVAL;
+
+ if (cmd == FUSE2_DEV_IOC_PROC) {
+ fudev_req_done(fux);
+ }
+ ret = wait_event_interruptible(fux->waitq, fux->ready);
+ if (ret) {
+ /* Must report success of write! */
+ if (cmd == FUSE2_DEV_IOC_PROC)
+ return 0;
+ return ret;
+ }
+
+ return ((struct fuse_in_header *) fux->ibuf)->len;
+}
+
+static long fudev_aux_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case FUSE2_DEV_IOC_BIND:
+ return fudev_aux_bind(file, arg);
+
+ case FUSE2_DEV_IOC_PROC:
+ case FUSE2_DEV_IOC_READ:
+ return fudev_aux_proc(file, cmd);
+
+ case FUSE2_DEV_IOC_MAP_OPEN:
+ case FUSE2_DEV_IOC_MAP_CLOSE:
+ return fudev_map_ioctl(fudev_aux(file)->dev, cmd, arg);
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static int fudev_aux_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct fudev_aux *fux = fudev_aux(file);
+ unsigned long length = vma->vm_end - vma->vm_start;
+ void **bufp;
+ size_t *bufsizep;
+ unsigned int off;
+ int err;
+
+ if (length != 0x2000 ||
+ (vma->vm_pgoff != (FUSE2_MMAP_INBUF_OFFSET >> PAGE_SHIFT) &&
+ vma->vm_pgoff != (FUSE2_MMAP_OUTBUF_OFFSET >> PAGE_SHIFT)))
+ return -EINVAL;
+
+ if (vma->vm_pgoff == (FUSE2_MMAP_INBUF_OFFSET >> PAGE_SHIFT)) {
+ bufp = &fux->ibuf;
+ bufsizep = &fux->ibufsize;
+ } else {
+ bufp = &fux->obuf;
+ bufsizep = &fux->obufsize;
+ }
+
+ if (!*bufp) {
+ *bufp = (void *)
+ __get_free_pages(GFP_KERNEL | __GFP_COMP |__GFP_ZERO,
+ get_order(length));
+ if (!*bufp)
+ return -ENOMEM;
+ *bufsizep = length;
+ }
+
+ for (off = 0; off < length; off += PAGE_SIZE) {
+ struct page *page = virt_to_page(*bufp + off);
+
+ err = vm_insert_page(vma, vma->vm_start + off, page);
+ if (err)
+ return err;
+ }
+
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+
+ return 0;
+}
+
+static void fudev_aux_remove(void *info)
+{
+ struct fudev_aux *fux = info;
+ struct fudev *fud = fux->dev;
+
+ *per_cpu_ptr(fud->aux_devs, fux->cpu) = NULL;
+}
+
+static int fudev_aux_release(struct inode *inode, struct file *file)
+{
+ struct fudev_aux *fux = fudev_aux(file);
+ struct fudev *fud = fux->dev;
+ int err;
+
+ if (fud) {
+ /* FIXME: synchronize with fudev_simple_send() */
+ err = smp_call_function_single(fux->cpu, fudev_aux_remove,
+ fux, true);
+ if (err)
+ fudev_aux_remove(fux);
+
+ fudev_put(fud);
+ }
+
+ free_pages((unsigned long) fux->ibuf, get_order(fux->ibufsize));
+ free_pages((unsigned long) fux->obuf, get_order(fux->obufsize));
+
+ kfree(fux);
+
+ return 0;
+}
+
+static ssize_t fudev_aux_read(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct fudev_aux *fux = fudev_aux(file);
+ struct fuse_req *req;
+ unsigned int reqsize;
+ int err;
+
+ if (cpumask_weight(current->cpus_ptr) != 1 ||
+ fux->cpu != smp_processor_id())
+ return -EINVAL;
+
+restart:
+ err = wait_event_interruptible(fux->waitq, fux->ready);
+ if (err)
+ return err;
+
+ req = READ_ONCE(fux->req);
+ if (!req)
+ return -EIO;
+
+ reqsize = req->inh.len;
+
+ /* If request is too large, reply with an error and restart the read */
+ if (iov_iter_count(to) < reqsize) {
+ req->outh.error = -EIO;
+ /* SETXATTR is special, since it may contain too large data */
+ if (req->inh.opcode == FUSE_SETXATTR)
+ req->outh.error = -E2BIG;
+ fudev_req_done(fux);
+ goto restart;
+ }
+
+ err = fuse_read_one(to, req->inlinedata, req->inline_inlen);
+ if (!err && reqsize > req->inline_inlen)
+ err = fuse_read_pages(to, req, reqsize - req->inline_inlen);
+
+ if (err) {
+ req->outh.error = -EIO;
+ fudev_req_done(fux);
+ return err;
+ }
+
+ return reqsize;
+}
+
+static ssize_t fudev_aux_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct fudev_aux *fux = fudev_aux(file);
+ struct fuse_req *req;
+ unsigned int nbytes = iov_iter_count(from);
+ struct fuse_out_header oh;
+ int err;
+
+ if (cpumask_weight(current->cpus_ptr) != 1 ||
+ fux->cpu != smp_processor_id())
+ return -EINVAL;
+
+ if (nbytes < sizeof(struct fuse_out_header))
+ return -EINVAL;
+
+ if (_copy_from_iter(&oh, sizeof(oh), from) != sizeof(oh))
+ return -EFAULT;
+
+ if (oh.len != nbytes)
+ return -EINVAL;
+
+ /*
+ * Zero oh.unique indicates unsolicited notification message
+ * and error contains notification code.
+ */
+ if (!oh.unique)
+ return -EINVAL;
+
+ if (oh.error <= -1000 || oh.error > 0)
+ return -EINVAL;
+
+
+ req = READ_ONCE(fux->req);
+ if (!req)
+ return -EIO;
+
+ if (req->inh.unique != oh.unique)
+ return -EIO;
+
+ req->outh = oh;
+
+ err = copy_out_args(from, req);
+ if (err) {
+ req->outh.error = -EIO;
+ fudev_req_done(fux);
+ return err;
+ }
+
+ fudev_req_done(fux);
+
+ return nbytes;
+
+}
+
+static const struct file_operations fudev_aux_operations = {
+ .owner = THIS_MODULE,
+ .open = fudev_aux_open,
+ .llseek = no_llseek,
+ .read_iter = fudev_aux_read,
+ .write_iter = fudev_aux_write,
+ .release = fudev_aux_release,
+ .unlocked_ioctl = fudev_aux_ioctl,
+ .mmap = fudev_aux_mmap,
+};
+
+static struct miscdevice fuse_miscdevice = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "fuse2",
+ .fops = &fudev_operations,
+};
+
+static struct miscdevice fuse_aux_miscdevice = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "fuse2-aux",
+ .fops = &fudev_aux_operations,
+};
+
+static int fudev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
+{
+ int err;
+
+ spin_lock(&fiq->waitq.lock);
+
+ err = -ENOTCONN;
+ if (!fiq->connected)
+ goto unlock;
+
+ req->inh.unique = fuse_get_unique(fiq);
+ list_add_tail(&req->list, &fiq->pending);
+ wake_up_locked(&fiq->waitq);
+ /*
+ * acquire extra reference, since request is still needed after
+ * request_end()
+ */
+ refcount_inc(&req->count);
+ err = 0;
+
+unlock:
+ spin_unlock(&fiq->waitq.lock);
+
+ return err;
+}
+
+static int fudev_wait_req(struct fuse_iqueue *fiq, struct fuse_req *req)
+{
+ int err;
+
+ if (!test_bit(FR_FORCE, &req->flags)) {
+ /* Only fatal signals may interrupt this */
+ err = wait_event_killable(req->waitq,
+ test_bit(FR_FINISHED, &req->flags));
+ if (!err)
+ return req->outh.error;
+
+ spin_lock(&fiq->waitq.lock);
+ /* Request is not yet in userspace, bail out */
+ if (test_bit(FR_PENDING, &req->flags)) {
+ list_del(&req->list);
+ spin_unlock(&fiq->waitq.lock);
+ fuse2_put_request(req);
+ return -EINTR;
+ }
+ spin_unlock(&fiq->waitq.lock);
+ if (test_bit(FR_KILLABLE, &req->flags))
+ return -EINTR;
+ }
+
+ /*
+ * Either request is already in userspace, or it was forced.
+ * Wait it out.
+ */
+ wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
+
+ return req->outh.error;
+}
+
+static void fudev_simple_copy_to_user(struct fudev_aux *fux,
+ struct fuse_args *args,
+ unsigned int inlen, u64 unique, uid_t uid,
+ gid_t gid, pid_t pid)
+{
+ struct fuse_in_header *inh = fux->ibuf;
+ void *ptr = inh + 1;
+ struct fuse_in_arg *a;
+ unsigned int i;
+
+ inh->len = inlen;
+ inh->opcode = args->in.h.opcode;
+ inh->unique = unique;
+ inh->nodeid = args->in.h.nodeid;
+ inh->uid = uid;
+ inh->gid = gid;
+ inh->pid = pid;
+ inh->padding = 0;
+
+ ptr = inh + 1;
+
+ for (i = 0; i < args->in.numargs; i++) {
+ a = &args->in.args[i];
+ memcpy(ptr, a->value, a->size);
+ ptr += a->size;
+ }
+}
+
+static ssize_t fudev_simple_copy_from_user(struct fudev_aux *fux,
+ struct fuse_args *args,
+ unsigned int outlen, u64 unique)
+{
+ struct fuse_out_header *outh = fux->obuf;
+ void *ptr = outh + 1;
+ struct fuse_arg *a;
+ unsigned int i;
+ size_t rem;
+
+ if (outh->unique != unique)
+ return -EIO;
+
+ rem = outh->len;
+ if (rem > outlen)
+ return -EIO;
+
+ rem -= sizeof(*outh);
+
+ if (outh->error)
+ return outh->error;
+
+ for (i = 0; i < args->out.numargs; i++) {
+ a = &args->out.args[i];
+ /* FIXME: truncate to rem? */
+ if (a->value)
+ memcpy(a->value, ptr, a->size);
+ ptr += a->size;
+ rem -= a->size;
+ }
+ if (args->out.argvar)
+ return outh->len - sizeof(*outh);
+
+ return 0;
+}
+
+static int fudev_simple_wake_and_wait(struct fudev_aux *fux,
+ struct fuse_args *args)
+{
+ wake_up(&fux->waitq);
+
+ /* FIXME: make aux dev requests killable */
+#if 0
+ if (!args->force && args->killable)
+ return wait_event_killable(fux->waitq, fux->done);
+#endif
+
+ wait_event(fux->waitq, fux->done);
+ return 0;
+}
+
+static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+{
+ unsigned nbytes = 0;
+ unsigned i;
+
+ for (i = 0; i < numargs; i++)
+ nbytes += args[i].size;
+
+ return nbytes;
+}
+
+static ssize_t fudev_simple_send(void *priv, struct fuse_args *args, uid_t uid,
+ gid_t gid, pid_t pid)
+{
+ unsigned int cpu;
+ struct fudev *fud = priv;
+ struct fudev_aux *fux;
+ unsigned int inlen = sizeof(struct fuse_in_header) +
+ len_args(args->in.numargs, (struct fuse_arg *) args->in.args);
+ unsigned int outlen = sizeof(struct fuse_out_header) +
+ len_args(args->out.numargs, args->out.args);
+ ssize_t ret;
+ u64 unique;
+ cpumask_t orig_mask;
+
+ cpu = get_cpu();
+ fux = *per_cpu_ptr(fud->aux_devs, cpu);
+ if (!fux || fux->reserved ||
+ inlen > fux->ibufsize || outlen > fux->obufsize) {
+ put_cpu();
+ return -EPROBE_DEFER;
+ }
+ fux->reserved = 1;
+ fudev_barrier();
+ cpumask_copy(&orig_mask, current->cpus_ptr);
+ cpumask_copy(¤t->cpus_mask, cpumask_of(cpu));
+ put_cpu();
+
+ unique = fux->reqctr;
+ fux->reqctr += FUSE_REQ_ID_STEP;
+
+ fudev_simple_copy_to_user(fux, args, inlen, unique, uid, gid, pid);
+ fux->done = 0;
+ fudev_barrier();
+ fux->ready = 1;
+
+ ret = fudev_simple_wake_and_wait(fux, args);
+ if (!ret)
+ ret = fudev_simple_copy_from_user(fux, args, outlen, unique);
+
+ fudev_barrier();
+ fux->reserved = 0;
+ cpumask_copy(¤t->cpus_mask, &orig_mask);
+
+ return ret;
+}
+
+static int fudev_send_slow(struct fudev *fud, struct fuse_req *req)
+{
+ int err;
+
+ err = fudev_queue_req(&fud->iq, req);
+ if (!err)
+ err = fudev_wait_req(&fud->iq, req);
+
+ return err;
+}
+
+static int fudev_send_fast(struct fudev *fud, struct fuse_req *req)
+{
+ unsigned int cpu;
+ struct fudev_aux *fux;
+ ssize_t ret;
+ u64 unique;
+ cpumask_t orig_mask;
+ void *ptr, *addr;
+ struct fuse_out_header *outh;
+ struct fuse_page_desc *pd;
+ unsigned int i, thislen, rem;
+
+ cpu = get_cpu();
+ fux = *per_cpu_ptr(fud->aux_devs, cpu);
+ if (!fux || fux->reserved) {
+ put_cpu();
+ return -EPROBE_DEFER;
+ }
+
+ if (!fux->ibuf) {
+ fux->reserved = 1;
+ fudev_barrier();
+ req->inh.unique = unique = fux->reqctr;
+ fux->reqctr += FUSE_REQ_ID_STEP;
+ fux->req = req;
+ fux->done = 0;
+ fudev_barrier();
+ fux->ready = 1;
+
+ put_cpu();
+ ret = fudev_simple_wake_and_wait(fux, NULL);
+ fudev_barrier();
+ fux->reserved = 0;
+
+ return ret;
+ }
+ if (req->inh.len > fux->ibufsize || req->max_outlen > fux->obufsize) {
+ put_cpu();
+ return -EPROBE_DEFER;
+ }
+
+ fux->reserved = 1;
+ fudev_barrier();
+ cpumask_copy(&orig_mask, current->cpus_ptr);
+ cpumask_copy(¤t->cpus_mask, cpumask_of(cpu));
+ put_cpu();
+
+ req->inh.unique = unique = fux->reqctr;
+ fux->reqctr += FUSE_REQ_ID_STEP;
+
+ ptr = fux->ibuf;
+ memcpy(ptr, req->inlinedata, req->inline_inlen);
+ ptr += req->inline_inlen;
+ for (i = 0; i < req->num_pages; i++) {
+ pd = &req->page_descs[i];
+ addr = kmap_atomic(req->pages[i]);
+ memcpy(ptr, addr + pd->offset, pd->length);
+ kunmap_atomic(addr);
+ ptr += pd->length;
+ }
+
+ fux->done = 0;
+ fudev_barrier();
+ fux->ready = 1;
+
+ ret = fudev_simple_wake_and_wait(fux, NULL);
+ if (!ret) {
+ outh = ptr = fux->obuf;
+
+ if (outh->unique != unique || outh->len < sizeof(*outh)) {
+ ret = -EIO;
+ goto out;
+ }
+ if (outh->error) {
+ ret = outh->error;
+ goto out;
+ }
+ rem = outh->len;
+ if (rem < req->mand_outlen || rem > req->max_outlen) {
+ ret = -EIO;
+ goto out;
+ }
+
+ memcpy(req->inlinedata, ptr, req->inline_outlen);
+ ptr += req->inline_outlen;
+ rem -= req->inline_outlen;
+
+ for (i = 0; rem && i < req->num_pages; i++) {
+ pd = &req->page_descs[i];
+ thislen = min(pd->length, rem);
+ addr = kmap_atomic(req->pages[i]);
+ memcpy(addr + pd->offset, ptr, thislen);
+ kunmap_atomic(addr);
+ ptr += thislen;
+ rem -= thislen;
+ }
+ }
+
+out:
+ fudev_barrier();
+ fux->reserved = 0;
+ cpumask_copy(¤t->cpus_mask, &orig_mask);
+
+ return ret;
+}
+
+static int fudev_send(void *priv, struct fuse_req *req)
+{
+ int ret;
+
+ ret = fudev_send_fast(priv, req);
+ if (ret == -EPROBE_DEFER)
+ ret = fudev_send_slow(priv, req);
+
+ return ret;
+}
+
+static void fudev_forget(void *priv, struct fuse_forget *forget)
+{
+ struct fudev *fud = priv;
+ struct fuse_iqueue *fiq = &fud->iq;
+
+ spin_lock(&fiq->waitq.lock);
+ if (fiq->connected) {
+ list_add_tail(&forget->list, &fiq->forgets);
+ wake_up_locked(&fiq->waitq);
+ } else {
+ kfree(forget);
+ }
+ spin_unlock(&fiq->waitq.lock);
+}
+
+static const struct fuse_dev_operations fudev_ops = {
+ .put = fudev_put,
+ .abort = fudev_abort,
+ .send = fudev_send,
+ .simple_send = fudev_simple_send,
+ .forget = fudev_forget,
+};
+
+static struct fudev *fudev_get(int fd)
+{
+ struct fudev *fud;
+ struct file *file;
+ int err = -EINVAL;
+
+ file = fget(fd);
+ if (!file)
+ goto out_err;
+
+ if (file->f_op != &fudev_operations)
+ goto fput;
+
+ fud = fudev(file);
+ /*
+ * Require mount to happen from the same user namespace which
+ * opened /dev/fuse to prevent potential attacks.
+ */
+ if (file->f_cred->user_ns != current_user_ns())
+ goto abort;
+
+ refcount_inc(&fud->count);
+ fput(file);
+
+ return fud;
+
+abort:
+ fudev_abort(fud);
+fput:
+ fput(file);
+out_err:
+ return ERR_PTR(err);
+}
+
+struct fuse_mount_data {
+ int fd;
+};
+
+enum {
+ OPT_FD,
+ OPT_ROOTMODE,
+ OPT_ERR
+};
+
+static const match_table_t tokens = {
+ {OPT_FD, "fd=%u"},
+ {OPT_ROOTMODE, "rootmode=%o"},
+ {OPT_ERR, NULL}
+};
+
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+{
+ char *p, *t = opt, *e = opt;
+ bool fd_present = false;
+
+ memset(d, 0, sizeof(struct fuse_mount_data));
+
+ while ((p = strsep(&opt, ",")) != NULL) {
+ int token;
+ int value;
+ size_t len;
+ substring_t args[MAX_OPT_ARGS];
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case OPT_FD:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->fd = value;
+ fd_present = true;
+ break;
+
+ case OPT_ROOTMODE:
+ pr_info("fuse: legacy mount mode not supported\n");
+ return 0;
+
+ default:
+ len = strlen(p);
+ memmove(t, p, len);
+ *(e = t + len) = ',';
+ t = e + 1;
+ }
+ }
+
+ if (!fd_present)
+ return 0;
+
+ *e = '\0';
+
+ return 1;
+}
+
+static struct dentry *fudev_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *opts)
+{
+ struct fuse_mount_data d;
+ struct fudev *fud;
+ struct dentry *root;
+
+ if (!parse_fuse_opt(opts, &d))
+ return ERR_PTR(-EINVAL);
+
+ fud = fudev_get(d.fd);
+ if (IS_ERR(fud))
+ return ERR_CAST(fud);
+
+ root = fuse_mount_common(fs_type, flags, opts, &fudev_ops, fud);
+ if (!IS_ERR(root))
+ fud->sb = root->d_sb;
+
+ return root;
+}
+
+static struct file_system_type fuse_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fuse2",
+ .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
+ .mount = fudev_mount,
+ .kill_sb = fuse_kill_sb,
+};
+MODULE_ALIAS_FS("fuse");
+
+static int __init fudev_init(void)
+{
+ int err;
+
+ err = register_filesystem(&fuse_fs_type);
+ if (err)
+ goto out;
+
+ err = misc_register(&fuse_miscdevice);
+ if (err)
+ goto unreg_fs;
+
+ err = misc_register(&fuse_aux_miscdevice);
+ if (err)
+ goto unreg_misc;
+
+ pr_info("fuse2 device initialized\n");
+
+ return 0;
+
+unreg_misc:
+ misc_deregister(&fuse_miscdevice);
+unreg_fs:
+ unregister_filesystem(&fuse_fs_type);
+out:
+ return err;
+
+}
+module_init(fudev_init);
+
+static void fudev_cleanup(void)
+{
+ unregister_filesystem(&fuse_fs_type);
+ misc_deregister(&fuse_miscdevice);
+ misc_deregister(&fuse_aux_miscdevice);
+}
+module_exit(fudev_cleanup);
diff --git a/fs/fuse2/fuse_i.h b/fs/fuse2/fuse_i.h
new file mode 100644
index 0000000..03dbbd2
--- /dev/null
+++ b/fs/fuse2/fuse_i.h
@@ -0,0 +1,321 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#ifndef _FS_FUSE_I_H
+#define _FS_FUSE_I_H
+
+#include "dev.h"
+
+/** Default max number of pages that can be used in a single read request */
+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
+
+/** Maximum of max_pages received in init_out */
+#define FUSE_MAX_MAX_PAGES 256
+
+/** Bias for fi->writectr, meaning new writepages must not be sent */
+#define FUSE_NOWRITE INT_MIN
+
+/** It could be as large as PATH_MAX, but would that have any uses? */
+#define FUSE_NAME_MAX 1024
+
+/** Number of dentries for each connection in the control filesystem */
+#define FUSE_CTL_NUM_DENTRIES 5
+
+/** FUSE inode */
+struct fuse_inode {
+ /** Inode data */
+ struct inode inode;
+
+ /** Unique ID, which identifies the inode between userspace
+ * and kernel */
+ u64 nodeid;
+
+ /** Number of lookups on this inode */
+ u64 nlookup;
+
+ /** The request used for sending the FORGET message */
+ struct fuse_forget *forget;
+
+ /** Miscellaneous bits describing inode state */
+ unsigned long state;
+
+ /** Lock to protect write related fields */
+ spinlock_t lock;
+};
+
+struct fuse_conn;
+
+/** FUSE specific file data */
+struct fuse_file {
+ /** Fuse connection for this file */
+ struct fuse_conn *fc;
+
+ /** Kernel file handle guaranteed to be unique */
+ u64 kh;
+
+ /** File handle used by userspace */
+ u64 fh;
+
+ /** Node id of this file */
+ u64 nodeid;
+
+ /** FOPEN_* flags returned by open */
+ u32 open_flags;
+
+ /** Has flock been performed on this file? */
+ bool flock:1;
+};
+
+#define FUSE_ARGS(args) struct fuse_args args = {}
+
+/**
+ * A Fuse connection.
+ *
+ * This structure is created, when the filesystem is mounted, and is
+ * destroyed, when the client device is closed and the filesystem is
+ * unmounted.
+ */
+struct fuse_conn {
+ /** Refcount */
+ refcount_t count;
+
+ struct rcu_head rcu;
+
+ /** The pid namespace for this mount */
+ struct pid_namespace *pid_ns;
+
+ /** The user namespace for this mount */
+ struct user_namespace *user_ns;
+
+ /** Maxmum number of pages that can be used in a single request */
+ unsigned int max_pages;
+
+ /** The next unique kernel file handle */
+ atomic64_t khctr;
+
+ /*
+ * The following bitfields are only for optimization purposes
+ * and hence races in setting them will not cause malfunction
+ */
+
+ /** Is fsync not implemented by fs? */
+ unsigned no_fsync:1;
+
+ /** Is fsyncdir not implemented by fs? */
+ unsigned no_fsyncdir:1;
+
+ /** Is flush not implemented by fs? */
+ unsigned no_flush:1;
+
+ /** Is setxattr not implemented by fs? */
+ unsigned no_setxattr:1;
+
+ /** Is getxattr not implemented by fs? */
+ unsigned no_getxattr:1;
+
+ /** Is listxattr not implemented by fs? */
+ unsigned no_listxattr:1;
+
+ /** Is removexattr not implemented by fs? */
+ unsigned no_removexattr:1;
+
+ /** Are posix file locking primitives not implemented by fs? */
+ unsigned no_lock:1;
+
+ /** Is create not implemented by fs? */
+ unsigned no_create:1;
+
+ /** Are BSD file locking primitives not implemented by fs? */
+ unsigned no_flock:1;
+
+ /** Is fallocate not implemented by fs? */
+ unsigned no_fallocate:1;
+
+ /** Is rename with flags implemented by fs? */
+ unsigned no_rename2:1;
+
+ /** Is lseek not implemented by fs? */
+ unsigned no_lseek:1;
+
+ /** Does the filesystem support posix acls? */
+ unsigned posix_acl:1;
+
+ /** Negotiated minor version */
+ unsigned minor;
+
+ /** Key for lock owner ID scrambling */
+ u32 scramble_key[4];
+
+ /* fuse device operations */
+ const struct fuse_dev_operations *dev_ops;
+
+ /* device data */
+ void *dev_priv;
+
+ /* Map file */
+ struct file *mapfile;
+};
+
+static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
+{
+ return get_fuse_conn_super(inode->i_sb);
+}
+
+static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
+{
+ return container_of(inode, struct fuse_inode, inode);
+}
+
+static inline u64 get_node_id(struct inode *inode)
+{
+ return get_fuse_inode(inode)->nodeid;
+}
+
+static inline int invalid_nodeid(u64 nodeid)
+{
+ return !nodeid || nodeid == FUSE_ROOT_ID;
+}
+
+extern const struct dentry_operations fuse2_dentry_operations;
+
+/**
+ * Inode to nodeid comparison.
+ */
+int fuse2_inode_eq(struct inode *inode, void *_nodeidp);
+
+/**
+ * Get a filled in inode
+ */
+struct inode *fuse2_iget(struct super_block *sb, u64 nodeid,
+ int generation, struct fuse_attr *attr);
+
+/**
+ * Initialize READ or READDIR request
+ */
+void fuse2_read_fill(struct fuse_req *req, struct fuse_read_in **inarg,
+ struct file *file, loff_t pos, size_t count, int opcode);
+
+/**
+ * Send OPEN or OPENDIR request
+ */
+int fuse2_open_common(struct inode *inode, struct file *file, bool isdir);
+
+struct fuse_file *fuse2_file_alloc(struct fuse_conn *fc);
+void fuse2_file_free(struct fuse_file *ff);
+void fuse2_finish_open(struct inode *inode, struct file *file);
+
+/**
+ * Send RELEASE or RELEASEDIR request
+ */
+void fuse2_release_common(struct fuse_conn *fc, struct fuse_file *ff,
+ int flags, fl_owner_t id, bool isdir);
+
+/**
+ * Send FSYNC or FSYNCDIR request
+ */
+int fuse2_fsync_common(struct file *file, loff_t start, loff_t end,
+ int datasync, int opcode);
+
+/**
+ * Notify poll wakeup
+ */
+int fuse_notify_poll_wakeup(struct fuse_conn *fc,
+ struct fuse_notify_poll_wakeup_out *outarg);
+
+/**
+ * Initialize file operations on a regular file
+ */
+void fuse2_init_file_inode(struct inode *inode);
+
+/**
+ * Initialize inode operations on regular files and special files
+ */
+void fuse2_init_common(struct inode *inode);
+
+/**
+ * Initialize inode and file operations on a directory
+ */
+void fuse2_init_dir(struct inode *inode);
+
+/**
+ * Initialize inode operations on a symlink
+ */
+void fuse2_init_symlink(struct inode *inode);
+
+/**
+ * Change attributes of an inode
+ */
+void fuse2_change_attributes(struct inode *inode, struct fuse_attr *attr);
+
+void fuse2_change_attributes_common(struct inode *inode, struct fuse_attr *attr);
+
+int fuse_req_cache_init(void);
+void fuse_req_cache_cleanup(void);
+
+/**
+ * Get a request, may fail with -ENOMEM,
+ * caller should specify # elements in req->pages[] explicitly
+ */
+struct fuse_req *fuse2_get_req(struct fuse_conn *fc, unsigned npages);
+
+struct fuse_forget *fuse2_alloc_forget(void);
+
+/*
+ * Send a request (synchronous)
+ */
+int fuse2_request_send(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Simple request sending that does request allocation and freeing
+ */
+ssize_t fuse2_simple_request(struct fuse_conn *fc, struct fuse_args *args);
+
+/*
+ * Send FORGET command
+ */
+void fuse2_queue_forget(struct fuse_conn *fc, struct fuse_forget *forget,
+ u64 nodeid, u64 nlookup);
+
+void fuse2_force_forget(struct fuse_conn *fc, u64 nodeid);
+
+/**
+ * Is current process allowed to perform filesystem operation?
+ */
+int fuse2_allow_current_process(struct fuse_conn *fc);
+
+u64 fuse2_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
+
+int fuse2_update_attributes(struct inode *inode, struct file *file);
+
+int fuse2_setxattr(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags);
+ssize_t fuse2_getxattr(struct inode *inode, const char *name, void *value,
+ size_t size);
+ssize_t fuse2_listxattr(struct dentry *entry, char *list, size_t size);
+int fuse2_removexattr(struct inode *inode, const char *name);
+extern const struct xattr_handler *fuse2_xattr_handlers[];
+extern const struct xattr_handler *fuse2_acl_xattr_handlers[];
+extern const struct xattr_handler *fuse2_no_acl_xattr_handlers[];
+
+struct posix_acl;
+struct posix_acl *fuse2_get_acl(struct inode *inode, int type);
+int fuse2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+
+
+/* readdir.c */
+int fuse2_readdir(struct file *file, struct dir_context *ctx);
+
+/* map.c */
+struct file *fuse2_map_get(struct fuse_conn *fc, u64 mapfd);
+
+#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse2/inode.c b/fs/fuse2/inode.c
new file mode 100644
index 0000000..1060f07
--- /dev/null
+++ b/fs/fuse2/inode.c
@@ -0,0 +1,635 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/module.h>
+#include <linux/statfs.h>
+#include <linux/random.h>
+#include <linux/backing-dev.h>
+#include <linux/pid_namespace.h>
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Filesystem in Userspace");
+MODULE_LICENSE("GPL");
+
+static struct kmem_cache *fuse_inode_cachep;
+
+#define FUSE_SUPER_MAGIC 0x65735546
+
+#define FUSE_DEFAULT_BLKSIZE 512
+
+static struct inode *fuse_alloc_inode(struct super_block *sb)
+{
+ struct fuse_inode *fi;
+
+ fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
+ if (!fi)
+ return NULL;
+
+ fi->nodeid = 0;
+ fi->nlookup = 0;
+ fi->state = 0;
+ spin_lock_init(&fi->lock);
+ fi->forget = fuse2_alloc_forget();
+ if (!fi->forget) {
+ kmem_cache_free(fuse_inode_cachep, fi);
+ return NULL;
+ }
+
+ return &fi->inode;
+}
+
+static void fuse_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ kmem_cache_free(fuse_inode_cachep, inode);
+}
+
+static void fuse_destroy_inode(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ kfree(fi->forget);
+ call_rcu(&inode->i_rcu, fuse_i_callback);
+}
+
+static void fuse_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+ if (inode->i_sb->s_flags & SB_ACTIVE) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ fuse2_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
+ fi->forget = NULL;
+ }
+}
+
+static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+ sync_filesystem(sb);
+ if (*flags & SB_MANDLOCK)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
+ * so that it will fit.
+ */
+static ino_t fuse_squash_ino(u64 ino64)
+{
+ ino_t ino = (ino_t) ino64;
+ if (sizeof(ino_t) < sizeof(u64))
+ ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
+ return ino;
+}
+
+void fuse2_change_attributes_common(struct inode *inode, struct fuse_attr *attr)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ lockdep_assert_held(&fi->lock);
+
+ inode->i_ino = fuse_squash_ino(attr->ino);
+ inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
+ set_nlink(inode, attr->nlink);
+ inode->i_uid = make_kuid(fc->user_ns, attr->uid);
+ inode->i_gid = make_kgid(fc->user_ns, attr->gid);
+ inode->i_blocks = attr->blocks;
+ inode->i_atime.tv_sec = attr->atime;
+ inode->i_atime.tv_nsec = attr->atimensec;
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
+
+ if (attr->blksize != 0)
+ inode->i_blkbits = ilog2(attr->blksize);
+ else
+ inode->i_blkbits = inode->i_sb->s_blocksize_bits;
+}
+
+void fuse2_change_attributes(struct inode *inode, struct fuse_attr *attr)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ loff_t oldsize;
+ struct timespec64 old_mtime;
+
+ /* before taking spinlock, check if anything has changed */
+ if (i_size_read(inode) == attr->size &&
+ inode->i_ino == fuse_squash_ino(attr->ino) &&
+ (inode->i_mode & 07777) == (attr->mode & 07777) &&
+ inode->i_nlink == attr->nlink &&
+ uid_eq(inode->i_uid, make_kuid(fc->user_ns, attr->uid)) &&
+ gid_eq(inode->i_gid, make_kgid(fc->user_ns, attr->gid)) &&
+ inode->i_blocks == attr->blocks &&
+ inode->i_atime.tv_sec == attr->atime &&
+ inode->i_atime.tv_nsec == attr->atimensec &&
+ inode->i_mtime.tv_sec == attr->mtime &&
+ inode->i_mtime.tv_nsec == attr->mtimensec &&
+ inode->i_ctime.tv_sec == attr->ctime &&
+ inode->i_ctime.tv_nsec == attr->ctimensec &&
+ ((attr->blksize != 0 && inode->i_blkbits == ilog2(attr->blksize)) ||
+ (attr->blksize == 0 && inode->i_blkbits == inode->i_sb->s_blocksize_bits)))
+ return;
+
+ spin_lock(&fi->lock);
+ old_mtime = inode->i_mtime;
+ fuse2_change_attributes_common(inode, attr);
+
+ oldsize = inode->i_size;
+ i_size_write(inode, attr->size);
+ spin_unlock(&fi->lock);
+
+ if (S_ISREG(inode->i_mode)) {
+ if (oldsize != attr->size) {
+ truncate_pagecache(inode, attr->size);
+ invalidate_inode_pages2(inode->i_mapping);
+ }
+ }
+}
+
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+{
+ inode->i_mode = attr->mode & S_IFMT;
+ inode->i_size = attr->size;
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
+ if (S_ISREG(inode->i_mode)) {
+ fuse2_init_common(inode);
+ fuse2_init_file_inode(inode);
+ } else if (S_ISDIR(inode->i_mode))
+ fuse2_init_dir(inode);
+ else if (S_ISLNK(inode->i_mode))
+ fuse2_init_symlink(inode);
+ else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ fuse2_init_common(inode);
+ init_special_inode(inode, inode->i_mode,
+ new_decode_dev(attr->rdev));
+ } else
+ BUG();
+}
+
+int fuse2_inode_eq(struct inode *inode, void *_nodeidp)
+{
+ u64 nodeid = *(u64 *) _nodeidp;
+ if (get_node_id(inode) == nodeid)
+ return 1;
+ else
+ return 0;
+}
+
+static int fuse_inode_set(struct inode *inode, void *_nodeidp)
+{
+ u64 nodeid = *(u64 *) _nodeidp;
+ get_fuse_inode(inode)->nodeid = nodeid;
+ return 0;
+}
+
+struct inode *fuse2_iget(struct super_block *sb, u64 nodeid,
+ int generation, struct fuse_attr *attr)
+{
+ struct inode *inode;
+ struct fuse_inode *fi;
+
+ retry:
+ inode = iget5_locked(sb, nodeid, fuse2_inode_eq, fuse_inode_set, &nodeid);
+ if (!inode)
+ return NULL;
+
+ if ((inode->i_state & I_NEW)) {
+ inode->i_flags |= S_NOATIME;
+ inode->i_flags |= S_NOCMTIME;
+ inode->i_generation = generation;
+ fuse_init_inode(inode, attr);
+ unlock_new_inode(inode);
+ } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+ /* Inode has changed type, any I/O on the old should fail */
+ make_bad_inode(inode);
+ iput(inode);
+ goto retry;
+ }
+
+ fi = get_fuse_inode(inode);
+ spin_lock(&fi->lock);
+ fi->nlookup++;
+ spin_unlock(&fi->lock);
+ fuse2_change_attributes(inode, attr);
+
+ return inode;
+}
+
+static void fuse_umount_begin(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ fc->dev_ops->abort(fc->dev_priv);
+}
+
+static void fuse_conn_put(struct fuse_conn *fc)
+{
+ if (refcount_dec_and_test(&fc->count)) {
+ put_pid_ns(fc->pid_ns);
+ put_user_ns(fc->user_ns);
+ fc->dev_ops->put(fc->dev_priv);
+ kfree_rcu(fc, rcu);
+ }
+}
+
+static void fuse_abort_and_put(struct fuse_conn *fc)
+{
+ fc->dev_ops->abort(fc->dev_priv);
+ fuse_conn_put(fc);
+}
+
+static void fuse_put_super(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ FUSE_ARGS(args);
+
+ args.force = true;
+ args.in.h.opcode = FUSE_DESTROY;
+ fuse2_simple_request(fc, &args);
+
+ fuse_abort_and_put(fc);
+}
+
+static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
+{
+ stbuf->f_type = FUSE_SUPER_MAGIC;
+ stbuf->f_bsize = attr->bsize;
+ stbuf->f_frsize = attr->frsize;
+ stbuf->f_blocks = attr->blocks;
+ stbuf->f_bfree = attr->bfree;
+ stbuf->f_bavail = attr->bavail;
+ stbuf->f_files = attr->files;
+ stbuf->f_ffree = attr->ffree;
+ stbuf->f_namelen = attr->namelen;
+ /* fsid is left zero */
+}
+
+static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ FUSE_ARGS(args);
+ struct fuse_statfs_out outarg;
+ int err;
+
+ if (!fuse2_allow_current_process(fc)) {
+ buf->f_type = FUSE_SUPER_MAGIC;
+ return 0;
+ }
+
+ memset(&outarg, 0, sizeof(outarg));
+ args.in.h.opcode = FUSE_STATFS;
+ args.in.h.nodeid = get_node_id(d_inode(dentry));
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse2_simple_request(fc, &args);
+ if (!err)
+ convert_fuse_statfs(buf, &outarg.st);
+ return err;
+}
+
+static int fuse_show_options(struct seq_file *m, struct dentry *root)
+{
+ return 0;
+}
+
+static struct fuse_conn *fuse_conn_alloc(void)
+{
+ struct fuse_conn *fc;
+
+ fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
+ if (!fc)
+ goto out;
+
+ refcount_set(&fc->count, 1);
+ atomic64_set(&fc->khctr, 0);
+ get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+ fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ fc->user_ns = get_user_ns(current_user_ns());
+ fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
+
+out:
+ return fc;
+}
+
+static struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
+{
+ refcount_inc(&fc->count);
+ return fc;
+}
+
+static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
+{
+ struct fuse_attr attr;
+ memset(&attr, 0, sizeof(attr));
+
+ attr.mode = mode;
+ attr.ino = FUSE_ROOT_ID;
+ attr.nlink = 1;
+ return fuse2_iget(sb, 1, 0, &attr);
+}
+
+static const struct super_operations fuse_super_operations = {
+ .alloc_inode = fuse_alloc_inode,
+ .destroy_inode = fuse_destroy_inode,
+ .evict_inode = fuse_evict_inode,
+ .drop_inode = generic_delete_inode,
+ .remount_fs = fuse_remount_fs,
+ .put_super = fuse_put_super,
+ .umount_begin = fuse_umount_begin,
+ .statfs = fuse_statfs,
+ .show_options = fuse_show_options,
+};
+
+static int process_init_reply(struct fuse_conn *fc, struct super_block *sb,
+ struct fuse_init_out *arg)
+{
+ unsigned long ra_pages;
+
+ if (arg->major != FUSE_KERNEL_VERSION || arg->minor < 23)
+ return -EINVAL;
+
+ ra_pages = arg->max_readahead / PAGE_SIZE;
+ if (!(arg->flags & FUSE_POSIX_LOCKS))
+ fc->no_lock = 1;
+ if (!(arg->flags & FUSE_FLOCK_LOCKS))
+ fc->no_flock = 1;
+ if (!(arg->flags & FUSE_ATOMIC_O_TRUNC)) {
+ pr_info("fuse: must support FUSE_ATOMIC_O_TRUNC\n");
+ return -EINVAL;
+ }
+ if (!(arg->flags & FUSE_BIG_WRITES)) {
+ pr_info("fuse: must support FUSE_BIG_WRITES\n");
+ return -EINVAL;
+ }
+ if (!(arg->flags & FUSE_PARALLEL_DIROPS)) {
+ pr_info("fuse: must support FUSE_PARALLEL_DIROPS\n");
+ return -EINVAL;
+ }
+ if (!(arg->flags & FUSE_HANDLE_KILLPRIV)) {
+ pr_info("fuse: must support FUSE_HANDLE_KILLPRIV\n");
+ return -EINVAL;
+ }
+ if (arg->time_gran && arg->time_gran <= 1000000000)
+ sb->s_time_gran = arg->time_gran;
+ if ((arg->flags & FUSE_POSIX_ACL)) {
+ fc->posix_acl = 1;
+ sb->s_xattr = fuse2_acl_xattr_handlers;
+ }
+ if (arg->flags & FUSE_MAX_PAGES) {
+ fc->max_pages =
+ min_t(unsigned int, FUSE_MAX_MAX_PAGES,
+ max_t(unsigned int, arg->max_pages, 1));
+ }
+ return 0;
+}
+
+static int fuse_send_init(struct fuse_conn *fc, struct super_block *sb,
+ char *opts)
+{
+ struct fuse_init_in inarg;
+ struct fuse_init_out outarg;
+ FUSE_ARGS(args);
+ int err;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.major = FUSE_KERNEL_VERSION;
+ inarg.minor = FUSE_KERNEL_MINOR_VERSION;
+ inarg.max_readahead = sb->s_bdi->ra_pages * PAGE_SIZE;
+ inarg.flags = FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+ FUSE_BIG_WRITES | FUSE_PARALLEL_DIROPS | FUSE_FLOCK_LOCKS |
+ FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
+ FUSE_MAX_PAGES;
+
+ args.killable = true;
+ args.in.h.opcode = FUSE_INIT;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ if (opts) {
+ args.in.numargs++;
+ args.in.args[1].size = strlen(opts) + 1;
+ args.in.args[1].value = opts;
+ }
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse2_simple_request(fc, &args);
+ if (!err)
+ err = process_init_reply(fc, sb, &outarg);
+
+ return err;
+}
+
+static int fuse_fill_super(struct super_block *sb, char *opts)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct inode *root;
+ int err;
+
+ if (sb->s_flags & SB_MANDLOCK)
+ return -EINVAL;
+
+ sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = FUSE_SUPER_MAGIC;
+ sb->s_op = &fuse_super_operations;
+ sb->s_xattr = fuse2_xattr_handlers;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_time_gran = 1;
+ sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
+ if (sb->s_user_ns != &init_user_ns)
+ sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
+
+ /*
+ * If we are not in the initial user namespace posix
+ * acls must be translated.
+ */
+ if (sb->s_user_ns != &init_user_ns)
+ sb->s_xattr = fuse2_no_acl_xattr_handlers;
+
+ sb->s_flags |= SB_POSIXACL;
+
+ err = fuse_send_init(fc, sb, opts);
+ if (err)
+ return err;
+
+ root = fuse_get_root_inode(sb, S_IFDIR | 0);
+ sb->s_root = d_make_root(root);
+ if (!sb->s_root)
+ return -ENOMEM;
+
+ /* Root dentry doesn't have .d_revalidate */
+ sb->s_d_op = &fuse2_dentry_operations;
+
+ return 0;
+}
+
+static int fuse_set_super(struct super_block *s, void *data)
+{
+ int err;
+
+ err = get_anon_bdev(&s->s_dev);
+ if (!err)
+ s->s_fs_info = fuse_conn_get(data);
+
+ return err;
+}
+
+static int fuse_test_super(struct super_block *s, void *data)
+{
+ struct fuse_conn *fc = data;
+
+ return fc->dev_priv == get_fuse_conn_super(s)->dev_priv;
+}
+
+struct dentry *fuse_mount_common(struct file_system_type *fs_type,
+ int flags, void *opts,
+ const struct fuse_dev_operations *dev_ops,
+ void *dev_priv)
+{
+ struct super_block *s;
+ struct fuse_conn *fc;
+ int err;
+
+ fc = fuse_conn_alloc();
+ if (!fc) {
+ dev_ops->abort(dev_priv);
+ dev_ops->put(dev_priv);
+ return ERR_PTR(-ENOMEM);
+ }
+ fc->dev_ops = dev_ops;
+ fc->dev_priv = dev_priv;
+
+ s = sget(fs_type, fuse_test_super, fuse_set_super, flags, fc);
+ err = PTR_ERR(s);
+ if (IS_ERR(s))
+ goto abort;
+
+ err = -EIO;
+ if (WARN_ON(fc->user_ns != s->s_user_ns))
+ goto deactivate;
+
+ if (s->s_root) {
+ err = -EBUSY;
+ if ((flags ^ s->s_flags) & SB_RDONLY)
+ goto deactivate;
+ } else {
+ err = fuse_fill_super(s, opts);
+ if (err)
+ goto deactivate;
+
+ s->s_flags |= SB_ACTIVE;
+ }
+ fuse_conn_put(fc);
+
+ return dget(s->s_root);
+
+deactivate:
+ deactivate_locked_super(s);
+abort:
+ fuse_abort_and_put(fc);
+
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fuse_mount_common);
+
+void fuse_kill_sb(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ if (!sb->s_root) {
+ /*
+ * Setup of the sb didn't complete, ->put_super() won't be
+ * called.
+ */
+ sb->s_fs_info = NULL;
+ fuse_abort_and_put(fc);
+ }
+ kill_anon_super(sb);
+}
+EXPORT_SYMBOL(fuse_kill_sb);
+
+
+static void fuse_inode_init_once(void *foo)
+{
+ struct inode *inode = foo;
+
+ inode_init_once(inode);
+}
+
+static int __init fuse_fs_init(void)
+{
+ fuse_inode_cachep = kmem_cache_create("fuse2_inode",
+ sizeof(struct fuse_inode), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
+ fuse_inode_init_once);
+
+ if (!fuse_inode_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void fuse_fs_cleanup(void)
+{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(fuse_inode_cachep);
+}
+
+static int __init fuse_init(void)
+{
+ int res;
+
+ res = fuse_fs_init();
+ if (res)
+ goto err;
+
+ res = fuse_req_cache_init();
+ if (res)
+ goto err_fs_cleanup;
+
+ pr_info("fuse2 core initialized (API version %i.%i)\n",
+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
+
+ return 0;
+
+err_fs_cleanup:
+ fuse_fs_cleanup();
+err:
+ return res;
+}
+
+static void __exit fuse_exit(void)
+{
+ printk(KERN_DEBUG "fuse exit\n");
+
+ fuse_req_cache_cleanup();
+ fuse_fs_cleanup();
+}
+
+module_init(fuse_init);
+module_exit(fuse_exit);
diff --git a/fs/fuse2/map.c b/fs/fuse2/map.c
new file mode 100644
index 0000000..184e210
--- /dev/null
+++ b/fs/fuse2/map.c
@@ -0,0 +1,60 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/file.h>
+#include <linux/idr.h>
+
+static DEFINE_SPINLOCK(fuse2_map_lock);
+static DEFINE_IDR(fuse2_map);
+
+int fuse2_map_open(struct super_block *sb, struct file *file)
+{
+ int res;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&fuse2_map_lock);
+ res = idr_alloc(&fuse2_map, file, 0, 0, GFP_ATOMIC);
+ spin_unlock(&fuse2_map_lock);
+ idr_preload_end();
+ if (res)
+ fput(file);
+
+ return res;
+}
+EXPORT_SYMBOL(fuse2_map_open);
+
+int fuse2_map_close(struct super_block *sb, unsigned long mapfd)
+{
+ struct file *file;
+
+ spin_lock(&fuse2_map_lock);
+ file = idr_remove(&fuse2_map, mapfd);
+ spin_unlock(&fuse2_map_lock);
+
+ if (!file)
+ return -EBADF;
+
+ fput(file);
+ return 0;
+}
+EXPORT_SYMBOL(fuse2_map_close);
+
+struct file *fuse2_map_get(struct fuse_conn *fc, u64 mapfd)
+{
+ struct file *file;
+
+ rcu_read_lock();
+ file = idr_find(&fuse2_map, mapfd);
+ if (file)
+ get_file(file);
+ rcu_read_unlock();
+
+ return file;
+}
diff --git a/fs/fuse2/readdir.c b/fs/fuse2/readdir.c
new file mode 100644
index 0000000..1e9b2f1
--- /dev/null
+++ b/fs/fuse2/readdir.c
@@ -0,0 +1,89 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+
+#include "fuse_i.h"
+#include <linux/iversion.h>
+#include <linux/posix_acl.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+
+static bool fuse_emit(struct file *file, struct dir_context *ctx,
+ struct fuse_dirent *dirent)
+{
+ return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
+ dirent->type);
+}
+
+static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
+ struct dir_context *ctx)
+{
+ while (nbytes >= FUSE_NAME_OFFSET) {
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+ size_t reclen = FUSE_DIRENT_SIZE(dirent);
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+ if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+ return -EIO;
+
+ if (!fuse_emit(file, ctx, dirent))
+ break;
+
+ buf += reclen;
+ nbytes -= reclen;
+ ctx->pos = dirent->off;
+ }
+
+ return 0;
+}
+
+static int fuse2_readdir_uncached(struct file *file, struct dir_context *ctx)
+{
+ int err;
+ size_t nbytes;
+ struct page *page;
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_read_in *inarg;
+
+ req = fuse2_get_req(fc, 1);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ fuse2_put_request(req);
+ return -ENOMEM;
+ }
+
+ req->num_pages = 1;
+ req->pages[0] = page;
+ req->page_descs[0].length = PAGE_SIZE;
+ fuse2_read_fill(req, &inarg, file, ctx->pos, PAGE_SIZE, FUSE_READDIR);
+ err = fuse2_request_send(fc, req);
+ nbytes = req->outh.len - req->mand_outlen;
+ if (!err && nbytes)
+ err = parse_dirfile(page_address(page), nbytes, file, ctx);
+
+ fuse2_put_request(req);
+
+ return err;
+}
+
+int fuse2_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct inode *inode = file_inode(file);
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ return fuse2_readdir_uncached(file, ctx);
+}
diff --git a/fs/fuse2/request.c b/fs/fuse2/request.c
new file mode 100644
index 0000000..3665ddc
--- /dev/null
+++ b/fs/fuse2/request.c
@@ -0,0 +1,303 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2019 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/mm.h>
+#include <linux/uio.h>
+#include <linux/cred.h>
+
+static struct kmem_cache *fuse_req_cachep;
+
+int fuse2_request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+ return fc->dev_ops->send(fc->dev_priv, req);
+}
+
+static void fuse_request_init(struct fuse_req *req, struct page **pages,
+ struct fuse_page_desc *page_descs,
+ unsigned npages)
+{
+ INIT_LIST_HEAD(&req->list);
+ init_waitqueue_head(&req->waitq);
+ refcount_set(&req->count, 1);
+ req->pages = pages;
+ req->page_descs = page_descs;
+ req->max_pages = npages;
+ __set_bit(FR_PENDING, &req->flags);
+ __set_bit(FR_ISREPLY, &req->flags);
+}
+
+static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
+ struct fuse_page_desc **desc)
+{
+ struct page **pages;
+
+ pages = kzalloc(npages * (sizeof(struct page *) +
+ sizeof(struct fuse_page_desc)), flags);
+ *desc = (void *) pages + npages * sizeof(struct page *);
+
+ return pages;
+}
+
+static struct fuse_req *fuse_request_alloc(unsigned npages, gfp_t flags)
+{
+ struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
+ if (req) {
+ struct page **pages = NULL;
+ struct fuse_page_desc *page_descs = NULL;
+
+ WARN_ON(npages > FUSE_MAX_MAX_PAGES);
+ if (npages > FUSE_REQ_INLINE_PAGES) {
+ pages = fuse_req_pages_alloc(npages, flags,
+ &page_descs);
+ if (!pages) {
+ kmem_cache_free(fuse_req_cachep, req);
+ return NULL;
+ }
+ } else if (npages) {
+ pages = req->inline_pages;
+ page_descs = req->inline_page_descs;
+ }
+
+ fuse_request_init(req, pages, page_descs, npages);
+ }
+ return req;
+}
+
+static void fuse_request_free(struct fuse_req *req)
+{
+ unsigned int i;
+
+ for (i = 0; i < req->num_pages; i++)
+ put_page(req->pages[i]);
+
+ if (req->pages != req->inline_pages)
+ kfree(req->pages);
+ kmem_cache_free(fuse_req_cachep, req);
+}
+
+static struct fuse_req *fuse2_get_req_gfp(struct fuse_conn *fc, unsigned npages,
+ gfp_t flags)
+{
+ struct fuse_req *req;
+ int err;
+
+ req = fuse_request_alloc(npages, flags);
+ err = -ENOMEM;
+ if (!req)
+ goto out;
+
+ req->inh.uid = from_kuid(fc->user_ns, current_fsuid());
+ req->inh.gid = from_kgid(fc->user_ns, current_fsgid());
+ req->inh.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
+
+ if (unlikely(req->inh.uid == ((uid_t)-1) ||
+ req->inh.gid == ((gid_t)-1))) {
+ fuse2_put_request(req);
+ return ERR_PTR(-EOVERFLOW);
+ }
+ return req;
+
+ out:
+ return ERR_PTR(err);
+}
+
+struct fuse_req *fuse2_get_req(struct fuse_conn *fc, unsigned npages)
+{
+ return fuse2_get_req_gfp(fc, npages, GFP_KERNEL);
+}
+
+struct fuse_forget *fuse2_alloc_forget(void)
+{
+ return kzalloc(sizeof(struct fuse_forget), GFP_KERNEL);
+}
+
+void fuse2_get_request(struct fuse_req *req)
+{
+ refcount_inc(&req->count);
+}
+EXPORT_SYMBOL(fuse2_get_request);
+
+void fuse2_put_request(struct fuse_req *req)
+{
+ if (refcount_dec_and_test(&req->count))
+ fuse_request_free(req);
+}
+EXPORT_SYMBOL(fuse2_put_request);
+
+static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+{
+ unsigned nbytes = 0;
+ unsigned i;
+
+ for (i = 0; i < numargs; i++)
+ nbytes += args[i].size;
+
+ return nbytes;
+}
+
+static int fuse_req_pages_fill(struct fuse_req *req, gfp_t flags)
+{
+ unsigned int i;
+
+ for (i = 0; i < req->max_pages; i++) {
+ struct page *page = alloc_page(flags);
+ if (!page)
+ return -ENOMEM;
+
+ req->pages[i] = page;
+ req->page_descs[i].length = PAGE_SIZE;
+ req->num_pages++;
+ }
+ return 0;
+}
+
+#define FUSE_SIMPLE_MAX_PAGES 17
+
+static ssize_t fuse2_simple_send(struct fuse_conn *fc, struct fuse_args *args)
+{
+ struct fuse_req *req;
+ ssize_t ret;
+ gfp_t flags = GFP_KERNEL;
+ unsigned int inlen = sizeof(struct fuse_in_header) +
+ len_args(args->in.numargs, (struct fuse_arg *) args->in.args);
+ unsigned int outlen = sizeof(struct fuse_out_header) +
+ len_args(args->out.numargs, args->out.args);
+ unsigned int maxlen = maxlen = max(inlen, outlen);
+ unsigned int npages = 0;
+ struct kvec vec[FUSE_SIMPLE_MAX_PAGES + 1];
+ struct iov_iter iter;
+ unsigned int i;
+
+ if (maxlen > FUSE_REQ_INLINE_DATA) {
+ npages = DIV_ROUND_UP(maxlen - FUSE_REQ_INLINE_DATA, PAGE_SIZE);
+ if (WARN_ON(npages > FUSE_SIMPLE_MAX_PAGES))
+ return -EIO;
+ }
+
+ if (args->force)
+ flags |= __GFP_NOFAIL;
+
+ req = fuse2_get_req_gfp(fc, npages, flags);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ vec[0].iov_base = req->inlinedata;
+ vec[0].iov_len = FUSE_REQ_INLINE_DATA;
+ if (npages) {
+ ret = fuse_req_pages_fill(req, flags);
+ if (ret)
+ goto out_put;
+
+ for (i = 0; i < npages; i++) {
+ vec[i + 1].iov_base = page_address(req->pages[i]);
+ vec[i + 1].iov_len = req->page_descs[i].length;
+ }
+ }
+
+ iov_iter_kvec(&iter, READ, vec, npages + 1, inlen);
+ iov_iter_advance(&iter, sizeof(struct fuse_in_header));
+ for (i = 0; i < args->in.numargs; i++) {
+ struct fuse_in_arg *a = &args->in.args[i];
+
+ ret = _copy_to_iter(a->value, a->size, &iter);
+ WARN_ON(ret != a->size);
+ }
+ req->inline_inlen = min_t(unsigned int, inlen, FUSE_REQ_INLINE_DATA);
+ req->inh.opcode = args->in.h.opcode;
+ req->inh.nodeid = args->in.h.nodeid;
+
+ req->inline_outlen = min_t(unsigned int, outlen, FUSE_REQ_INLINE_DATA);
+ req->max_outlen = req->mand_outlen = outlen;
+ if (args->out.argvar)
+ req->mand_outlen = sizeof(struct fuse_out_header);
+
+ if (args->force)
+ __set_bit(FR_FORCE, &req->flags);
+ if (args->killable)
+ __set_bit(FR_KILLABLE, &req->flags);
+ req->inh.len = inlen;
+ ret = fuse2_request_send(fc, req);
+ if (ret)
+ goto out_put;
+
+ iov_iter_kvec(&iter, WRITE, vec, npages + 1, req->outh.len);
+ iov_iter_advance(&iter, sizeof(struct fuse_out_header));
+ for (i = 0; i < args->out.numargs; i++) {
+ struct fuse_arg *a = &args->out.args[i];
+
+ if (a->value)
+ _copy_from_iter(a->value, a->size, &iter);
+ else
+ iov_iter_advance(&iter, a->size);
+ }
+ if (args->out.argvar)
+ ret = req->outh.len - req->mand_outlen;
+
+out_put:
+ fuse2_put_request(req);
+
+ return ret;
+}
+
+ssize_t fuse2_simple_request(struct fuse_conn *fc, struct fuse_args *args)
+{
+ ssize_t res;
+ uid_t uid = from_kuid(fc->user_ns, current_fsuid());
+ gid_t gid = from_kgid(fc->user_ns, current_fsgid());
+ pid_t pid = pid_nr_ns(task_pid(current), fc->pid_ns);
+
+ if (unlikely(uid == ((uid_t)-1) || gid == ((gid_t)-1)))
+ return -EOVERFLOW;
+
+ if (WARN_ON(args->out.argvar && args->out.numargs != 1))
+ return -EIO;
+
+ if (fc->dev_ops->simple_send) {
+ res = fc->dev_ops->simple_send(fc->dev_priv, args,
+ uid, gid, pid);
+ if (res != -EPROBE_DEFER)
+ return res;
+ }
+
+ return fuse2_simple_send(fc, args);
+}
+
+void fuse2_queue_forget(struct fuse_conn *fc, struct fuse_forget *forget,
+ u64 nodeid, u64 nlookup)
+{
+ forget->forget_one.nodeid = nodeid;
+ forget->forget_one.nlookup = nlookup;
+
+ fc->dev_ops->forget(fc->dev_priv, forget);
+}
+
+void fuse2_force_forget(struct fuse_conn *fc, u64 nodeid)
+{
+ struct fuse_forget *forget;
+
+ forget = kzalloc(sizeof(struct fuse_forget), GFP_KERNEL | __GFP_NOFAIL);
+ fuse2_queue_forget(fc, forget, nodeid, 1);
+}
+
+int __init fuse_req_cache_init(void)
+{
+ fuse_req_cachep = kmem_cache_create("fuse2_request",
+ sizeof(struct fuse_req),
+ 0, 0, NULL);
+ if (!fuse_req_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void fuse_req_cache_cleanup(void)
+{
+ kmem_cache_destroy(fuse_req_cachep);
+}
diff --git a/fs/fuse2/xattr.c b/fs/fuse2/xattr.c
new file mode 100644
index 0000000..47710c9
--- /dev/null
+++ b/fs/fuse2/xattr.c
@@ -0,0 +1,246 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2016 Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU GPL.
+ * See the file COPYING.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+int fuse2_setxattr(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_setxattr_in inarg;
+ int err;
+
+ if (fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ inarg.flags = flags;
+ args.in.h.opcode = FUSE_SETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 3;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
+ args.in.args[2].size = size;
+ args.in.args[2].value = value;
+ err = fuse2_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_setxattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+ssize_t fuse2_getxattr(struct inode *inode, const char *name, void *value,
+ size_t size)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (fc->no_getxattr)
+ return -EOPNOTSUPP;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ args.in.h.opcode = FUSE_GETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
+ /* This is really two different operations rolled into one */
+ args.out.numargs = 1;
+ if (size) {
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = value;
+ } else {
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ }
+ ret = fuse2_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
+ if (ret == -ENOSYS) {
+ fc->no_getxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ return ret;
+}
+
+static int fuse_verify_xattr_list(char *list, size_t size)
+{
+ size_t origsize = size;
+
+ while (size) {
+ size_t thislen = strnlen(list, size);
+
+ if (!thislen || thislen == size)
+ return -EIO;
+
+ size -= thislen + 1;
+ list += thislen + 1;
+ }
+
+ return origsize;
+}
+
+ssize_t fuse2_listxattr(struct dentry *entry, char *list, size_t size)
+{
+ struct inode *inode = d_inode(entry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (!fuse2_allow_current_process(fc))
+ return -EACCES;
+
+ if (fc->no_listxattr)
+ return -EOPNOTSUPP;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ args.in.h.opcode = FUSE_LISTXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ /* This is really two different operations rolled into one */
+ args.out.numargs = 1;
+ if (size) {
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = list;
+ } else {
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ }
+ ret = fuse2_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
+ if (ret > 0 && size)
+ ret = fuse_verify_xattr_list(list, ret);
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ return ret;
+}
+
+int fuse2_removexattr(struct inode *inode, const char *name)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ int err;
+
+ if (fc->no_removexattr)
+ return -EOPNOTSUPP;
+
+ args.in.h.opcode = FUSE_REMOVEXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = strlen(name) + 1;
+ args.in.args[0].value = name;
+ err = fuse2_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_removexattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static int fuse_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ return fuse2_getxattr(inode, name, value, size);
+}
+
+static int fuse_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags)
+{
+ if (!value)
+ return fuse2_removexattr(inode, name);
+
+ return fuse2_setxattr(inode, name, value, size, flags);
+}
+
+static bool no_xattr_list(struct dentry *dentry)
+{
+ return false;
+}
+
+static int no_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+
+static int no_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *nodee,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static const struct xattr_handler fuse_xattr_handler = {
+ .prefix = "",
+ .get = fuse_xattr_get,
+ .set = fuse_xattr_set,
+};
+
+const struct xattr_handler *fuse2_xattr_handlers[] = {
+ &fuse_xattr_handler,
+ NULL
+};
+
+const struct xattr_handler *fuse2_acl_xattr_handlers[] = {
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+ &fuse_xattr_handler,
+ NULL
+};
+
+static const struct xattr_handler fuse_no_acl_access_xattr_handler = {
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+ .list = no_xattr_list,
+ .get = no_xattr_get,
+ .set = no_xattr_set,
+};
+
+static const struct xattr_handler fuse_no_acl_default_xattr_handler = {
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
+ .flags = ACL_TYPE_ACCESS,
+ .list = no_xattr_list,
+ .get = no_xattr_get,
+ .set = no_xattr_set,
+};
+
+const struct xattr_handler *fuse2_no_acl_xattr_handlers[] = {
+ &fuse_no_acl_access_xattr_handler,
+ &fuse_no_acl_default_xattr_handler,
+ &fuse_xattr_handler,
+ NULL
+};
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 2971d29..2c290d2 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -244,6 +244,7 @@ struct fuse_file_lock {
#define FOPEN_NONSEEKABLE (1 << 2)
#define FOPEN_CACHE_DIR (1 << 3)
#define FOPEN_STREAM (1 << 4)
+#define FOPEN_MAP (1 << 5)
/**
* INIT request/reply flags
@@ -422,6 +423,7 @@ enum fuse_opcode {
FUSE_RENAME2 = 45,
FUSE_LSEEK = 46,
FUSE_COPY_FILE_RANGE = 47,
+ FUSE_MAP = 50,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -571,6 +573,12 @@ struct fuse_read_in {
uint32_t padding;
};
+struct fuse_map_out {
+ uint64_t mapfd;
+ uint64_t offset;
+ uint64_t size;
+};
+
#define FUSE_COMPAT_WRITE_IN_SIZE 24
struct fuse_write_in {
@@ -823,6 +831,14 @@ struct fuse_notify_retrieve_in {
/* Device ioctls: */
#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
+#define FUSE2_DEV_IOC_BIND _IOW(229, 1, uint32_t)
+#define FUSE2_DEV_IOC_PROC _IO(229, 2)
+#define FUSE2_DEV_IOC_READ _IO(229, 3)
+#define FUSE2_DEV_IOC_MAP_OPEN _IO(229, 4)
+#define FUSE2_DEV_IOC_MAP_CLOSE _IO(230, 5)
+
+#define FUSE2_MMAP_INBUF_OFFSET 0x00000000UL
+#define FUSE2_MMAP_OUTBUF_OFFSET 0x80000000UL
struct fuse_lseek_in {
uint64_t fh;
diff --git a/samples/Kconfig b/samples/Kconfig
index c8dacb4..a4ea071 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -169,4 +169,8 @@
as mount API and statx(). Note that this is restricted to the x86
arch whilst it accesses system calls that aren't yet in all arches.
+config SAMPLE_FUSE2
+ bool "Build example server for fuse2"
+ select HEADERS_INSTALL
+
endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index 7d6e4ca..2d6034d 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -20,3 +20,4 @@
obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/
obj-y += vfio-mdev/
subdir-$(CONFIG_SAMPLE_VFS) += vfs
+subdir-$(CONFIG_SAMPLE_FUSE2) += fuse2/
diff --git a/samples/fuse2/.gitignore b/samples/fuse2/.gitignore
new file mode 100644
index 0000000..9b64f2d
--- /dev/null
+++ b/samples/fuse2/.gitignore
@@ -0,0 +1 @@
+loraw
diff --git a/samples/fuse2/Makefile b/samples/fuse2/Makefile
new file mode 100644
index 0000000..fe8bf58
--- /dev/null
+++ b/samples/fuse2/Makefile
@@ -0,0 +1,4 @@
+hostprogs-y := loraw
+always := $(hostprogs-y)
+KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
+KBUILD_HOSTLDLIBS += -pthread
diff --git a/samples/fuse2/loraw.c b/samples/fuse2/loraw.c
new file mode 100644
index 0000000..8984d44
--- /dev/null
+++ b/samples/fuse2/loraw.c
@@ -0,0 +1,1155 @@
+#define _GNU_SOURCE
+#define LO_NOTHREAD 1
+
+#include <linux/fuse.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <dirent.h>
+#include <assert.h>
+#include <errno.h>
+#include <err.h>
+#include <inttypes.h>
+#include <sched.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <semaphore.h>
+
+struct lo_inode {
+ struct lo_inode *next; /* protected by lo->mutex */
+ struct lo_inode *prev; /* protected by lo->mutex */
+ int fd;
+ dev_t dev;
+ uint64_t refcount; /* protected by lo->mutex */
+ struct fuse_attr attr;
+};
+
+struct lo_file {
+ union {
+ struct lo_file *next;
+ struct {
+ int fd;
+ int mapfd;
+ };
+ struct {
+ DIR *dp;
+ struct dirent *entry;
+ off_t offset;
+ };
+ };
+};
+
+struct lo_config {
+ int debug;
+ int version;
+ int single;
+ int bind;
+ int proc;
+ int map;
+ uint64_t timeout;
+ const char *source;
+ int nothread;
+};
+
+struct lo_data {
+ pthread_mutex_t mutex;
+#ifdef LO_NOTHREAD
+#define LO_INODE_MAX 65536
+ struct lo_inode inodes[LO_INODE_MAX];
+ struct lo_inode *free_inodes;
+#define LO_FILE_MAX 65536
+ struct lo_file files[LO_FILE_MAX];
+ struct lo_file *free_files;
+ sem_t sem;
+#endif
+ struct lo_config c;
+ struct lo_inode root;
+};
+
+struct lo_chan {
+ struct lo_data *lo;
+ enum { FUDEV_V1, FUDEV_V2, FUDEV_AUX } type;
+ int fd;
+ int filled;
+ int mapped;
+ void *inbuf;
+ void *outbuf;
+ size_t len;
+ size_t bufsize;
+};
+
+
+#ifdef LO_NOTHREAD
+static inline int lo_nothread(struct lo_data *lo)
+{
+ return lo->c.nothread;
+}
+
+static inline void lo_mutex_init_nt(struct lo_data *lo)
+{
+ sem_init(&lo->sem, 1, 1);
+}
+
+static inline void lo_mutex_lock_nt(struct lo_data *lo)
+{
+ sem_wait(&lo->sem);
+}
+
+static inline void lo_mutex_unlock_nt(struct lo_data *lo)
+{
+ sem_post(&lo->sem);
+}
+
+static inline struct lo_inode *lo_alloc_inode_nt(struct lo_data *lo)
+{
+ struct lo_inode *inode;
+
+ lo_mutex_lock_nt(lo);
+ inode = lo->free_inodes;
+ if (inode)
+ lo->free_inodes = inode->next;
+ lo_mutex_unlock_nt(lo);
+
+ memset(inode, 0, sizeof(*inode));
+
+ return inode;
+}
+
+static inline struct lo_file *lo_alloc_file_nt(struct lo_data *lo)
+{
+ struct lo_file *lf;
+
+ lo_mutex_lock_nt(lo);
+ lf = lo->free_files;
+ if (lf)
+ lo->free_files = lf->next;
+ lo_mutex_unlock_nt(lo);
+
+ memset(lf, 0, sizeof(*lf));
+
+ return lf;
+
+}
+
+static inline void lo_free_inode_locked_nt(struct lo_data *lo,
+ struct lo_inode *inode)
+{
+ inode->next = lo->free_inodes;
+ lo->free_inodes = inode;
+}
+
+static inline void lo_free_file_locked_nt(struct lo_data *lo,
+ struct lo_file *lf)
+{
+ lf->next = lo->free_files;
+ lo->free_files = lf;
+}
+
+static inline void lo_free_inode_nt(struct lo_data *lo, struct lo_inode *inode)
+{
+ lo_mutex_lock_nt(lo);
+ lo_free_inode_locked_nt(lo, inode);
+ lo_mutex_unlock_nt(lo);
+}
+
+static inline void lo_free_file_nt(struct lo_data *lo, struct lo_file *lf)
+{
+ lo_mutex_lock_nt(lo);
+ lo_free_file_locked_nt(lo, lf);
+ lo_mutex_unlock_nt(lo);
+}
+
+static inline struct lo_data *lo_alloc_lo_nt(void)
+{
+ struct lo_data *lo;
+ unsigned int i;
+
+ lo = mmap(NULL, sizeof(struct lo_data), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (lo == MAP_FAILED) {
+ warn("mmap(NULL, %zu, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0)", sizeof(struct lo_data));
+ return NULL;
+ }
+
+ for (i = 0; i < LO_INODE_MAX; i++)
+ lo_free_inode_locked_nt(lo, &lo->inodes[i]);
+ for (i = 0; i < LO_FILE_MAX; i++)
+ lo_free_file_locked_nt(lo, &lo->files[i]);
+
+ return lo;
+}
+#else
+
+#define lo_nothread(lo) ((void) lo, 0)
+#define lo_alloc_inode_nt(lo) NULL
+#define lo_alloc_file_nt(lo) NULL
+#define lo_free_inode_nt(lo, inode) abort()
+#define lo_free_file_nt(lo, lf) abort()
+#define lo_alloc_lo_nt() NULL
+#define lo_mutex_init_nt(lo) abort()
+#define lo_mutex_lock_nt(lo) abort()
+#define lo_mutex_unlock_nt(lo) abort()
+
+#endif
+
+static void lo_mutex_init(struct lo_data *lo)
+{
+ if (!lo_nothread(lo))
+ pthread_mutex_init(&lo->mutex, NULL);
+ else
+ lo_mutex_init_nt(lo);
+}
+
+static void lo_mutex_lock(struct lo_data *lo)
+{
+ if (!lo_nothread(lo))
+ pthread_mutex_lock(&lo->mutex);
+ else
+ lo_mutex_lock_nt(lo);
+}
+
+static void lo_mutex_unlock(struct lo_data *lo)
+{
+ if (!lo_nothread(lo))
+ pthread_mutex_unlock(&lo->mutex);
+ else
+ lo_mutex_unlock_nt(lo);
+}
+
+static struct lo_inode *lo_alloc_inode(struct lo_data *lo)
+{
+ if (!lo_nothread(lo))
+ return calloc(1, sizeof(struct lo_inode));
+ else
+ return lo_alloc_inode_nt(lo);
+}
+
+static struct lo_file *lo_alloc_file(struct lo_data *lo)
+{
+ if (!lo_nothread(lo))
+ return calloc(1, sizeof(struct lo_file));
+ else
+ return lo_alloc_file_nt(lo);
+}
+
+static void lo_free_inode(struct lo_data *lo, struct lo_inode *inode)
+{
+ if (!lo_nothread(lo))
+ free(inode);
+ else
+ lo_free_inode_nt(lo, inode);
+}
+
+static void lo_free_file(struct lo_data *lo, struct lo_file *lf)
+{
+ if (!lo_nothread(lo))
+ free(lf);
+ else
+ lo_free_file_nt(lo, lf);
+}
+
+static struct lo_inode *lo_inode(struct lo_data *lo, uint64_t ino)
+{
+ if (ino == FUSE_ROOT_ID)
+ return &lo->root;
+ else
+ return (struct lo_inode *) (uintptr_t) ino;
+}
+
+static int lo_debug(struct lo_chan *lc)
+{
+ return lc->lo->c.debug;
+}
+
+static void lo_reply(struct lo_chan *lc, int error, size_t argsize)
+{
+
+ struct fuse_in_header *inh = lc->inbuf;
+ struct fuse_out_header *outh = lc->outbuf;
+ int res;
+
+ outh->len = sizeof(struct fuse_out_header) + argsize;
+ outh->error = -error;
+ outh->unique = inh->unique;
+
+ if (lo_debug(lc)) {
+ fprintf(stderr,
+ "unique: %"PRIu64", opcode: %i, nodeid: %"PRIu64", insize: %zu\n",
+ inh->unique, inh->opcode, inh->nodeid, lc->len);
+
+ fprintf(stderr, " error: %i, outsize: %u\n",
+ error, outh->len);
+ }
+
+ if (lc->mapped) {
+ res = ioctl(lc->fd, FUSE2_DEV_IOC_PROC, 0);
+ if (res == -1)
+ err(1, "writing/reading fuse device");
+
+ if (res > 0) {
+ lc->len = res;
+ lc->filled = 1;
+ }
+ } else {
+ res = write(lc->fd, lc->outbuf, outh->len);
+ if (res == -1)
+ err(1, "writing fuse device");
+ }
+}
+
+static void *lo_out_arg(struct lo_chan *lc)
+{
+ return ((struct fuse_out_header *) lc->outbuf) + 1;
+}
+
+static void lo_convert_stat(const struct stat *stat, struct fuse_attr *attr)
+{
+ memset(attr, 0, sizeof(*attr));
+
+ attr->ino = stat->st_ino;
+ attr->mode = stat->st_mode;
+ attr->nlink = stat->st_nlink;
+ attr->uid = stat->st_uid;
+ attr->gid = stat->st_gid;
+ attr->rdev = stat->st_rdev;
+ attr->size = stat->st_size;
+ attr->blksize = stat->st_blksize;
+ attr->blocks = stat->st_blocks;
+ attr->atime = stat->st_atime;
+ attr->mtime = stat->st_mtime;
+ attr->ctime = stat->st_ctime;
+ attr->atimensec = stat->st_atim.tv_nsec;
+ attr->mtimensec = stat->st_mtim.tv_nsec;
+ attr->ctimensec = stat->st_ctim.tv_nsec;
+}
+
+static void lo_getattr(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_getattr_in *inarg)
+{
+ struct lo_data *lo = lc->lo;
+ struct fuse_attr_out *outarg = lo_out_arg(lc);
+
+ (void) inarg;
+
+ if (lo_debug(lc))
+ fprintf(stderr, "lo_getattr(ino=%"PRIu64")\n", inh->nodeid);
+
+ outarg->attr_valid = lo->c.timeout;
+ outarg->attr_valid_nsec = 0;
+ outarg->dummy = 0;
+ outarg->attr = lo_inode(lo, inh->nodeid)->attr;
+ lo_reply(lc, 0, sizeof(*outarg));
+}
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
+{
+ struct lo_inode *p;
+ struct lo_inode *ret = NULL;
+
+ lo_mutex_lock(lo);
+ for (p = lo->root.next; p != &lo->root; p = p->next) {
+ if (p->attr.ino == st->st_ino && p->dev == st->st_dev) {
+ assert(p->refcount > 0);
+ ret = p;
+ ret->refcount++;
+ break;
+ }
+ }
+ lo_mutex_unlock(lo);
+ return ret;
+}
+
+static void lo_lookup(struct lo_chan *lc, struct fuse_in_header *inh,
+ char *name)
+{
+ struct lo_data *lo = lc->lo;
+ struct lo_inode *inode, *parent = lo_inode(lo, inh->nodeid);
+ struct fuse_entry_out *outarg = lo_out_arg(lc);
+ struct stat stat;
+ int newfd;
+ int res;
+ int saverr;
+
+ if (lo_debug(lc)) {
+ fprintf(stderr, "lo_lookup(parent=%"PRIu64", name=%s)\n",
+ inh->nodeid, name);
+ }
+
+ newfd = openat(parent->fd, name, O_PATH | O_NOFOLLOW);
+ if (newfd == -1)
+ goto out_err;
+
+ res = fstatat(newfd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1)
+ goto out_err;
+
+
+ inode = lo_find(lo, &stat);
+ if (inode) {
+ close(newfd);
+ newfd = -1;
+ } else {
+ struct lo_inode *prev, *next;
+
+ saverr = ENOMEM;
+ inode = lo_alloc_inode(lo);
+ if (!inode)
+ goto out_err;
+
+ inode->refcount = 1;
+ inode->fd = newfd;
+ inode->dev = stat.st_dev;
+ lo_convert_stat(&stat, &inode->attr);
+
+ lo_mutex_lock(lo);
+ prev = &lo->root;
+ next = prev->next;
+ next->prev = inode;
+ inode->next = next;
+ inode->prev = prev;
+ prev->next = inode;
+ lo_mutex_unlock(lo);
+ }
+ memset(outarg, 0, sizeof(*outarg));
+ outarg->nodeid = (uintptr_t) inode;
+ outarg->entry_valid = lo->c.timeout;
+ outarg->attr_valid = lo->c.timeout;
+ outarg->attr = inode->attr;
+
+ if (lo_debug(lc)) {
+ fprintf(stderr, " %"PRIu64"/%s -> %"PRIu64"\n",
+ inh->nodeid, name, outarg->nodeid);
+ }
+
+ lo_reply(lc, 0, sizeof(*outarg));
+ return;
+
+out_err:
+ saverr = errno;
+ if (newfd != -1)
+ close(newfd);
+ lo_reply(lc, saverr, 0);
+}
+
+static void lo_open(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_open_in *inarg)
+{
+ struct lo_data *lo = lc->lo;
+ struct lo_inode *inode = lo_inode(lo, inh->nodeid);
+ struct fuse_open_out *outarg = lo_out_arg(lc);
+ struct lo_file *lf;
+ char buf[64];
+ int fd;
+
+ sprintf(buf, "/proc/self/fd/%i", inode->fd);
+ fd = open(buf, inarg->flags & ~O_NOFOLLOW);
+ if (fd == -1) {
+ lo_reply(lc, errno, 0);
+ return;
+ }
+
+ lf = lo_alloc_file(lo);
+ if (lf == NULL)
+ errx(1, "malloc failed");
+
+ memset(outarg, 0, sizeof(*outarg));
+ outarg->fh = (uintptr_t) lf;
+ outarg->open_flags = FOPEN_DIRECT_IO;
+
+ lf->fd = fd;
+ if (lo->c.map) {
+ lf->mapfd = ioctl(lc->fd, FUSE2_DEV_IOC_MAP_OPEN, fd);
+ if (lf->mapfd == -1)
+ warn("FUSE2_DEV_IOC_MAP_OPEN");
+ else
+ outarg->open_flags |= FOPEN_MAP;
+ }
+
+ lo_reply(lc, 0, sizeof(*outarg));
+}
+
+static struct lo_file *lo_file(uint64_t fh)
+{
+ return (void *) (uintptr_t) fh;
+}
+
+static void lo_release(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_release_in *inarg)
+{
+ struct lo_file *lf = lo_file(inarg->fh);
+
+ (void) inh;
+
+ close(lf->fd);
+ if (lf->mapfd != -1)
+ ioctl(lc->fd, FUSE2_DEV_IOC_MAP_CLOSE, lf->mapfd);
+
+ lo_free_file(lc->lo, lf);
+ lo_reply(lc, 0, 0);
+}
+
+static void lo_read(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_read_in *inarg)
+{
+ char *outarg = lo_out_arg(lc);
+ struct lo_file *lf = lo_file(inarg->fh);
+ ssize_t res;
+
+ (void) inh;
+
+ if (inarg->size > lc->bufsize - (outarg - (char *) lc->outbuf)) {
+ lo_reply(lc, EOVERFLOW, 0);
+ return;
+ }
+
+ res = pread(lf->fd, outarg, inarg->size, inarg->offset);
+ if (res == -1) {
+ lo_reply(lc, errno, 0);
+ return;
+ }
+
+ lo_reply(lc, 0, res);
+}
+
+static void lo_map(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_read_in *inarg)
+{
+ struct lo_file *lf = lo_file(inarg->fh);
+ struct fuse_map_out *outarg = lo_out_arg(lc);
+
+ (void) inh;
+
+ if (lo_debug(lc))
+ fprintf(stderr, "lo_map(offset=%"PRIu64", size=%u)\n",
+ inarg->offset, inarg->size);
+
+ outarg->mapfd = lf->mapfd;
+ outarg->offset = inarg->offset;
+ outarg->size = inarg->size;
+
+ lo_reply(lc, 0, sizeof(*outarg));
+}
+
+
+static void lo_opendir(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_open_in *inarg)
+{
+ struct lo_data *lo = lc->lo;
+ struct lo_inode *inode = lo_inode(lo, inh->nodeid);
+ struct fuse_open_out *outarg = lo_out_arg(lc);
+ struct lo_file *lf;
+ int fd;
+ DIR *dp;
+
+ (void) inarg;
+
+ fd = openat(inode->fd, ".", O_RDONLY);
+ if (fd == -1) {
+ lo_reply(lc, errno, 0);
+ return;
+ }
+
+ dp = fdopendir(fd);
+ if (dp == NULL) {
+ int saverr = errno;
+
+ close(fd);
+ lo_reply(lc, saverr, 0);
+ return;
+ }
+
+ lf = lo_alloc_file(lo);
+ if (lf == NULL)
+ errx(1, "malloc failed");
+
+ memset(outarg, 0, sizeof(*outarg));
+ outarg->fh = (uintptr_t) lf;
+ outarg->open_flags = 0;
+
+ lf->dp = dp;
+
+ lo_reply(lc, 0, sizeof(*outarg));
+}
+
+static void lo_releasedir(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_release_in *inarg)
+{
+ struct lo_file *lf = lo_file(inarg->fh);
+
+ (void) inh;
+
+ closedir(lf->dp);
+
+ lo_free_file(lc->lo, lf);
+ lo_reply(lc, 0, 0);
+}
+
+static void lo_readdir(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_read_in *inarg)
+{
+ void *p = lo_out_arg(lc);
+ struct lo_file *lf = lo_file(inarg->fh);
+ size_t rem = inarg->size;
+ int err = 0;
+ const char *name;
+ size_t namelen, entlen, entlen_padded;
+ struct fuse_dirent *dirent;
+ off_t nextoff;
+
+ (void) inh;
+
+ if (inarg->size > lc->bufsize - (p - lc->outbuf)) {
+ lo_reply(lc, EOVERFLOW, 0);
+ return;
+ }
+
+ if ((off_t) inarg->offset != lf->offset) {
+ seekdir(lf->dp, inarg->offset);
+ lf->entry = NULL;
+ lf->offset = inarg->offset;
+ }
+ while (1) {
+ if (!lf->entry) {
+ errno = 0;
+ lf->entry = readdir(lf->dp);
+ if (!lf->entry) {
+ if (errno) {
+ err = errno;
+ break;
+ } else {
+ break;
+ }
+ }
+ }
+ nextoff = lf->entry->d_off;
+ name = lf->entry->d_name;
+ namelen = strlen(name);
+ entlen = FUSE_NAME_OFFSET + namelen;
+ entlen_padded = FUSE_DIRENT_ALIGN(entlen);
+ if (entlen_padded > rem)
+ break;
+
+ dirent = (struct fuse_dirent *) p;
+
+ dirent->ino = lf->entry->d_ino;
+ dirent->off = nextoff;
+ dirent->namelen = namelen;
+ dirent->type = lf->entry->d_type;
+ memcpy(dirent->name, name, namelen);
+ memset(dirent->name + namelen, 0, entlen_padded - entlen);
+
+ p += entlen_padded;
+ rem -= entlen_padded;
+
+ lf->entry = NULL;
+ lf->offset = nextoff;
+ }
+
+ if (err && rem == inarg->size)
+ lo_reply(lc, err, 0);
+ else
+ lo_reply(lc, 0, inarg->size - rem);
+}
+
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
+{
+ if (!inode)
+ return;
+
+ lo_mutex_lock(lo);
+ assert(inode->refcount >= n);
+ inode->refcount -= n;
+ if (!inode->refcount) {
+ struct lo_inode *prev, *next;
+
+ prev = inode->prev;
+ next = inode->next;
+ next->prev = prev;
+ prev->next = next;
+ lo_mutex_unlock(lo);
+
+ close(inode->fd);
+ lo_free_inode(lo, inode);
+ } else {
+ lo_mutex_unlock(lo);
+ }
+}
+
+static void lo_forget_one(struct lo_data *lo, uint64_t nodeid,
+ uint64_t nlookup)
+{
+ struct lo_inode *inode = lo_inode(lo, nodeid);
+
+ if (lo->c.debug) {
+ fprintf(stderr, " forget %"PRIu64" %"PRIu64" -%"PRIu64"\n",
+ nodeid, inode->refcount, nlookup);
+ }
+
+ unref_inode(lo, inode, nlookup);
+
+}
+
+static void lo_forget(struct lo_data *lo, struct fuse_in_header *inh,
+ struct fuse_forget_in *inarg)
+{
+ lo_forget_one(lo, inh->nodeid, inarg->nlookup);
+}
+
+static void lo_batch_forget(struct lo_data *lo, struct fuse_in_header *inh,
+ struct fuse_batch_forget_in *inarg)
+{
+ struct fuse_forget_one *param = (void *) (inarg + 1);
+ unsigned int i;
+
+ (void) inh;
+
+ for (i = 0; i < inarg->count; i++)
+ lo_forget_one(lo, param[i].nodeid, param[i].nlookup);
+}
+
+static void lo_init(struct lo_chan *lc, struct fuse_in_header *inh,
+ struct fuse_init_in *inarg)
+{
+ struct fuse_init_out *outarg = lo_out_arg(lc);
+
+ (void) inh;
+
+ memset(outarg, 0, sizeof(*outarg));
+ outarg->flags = inarg->flags &
+ (FUSE_ATOMIC_O_TRUNC | FUSE_BIG_WRITES | FUSE_PARALLEL_DIROPS |
+ FUSE_HANDLE_KILLPRIV);
+ outarg->major = FUSE_KERNEL_VERSION;
+ outarg->minor = FUSE_KERNEL_MINOR_VERSION;
+
+ lo_reply(lc, 0, sizeof(*outarg));
+}
+
+static ssize_t lo_getreq(struct lo_chan *lc)
+{
+ ssize_t res;
+
+ if (lc->mapped)
+ res = ioctl(lc->fd, FUSE2_DEV_IOC_READ, 0);
+ else
+ res = read(lc->fd, lc->inbuf, lc->bufsize);
+ if (res != -1)
+ lc->len = res;
+
+ return res;
+}
+
+static void lo_process(struct lo_chan *lc)
+{
+ int res;
+ struct fuse_in_header *inh = lc->inbuf;
+ void *arg = inh + 1;
+
+ if (!lc->filled) {
+ res = lo_getreq(lc);
+ if (res == -1)
+ err(1, "reading from device");
+ }
+
+ lc->filled = 0;
+
+ if (lc->len < sizeof(*inh))
+ errx(1, "short read from fuse device");
+
+ switch (inh->opcode) {
+ case FUSE_INIT:
+ lo_init(lc, inh, arg);
+ break;
+
+ case FUSE_LOOKUP:
+ lo_lookup(lc, inh, arg);
+ break;
+
+ case FUSE_GETATTR:
+ lo_getattr(lc, inh, arg);
+ break;
+
+ case FUSE_OPEN:
+ lo_open(lc, inh, arg);
+ break;
+
+ case FUSE_RELEASE:
+ lo_release(lc, inh, arg);
+ break;
+
+ case FUSE_READ:
+ lo_read(lc, inh, arg);
+ break;
+
+ case FUSE_MAP:
+ lo_map(lc, inh, arg);
+ break;
+
+ case FUSE_OPENDIR:
+ lo_opendir(lc, inh, arg);
+ break;
+
+ case FUSE_RELEASEDIR:
+ lo_releasedir(lc, inh, arg);
+ break;
+
+ case FUSE_READDIR:
+ lo_readdir(lc, inh, arg);
+ break;
+
+ case FUSE_FORGET:
+ lo_forget(lc->lo, inh, arg);
+ break;
+
+ case FUSE_BATCH_FORGET:
+ lo_batch_forget(lc->lo, inh, arg);
+ break;
+
+ default:
+ lo_reply(lc, ENOSYS, 0);
+ }
+
+#if 0
+ {
+ static int slow_ctr, fast_ctr;
+ int *cp;
+
+ cp = lc->type == FUDEV_AUX ? &fast_ctr : &slow_ctr;
+ if (__atomic_add_fetch(cp, 1, __ATOMIC_SEQ_CST) % 1000000 == 0)
+ fprintf(stderr, "slow: %9i fast: %9i total: %9i\r",
+ slow_ctr, fast_ctr, slow_ctr + fast_ctr);
+ }
+#endif
+}
+
+static void lo_alloc_bufs(struct lo_chan *lc)
+{
+ int res;
+
+ res = posix_memalign(&lc->inbuf, 0x1000, lc->bufsize);
+ if (res)
+ errx(1, "allocating aligned buffer: %s", strerror(res));
+
+ res = posix_memalign(&lc->outbuf, 0x1000, lc->bufsize);
+ if (res)
+ errx(1, "allocating aligned buffer: %s", strerror(res));
+}
+
+static void lo_map_bufs(struct lo_chan *lc)
+{
+ lc->inbuf = mmap(NULL, lc->bufsize, PROT_READ, MAP_SHARED, lc->fd,
+ FUSE2_MMAP_INBUF_OFFSET);
+ if (lc->inbuf == MAP_FAILED)
+ err(1, "mmap of inbuf failed");
+
+ lc->outbuf = mmap(NULL, lc->bufsize, PROT_WRITE, MAP_SHARED, lc->fd,
+ FUSE2_MMAP_OUTBUF_OFFSET);
+ if (lc->outbuf == MAP_FAILED)
+ err(1, "mmap of outbuf failed");
+
+ lc->mapped = 1;
+}
+
+static void lo_open_aux(struct lo_chan *lc, int devfd)
+{
+ int res;
+
+ lc->type = FUDEV_AUX;
+ lc->fd = open("/dev/fuse2-aux", O_RDWR);
+ if (lc->fd == -1)
+ err(1, "failed to open /dev/fuse2-aux");
+
+ res = ioctl(lc->fd, FUSE2_DEV_IOC_BIND, &devfd);
+ if (res == -1)
+ err(1, "failed to bind aux device");
+
+ if (lc->lo->c.proc)
+ lo_map_bufs(lc);
+ else
+ lo_alloc_bufs(lc);
+}
+
+struct lo_thread_data {
+ struct lo_chan *def_chan;
+ int cpu;
+};
+
+static void lo_start_aux(struct lo_thread_data *ltd)
+{
+ struct lo_chan lc = {
+ .lo = ltd->def_chan->lo,
+ .bufsize = lc.lo->c.proc ? 0x2000 : ltd->def_chan->bufsize,
+ };
+ cpu_set_t set;
+ int res;
+
+ CPU_ZERO(&set);
+ CPU_SET(ltd->cpu, &set);
+
+ res = sched_setaffinity(0, sizeof(set), &set);
+ if (res == -1)
+ err(1, "sched_getaffinity() to cpu %i", ltd->cpu);
+
+ lo_open_aux(&lc, ltd->def_chan->fd);
+
+ while (1)
+ lo_process(&lc);
+}
+
+static void lo_start_v1(struct lo_thread_data *ltd)
+{
+ int res;
+ int devfd = ltd->def_chan->fd;
+ struct lo_chan lc = {
+ .type = FUDEV_V1,
+ .lo = ltd->def_chan->lo,
+ .bufsize = ltd->def_chan->bufsize,
+ };
+
+ if (lc.lo->c.bind) {
+ lc.fd = open("/dev/fuse", O_RDWR);
+ if (lc.fd == -1)
+ err(1, "/dev/fuse");
+
+ res = ioctl(lc.fd, FUSE_DEV_IOC_CLONE, &devfd);
+ if (res == -1)
+ err(1, "FUSE_DEV_IOC_CLONE");
+
+ } else {
+ lc.fd = devfd;
+ }
+
+ lo_alloc_bufs(&lc);
+
+
+ while (1)
+ lo_process(&lc);
+}
+static void *lo_start_one(void *data)
+{
+ struct lo_thread_data *ltd = data;
+
+ if (ltd->def_chan->type == FUDEV_V2)
+ lo_start_aux(ltd);
+ else
+ lo_start_v1(ltd);
+
+ return NULL;
+}
+
+static int lo_start_one_nt(void *data)
+{
+ lo_start_one(data);
+ return 0;
+}
+
+static void lo_start_threads(struct lo_chan *def_chan)
+{
+ int i, n, res;
+ cpu_set_t set;
+ struct lo_thread_data *ltd;
+
+ res = sched_getaffinity(0, sizeof(set), &set);
+ if (res == -1)
+ err(1, "sched_getaffinity()");
+
+ n = CPU_COUNT(&set);
+ for (i = 0; n && i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &set)) {
+ ltd = malloc(sizeof(*ltd));
+ if (ltd == NULL)
+ errx(1, "malloc failed");
+
+ ltd->def_chan = def_chan;
+ ltd->cpu = i;
+ if (!lo_nothread(def_chan->lo)) {
+ pthread_t id;
+
+ res = pthread_create(&id, NULL, lo_start_one, ltd);
+ if (res != 0) {
+ errno = res;
+ err(1, "pthread_create");
+ }
+ } else {
+ void *stack, *top;
+ size_t stack_size = 1048576;
+
+ stack = malloc(stack_size);
+ if (stack == NULL)
+ errx(1, "failed to allocate child stack");
+ top = stack + stack_size;
+
+ res = clone(lo_start_one_nt, top, CLONE_FILES, ltd);
+ if (res == -1)
+ err(1, "clone");
+ }
+
+ n--;
+ }
+ }
+}
+
+static void lo_usage(char *argv[])
+{
+ errx(1, "usage: %s [-d] [-s] [-b] [-m] [-1] [-2] mountpoint", argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+ struct lo_data *lo;
+ struct lo_config c = {};
+ struct lo_chan def_chan = {
+ .bufsize = 0x21000,
+ };
+ char *devname;
+ int res, pid;
+ int status;
+ struct stat stat;
+ int ctr;
+ const char *mnt = NULL;
+
+ if (argc < 2)
+ lo_usage(argv);
+
+ c.source = "/";
+ for (ctr = 1; ctr < argc; ctr++) {
+ char *arg = argv[ctr];
+
+ if (arg[0] == '-') {
+ switch (arg[1]) {
+ case 'd':
+ c.debug = 1;
+ break;
+ case 's':
+ c.single = 1;
+ break;
+ case 'b':
+ c.bind = 1;
+ break;
+ case 'p':
+ c.proc = 1;
+ break;
+ case 'm':
+ c.map = 1;
+ break;
+ case '1':
+ c.version = 1;
+ break;
+ case '2':
+ c.version = 2;
+ break;
+#ifdef LO_NOTHREAD
+ case 't':
+ c.nothread = 1;
+ break;
+#endif
+ default:
+ lo_usage(argv);
+ }
+
+ } else if (!mnt) {
+ mnt = arg;
+ } else {
+ lo_usage(argv);
+ }
+ }
+
+ if (c.nothread)
+ lo = lo_alloc_lo_nt();
+ else
+ lo = calloc(1, sizeof(struct lo_data));
+ if (lo == NULL)
+ errx(1, "failed to allocate memory");
+
+ lo->c = c;
+ lo_mutex_init(lo);
+
+ /* Don't mask creation mode, kernel already did that */
+ umask(0);
+
+ lo->root.next = lo->root.prev = &lo->root;
+ lo->root.refcount = 2;
+
+ lo->root.fd = open(lo->c.source, O_PATH);
+ if (lo->root.fd == -1)
+ err(1, "open(\"%s\", O_PATH)", lo->c.source);
+
+ res = fstatat(lo->root.fd, "", &stat,
+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1)
+ err(1, "statting root");
+
+ lo_convert_stat(&stat, &lo->root.attr);
+
+
+ def_chan.lo = lo;
+ def_chan.fd = -1;
+ if (lo->c.version != 1) {
+ def_chan.type = FUDEV_V2;
+ devname = "/dev/fuse2";
+ def_chan.fd = open(devname, O_RDWR);
+ }
+ if (lo->c.version != 2 && def_chan.fd == -1) {
+ def_chan.type = FUDEV_V1;
+ devname = "/dev/fuse";
+ def_chan.fd = open(devname, O_RDWR);
+ }
+ if (def_chan.fd == -1)
+ err(1, "opening %s", devname);
+
+ lo_alloc_bufs(&def_chan);
+
+ pid = fork();
+ if (pid == -1)
+ err(1, "fork");
+
+ if (pid == 0) {
+ char opts[128];
+
+ if (def_chan.type == FUDEV_V2) {
+ snprintf(opts, sizeof(opts), "fd=%i", def_chan.fd);
+ res = mount("loraw", mnt, "fuse2.loraw", 0, opts);
+ } else {
+ snprintf(opts, sizeof(opts),
+ "fd=%i,rootmode=40000,user_id=0,group_id=0",
+ def_chan.fd);
+
+ res = mount("loraw", mnt, "fuse.loraw", 0, opts);
+ }
+ if (res == -1)
+ exit(1);
+
+ exit(0);
+ }
+ res = lo_getreq(&def_chan);
+ if (res > 0) {
+ struct fuse_in_header *inh = def_chan.inbuf;
+
+ if (def_chan.len < sizeof(*inh))
+ errx(1, "short read from fuse device");
+ if (inh->opcode != FUSE_INIT)
+ errx(1, "FUSE_INIT expected");
+ lo_init(&def_chan, inh, (void *) (inh + 1));
+ }
+ res = waitpid(pid, &status, 0);
+ if (res == -1)
+ err(1, "waitpid failed for mount process");
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ errx(1, "mount failed");
+
+ if (!lo->c.single)
+ lo_start_threads(&def_chan);
+
+ while (1)
+ lo_process(&def_chan);
+
+ return 0;
+}