Merge tag 'fuse-update-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi:
- Fix a page locking bug in write (introduced in 2.6.26)
- Allow sgid bit to be killed in setacl()
- Miscellaneous fixes and cleanups
* tag 'fuse-update-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
cuse: simplify refcount
cuse: prevent clone
virtiofs: fix userns
virtiofs: remove useless function
virtiofs: split requests that exceed virtqueue size
virtiofs: fix memory leak in virtio_fs_probe()
fuse: invalidate attrs when page writeback completes
fuse: add a flag FUSE_SETXATTR_ACL_KILL_SGID to kill SGID
fuse: extend FUSE_SETXATTR request
fuse: fix matching of FUSE_DEV_IOC_CLONE command
fuse: fix a typo
fuse: don't zero pages twice
fuse: fix typo for fuse_conn.max_pages comment
fuse: fix write deadlock
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index e9c0f91..52b1653 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -71,6 +71,7 @@
return -EINVAL;
if (acl) {
+ unsigned int extra_flags = 0;
/*
* Fuse userspace is responsible for updating access
* permissions in the inode, if needed. fuse_setxattr
@@ -94,7 +95,11 @@
return ret;
}
- ret = fuse_setxattr(inode, name, value, size, 0);
+ if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
+ !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
+ extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
+
+ ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);
kfree(value);
} else {
ret = fuse_removexattr(inode, name);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 4508226..c7d882a 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -511,20 +511,18 @@
fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
&fuse_dev_fiq_ops, NULL);
+ cc->fc.release = cuse_fc_release;
fud = fuse_dev_alloc_install(&cc->fc);
- if (!fud) {
- kfree(cc);
+ fuse_conn_put(&cc->fc);
+ if (!fud)
return -ENOMEM;
- }
INIT_LIST_HEAD(&cc->list);
- cc->fc.release = cuse_fc_release;
cc->fc.initialized = 1;
rc = cuse_send_init(cc);
if (rc) {
fuse_dev_free(fud);
- fuse_conn_put(&cc->fc);
return rc;
}
file->private_data = fud;
@@ -561,8 +559,6 @@
unregister_chrdev_region(cc->cdev->dev, 1);
cdev_del(cc->cdev);
}
- /* Base reference is now owned by "fud" */
- fuse_conn_put(&cc->fc);
rc = fuse_dev_release(inode, file); /* puts the base reference */
@@ -627,6 +623,8 @@
cuse_channel_fops.owner = THIS_MODULE;
cuse_channel_fops.open = cuse_channel_open;
cuse_channel_fops.release = cuse_channel_release;
+ /* CUSE is not prepared for FUSE_DEV_IOC_CLONE */
+ cuse_channel_fops.unlocked_ioctl = NULL;
cuse_class = class_create(THIS_MODULE, "cuse");
if (IS_ERR(cuse_class))
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c0fee83..a5ceccc 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2233,11 +2233,8 @@
int oldfd;
struct fuse_dev *fud = NULL;
- if (_IOC_TYPE(cmd) != FUSE_DEV_IOC_MAGIC)
- return -ENOTTY;
-
- switch (_IOC_NR(cmd)) {
- case _IOC_NR(FUSE_DEV_IOC_CLONE):
+ switch (cmd) {
+ case FUSE_DEV_IOC_CLONE:
res = -EFAULT;
if (!get_user(oldfd, (__u32 __user *)arg)) {
struct file *old = fget(oldfd);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e8aa533..09ef2a4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,21 +802,12 @@
{
struct fuse_conn *fc = get_fuse_conn(inode);
- if (fc->writeback_cache) {
- /*
- * A hole in a file. Some data after the hole are in page cache,
- * but have not reached the client fs yet. So, the hole is not
- * present there.
- */
- int i;
- int start_idx = num_read >> PAGE_SHIFT;
- size_t off = num_read & (PAGE_SIZE - 1);
-
- for (i = start_idx; i < ap->num_pages; i++) {
- zero_user_segment(ap->pages[i], off, PAGE_SIZE);
- off = 0;
- }
- } else {
+ /*
+ * If writeback_cache is enabled, a short read means there's a hole in
+ * the file. Some data after the hole is in page cache, but has not
+ * reached the client fs yet. So the hole is not present there.
+ */
+ if (!fc->writeback_cache) {
loff_t pos = page_offset(ap->pages[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
@@ -1103,6 +1094,7 @@
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
unsigned int offset, i;
+ bool short_write;
int err;
for (i = 0; i < ap->num_pages; i++)
@@ -1117,32 +1109,38 @@
if (!err && ia->write.out.size > count)
err = -EIO;
+ short_write = ia->write.out.size < count;
offset = ap->descs[0].offset;
count = ia->write.out.size;
for (i = 0; i < ap->num_pages; i++) {
struct page *page = ap->pages[i];
- if (!err && !offset && count >= PAGE_SIZE)
- SetPageUptodate(page);
-
- if (count > PAGE_SIZE - offset)
- count -= PAGE_SIZE - offset;
- else
- count = 0;
- offset = 0;
-
- unlock_page(page);
+ if (err) {
+ ClearPageUptodate(page);
+ } else {
+ if (count >= PAGE_SIZE - offset)
+ count -= PAGE_SIZE - offset;
+ else {
+ if (short_write)
+ ClearPageUptodate(page);
+ count = 0;
+ }
+ offset = 0;
+ }
+ if (ia->write.page_locked && (i == ap->num_pages - 1))
+ unlock_page(page);
put_page(page);
}
return err;
}
-static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
+static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
unsigned int max_pages)
{
+ struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
size_t count = 0;
@@ -1195,6 +1193,16 @@
if (offset == PAGE_SIZE)
offset = 0;
+ /* If we copied full page, mark it uptodate */
+ if (tmp == PAGE_SIZE)
+ SetPageUptodate(page);
+
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ } else {
+ ia->write.page_locked = true;
+ break;
+ }
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
@@ -1238,7 +1246,7 @@
break;
}
- count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages);
+ count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages);
if (count <= 0) {
err = count;
} else {
@@ -1753,8 +1761,17 @@
container_of(args, typeof(*wpa), ia.ap.args);
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
mapping_set_error(inode->i_mapping, error);
+ /*
+ * A writeback finished and this might have updated mtime/ctime on
+ * server making local mtime/ctime stale. Hence invalidate attrs.
+ * Do this only if writeback_cache is not enabled. If writeback_cache
+ * is enabled, we trust local ctime/mtime.
+ */
+ if (!fc->writeback_cache)
+ fuse_invalidate_attr(inode);
spin_lock(&fi->lock);
rb_erase(&wpa->writepages_entry, &fi->writepages);
while (wpa->next) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ca868b7..7e463e2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -552,9 +552,12 @@
/** Maximum write size */
unsigned max_write;
- /** Maxmum number of pages that can be used in a single request */
+ /** Maximum number of pages that can be used in a single request */
unsigned int max_pages;
+ /** Constrain ->max_pages to this value during feature negotiation */
+ unsigned int max_pages_limit;
+
/** Input queue */
struct fuse_iqueue iq;
@@ -668,6 +671,9 @@
/** Is setxattr not implemented by fs? */
unsigned no_setxattr:1;
+ /** Does file server support extended setxattr */
+ unsigned setxattr_ext:1;
+
/** Is getxattr not implemented by fs? */
unsigned no_getxattr:1;
@@ -713,7 +719,7 @@
/** Use enhanced/automatic page cache invalidation. */
unsigned auto_inval_data:1;
- /** Filesystem is fully reponsible for page cache invalidation. */
+ /** Filesystem is fully responsible for page cache invalidation. */
unsigned explicit_inval_data:1;
/** Does the filesystem support readdirplus? */
@@ -934,6 +940,7 @@
struct {
struct fuse_write_in in;
struct fuse_write_out out;
+ bool page_locked;
} write;
};
struct fuse_args_pages ap;
@@ -1193,7 +1200,7 @@
bool fuse_lock_inode(struct inode *inode);
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
- size_t size, int flags);
+ size_t size, int flags, unsigned int extra_flags);
ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
size_t size);
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b4b956d..6c99520 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -712,6 +712,7 @@
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
+ fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
INIT_LIST_HEAD(&fc->mounts);
list_add(&fm->fc_entry, &fc->mounts);
@@ -1040,7 +1041,7 @@
fc->abort_err = 1;
if (arg->flags & FUSE_MAX_PAGES) {
fc->max_pages =
- min_t(unsigned int, FUSE_MAX_MAX_PAGES,
+ min_t(unsigned int, fc->max_pages_limit,
max_t(unsigned int, arg->max_pages, 1));
}
if (IS_ENABLED(CONFIG_FUSE_DAX) &&
@@ -1052,6 +1053,8 @@
fc->handle_killpriv_v2 = 1;
fm->sb->s_flags |= SB_NOSEC;
}
+ if (arg->flags & FUSE_SETXATTR_EXT)
+ fc->setxattr_ext = 1;
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -1095,7 +1098,7 @@
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
- FUSE_HANDLE_KILLPRIV_V2;
+ FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
ia->in.flags |= FUSE_MAP_ALIGNMENT;
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 4ee6f73..bcb8a02 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -18,6 +18,12 @@
#include <linux/uio.h>
#include "fuse_i.h"
+/* Used to help calculate the FUSE connection's max_pages limit for a request's
+ * size. Parts of the struct fuse_req are sliced into scattergather lists in
+ * addition to the pages used, so this can help account for that overhead.
+ */
+#define FUSE_HEADER_OVERHEAD 4
+
/* List of virtio-fs device instances and a lock for the list. Also provides
* mutual exclusion in device removal and mounting path
*/
@@ -127,11 +133,6 @@
return &fs->vqs[vq->index];
}
-static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
-{
- return &vq_to_fsvq(vq)->fud->pq;
-}
-
/* Should be called with fsvq->lock held. */
static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
{
@@ -896,6 +897,7 @@
out_vqs:
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
+ kfree(fs->vqs);
out:
vdev->priv = NULL;
@@ -1413,9 +1415,10 @@
{
struct virtio_fs *fs;
struct super_block *sb;
- struct fuse_conn *fc;
+ struct fuse_conn *fc = NULL;
struct fuse_mount *fm;
- int err;
+ unsigned int virtqueue_size;
+ int err = -EIO;
/* This gets a reference on virtio_fs object. This ptr gets installed
* in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
@@ -1427,6 +1430,10 @@
return -EINVAL;
}
+ virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
+ if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
+ goto out_err;
+
err = -ENOMEM;
fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
if (!fc)
@@ -1436,12 +1443,15 @@
if (!fm)
goto out_err;
- fuse_conn_init(fc, fm, get_user_ns(current_user_ns()),
- &virtio_fs_fiq_ops, fs);
+ fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
fc->release = fuse_free_conn;
fc->delete_stale = true;
fc->auto_submounts = true;
+ /* Tell FUSE to split requests that exceed the virtqueue's size */
+ fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
+ virtqueue_size - FUSE_HEADER_OVERHEAD);
+
fsc->s_fs_info = fm;
sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
if (fsc->s_fs_info) {
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 1a7d7ac..61dfaf7 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -12,7 +12,7 @@
#include <linux/posix_acl_xattr.h>
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
- size_t size, int flags)
+ size_t size, int flags, unsigned int extra_flags)
{
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
@@ -25,10 +25,13 @@
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
inarg.flags = flags;
+ inarg.setxattr_flags = extra_flags;
+
args.opcode = FUSE_SETXATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 3;
- args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].size = fm->fc->setxattr_ext ?
+ sizeof(inarg) : FUSE_COMPAT_SETXATTR_IN_SIZE;
args.in_args[0].value = &inarg;
args.in_args[1].size = strlen(name) + 1;
args.in_args[1].value = name;
@@ -199,7 +202,7 @@
if (!value)
return fuse_removexattr(inode, name);
- return fuse_setxattr(inode, name, value, size, flags);
+ return fuse_setxattr(inode, name, value, size, flags, 0);
}
static bool no_xattr_list(struct dentry *dentry)
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 54442612..271ae90 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -179,6 +179,8 @@
* 7.33
* - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
* - add FUSE_OPEN_KILL_SUIDGID
+ * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
+ * - add FUSE_SETXATTR_ACL_KILL_SGID
*/
#ifndef _LINUX_FUSE_H
@@ -330,6 +332,7 @@
* does not have CAP_FSETID. Additionally upon
* write/truncate sgid is killed only if file has group
* execute permission. (Same as Linux VFS behavior).
+ * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -360,6 +363,7 @@
#define FUSE_MAP_ALIGNMENT (1 << 26)
#define FUSE_SUBMOUNTS (1 << 27)
#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
+#define FUSE_SETXATTR_EXT (1 << 29)
/**
* CUSE INIT request/reply flags
@@ -451,6 +455,12 @@
*/
#define FUSE_OPEN_KILL_SUIDGID (1 << 0)
+/**
+ * setxattr flags
+ * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set
+ */
+#define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0)
+
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
@@ -681,9 +691,13 @@
uint32_t padding;
};
+#define FUSE_COMPAT_SETXATTR_IN_SIZE 8
+
struct fuse_setxattr_in {
uint32_t size;
uint32_t flags;
+ uint32_t setxattr_flags;
+ uint32_t padding;
};
struct fuse_getxattr_in {