Merge branches 'work.fs_context', 'work.f_path', 'work.qstr', 'work.misc', 'work.nfsctl' and 'work.finish_no_open' into for-next
diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
index e149b89..c99ab1f 100644
--- a/Documentation/filesystems/mount_api.rst
+++ b/Documentation/filesystems/mount_api.rst
@@ -506,8 +506,16 @@
* ::
+ int vfs_parse_fs_qstr(struct fs_context *fc, const char *key,
+ const struct qstr *value);
+
+ A wrapper around vfs_parse_fs_param() that copies the value string it is
+ passed.
+
+ * ::
+
int vfs_parse_fs_string(struct fs_context *fc, const char *key,
- const char *value, size_t v_size);
+ const char *value);
A wrapper around vfs_parse_fs_param() that copies the value string it is
passed.
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 85f5902..ab48ab3 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1285,3 +1285,15 @@
The vm_area_desc provides the minimum required information for a filesystem
to initialise state upon memory mapping of a file-backed region, and output
parameters for the file system to set this state.
+
+---
+
+**mandatory**
+
+Calling conventions for vfs_parse_fs_string() have changed; it does *not*
+take length anymore (value ? strlen(value) : 0 is used). If you want
+a different length, use
+
+ vfs_parse_fs_qstr(fc, key, &QSTR_LEN(value, len))
+
+instead.
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index a09e2eb..8f13ec4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -11,11 +11,6 @@
#include "i915_gemfs.h"
#include "i915_utils.h"
-static int add_param(struct fs_context *fc, const char *key, const char *val)
-{
- return vfs_parse_fs_string(fc, key, val, strlen(val));
-}
-
void i915_gemfs_init(struct drm_i915_private *i915)
{
struct file_system_type *type;
@@ -48,9 +43,9 @@ void i915_gemfs_init(struct drm_i915_private *i915)
fc = fs_context_for_mount(type, SB_KERNMOUNT);
if (IS_ERR(fc))
goto err;
- ret = add_param(fc, "source", "tmpfs");
+ ret = vfs_parse_fs_string(fc, "source", "tmpfs");
if (!ret)
- ret = add_param(fc, "huge", "within_size");
+ ret = vfs_parse_fs_string(fc, "huge", "within_size");
if (!ret)
gemfs = fc_mount_longterm(fc);
put_fs_context(fc);
diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c
index 8ec6ed8..c1a3016 100644
--- a/drivers/gpu/drm/v3d/v3d_gemfs.c
+++ b/drivers/gpu/drm/v3d/v3d_gemfs.c
@@ -7,11 +7,6 @@
#include "v3d_drv.h"
-static int add_param(struct fs_context *fc, const char *key, const char *val)
-{
- return vfs_parse_fs_string(fc, key, val, strlen(val));
-}
-
void v3d_gemfs_init(struct v3d_dev *v3d)
{
struct file_system_type *type;
@@ -38,9 +33,9 @@ void v3d_gemfs_init(struct v3d_dev *v3d)
fc = fs_context_for_mount(type, SB_KERNMOUNT);
if (IS_ERR(fc))
goto err;
- ret = add_param(fc, "source", "tmpfs");
+ ret = vfs_parse_fs_string(fc, "source", "tmpfs");
if (!ret)
- ret = add_param(fc, "huge", "within_size");
+ ret = vfs_parse_fs_string(fc, "huge", "within_size");
if (!ret)
gemfs = fc_mount_longterm(fc);
put_fs_context(fc);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 399d455..d0c77ec 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -768,22 +768,18 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct v9fs_inode __maybe_unused *v9inode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
- struct dentry *res = NULL;
struct inode *inode;
int p9_omode;
if (d_in_lookup(dentry)) {
- res = v9fs_vfs_lookup(dir, dentry, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- if (res)
- dentry = res;
+ struct dentry *res = v9fs_vfs_lookup(dir, dentry, 0);
+ if (res || d_really_is_positive(dentry))
+ return finish_no_open(file, res);
}
/* Only creates */
- if (!(flags & O_CREAT) || d_really_is_positive(dentry))
- return finish_no_open(file, res);
+ if (!(flags & O_CREAT))
+ return finish_no_open(file, NULL);
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
@@ -795,17 +791,17 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
"write-only file with writeback enabled, creating w/ O_RDWR\n");
}
fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- goto error;
- }
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
v9fs_invalidate_inode_attr(dir);
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
err = finish_open(file, dentry, generic_file_open);
- if (err)
- goto error;
+ if (unlikely(err)) {
+ p9_fid_put(fid);
+ return err;
+ }
file->private_data = fid;
#ifdef CONFIG_9P_FSCACHE
@@ -818,13 +814,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9fs_open_fid_add(inode, &fid);
file->f_mode |= FMODE_CREATED;
-out:
- dput(res);
- return err;
-
-error:
- p9_fid_put(fid);
- goto out;
+ return 0;
}
/**
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5b5fda6..be297e3 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -238,20 +238,16 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
struct p9_fid *dfid = NULL, *ofid = NULL;
struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
- struct dentry *res = NULL;
if (d_in_lookup(dentry)) {
- res = v9fs_vfs_lookup(dir, dentry, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- if (res)
- dentry = res;
+ struct dentry *res = v9fs_vfs_lookup(dir, dentry, 0);
+ if (res || d_really_is_positive(dentry))
+ return finish_no_open(file, res);
}
/* Only creates */
- if (!(flags & O_CREAT) || d_really_is_positive(dentry))
- return finish_no_open(file, res);
+ if (!(flags & O_CREAT))
+ return finish_no_open(file, NULL);
v9ses = v9fs_inode2v9ses(dir);
@@ -337,7 +333,6 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
p9_fid_put(ofid);
p9_fid_put(fid);
v9fs_put_acl(dacl, pacl);
- dput(res);
return err;
}
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index 60a549f1..60b0f70 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -239,7 +239,7 @@ static void afs_edit_init_block(union afs_xdr_dir_block *meta,
* The caller must hold the inode locked.
*/
void afs_edit_dir_add(struct afs_vnode *vnode,
- struct qstr *name, struct afs_fid *new_fid,
+ const struct qstr *name, struct afs_fid *new_fid,
enum afs_edit_dir_reason why)
{
union afs_xdr_dir_block *meta, *block;
@@ -391,7 +391,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
* The caller must hold the inode locked.
*/
void afs_edit_dir_remove(struct afs_vnode *vnode,
- struct qstr *name, enum afs_edit_dir_reason why)
+ const struct qstr *name, enum afs_edit_dir_reason why)
{
union afs_xdr_dir_block *meta, *block, *pblock;
union afs_xdr_dirent *de, *pde;
diff --git a/fs/afs/dir_search.c b/fs/afs/dir_search.c
index b25bd89..d2516e5 100644
--- a/fs/afs/dir_search.c
+++ b/fs/afs/dir_search.c
@@ -188,7 +188,7 @@ int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
/*
* Search the appropriate hash chain in the contents of an AFS directory.
*/
-int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
+int afs_dir_search(struct afs_vnode *dvnode, const struct qstr *name,
struct afs_fid *_fid, afs_dataversion_t *_dir_version)
{
struct afs_dir_iter iter = { .dvnode = dvnode, };
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1124ea4..1ce5dea 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1097,9 +1097,9 @@ int afs_single_writepages(struct address_space *mapping,
/*
* dir_edit.c
*/
-extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *,
+extern void afs_edit_dir_add(struct afs_vnode *, const struct qstr *, struct afs_fid *,
enum afs_edit_dir_reason);
-extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
+extern void afs_edit_dir_remove(struct afs_vnode *, const struct qstr *, enum afs_edit_dir_reason);
void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
enum afs_edit_dir_reason why);
void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_vnode);
@@ -1112,7 +1112,7 @@ bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name);
union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block);
int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
struct afs_fid *_fid);
-int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
+int afs_dir_search(struct afs_vnode *dvnode, const struct qstr *name,
struct afs_fid *_fid, afs_dataversion_t *_dir_version);
/*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 9434a53..1ad048e 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -137,7 +137,8 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
ret = -EINVAL;
if (content[size - 1] == '.')
- ret = vfs_parse_fs_string(fc, "source", content, size - 1);
+ ret = vfs_parse_fs_qstr(fc, "source",
+ &QSTR_LEN(content, size - 1));
do_delayed_call(&cleanup);
if (ret < 0)
return ret;
diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c
index 1e36a12..5ace251 100644
--- a/fs/bpf_fs_kfuncs.c
+++ b/fs/bpf_fs_kfuncs.c
@@ -79,7 +79,7 @@ __bpf_kfunc void bpf_put_file(struct file *file)
* pathname in *buf*, including the NUL termination character. On error, a
* negative integer is returned.
*/
-__bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz)
+__bpf_kfunc int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz)
{
int len;
char *ret;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 69133ec..f3f79c6 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -114,26 +114,21 @@ static int create_link(struct config_item *parent_item,
}
-static int get_target(const char *symname, struct path *path,
- struct config_item **target, struct super_block *sb)
+static int get_target(const char *symname, struct config_item **target,
+ struct super_block *sb)
{
+ struct path path __free(path_put) = {};
int ret;
- ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path);
- if (!ret) {
- if (path->dentry->d_sb == sb) {
- *target = configfs_get_config_item(path->dentry);
- if (!*target) {
- ret = -ENOENT;
- path_put(path);
- }
- } else {
- ret = -EPERM;
- path_put(path);
- }
- }
-
- return ret;
+ ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
+ if (ret)
+ return ret;
+ if (path.dentry->d_sb != sb)
+ return -EPERM;
+ *target = configfs_get_config_item(path.dentry);
+ if (!*target)
+ return -ENOENT;
+ return 0;
}
@@ -141,7 +136,6 @@ int configfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
int ret;
- struct path path;
struct configfs_dirent *sd;
struct config_item *parent_item;
struct config_item *target_item = NULL;
@@ -188,7 +182,7 @@ int configfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
* AV, a thoroughly annoyed bastard.
*/
inode_unlock(dir);
- ret = get_target(symname, &path, &target_item, dentry->d_sb);
+ ret = get_target(symname, &target_item, dentry->d_sb);
inode_lock(dir);
if (ret)
goto out_put;
@@ -210,7 +204,6 @@ int configfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
}
config_item_put(target_item);
- path_put(&path);
out_put:
config_item_put(parent_item);
diff --git a/fs/dcache.c b/fs/dcache.c
index 60046ae..6ff430d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1390,6 +1390,7 @@ struct check_mount {
unsigned int mounted;
};
+/* locks: mount_locked_reader && dentry->d_lock */
static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
{
struct check_mount *info = data;
@@ -1416,9 +1417,8 @@ int path_has_submounts(const struct path *parent)
{
struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
- read_seqlock_excl(&mount_lock);
+ guard(mount_locked_reader)();
d_walk(parent->dentry, &data, path_check_mount);
- read_sequnlock_excl(&mount_lock);
return data.mounted;
}
@@ -1717,13 +1717,13 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
dname = dentry->d_shortname.string;
}
- dentry->d_name.len = name->len;
- dentry->d_name.hash = name->hash;
+ dentry->__d_name.len = name->len;
+ dentry->__d_name.hash = name->hash;
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
/* Make sure we always see the terminating NUL character */
- smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
+ smp_store_release(&dentry->__d_name.name, dname); /* ^^^ */
dentry->d_flags = 0;
lockref_init(&dentry->d_lockref);
@@ -2743,15 +2743,15 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
/*
* Both external: swap the pointers
*/
- swap(target->d_name.name, dentry->d_name.name);
+ swap(target->__d_name.name, dentry->__d_name.name);
} else {
/*
* dentry:internal, target:external. Steal target's
* storage and make target internal.
*/
- dentry->d_name.name = target->d_name.name;
+ dentry->__d_name.name = target->__d_name.name;
target->d_shortname = dentry->d_shortname;
- target->d_name.name = target->d_shortname.string;
+ target->__d_name.name = target->d_shortname.string;
}
} else {
if (unlikely(dname_external(dentry))) {
@@ -2759,9 +2759,9 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
* dentry:external, target:internal. Give dentry's
* storage to target and make dentry internal
*/
- target->d_name.name = dentry->d_name.name;
+ target->__d_name.name = dentry->__d_name.name;
dentry->d_shortname = target->d_shortname;
- dentry->d_name.name = dentry->d_shortname.string;
+ dentry->__d_name.name = dentry->d_shortname.string;
} else {
/*
* Both are internal.
@@ -2771,7 +2771,7 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
target->d_shortname.words[i]);
}
}
- swap(dentry->d_name.hash_len, target->d_name.hash_len);
+ swap(dentry->__d_name.hash_len, target->__d_name.hash_len);
}
static void copy_name(struct dentry *dentry, struct dentry *target)
@@ -2781,11 +2781,11 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
old_name = external_name(dentry);
if (unlikely(dname_external(target))) {
atomic_inc(&external_name(target)->count);
- dentry->d_name = target->d_name;
+ dentry->__d_name = target->__d_name;
} else {
dentry->d_shortname = target->d_shortname;
- dentry->d_name.name = dentry->d_shortname.string;
- dentry->d_name.hash_len = target->d_name.hash_len;
+ dentry->__d_name.name = dentry->d_shortname.string;
+ dentry->__d_name.hash_len = target->__d_name.hash_len;
}
if (old_name && likely(atomic_dec_and_test(&old_name->count)))
kfree_rcu(old_name, head);
@@ -3133,7 +3133,7 @@ void d_mark_tmpfile(struct file *file, struct inode *inode)
!d_unlinked(dentry));
spin_lock(&dentry->d_parent->d_lock);
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- dentry->d_name.len = sprintf(dentry->d_shortname.string, "#%llu",
+ dentry->__d_name.len = sprintf(dentry->d_shortname.string, "#%llu",
(unsigned long long)inode->i_ino);
spin_unlock(&dentry->d_lock);
spin_unlock(&dentry->d_parent->d_lock);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 1dfd5b8..6648a92 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -59,14 +59,6 @@ static int ecryptfs_d_revalidate(struct inode *dir, const struct qstr *name,
return rc;
}
-struct kmem_cache *ecryptfs_dentry_info_cache;
-
-static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
-{
- kmem_cache_free(ecryptfs_dentry_info_cache,
- container_of(head, struct ecryptfs_dentry_info, rcu));
-}
-
/**
* ecryptfs_d_release
* @dentry: The ecryptfs dentry
@@ -75,11 +67,7 @@ static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
*/
static void ecryptfs_d_release(struct dentry *dentry)
{
- struct ecryptfs_dentry_info *p = dentry->d_fsdata;
- if (p) {
- path_put(&p->lower_path);
- call_rcu(&p->rcu, ecryptfs_dentry_free_rcu);
- }
+ dput(dentry->d_fsdata);
}
const struct dentry_operations ecryptfs_dops = {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 1f562e7..9e6ab0b 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -258,13 +258,6 @@ struct ecryptfs_inode_info {
struct ecryptfs_crypt_stat crypt_stat;
};
-/* dentry private data. Each dentry must keep track of a lower
- * vfsmount too. */
-struct ecryptfs_dentry_info {
- struct path lower_path;
- struct rcu_head rcu;
-};
-
/**
* ecryptfs_global_auth_tok - A key used to encrypt all new files under the mountpoint
* @flags: Status flags
@@ -348,6 +341,7 @@ struct ecryptfs_mount_crypt_stat {
/* superblock private data. */
struct ecryptfs_sb_info {
struct super_block *wsi_sb;
+ struct vfsmount *lower_mnt;
struct ecryptfs_mount_crypt_stat mount_crypt_stat;
};
@@ -494,22 +488,25 @@ ecryptfs_set_superblock_lower(struct super_block *sb,
}
static inline void
-ecryptfs_set_dentry_private(struct dentry *dentry,
- struct ecryptfs_dentry_info *dentry_info)
+ecryptfs_set_dentry_lower(struct dentry *dentry,
+ struct dentry *lower_dentry)
{
- dentry->d_fsdata = dentry_info;
+ dentry->d_fsdata = lower_dentry;
}
static inline struct dentry *
ecryptfs_dentry_to_lower(struct dentry *dentry)
{
- return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
+ return dentry->d_fsdata;
}
-static inline const struct path *
-ecryptfs_dentry_to_lower_path(struct dentry *dentry)
+static inline struct path
+ecryptfs_lower_path(struct dentry *dentry)
{
- return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
+ return (struct path){
+ .mnt = ecryptfs_superblock_to_private(dentry->d_sb)->lower_mnt,
+ .dentry = ecryptfs_dentry_to_lower(dentry)
+ };
}
#define ecryptfs_printk(type, fmt, arg...) \
@@ -532,7 +529,6 @@ extern unsigned int ecryptfs_number_of_users;
extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
extern struct kmem_cache *ecryptfs_file_info_cache;
-extern struct kmem_cache *ecryptfs_dentry_info_cache;
extern struct kmem_cache *ecryptfs_inode_info_cache;
extern struct kmem_cache *ecryptfs_sb_info_cache;
extern struct kmem_cache *ecryptfs_header_cache;
@@ -557,7 +553,6 @@ int ecryptfs_encrypt_and_encode_filename(
size_t *encoded_name_size,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
const char *name, size_t name_size);
-struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
void ecryptfs_dump_hex(char *data, int bytes);
int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
int sg_size);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 5f8f96d..7929411 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -33,13 +33,12 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
struct iov_iter *to)
{
ssize_t rc;
- const struct path *path;
struct file *file = iocb->ki_filp;
rc = generic_file_read_iter(iocb, to);
if (rc >= 0) {
- path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
- touch_atime(path);
+ struct path path = ecryptfs_lower_path(file->f_path.dentry);
+ touch_atime(&path);
}
return rc;
}
@@ -59,12 +58,11 @@ static ssize_t ecryptfs_splice_read_update_atime(struct file *in, loff_t *ppos,
size_t len, unsigned int flags)
{
ssize_t rc;
- const struct path *path;
rc = filemap_splice_read(in, ppos, pipe, len, flags);
if (rc >= 0) {
- path = ecryptfs_dentry_to_lower_path(in->f_path.dentry);
- touch_atime(path);
+ struct path path = ecryptfs_lower_path(in->f_path.dentry);
+ touch_atime(&path);
}
return rc;
}
@@ -283,6 +281,7 @@ static int ecryptfs_dir_open(struct inode *inode, struct file *file)
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
struct file *lower_file;
+ struct path path;
/* Released in ecryptfs_release or end of function if failure */
file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
@@ -292,8 +291,8 @@ static int ecryptfs_dir_open(struct inode *inode, struct file *file)
"Error attempting to allocate memory\n");
return -ENOMEM;
}
- lower_file = dentry_open(ecryptfs_dentry_to_lower_path(ecryptfs_dentry),
- file->f_flags, current_cred());
+ path = ecryptfs_lower_path(ecryptfs_dentry);
+ lower_file = dentry_open(&path, file->f_flags, current_cred());
if (IS_ERR(lower_file)) {
printk(KERN_ERR "%s: Error attempting to initialize "
"the lower file for the dentry with name "
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 72fbe13..d2b262d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -327,24 +327,15 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode)
static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
struct dentry *lower_dentry)
{
- const struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent);
+ struct dentry *lower_parent = ecryptfs_dentry_to_lower(dentry->d_parent);
struct inode *inode, *lower_inode;
- struct ecryptfs_dentry_info *dentry_info;
int rc = 0;
- dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
- if (!dentry_info) {
- dput(lower_dentry);
- return ERR_PTR(-ENOMEM);
- }
-
fsstack_copy_attr_atime(d_inode(dentry->d_parent),
- d_inode(path->dentry));
+ d_inode(lower_parent));
BUG_ON(!d_count(lower_dentry));
- ecryptfs_set_dentry_private(dentry, dentry_info);
- dentry_info->lower_path.mnt = mntget(path->mnt);
- dentry_info->lower_path.dentry = lower_dentry;
+ ecryptfs_set_dentry_lower(dentry, lower_dentry);
/*
* negative dentry can go positive under us here - its parent is not
@@ -1022,10 +1013,10 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap,
{
struct dentry *dentry = path->dentry;
struct kstat lower_stat;
+ struct path lower_path = ecryptfs_lower_path(dentry);
int rc;
- rc = vfs_getattr_nosec(ecryptfs_dentry_to_lower_path(dentry),
- &lower_stat, request_mask, flags);
+ rc = vfs_getattr_nosec(&lower_path, &lower_stat, request_mask, flags);
if (!rc) {
fsstack_copy_attr_all(d_inode(dentry),
ecryptfs_inode_to_lower(d_inode(dentry)));
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index eab1beb..16ea14dd 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -106,15 +106,14 @@ static int ecryptfs_init_lower_file(struct dentry *dentry,
struct file **lower_file)
{
const struct cred *cred = current_cred();
- const struct path *path = ecryptfs_dentry_to_lower_path(dentry);
+ struct path path = ecryptfs_lower_path(dentry);
int rc;
- rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt,
- cred);
+ rc = ecryptfs_privileged_open(lower_file, path.dentry, path.mnt, cred);
if (rc) {
printk(KERN_ERR "Error opening lower file "
"for lower_dentry [0x%p] and lower_mnt [0x%p]; "
- "rc = [%d]\n", path->dentry, path->mnt, rc);
+ "rc = [%d]\n", path.dentry, path.mnt, rc);
(*lower_file) = NULL;
}
return rc;
@@ -437,7 +436,6 @@ static int ecryptfs_get_tree(struct fs_context *fc)
struct ecryptfs_fs_context *ctx = fc->fs_private;
struct ecryptfs_sb_info *sbi = fc->s_fs_info;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
- struct ecryptfs_dentry_info *root_info;
const char *err = "Getting sb failed";
struct inode *inode;
struct path path;
@@ -543,14 +541,8 @@ static int ecryptfs_get_tree(struct fs_context *fc)
goto out_free;
}
- rc = -ENOMEM;
- root_info = kmem_cache_zalloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
- if (!root_info)
- goto out_free;
-
- /* ->kill_sb() will take care of root_info */
- ecryptfs_set_dentry_private(s->s_root, root_info);
- root_info->lower_path = path;
+ ecryptfs_set_dentry_lower(s->s_root, path.dentry);
+ ecryptfs_superblock_to_private(s)->lower_mnt = path.mnt;
s->s_flags |= SB_ACTIVE;
fc->root = dget(s->s_root);
@@ -580,6 +572,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
kill_anon_super(sb);
if (!sb_info)
return;
+ mntput(sb_info->lower_mnt);
ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
}
@@ -668,11 +661,6 @@ static struct ecryptfs_cache_info {
.size = sizeof(struct ecryptfs_file_info),
},
{
- .cache = &ecryptfs_dentry_info_cache,
- .name = "ecryptfs_dentry_info_cache",
- .size = sizeof(struct ecryptfs_dentry_info),
- },
- {
.cache = &ecryptfs_inode_info_cache,
.name = "ecryptfs_inode_cache",
.size = sizeof(struct ecryptfs_inode_info),
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index f5f1c4e..c8388a2 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -587,7 +587,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
}
/* lookup a file */
-static int exfat_find(struct inode *dir, struct qstr *qname,
+static int exfat_find(struct inode *dir, const struct qstr *qname,
struct exfat_dir_entry *info)
{
int ret, dentry, count;
diff --git a/fs/file_table.c b/fs/file_table.c
index 81c7257..b223d87 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -54,7 +54,7 @@ struct backing_file {
#define backing_file(f) container_of(f, struct backing_file, file)
-struct path *backing_file_user_path(const struct file *f)
+const struct path *backing_file_user_path(const struct file *f)
{
return &backing_file(f)->user_path;
}
@@ -171,7 +171,7 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
* the respective member when opening the file.
*/
mutex_init(&f->f_pos_lock);
- memset(&f->f_path, 0, sizeof(f->f_path));
+ memset(&f->__f_path, 0, sizeof(f->f_path));
memset(&f->f_ra, 0, sizeof(f->f_ra));
f->f_flags = flags;
@@ -319,7 +319,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
static void file_init_path(struct file *file, const struct path *path,
const struct file_operations *fop)
{
- file->f_path = *path;
+ file->__f_path = *path;
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 666e617..93b7ebf 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -161,25 +161,24 @@ int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param)
EXPORT_SYMBOL(vfs_parse_fs_param);
/**
- * vfs_parse_fs_string - Convenience function to just parse a string.
+ * vfs_parse_fs_qstr - Convenience function to just parse a string.
* @fc: Filesystem context.
* @key: Parameter name.
* @value: Default value.
- * @v_size: Maximum number of bytes in the value.
*/
-int vfs_parse_fs_string(struct fs_context *fc, const char *key,
- const char *value, size_t v_size)
+int vfs_parse_fs_qstr(struct fs_context *fc, const char *key,
+ const struct qstr *value)
{
int ret;
struct fs_parameter param = {
.key = key,
.type = fs_value_is_flag,
- .size = v_size,
+ .size = value ? value->len : 0,
};
if (value) {
- param.string = kmemdup_nul(value, v_size, GFP_KERNEL);
+ param.string = kmemdup_nul(value->name, value->len, GFP_KERNEL);
if (!param.string)
return -ENOMEM;
param.type = fs_value_is_string;
@@ -189,7 +188,7 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key,
kfree(param.string);
return ret;
}
-EXPORT_SYMBOL(vfs_parse_fs_string);
+EXPORT_SYMBOL(vfs_parse_fs_qstr);
/**
* vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data
@@ -218,16 +217,14 @@ int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
while ((key = sep(&options)) != NULL) {
if (*key) {
- size_t v_len = 0;
char *value = strchr(key, '=');
if (value) {
if (unlikely(value == key))
continue;
*value++ = 0;
- v_len = strlen(value);
}
- ret = vfs_parse_fs_string(fc, key, value, v_len);
+ ret = vfs_parse_fs_string(fc, key, value);
if (ret < 0)
break;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2d817d7..d3076bf 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -739,22 +739,18 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
int err;
struct mnt_idmap *idmap = file_mnt_idmap(file);
struct fuse_conn *fc = get_fuse_conn(dir);
- struct dentry *res = NULL;
if (fuse_is_bad(dir))
return -EIO;
if (d_in_lookup(entry)) {
- res = fuse_lookup(dir, entry, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- if (res)
- entry = res;
+ struct dentry *res = fuse_lookup(dir, entry, 0);
+ if (res || d_really_is_positive(entry))
+ return finish_no_open(file, res);
}
- if (!(flags & O_CREAT) || d_really_is_positive(entry))
- goto no_open;
+ if (!(flags & O_CREAT))
+ return finish_no_open(file, NULL);
/* Only creates */
file->f_mode |= FMODE_CREATED;
@@ -768,16 +764,13 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
goto mknod;
} else if (err == -EEXIST)
fuse_invalidate_entry(entry);
-out_dput:
- dput(res);
return err;
mknod:
err = fuse_mknod(idmap, dir, entry, mode, 0);
if (err)
- goto out_dput;
-no_open:
- return finish_no_open(file, res);
+ return err;
+ return finish_no_open(file, NULL);
}
/*
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8760e7e..8a7ed80 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1368,27 +1368,19 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags,
umode_t mode)
{
- struct dentry *d;
bool excl = !!(flags & O_EXCL);
- if (!d_in_lookup(dentry))
- goto skip_lookup;
-
- d = __gfs2_lookup(dir, dentry, file);
- if (IS_ERR(d))
- return PTR_ERR(d);
- if (d != NULL)
- dentry = d;
- if (d_really_is_positive(dentry)) {
- if (!(file->f_mode & FMODE_OPENED))
+ if (d_in_lookup(dentry)) {
+ struct dentry *d = __gfs2_lookup(dir, dentry, file);
+ if (file->f_mode & FMODE_OPENED) {
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+ dput(d);
+ return excl && (flags & O_CREAT) ? -EEXIST : 0;
+ }
+ if (d || d_really_is_positive(dentry))
return finish_no_open(file, d);
- dput(d);
- return excl && (flags & O_CREAT) ? -EEXIST : 0;
}
-
- BUG_ON(d != NULL);
-
-skip_lookup:
if (!(flags & O_CREAT))
return -ENOENT;
diff --git a/fs/internal.h b/fs/internal.h
index 38e8aab..72a7e944 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -53,7 +53,7 @@ extern int finish_clean_context(struct fs_context *fc);
* namei.c
*/
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
- struct path *path, struct path *root);
+ struct path *path, const struct path *root);
int do_rmdir(int dfd, struct filename *name);
int do_unlinkat(int dfd, struct filename *name);
int may_linkat(struct mnt_idmap *idmap, const struct path *link);
@@ -84,9 +84,9 @@ void mnt_put_write_access_file(struct file *file);
extern void dissolve_on_fput(struct vfsmount *);
extern bool may_mount(void);
-int path_mount(const char *dev_name, struct path *path,
+int path_mount(const char *dev_name, const struct path *path,
const char *type_page, unsigned long flags, void *data_page);
-int path_umount(struct path *path, int flags);
+int path_umount(const struct path *path, int flags);
int show_path(struct seq_file *m, struct dentry *root);
diff --git a/fs/mount.h b/fs/mount.h
index 9773705..40cf165 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -64,7 +64,10 @@ struct mount {
#endif
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
- struct list_head mnt_instance; /* mount instance on sb->s_mounts */
+ struct mount *mnt_next_for_sb; /* the next two fields are hlist_node, */
+ struct mount * __aligned(1) *mnt_pprev_for_sb;
+ /* except that LSB of pprev is stolen */
+#define WRITE_HOLD 1 /* ... for use by mnt_hold_writers() */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
@@ -154,6 +157,11 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
extern seqlock_t mount_lock;
+DEFINE_LOCK_GUARD_0(mount_writer, write_seqlock(&mount_lock),
+ write_sequnlock(&mount_lock))
+DEFINE_LOCK_GUARD_0(mount_locked_reader, read_seqlock_excl(&mount_lock),
+ read_sequnlock_excl(&mount_lock))
+
struct proc_mounts {
struct mnt_namespace *ns;
struct path root;
@@ -230,4 +238,33 @@ static inline void mnt_notify_add(struct mount *m)
}
#endif
+static inline struct mount *topmost_overmount(struct mount *m)
+{
+ while (m->overmount)
+ m = m->overmount;
+ return m;
+}
+
+static inline bool __test_write_hold(struct mount * __aligned(1) *val)
+{
+ return (unsigned long)val & WRITE_HOLD;
+}
+
+static inline bool test_write_hold(const struct mount *m)
+{
+ return __test_write_hold(m->mnt_pprev_for_sb);
+}
+
+static inline void set_write_hold(struct mount *m)
+{
+ m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb
+ | WRITE_HOLD);
+}
+
+static inline void clear_write_hold(struct mount *m)
+{
+ m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb
+ & ~WRITE_HOLD);
+}
+
struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
diff --git a/fs/namei.c b/fs/namei.c
index cd43ff8..ba8bf73 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2673,7 +2673,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
}
int filename_lookup(int dfd, struct filename *name, unsigned flags,
- struct path *path, struct path *root)
+ struct path *path, const struct path *root)
{
int retval;
struct nameidata nd;
@@ -3563,8 +3563,8 @@ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
if (nd->flags & LOOKUP_DIRECTORY)
open_flag |= O_DIRECTORY;
- file->f_path.dentry = DENTRY_NOT_SET;
- file->f_path.mnt = nd->path.mnt;
+ file->__f_path.dentry = DENTRY_NOT_SET;
+ file->__f_path.mnt = nd->path.mnt;
error = dir->i_op->atomic_open(dir, dentry, file,
open_to_namei_flags(open_flag), mode);
d_lookup_done(dentry);
@@ -3932,8 +3932,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
child = d_alloc(parentpath->dentry, &slash_name);
if (unlikely(!child))
return -ENOMEM;
- file->f_path.mnt = parentpath->mnt;
- file->f_path.dentry = child;
+ file->__f_path.mnt = parentpath->mnt;
+ file->__f_path.dentry = child;
mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
error = dir->i_op->tmpfile(idmap, dir, file, mode);
dput(child);
@@ -4170,7 +4170,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
}
EXPORT_SYMBOL(kern_path_create);
-void done_path_create(struct path *path, struct dentry *dentry)
+void done_path_create(const struct path *path, struct dentry *dentry)
{
if (!IS_ERR(dentry))
dput(dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index ae6d131..b9430a5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -82,6 +82,14 @@ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
static DEFINE_SEQLOCK(mnt_ns_tree_lock);
+static inline void namespace_lock(void);
+static void namespace_unlock(void);
+DEFINE_LOCK_GUARD_0(namespace_excl, namespace_lock(), namespace_unlock())
+DEFINE_LOCK_GUARD_0(namespace_shared, down_read(&namespace_sem),
+ up_read(&namespace_sem))
+
+DEFINE_FREE(mntput, struct vfsmount *, if (!IS_ERR(_T)) mntput(_T))
+
#ifdef CONFIG_FSNOTIFY
LIST_HEAD(notify_list); /* protected by namespace_sem */
#endif
@@ -187,7 +195,7 @@ static void mnt_ns_release_rcu(struct rcu_head *rcu)
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
- if (!is_anon_ns(ns)) {
+ if (!RB_EMPTY_NODE(&ns->mnt_ns_tree_node)) {
mnt_ns_tree_write_lock();
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
list_bidir_del_rcu(&ns->mnt_ns_list);
@@ -420,7 +428,7 @@ static struct mount *alloc_vfsmnt(const char *name)
* mnt_want/drop_write() will _keep_ the filesystem
* r/w.
*/
-bool __mnt_is_readonly(struct vfsmount *mnt)
+bool __mnt_is_readonly(const struct vfsmount *mnt)
{
return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
}
@@ -460,7 +468,7 @@ static unsigned int mnt_get_writers(struct mount *mnt)
#endif
}
-static int mnt_is_readonly(struct vfsmount *mnt)
+static int mnt_is_readonly(const struct vfsmount *mnt)
{
if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))
return 1;
@@ -501,31 +509,31 @@ int mnt_get_write_access(struct vfsmount *m)
mnt_inc_writers(mnt);
/*
* The store to mnt_inc_writers must be visible before we pass
- * MNT_WRITE_HOLD loop below, so that the slowpath can see our
- * incremented count after it has set MNT_WRITE_HOLD.
+ * WRITE_HOLD loop below, so that the slowpath can see our
+ * incremented count after it has set WRITE_HOLD.
*/
smp_mb();
might_lock(&mount_lock.lock);
- while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+ while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) {
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
cpu_relax();
} else {
/*
* This prevents priority inversion, if the task
- * setting MNT_WRITE_HOLD got preempted on a remote
+ * setting WRITE_HOLD got preempted on a remote
* CPU, and it prevents life lock if the task setting
- * MNT_WRITE_HOLD has a lower priority and is bound to
+ * WRITE_HOLD has a lower priority and is bound to
* the same CPU as the task that is spinning here.
*/
preempt_enable();
- lock_mount_hash();
- unlock_mount_hash();
+ read_seqlock_excl(&mount_lock);
+ read_sequnlock_excl(&mount_lock);
preempt_disable();
}
}
/*
* The barrier pairs with the barrier sb_start_ro_state_change() making
- * sure that if we see MNT_WRITE_HOLD cleared, we will also see
+ * sure that if we see WRITE_HOLD cleared, we will also see
* s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
* mnt_is_readonly() and bail in case we are racing with remount
* read-only.
@@ -663,16 +671,16 @@ EXPORT_SYMBOL(mnt_drop_write_file);
* a call to mnt_unhold_writers() in order to stop preventing write access to
* @mnt.
*
- * Context: This function expects lock_mount_hash() to be held serializing
- * setting MNT_WRITE_HOLD.
+ * Context: This function expects to be in mount_locked_reader scope serializing
+ * setting WRITE_HOLD.
* Return: On success 0 is returned.
* On error, -EBUSY is returned.
*/
static inline int mnt_hold_writers(struct mount *mnt)
{
- mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
+ set_write_hold(mnt);
/*
- * After storing MNT_WRITE_HOLD, we'll read the counters. This store
+ * After storing WRITE_HOLD, we'll read the counters. This store
* should be visible before we do.
*/
smp_mb();
@@ -688,9 +696,9 @@ static inline int mnt_hold_writers(struct mount *mnt)
* sum up each counter, if we read a counter before it is incremented,
* but then read another CPU's count which it has been subsequently
* decremented from -- we would see more decrements than we should.
- * MNT_WRITE_HOLD protects against this scenario, because
+ * WRITE_HOLD protects against this scenario, because
* mnt_want_write first increments count, then smp_mb, then spins on
- * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
+ * WRITE_HOLD, so it can't be decremented by another CPU while
* we're counting up here.
*/
if (mnt_get_writers(mnt) > 0)
@@ -706,19 +714,42 @@ static inline int mnt_hold_writers(struct mount *mnt)
* Stop preventing write access to @mnt allowing callers to gain write access
* to @mnt again.
*
- * This function can only be called after a successful call to
- * mnt_hold_writers().
+ * This function can only be called after a call to mnt_hold_writers().
*
- * Context: This function expects lock_mount_hash() to be held.
+ * Context: This function expects to be in the same mount_locked_reader scope
+ * as the matching mnt_hold_writers().
*/
static inline void mnt_unhold_writers(struct mount *mnt)
{
+ if (!test_write_hold(mnt))
+ return;
/*
- * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
+ * MNT_READONLY must become visible before ~WRITE_HOLD, so writers
* that become unheld will see MNT_READONLY.
*/
smp_wmb();
- mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
+ clear_write_hold(mnt);
+}
+
+static inline void mnt_del_instance(struct mount *m)
+{
+ struct mount **p = m->mnt_pprev_for_sb;
+ struct mount *next = m->mnt_next_for_sb;
+
+ if (next)
+ next->mnt_pprev_for_sb = p;
+ *p = next;
+}
+
+static inline void mnt_add_instance(struct mount *m, struct super_block *s)
+{
+ struct mount *first = s->s_mounts;
+
+ if (first)
+ first->mnt_pprev_for_sb = &m->mnt_next_for_sb;
+ m->mnt_next_for_sb = first;
+ m->mnt_pprev_for_sb = &s->s_mounts;
+ s->s_mounts = m;
}
static int mnt_make_readonly(struct mount *mnt)
@@ -734,17 +765,17 @@ static int mnt_make_readonly(struct mount *mnt)
int sb_prepare_remount_readonly(struct super_block *sb)
{
- struct mount *mnt;
int err = 0;
- /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */
+ /* Racy optimization. Recheck the counter under WRITE_HOLD */
if (atomic_long_read(&sb->s_remove_count))
return -EBUSY;
- lock_mount_hash();
- list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
- if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
- err = mnt_hold_writers(mnt);
+ guard(mount_locked_reader)();
+
+ for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {
+ if (!(m->mnt.mnt_flags & MNT_READONLY)) {
+ err = mnt_hold_writers(m);
if (err)
break;
}
@@ -754,11 +785,10 @@ int sb_prepare_remount_readonly(struct super_block *sb)
if (!err)
sb_start_ro_state_change(sb);
- list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
- if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
- mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
+ for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {
+ if (test_write_hold(m))
+ clear_write_hold(m);
}
- unlock_mount_hash();
return err;
}
@@ -817,24 +847,16 @@ static bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
}
/**
- * __lookup_mnt - find first child mount
+ * __lookup_mnt - mount hash lookup
* @mnt: parent mount
- * @dentry: mountpoint
+ * @dentry: dentry of mountpoint
*
- * If @mnt has a child mount @c mounted @dentry find and return it.
+ * If @mnt has a child mount @c mounted on @dentry find and return it.
+ * Caller must either hold the spinlock component of @mount_lock or
+ * hold rcu_read_lock(), sample the seqcount component before the call
+ * and recheck it afterwards.
*
- * Note that the child mount @c need not be unique. There are cases
- * where shadow mounts are created. For example, during mount
- * propagation when a source mount @mnt whose root got overmounted by a
- * mount @o after path lookup but before @namespace_sem could be
- * acquired gets copied and propagated. So @mnt gets copied including
- * @o. When @mnt is propagated to a destination mount @d that already
- * has another mount @n mounted at the same mountpoint then the source
- * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on
- * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt
- * on @dentry.
- *
- * Return: The first child of @mnt mounted @dentry or NULL.
+ * Return: The child of @mnt mounted on @dentry or %NULL.
*/
struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
@@ -847,21 +869,12 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
return NULL;
}
-/*
- * lookup_mnt - Return the first child mount mounted at path
+/**
+ * lookup_mnt - Return the child mount mounted at given location
+ * @path: location in the namespace
*
- * "First" means first mounted chronologically. If you create the
- * following mounts:
- *
- * mount /dev/sda1 /mnt
- * mount /dev/sda2 /mnt
- * mount /dev/sda3 /mnt
- *
- * Then lookup_mnt() on the base /mnt dentry in the root mount will
- * return successively the root dentry and vfsmount of /dev/sda1, then
- * /dev/sda2, then /dev/sda3, then NULL.
- *
- * lookup_mnt takes a reference to the found vfsmount.
+ * Acquires and returns a new reference to mount at given location
+ * or %NULL if nothing is mounted there.
*/
struct vfsmount *lookup_mnt(const struct path *path)
{
@@ -898,22 +911,20 @@ bool __is_local_mountpoint(const struct dentry *dentry)
{
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mount *mnt, *n;
- bool is_covered = false;
- down_read(&namespace_sem);
- rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) {
- is_covered = (mnt->mnt_mountpoint == dentry);
- if (is_covered)
- break;
- }
- up_read(&namespace_sem);
+ guard(namespace_shared)();
- return is_covered;
+ rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node)
+ if (mnt->mnt_mountpoint == dentry)
+ return true;
+
+ return false;
}
struct pinned_mountpoint {
struct hlist_node node;
struct mountpoint *mp;
+ struct mount *parent;
};
static bool lookup_mountpoint(struct dentry *dentry, struct pinned_mountpoint *m)
@@ -1004,7 +1015,7 @@ static void unpin_mountpoint(struct pinned_mountpoint *m)
}
}
-static inline int check_mnt(struct mount *mnt)
+static inline int check_mnt(const struct mount *mnt)
{
return mnt->mnt_ns == current->nsproxy->mnt_ns;
}
@@ -1206,6 +1217,20 @@ static void commit_tree(struct mount *mnt)
touch_mnt_namespace(n);
}
+static void setup_mnt(struct mount *m, struct dentry *root)
+{
+ struct super_block *s = root->d_sb;
+
+ atomic_inc(&s->s_active);
+ m->mnt.mnt_sb = s;
+ m->mnt.mnt_root = dget(root);
+ m->mnt_mountpoint = m->mnt.mnt_root;
+ m->mnt_parent = m;
+
+ guard(mount_locked_reader)();
+ mnt_add_instance(m, s);
+}
+
/**
* vfs_create_mount - Create a mount for a configured superblock
* @fc: The configuration context with the superblock attached
@@ -1229,15 +1254,8 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
if (fc->sb_flags & SB_KERNMOUNT)
mnt->mnt.mnt_flags = MNT_INTERNAL;
- atomic_inc(&fc->root->d_sb->s_active);
- mnt->mnt.mnt_sb = fc->root->d_sb;
- mnt->mnt.mnt_root = dget(fc->root);
- mnt->mnt_mountpoint = mnt->mnt.mnt_root;
- mnt->mnt_parent = mnt;
+ setup_mnt(mnt, fc->root);
- lock_mount_hash();
- list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
- unlock_mount_hash();
return &mnt->mnt;
}
EXPORT_SYMBOL(vfs_create_mount);
@@ -1278,8 +1296,7 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type,
return ERR_CAST(fc);
if (name)
- ret = vfs_parse_fs_string(fc, "source",
- name, strlen(name));
+ ret = vfs_parse_fs_string(fc, "source", name);
if (!ret)
ret = parse_monolithic_mount_data(fc, data);
if (!ret)
@@ -1295,7 +1312,6 @@ EXPORT_SYMBOL_GPL(vfs_kern_mount);
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
{
- struct super_block *sb = old->mnt.mnt_sb;
struct mount *mnt;
int err;
@@ -1320,16 +1336,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if (mnt->mnt_group_id)
set_mnt_shared(mnt);
- atomic_inc(&sb->s_active);
mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
- mnt->mnt.mnt_sb = sb;
- mnt->mnt.mnt_root = dget(root);
- mnt->mnt_mountpoint = mnt->mnt.mnt_root;
- mnt->mnt_parent = mnt;
- lock_mount_hash();
- list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
- unlock_mount_hash();
+ setup_mnt(mnt, root);
if (flag & CL_PRIVATE) // we are done with it
return mnt;
@@ -1435,7 +1444,7 @@ static void mntput_no_expire(struct mount *mnt)
mnt->mnt.mnt_flags |= MNT_DOOMED;
rcu_read_unlock();
- list_del(&mnt->mnt_instance);
+ mnt_del_instance(mnt);
if (unlikely(!list_empty(&mnt->mnt_expire)))
list_del(&mnt->mnt_expire);
@@ -1776,8 +1785,6 @@ static inline void namespace_lock(void)
down_write(&namespace_sem);
}
-DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock())
-
enum umount_tree_flags {
UMOUNT_SYNC = 1,
UMOUNT_PROPAGATE = 2,
@@ -1842,6 +1849,8 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
if (how & UMOUNT_PROPAGATE)
propagate_umount(&tmp_list);
+ bulk_make_private(&tmp_list);
+
while (!list_empty(&tmp_list)) {
struct mnt_namespace *ns;
bool disconnect;
@@ -1866,7 +1875,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
umount_mnt(p);
}
}
- change_mnt_propagation(p, MS_PRIVATE);
if (disconnect)
hlist_add_head(&p->mnt_umount, &unmounted);
@@ -2026,10 +2034,11 @@ void __detach_mounts(struct dentry *dentry)
struct pinned_mountpoint mp = {};
struct mount *mnt;
- namespace_lock();
- lock_mount_hash();
+ guard(namespace_excl)();
+ guard(mount_writer)();
+
if (!lookup_mountpoint(dentry, &mp))
- goto out_unlock;
+ return;
event++;
while (mp.node.next) {
@@ -2041,9 +2050,6 @@ void __detach_mounts(struct dentry *dentry)
else umount_tree(mnt, UMOUNT_CONNECTED);
}
unpin_mountpoint(&mp);
-out_unlock:
- unlock_mount_hash();
- namespace_unlock();
}
/*
@@ -2082,7 +2088,7 @@ static int can_umount(const struct path *path, int flags)
}
// caller is responsible for flags being sane
-int path_umount(struct path *path, int flags)
+int path_umount(const struct path *path, int flags)
{
struct mount *mnt = real_mount(path->mnt);
int ret;
@@ -2298,7 +2304,7 @@ static inline bool extend_array(struct path **res, struct path **to_free,
return p;
}
-struct path *collect_paths(const struct path *path,
+const struct path *collect_paths(const struct path *path,
struct path *prealloc, unsigned count)
{
struct mount *root = real_mount(path->mnt);
@@ -2306,7 +2312,7 @@ struct path *collect_paths(const struct path *path,
struct path *res = prealloc, *to_free = NULL;
unsigned n = 0;
- guard(rwsem_read)(&namespace_sem);
+ guard(namespace_shared)();
if (!check_mnt(root))
return ERR_PTR(-EINVAL);
@@ -2332,9 +2338,9 @@ struct path *collect_paths(const struct path *path,
return res;
}
-void drop_collected_paths(struct path *paths, struct path *prealloc)
+void drop_collected_paths(const struct path *paths, const struct path *prealloc)
{
- for (struct path *p = paths; p->mnt; p++)
+ for (const struct path *p = paths; p->mnt; p++)
path_put(p);
if (paths != prealloc)
kfree(paths);
@@ -2361,7 +2367,7 @@ void dissolve_on_fput(struct vfsmount *mnt)
return;
}
- scoped_guard(namespace_lock, &namespace_sem) {
+ scoped_guard(namespace_excl) {
if (!anon_ns_root(m))
return;
@@ -2372,6 +2378,7 @@ void dissolve_on_fput(struct vfsmount *mnt)
}
}
+/* locks: namespace_shared && pinned(mnt) || mount_locked_reader */
static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
@@ -2388,12 +2395,8 @@ static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
- bool res;
-
- read_seqlock_excl(&mount_lock);
- res = __has_locked_children(mnt, dentry);
- read_sequnlock_excl(&mount_lock);
- return res;
+ guard(mount_locked_reader)();
+ return __has_locked_children(mnt, dentry);
}
/*
@@ -2401,21 +2404,15 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
* specified subtree. Such references can act as pins for mount namespaces
* that aren't checked by the mount-cycle checking code, thereby allowing
* cycles to be made.
+ *
+ * locks: mount_locked_reader || namespace_shared && pinned(subtree)
*/
static bool check_for_nsfs_mounts(struct mount *subtree)
{
- struct mount *p;
- bool ret = false;
-
- lock_mount_hash();
- for (p = subtree; p; p = next_mnt(p, subtree))
+ for (struct mount *p = subtree; p; p = next_mnt(p, subtree))
if (mnt_ns_loop(p->mnt.mnt_root))
- goto out;
-
- ret = true;
-out:
- unlock_mount_hash();
- return ret;
+ return false;
+ return true;
}
/**
@@ -2435,7 +2432,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
- guard(rwsem_read)(&namespace_sem);
+ guard(namespace_shared)();
if (IS_MNT_UNBINDABLE(old_mnt))
return ERR_PTR(-EINVAL);
@@ -2556,8 +2553,7 @@ enum mnt_tree_flags_t {
/**
* attach_recursive_mnt - attach a source mount tree
* @source_mnt: mount tree to be attached
- * @dest_mnt: mount that @source_mnt will be mounted on
- * @dest_mp: the mountpoint @source_mnt will be mounted at
+ * @dest: the context for mounting at the place where the tree should go
*
* NOTE: in the table below explains the semantics when a source mount
* of a given type is attached to a destination mount of a given type.
@@ -2620,10 +2616,11 @@ enum mnt_tree_flags_t {
* Otherwise a negative error code is returned.
*/
static int attach_recursive_mnt(struct mount *source_mnt,
- struct mount *dest_mnt,
- struct mountpoint *dest_mp)
+ const struct pinned_mountpoint *dest)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+ struct mount *dest_mnt = dest->parent;
+ struct mountpoint *dest_mp = dest->mp;
HLIST_HEAD(tree_list);
struct mnt_namespace *ns = dest_mnt->mnt_ns;
struct pinned_mountpoint root = {};
@@ -2703,10 +2700,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
child->mnt_mountpoint);
commit_tree(child);
if (q) {
+ struct mount *r = topmost_overmount(child);
struct mountpoint *mp = root.mp;
- struct mount *r = child;
- while (unlikely(r->overmount))
- r = r->overmount;
+
if (unlikely(shorter) && child != source_mnt)
mp = shorter;
mnt_change_mountpoint(r, mp, q);
@@ -2735,110 +2731,120 @@ static int attach_recursive_mnt(struct mount *source_mnt,
return err;
}
-/**
- * do_lock_mount - lock mount and mountpoint
- * @path: target path
- * @beneath: whether the intention is to mount beneath @path
- *
- * Follow the mount stack on @path until the top mount @mnt is found. If
- * the initial @path->{mnt,dentry} is a mountpoint lookup the first
- * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root}
- * until nothing is stacked on top of it anymore.
- *
- * Acquire the inode_lock() on the top mount's ->mnt_root to protect
- * against concurrent removal of the new mountpoint from another mount
- * namespace.
- *
- * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint
- * @mp on @mnt->mnt_parent must be acquired. This protects against a
- * concurrent unlink of @mp->mnt_dentry from another mount namespace
- * where @mnt doesn't have a child mount mounted @mp. A concurrent
- * removal of @mnt->mnt_root doesn't matter as nothing will be mounted
- * on top of it for @beneath.
- *
- * In addition, @beneath needs to make sure that @mnt hasn't been
- * unmounted or moved from its current mountpoint in between dropping
- * @mount_lock and acquiring @namespace_sem. For the !@beneath case @mnt
- * being unmounted would be detected later by e.g., calling
- * check_mnt(mnt) in the function it's called from. For the @beneath
- * case however, it's useful to detect it directly in do_lock_mount().
- * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points
- * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will
- * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL.
- *
- * Return: Either the target mountpoint on the top mount or the top
- * mount's mountpoint.
- */
-static int do_lock_mount(struct path *path, struct pinned_mountpoint *pinned, bool beneath)
+static inline struct mount *where_to_mount(const struct path *path,
+ struct dentry **dentry,
+ bool beneath)
{
- struct vfsmount *mnt = path->mnt;
- struct dentry *dentry;
- struct path under = {};
- int err = -ENOENT;
+ struct mount *m;
- for (;;) {
- struct mount *m = real_mount(mnt);
+ if (unlikely(beneath)) {
+ m = topmost_overmount(real_mount(path->mnt));
+ *dentry = m->mnt_mountpoint;
+ return m->mnt_parent;
+ }
+ m = __lookup_mnt(path->mnt, path->dentry);
+ if (unlikely(m)) {
+ m = topmost_overmount(m);
+ *dentry = m->mnt.mnt_root;
+ return m;
+ }
+ *dentry = path->dentry;
+ return real_mount(path->mnt);
+}
- if (beneath) {
- path_put(&under);
- read_seqlock_excl(&mount_lock);
- under.mnt = mntget(&m->mnt_parent->mnt);
- under.dentry = dget(m->mnt_mountpoint);
- read_sequnlock_excl(&mount_lock);
- dentry = under.dentry;
- } else {
- dentry = path->dentry;
+/**
+ * do_lock_mount - acquire environment for mounting
+ * @path: target path
+ * @res: context to set up
+ * @beneath: whether the intention is to mount beneath @path
+ *
+ * To mount something at given location, we need
+ * namespace_sem locked exclusive
+ * inode of dentry we are mounting on locked exclusive
+ * struct mountpoint for that dentry
+ * struct mount we are mounting on
+ *
+ * Results are stored in caller-supplied context (pinned_mountpoint);
+ * on success we have res->parent and res->mp pointing to parent and
+ * mountpoint respectively and res->node inserted into the ->m_list
+ * of the mountpoint, making sure the mountpoint won't disappear.
+ * On failure we have res->parent set to ERR_PTR(-E...), res->mp
+ * left NULL, res->node - empty.
+ * In case of success do_lock_mount returns with locks acquired (in
+ * proper order - inode lock nests outside of namespace_sem).
+ *
+ * Request to mount on overmounted location is treated as "mount on
+ * top of whatever's overmounting it"; request to mount beneath
+ * a location - "mount immediately beneath the topmost mount at that
+ * place".
+ *
+ * In all cases the location must not have been unmounted and the
+ * chosen mountpoint must be allowed to be mounted on. For "beneath"
+ * case we also require the location to be at the root of a mount
+ * that has a parent (i.e. is not a root of some namespace).
+ */
+static void do_lock_mount(const struct path *path,
+ struct pinned_mountpoint *res,
+ bool beneath)
+{
+ int err;
+
+ if (unlikely(beneath) && !path_mounted(path)) {
+ res->parent = ERR_PTR(-EINVAL);
+ return;
+ }
+
+ do {
+ struct dentry *dentry, *d;
+ struct mount *m, *n;
+
+ scoped_guard(mount_locked_reader) {
+ m = where_to_mount(path, &dentry, beneath);
+ if (&m->mnt != path->mnt) {
+ mntget(&m->mnt);
+ dget(dentry);
+ }
}
inode_lock(dentry->d_inode);
namespace_lock();
- if (unlikely(cant_mount(dentry) || !is_mounted(mnt)))
- break; // not to be mounted on
+ // check if the chain of mounts (if any) has changed.
+ scoped_guard(mount_locked_reader)
+ n = where_to_mount(path, &d, beneath);
- if (beneath && unlikely(m->mnt_mountpoint != dentry ||
- &m->mnt_parent->mnt != under.mnt)) {
+ if (unlikely(n != m || dentry != d))
+ err = -EAGAIN; // something moved, retry
+ else if (unlikely(cant_mount(dentry) || !is_mounted(path->mnt)))
+ err = -ENOENT; // not to be mounted on
+ else if (beneath && &m->mnt == path->mnt && !m->overmount)
+ err = -EINVAL;
+ else
+ err = get_mountpoint(dentry, res);
+
+ if (unlikely(err)) {
+ res->parent = ERR_PTR(err);
namespace_unlock();
inode_unlock(dentry->d_inode);
- continue; // got moved
+ } else {
+ res->parent = m;
}
-
- mnt = lookup_mnt(path);
- if (unlikely(mnt)) {
- namespace_unlock();
- inode_unlock(dentry->d_inode);
- path_put(path);
- path->mnt = mnt;
- path->dentry = dget(mnt->mnt_root);
- continue; // got overmounted
+ /*
+ * Drop the temporary references. This is subtle - on success
+ * we are doing that under namespace_sem, which would normally
+ * be forbidden. However, in that case we are guaranteed that
+ * refcounts won't reach zero, since we know that path->mnt
+ * is mounted and thus all mounts reachable from it are pinned
+ * and stable, along with their mountpoints and roots.
+ */
+ if (&m->mnt != path->mnt) {
+ dput(dentry);
+ mntput(&m->mnt);
}
- err = get_mountpoint(dentry, pinned);
- if (err)
- break;
- if (beneath) {
- /*
- * @under duplicates the references that will stay
- * at least until namespace_unlock(), so the path_put()
- * below is safe (and OK to do under namespace_lock -
- * we are not dropping the final references here).
- */
- path_put(&under);
- }
- return 0;
- }
- namespace_unlock();
- inode_unlock(dentry->d_inode);
- if (beneath)
- path_put(&under);
- return err;
+ } while (err == -EAGAIN);
}
-static inline int lock_mount(struct path *path, struct pinned_mountpoint *m)
-{
- return do_lock_mount(path, m, false);
-}
-
-static void unlock_mount(struct pinned_mountpoint *m)
+static void __unlock_mount(struct pinned_mountpoint *m)
{
inode_unlock(m->mp->m_dentry->d_inode);
read_seqlock_excl(&mount_lock);
@@ -2847,16 +2853,30 @@ static void unlock_mount(struct pinned_mountpoint *m)
namespace_unlock();
}
-static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+static inline void unlock_mount(struct pinned_mountpoint *m)
+{
+ if (!IS_ERR(m->parent))
+ __unlock_mount(m);
+}
+
+#define LOCK_MOUNT_MAYBE_BENEATH(mp, path, beneath) \
+ struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \
+ do_lock_mount((path), &mp, (beneath))
+#define LOCK_MOUNT(mp, path) LOCK_MOUNT_MAYBE_BENEATH(mp, (path), false)
+#define LOCK_MOUNT_EXACT(mp, path) \
+ struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \
+ lock_mount_exact((path), &mp)
+
+static int graft_tree(struct mount *mnt, const struct pinned_mountpoint *mp)
{
if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
return -EINVAL;
- if (d_is_dir(mp->m_dentry) !=
+ if (d_is_dir(mp->mp->m_dentry) !=
d_is_dir(mnt->mnt.mnt_root))
return -ENOTDIR;
- return attach_recursive_mnt(mnt, p, mp);
+ return attach_recursive_mnt(mnt, mp);
}
static int may_change_propagation(const struct mount *m)
@@ -2892,13 +2912,13 @@ static int flags_to_propagation_type(int ms_flags)
/*
* recursively change the type of the mountpoint.
*/
-static int do_change_type(struct path *path, int ms_flags)
+static int do_change_type(const struct path *path, int ms_flags)
{
struct mount *m;
struct mount *mnt = real_mount(path->mnt);
int recurse = ms_flags & MS_REC;
int type;
- int err = 0;
+ int err;
if (!path_mounted(path))
return -EINVAL;
@@ -2907,23 +2927,22 @@ static int do_change_type(struct path *path, int ms_flags)
if (!type)
return -EINVAL;
- namespace_lock();
+ guard(namespace_excl)();
+
err = may_change_propagation(mnt);
if (err)
- goto out_unlock;
+ return err;
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
- goto out_unlock;
+ return err;
}
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
change_mnt_propagation(m, type);
- out_unlock:
- namespace_unlock();
- return err;
+ return 0;
}
/* may_copy_tree() - check if a mount tree can be copied
@@ -2969,7 +2988,7 @@ static int do_change_type(struct path *path, int ms_flags)
*
* Returns true if the mount tree can be copied, false otherwise.
*/
-static inline bool may_copy_tree(struct path *path)
+static inline bool may_copy_tree(const struct path *path)
{
struct mount *mnt = real_mount(path->mnt);
const struct dentry_operations *d_op;
@@ -2991,7 +3010,7 @@ static inline bool may_copy_tree(struct path *path)
}
-static struct mount *__do_loopback(struct path *old_path, int recurse)
+static struct mount *__do_loopback(const struct path *old_path, int recurse)
{
struct mount *old = real_mount(old_path->mnt);
@@ -3013,12 +3032,11 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
/*
* do loopback mount.
*/
-static int do_loopback(struct path *path, const char *old_name,
- int recurse)
+static int do_loopback(const struct path *path, const char *old_name,
+ int recurse)
{
- struct path old_path;
- struct mount *mnt = NULL, *parent;
- struct pinned_mountpoint mp = {};
+ struct path old_path __free(path_put) = {};
+ struct mount *mnt = NULL;
int err;
if (!old_name || !*old_name)
return -EINVAL;
@@ -3026,49 +3044,40 @@ static int do_loopback(struct path *path, const char *old_name,
if (err)
return err;
- err = -EINVAL;
if (mnt_ns_loop(old_path.dentry))
- goto out;
+ return -EINVAL;
- err = lock_mount(path, &mp);
- if (err)
- goto out;
+ LOCK_MOUNT(mp, path);
+ if (IS_ERR(mp.parent))
+ return PTR_ERR(mp.parent);
- parent = real_mount(path->mnt);
- if (!check_mnt(parent))
- goto out2;
+ if (!check_mnt(mp.parent))
+ return -EINVAL;
mnt = __do_loopback(&old_path, recurse);
- if (IS_ERR(mnt)) {
- err = PTR_ERR(mnt);
- goto out2;
- }
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
- err = graft_tree(mnt, parent, mp.mp);
+ err = graft_tree(mnt, &mp);
if (err) {
lock_mount_hash();
umount_tree(mnt, UMOUNT_SYNC);
unlock_mount_hash();
}
-out2:
- unlock_mount(&mp);
-out:
- path_put(&old_path);
return err;
}
-static struct file *open_detached_copy(struct path *path, bool recursive)
+static struct mnt_namespace *get_detached_copy(const struct path *path, bool recursive)
{
struct mnt_namespace *ns, *mnt_ns = current->nsproxy->mnt_ns, *src_mnt_ns;
struct user_namespace *user_ns = mnt_ns->user_ns;
struct mount *mnt, *p;
- struct file *file;
ns = alloc_mnt_ns(user_ns, true);
if (IS_ERR(ns))
- return ERR_CAST(ns);
+ return ns;
- namespace_lock();
+ guard(namespace_excl)();
/*
* Record the sequence number of the source mount namespace.
@@ -3085,23 +3094,28 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
mnt = __do_loopback(path, recursive);
if (IS_ERR(mnt)) {
- namespace_unlock();
- free_mnt_ns(ns);
+ emptied_ns = ns;
return ERR_CAST(mnt);
}
- lock_mount_hash();
for (p = mnt; p; p = next_mnt(p, mnt)) {
mnt_add_to_ns(ns, p);
ns->nr_mounts++;
}
ns->root = mnt;
- mntget(&mnt->mnt);
- unlock_mount_hash();
- namespace_unlock();
+ return ns;
+}
+
+static struct file *open_detached_copy(struct path *path, bool recursive)
+{
+ struct mnt_namespace *ns = get_detached_copy(path, recursive);
+ struct file *file;
+
+ if (IS_ERR(ns))
+ return ERR_CAST(ns);
mntput(path->mnt);
- path->mnt = &mnt->mnt;
+ path->mnt = mntget(&ns->root->mnt);
file = dentry_open(path, O_PATH, current_cred());
if (IS_ERR(file))
dissolve_on_fput(path->mnt);
@@ -3218,7 +3232,8 @@ static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
touch_mnt_namespace(mnt->mnt_ns);
}
-static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
+static void mnt_warn_timestamp_expiry(const struct path *mountpoint,
+ struct vfsmount *mnt)
{
struct super_block *sb = mnt->mnt_sb;
@@ -3252,7 +3267,7 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
* superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND
* to mount(2).
*/
-static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
+static int do_reconfigure_mnt(const struct path *path, unsigned int mnt_flags)
{
struct super_block *sb = path->mnt->mnt_sb;
struct mount *mnt = real_mount(path->mnt);
@@ -3289,7 +3304,7 @@ static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
* If you've mounted a non-root directory somewhere and want to do remount
* on it - tough luck.
*/
-static int do_remount(struct path *path, int ms_flags, int sb_flags,
+static int do_remount(const struct path *path, int ms_flags, int sb_flags,
int mnt_flags, void *data)
{
int err;
@@ -3347,49 +3362,46 @@ static inline int tree_contains_unbindable(struct mount *mnt)
return 0;
}
-static int do_set_group(struct path *from_path, struct path *to_path)
+static int do_set_group(const struct path *from_path, const struct path *to_path)
{
- struct mount *from, *to;
+ struct mount *from = real_mount(from_path->mnt);
+ struct mount *to = real_mount(to_path->mnt);
int err;
- from = real_mount(from_path->mnt);
- to = real_mount(to_path->mnt);
-
- namespace_lock();
+ guard(namespace_excl)();
err = may_change_propagation(from);
if (err)
- goto out;
+ return err;
err = may_change_propagation(to);
if (err)
- goto out;
+ return err;
- err = -EINVAL;
/* To and From paths should be mount roots */
if (!path_mounted(from_path))
- goto out;
+ return -EINVAL;
if (!path_mounted(to_path))
- goto out;
+ return -EINVAL;
/* Setting sharing groups is only allowed across same superblock */
if (from->mnt.mnt_sb != to->mnt.mnt_sb)
- goto out;
+ return -EINVAL;
/* From mount root should be wider than To mount root */
if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
- goto out;
+ return -EINVAL;
/* From mount should not have locked children in place of To's root */
if (__has_locked_children(from, to->mnt.mnt_root))
- goto out;
+ return -EINVAL;
/* Setting sharing groups is only allowed on private mounts */
if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
- goto out;
+ return -EINVAL;
/* From should not be private */
if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
- goto out;
+ return -EINVAL;
if (IS_MNT_SLAVE(from)) {
hlist_add_behind(&to->mnt_slave, &from->mnt_slave);
@@ -3401,11 +3413,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
list_add(&to->mnt_share, &from->mnt_share);
set_mnt_shared(to);
}
-
- err = 0;
-out:
- namespace_unlock();
- return err;
+ return 0;
}
/**
@@ -3449,17 +3457,15 @@ static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
/**
* can_move_mount_beneath - check that we can mount beneath the top mount
- * @from: mount to mount beneath
- * @to: mount under which to mount
- * @mp: mountpoint of @to
+ * @mnt_from: mount we are trying to move
+ * @mnt_to: mount under which to mount
+ * @mp: mountpoint of @mnt_to
*
- * - Make sure that @to->dentry is actually the root of a mount under
- * which we can mount another mount.
* - Make sure that nothing can be mounted beneath the caller's current
* root or the rootfs of the namespace.
* - Make sure that the caller can unmount the topmost mount ensuring
* that the caller could reveal the underlying mountpoint.
- * - Ensure that nothing has been mounted on top of @from before we
+ * - Ensure that nothing has been mounted on top of @mnt_from before we
* grabbed @namespace_sem to avoid creating pointless shadow mounts.
* - Prevent mounting beneath a mount if the propagation relationship
* between the source mount, parent mount, and top mount would lead to
@@ -3468,25 +3474,17 @@ static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
* Context: This function expects namespace_lock() to be held.
* Return: On success 0, and on error a negative error code is returned.
*/
-static int can_move_mount_beneath(const struct path *from,
- const struct path *to,
+static int can_move_mount_beneath(const struct mount *mnt_from,
+ const struct mount *mnt_to,
const struct mountpoint *mp)
{
- struct mount *mnt_from = real_mount(from->mnt),
- *mnt_to = real_mount(to->mnt),
- *parent_mnt_to = mnt_to->mnt_parent;
-
- if (!mnt_has_parent(mnt_to))
- return -EINVAL;
-
- if (!path_mounted(to))
- return -EINVAL;
+ struct mount *parent_mnt_to = mnt_to->mnt_parent;
if (IS_MNT_LOCKED(mnt_to))
return -EINVAL;
/* Avoid creating shadow mounts during mount propagation. */
- if (path_overmounted(from))
+ if (mnt_from->overmount)
return -EINVAL;
/*
@@ -3577,97 +3575,83 @@ static inline bool may_use_mount(struct mount *mnt)
return check_anonymous_mnt(mnt);
}
-static int do_move_mount(struct path *old_path,
- struct path *new_path, enum mnt_tree_flags_t flags)
+static int do_move_mount(const struct path *old_path,
+ const struct path *new_path,
+ enum mnt_tree_flags_t flags)
{
- struct mnt_namespace *ns;
- struct mount *p;
- struct mount *old;
- struct mount *parent;
- struct pinned_mountpoint mp;
+ struct mount *old = real_mount(old_path->mnt);
int err;
bool beneath = flags & MNT_TREE_BENEATH;
- err = do_lock_mount(new_path, &mp, beneath);
- if (err)
- return err;
+ if (!path_mounted(old_path))
+ return -EINVAL;
- old = real_mount(old_path->mnt);
- p = real_mount(new_path->mnt);
- parent = old->mnt_parent;
- ns = old->mnt_ns;
+ if (d_is_dir(new_path->dentry) != d_is_dir(old_path->dentry))
+ return -EINVAL;
- err = -EINVAL;
+ LOCK_MOUNT_MAYBE_BENEATH(mp, new_path, beneath);
+ if (IS_ERR(mp.parent))
+ return PTR_ERR(mp.parent);
if (check_mnt(old)) {
/* if the source is in our namespace... */
/* ... it should be detachable from parent */
if (!mnt_has_parent(old) || IS_MNT_LOCKED(old))
- goto out;
+ return -EINVAL;
+ /* ... which should not be shared */
+ if (IS_MNT_SHARED(old->mnt_parent))
+ return -EINVAL;
/* ... and the target should be in our namespace */
- if (!check_mnt(p))
- goto out;
- /* parent of the source should not be shared */
- if (IS_MNT_SHARED(parent))
- goto out;
+ if (!check_mnt(mp.parent))
+ return -EINVAL;
} else {
/*
* otherwise the source must be the root of some anon namespace.
*/
if (!anon_ns_root(old))
- goto out;
+ return -EINVAL;
/*
* Bail out early if the target is within the same namespace -
* subsequent checks would've rejected that, but they lose
* some corner cases if we check it early.
*/
- if (ns == p->mnt_ns)
- goto out;
+ if (old->mnt_ns == mp.parent->mnt_ns)
+ return -EINVAL;
/*
* Target should be either in our namespace or in an acceptable
* anon namespace, sensu check_anonymous_mnt().
*/
- if (!may_use_mount(p))
- goto out;
+ if (!may_use_mount(mp.parent))
+ return -EINVAL;
}
- if (!path_mounted(old_path))
- goto out;
-
- if (d_is_dir(new_path->dentry) !=
- d_is_dir(old_path->dentry))
- goto out;
-
if (beneath) {
- err = can_move_mount_beneath(old_path, new_path, mp.mp);
- if (err)
- goto out;
+ struct mount *over = real_mount(new_path->mnt);
- err = -EINVAL;
- p = p->mnt_parent;
+ if (mp.parent != over->mnt_parent)
+ over = mp.parent->overmount;
+ err = can_move_mount_beneath(old, over, mp.mp);
+ if (err)
+ return err;
}
/*
* Don't move a mount tree containing unbindable mounts to a destination
* mount which is shared.
*/
- if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
- goto out;
- err = -ELOOP;
+ if (IS_MNT_SHARED(mp.parent) && tree_contains_unbindable(old))
+ return -EINVAL;
if (!check_for_nsfs_mounts(old))
- goto out;
- if (mount_is_ancestor(old, p))
- goto out;
+ return -ELOOP;
+ if (mount_is_ancestor(old, mp.parent))
+ return -ELOOP;
- err = attach_recursive_mnt(old, p, mp.mp);
-out:
- unlock_mount(&mp);
- return err;
+ return attach_recursive_mnt(old, &mp);
}
-static int do_move_mount_old(struct path *path, const char *old_name)
+static int do_move_mount_old(const struct path *path, const char *old_name)
{
- struct path old_path;
+ struct path old_path __free(path_put) = {};
int err;
if (!old_name || !*old_name)
@@ -3677,18 +3661,19 @@ static int do_move_mount_old(struct path *path, const char *old_name)
if (err)
return err;
- err = do_move_mount(&old_path, path, 0);
- path_put(&old_path);
- return err;
+ return do_move_mount(&old_path, path, 0);
}
/*
* add a mount into a namespace's mount tree
*/
-static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
- const struct path *path, int mnt_flags)
+static int do_add_mount(struct mount *newmnt, const struct pinned_mountpoint *mp,
+ int mnt_flags)
{
- struct mount *parent = real_mount(path->mnt);
+ struct mount *parent = mp->parent;
+
+ if (IS_ERR(parent))
+ return PTR_ERR(parent);
mnt_flags &= ~MNT_INTERNAL_FLAGS;
@@ -3702,14 +3687,15 @@ static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
}
/* Refuse the same filesystem on the same mount point */
- if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path))
+ if (parent->mnt.mnt_sb == newmnt->mnt.mnt_sb &&
+ parent->mnt.mnt_root == mp->mp->m_dentry)
return -EBUSY;
if (d_is_symlink(newmnt->mnt.mnt_root))
return -EINVAL;
newmnt->mnt.mnt_flags = mnt_flags;
- return graft_tree(newmnt, parent, mp);
+ return graft_tree(newmnt, mp);
}
static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
@@ -3718,39 +3704,30 @@ static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags
* Create a new mount using a superblock configuration and request it
* be added to the namespace tree.
*/
-static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
+static int do_new_mount_fc(struct fs_context *fc, const struct path *mountpoint,
unsigned int mnt_flags)
{
- struct vfsmount *mnt;
- struct pinned_mountpoint mp = {};
- struct super_block *sb = fc->root->d_sb;
+ struct super_block *sb;
+ struct vfsmount *mnt __free(mntput) = fc_mount(fc);
int error;
- error = security_sb_kern_mount(sb);
- if (!error && mount_too_revealing(sb, &mnt_flags))
- error = -EPERM;
-
- if (unlikely(error)) {
- fc_drop_locked(fc);
- return error;
- }
-
- up_write(&sb->s_umount);
-
- mnt = vfs_create_mount(fc);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
+ sb = fc->root->d_sb;
+ error = security_sb_kern_mount(sb);
+ if (unlikely(error))
+ return error;
+
+ if (unlikely(mount_too_revealing(sb, &mnt_flags)))
+ return -EPERM;
+
mnt_warn_timestamp_expiry(mountpoint, mnt);
- error = lock_mount(mountpoint, &mp);
- if (!error) {
- error = do_add_mount(real_mount(mnt), mp.mp,
- mountpoint, mnt_flags);
- unlock_mount(&mp);
- }
- if (error < 0)
- mntput(mnt);
+ LOCK_MOUNT(mp, mountpoint);
+ error = do_add_mount(real_mount(mnt), &mp, mnt_flags);
+ if (!error)
+ retain_and_null_ptr(mnt); // consumed on success
return error;
}
@@ -3758,8 +3735,9 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
* create a new mount for userspace and request it to be added into the
* namespace's tree
*/
-static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
- int mnt_flags, const char *name, void *data)
+static int do_new_mount(const struct path *path, const char *fstype,
+ int sb_flags, int mnt_flags,
+ const char *name, void *data)
{
struct file_system_type *type;
struct fs_context *fc;
@@ -3796,27 +3774,46 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
fc->oldapi = true;
if (subtype)
- err = vfs_parse_fs_string(fc, "subtype",
- subtype, strlen(subtype));
+ err = vfs_parse_fs_string(fc, "subtype", subtype);
if (!err && name)
- err = vfs_parse_fs_string(fc, "source", name, strlen(name));
+ err = vfs_parse_fs_string(fc, "source", name);
if (!err)
err = parse_monolithic_mount_data(fc, data);
if (!err && !mount_capable(fc))
err = -EPERM;
if (!err)
- err = vfs_get_tree(fc);
- if (!err)
err = do_new_mount_fc(fc, path, mnt_flags);
put_fs_context(fc);
return err;
}
-int finish_automount(struct vfsmount *m, const struct path *path)
+static void lock_mount_exact(const struct path *path,
+ struct pinned_mountpoint *mp)
{
struct dentry *dentry = path->dentry;
- struct pinned_mountpoint mp = {};
+ int err;
+
+ inode_lock(dentry->d_inode);
+ namespace_lock();
+ if (unlikely(cant_mount(dentry)))
+ err = -ENOENT;
+ else if (path_overmounted(path))
+ err = -EBUSY;
+ else
+ err = get_mountpoint(dentry, mp);
+ if (unlikely(err)) {
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
+ mp->parent = ERR_PTR(err);
+ } else {
+ mp->parent = real_mount(path->mnt);
+ }
+}
+
+int finish_automount(struct vfsmount *__m, const struct path *path)
+{
+ struct vfsmount *m __free(mntput) = __m;
struct mount *mnt;
int err;
@@ -3827,43 +3824,21 @@ int finish_automount(struct vfsmount *m, const struct path *path)
mnt = real_mount(m);
- if (m->mnt_sb == path->mnt->mnt_sb &&
- m->mnt_root == dentry) {
- err = -ELOOP;
- goto discard;
- }
+ if (m->mnt_root == path->dentry)
+ return -ELOOP;
/*
- * we don't want to use lock_mount() - in this case finding something
+ * we don't want to use LOCK_MOUNT() - in this case finding something
* that overmounts our mountpoint to be means "quitely drop what we've
* got", not "try to mount it on top".
*/
- inode_lock(dentry->d_inode);
- namespace_lock();
- if (unlikely(cant_mount(dentry))) {
- err = -ENOENT;
- goto discard_locked;
- }
- if (path_overmounted(path)) {
- err = 0;
- goto discard_locked;
- }
- err = get_mountpoint(dentry, &mp);
- if (err)
- goto discard_locked;
+ LOCK_MOUNT_EXACT(mp, path);
+ if (mp.parent == ERR_PTR(-EBUSY))
+ return 0;
- err = do_add_mount(mnt, mp.mp, path,
- path->mnt->mnt_flags | MNT_SHRINKABLE);
- unlock_mount(&mp);
- if (unlikely(err))
- goto discard;
- return 0;
-
-discard_locked:
- namespace_unlock();
- inode_unlock(dentry->d_inode);
-discard:
- mntput(m);
+ err = do_add_mount(mnt, &mp, path->mnt->mnt_flags | MNT_SHRINKABLE);
+ if (likely(!err))
+ retain_and_null_ptr(m);
return err;
}
@@ -3874,9 +3849,8 @@ int finish_automount(struct vfsmount *m, const struct path *path)
*/
void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
{
- read_seqlock_excl(&mount_lock);
+ guard(mount_locked_reader)();
list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
- read_sequnlock_excl(&mount_lock);
}
EXPORT_SYMBOL(mnt_set_expiry);
@@ -3893,8 +3867,8 @@ void mark_mounts_for_expiry(struct list_head *mounts)
if (list_empty(mounts))
return;
- namespace_lock();
- lock_mount_hash();
+ guard(namespace_excl)();
+ guard(mount_writer)();
/* extract from the expiration list every vfsmount that matches the
* following criteria:
@@ -3916,8 +3890,6 @@ void mark_mounts_for_expiry(struct list_head *mounts)
touch_mnt_namespace(mnt->mnt_ns);
umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
}
- unlock_mount_hash();
- namespace_unlock();
}
EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
@@ -4045,7 +4017,7 @@ static char *copy_mount_string(const void __user *data)
* Therefore, if this magic number is present, it carries no information
* and must be discarded.
*/
-int path_mount(const char *dev_name, struct path *path,
+int path_mount(const char *dev_name, const struct path *path,
const char *type_page, unsigned long flags, void *data_page)
{
unsigned int mnt_flags = 0, sb_flags;
@@ -4127,15 +4099,13 @@ int path_mount(const char *dev_name, struct path *path,
int do_mount(const char *dev_name, const char __user *dir_name,
const char *type_page, unsigned long flags, void *data_page)
{
- struct path path;
+ struct path path __free(path_put) = {};
int ret;
ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
if (ret)
return ret;
- ret = path_mount(dev_name, &path, type_page, flags, data_page);
- path_put(&path);
- return ret;
+ return path_mount(dev_name, &path, type_page, flags, data_page);
}
static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
@@ -4207,7 +4177,8 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
struct user_namespace *user_ns, struct fs_struct *new_fs)
{
struct mnt_namespace *new_ns;
- struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
+ struct vfsmount *rootmnt __free(mntput) = NULL;
+ struct vfsmount *pwdmnt __free(mntput) = NULL;
struct mount *p, *q;
struct mount *old;
struct mount *new;
@@ -4226,23 +4197,19 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
if (IS_ERR(new_ns))
return new_ns;
- namespace_lock();
+ guard(namespace_excl)();
/* First pass: copy the tree topology */
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != ns->user_ns)
copy_flags |= CL_SLAVE;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
- namespace_unlock();
- ns_free_inum(&new_ns->ns);
- dec_mnt_namespaces(new_ns->ucounts);
- mnt_ns_release(new_ns);
+ emptied_ns = new_ns;
return ERR_CAST(new);
}
if (user_ns != ns->user_ns) {
- lock_mount_hash();
+ guard(mount_writer)();
lock_mnt_tree(new);
- unlock_mount_hash();
}
new_ns->root = new;
@@ -4274,13 +4241,6 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old);
}
- namespace_unlock();
-
- if (rootmnt)
- mntput(rootmnt);
- if (pwdmnt)
- mntput(pwdmnt);
-
mnt_ns_tree_add(new_ns);
return new_ns;
}
@@ -4505,7 +4465,8 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
return ret;
}
-static inline int vfs_move_mount(struct path *from_path, struct path *to_path,
+static inline int vfs_move_mount(const struct path *from_path,
+ const struct path *to_path,
enum mnt_tree_flags_t mflags)
{
int ret;
@@ -4611,7 +4572,7 @@ SYSCALL_DEFINE5(move_mount,
/*
* Return true if path is reachable from root
*
- * namespace_sem or mount_lock is held
+ * locks: mount_locked_reader || namespace_shared && is_mounted(mnt)
*/
bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
const struct path *root)
@@ -4625,11 +4586,8 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
bool path_is_under(const struct path *path1, const struct path *path2)
{
- bool res;
- read_seqlock_excl(&mount_lock);
- res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
- read_sequnlock_excl(&mount_lock);
- return res;
+ guard(mount_locked_reader)();
+ return is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
}
EXPORT_SYMBOL(path_is_under);
@@ -4661,9 +4619,10 @@ EXPORT_SYMBOL(path_is_under);
SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
const char __user *, put_old)
{
- struct path new, old, root;
+ struct path new __free(path_put) = {};
+ struct path old __free(path_put) = {};
+ struct path root __free(path_put) = {};
struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
- struct pinned_mountpoint old_mp = {};
int error;
if (!may_mount())
@@ -4672,57 +4631,54 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
error = user_path_at(AT_FDCWD, new_root,
LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
if (error)
- goto out0;
+ return error;
error = user_path_at(AT_FDCWD, put_old,
LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
if (error)
- goto out1;
+ return error;
error = security_sb_pivotroot(&old, &new);
if (error)
- goto out2;
+ return error;
get_fs_root(current->fs, &root);
- error = lock_mount(&old, &old_mp);
- if (error)
- goto out3;
- error = -EINVAL;
+ LOCK_MOUNT(old_mp, &old);
+ old_mnt = old_mp.parent;
+ if (IS_ERR(old_mnt))
+ return PTR_ERR(old_mnt);
+
new_mnt = real_mount(new.mnt);
root_mnt = real_mount(root.mnt);
- old_mnt = real_mount(old.mnt);
ex_parent = new_mnt->mnt_parent;
root_parent = root_mnt->mnt_parent;
if (IS_MNT_SHARED(old_mnt) ||
IS_MNT_SHARED(ex_parent) ||
IS_MNT_SHARED(root_parent))
- goto out4;
+ return -EINVAL;
if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
- goto out4;
+ return -EINVAL;
if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
- goto out4;
- error = -ENOENT;
+ return -EINVAL;
if (d_unlinked(new.dentry))
- goto out4;
- error = -EBUSY;
+ return -ENOENT;
if (new_mnt == root_mnt || old_mnt == root_mnt)
- goto out4; /* loop, on the same file system */
- error = -EINVAL;
+ return -EBUSY; /* loop, on the same file system */
if (!path_mounted(&root))
- goto out4; /* not a mountpoint */
+ return -EINVAL; /* not a mountpoint */
if (!mnt_has_parent(root_mnt))
- goto out4; /* absolute root */
+ return -EINVAL; /* absolute root */
if (!path_mounted(&new))
- goto out4; /* not a mountpoint */
+ return -EINVAL; /* not a mountpoint */
if (!mnt_has_parent(new_mnt))
- goto out4; /* absolute root */
+ return -EINVAL; /* absolute root */
/* make sure we can reach put_old from new_root */
- if (!is_path_reachable(old_mnt, old.dentry, &new))
- goto out4;
+ if (!is_path_reachable(old_mnt, old_mp.mp->m_dentry, &new))
+ return -EINVAL;
/* make certain new is below the root */
if (!is_path_reachable(new_mnt, new.dentry, &root))
- goto out4;
+ return -EINVAL;
lock_mount_hash();
umount_mnt(new_mnt);
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
@@ -4741,17 +4697,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
mnt_notify_add(root_mnt);
mnt_notify_add(new_mnt);
chroot_fs_refs(&root, &new);
- error = 0;
-out4:
- unlock_mount(&old_mp);
-out3:
- path_put(&root);
-out2:
- path_put(&old);
-out1:
- path_put(&new);
-out0:
- return error;
+ return 0;
}
static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
@@ -4841,8 +4787,10 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
if (!mnt_allow_writers(kattr, m)) {
err = mnt_hold_writers(m);
- if (err)
+ if (err) {
+ m = next_mnt(m, mnt);
break;
+ }
}
if (!(kattr->kflags & MOUNT_KATTR_RECURSE))
@@ -4850,25 +4798,9 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
}
if (err) {
- struct mount *p;
-
- /*
- * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will
- * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all
- * mounts and needs to take care to include the first mount.
- */
- for (p = mnt; p; p = next_mnt(p, mnt)) {
- /* If we had to hold writers unblock them. */
- if (p->mnt.mnt_flags & MNT_WRITE_HOLD)
- mnt_unhold_writers(p);
-
- /*
- * We're done once the first mount we changed got
- * MNT_WRITE_HOLD unset.
- */
- if (p == m)
- break;
- }
+ /* undo all mnt_hold_writers() we'd done */
+ for (struct mount *p = mnt; p != m; p = next_mnt(p, mnt))
+ mnt_unhold_writers(p);
}
return err;
}
@@ -4899,8 +4831,7 @@ static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
WRITE_ONCE(m->mnt.mnt_flags, flags);
/* If we had to hold writers unblock them. */
- if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
- mnt_unhold_writers(m);
+ mnt_unhold_writers(m);
if (kattr->propagation)
change_mnt_propagation(m, kattr->propagation);
@@ -4910,7 +4841,7 @@ static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
touch_mnt_namespace(mnt->mnt_ns);
}
-static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
+static int do_mount_setattr(const struct path *path, struct mount_kattr *kattr)
{
struct mount *mnt = real_mount(path->mnt);
int err = 0;
@@ -5708,6 +5639,7 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
STATMOUNT_MNT_UIDMAP | \
STATMOUNT_MNT_GIDMAP)
+/* locks: namespace_shared */
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
{
@@ -5957,7 +5889,7 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
if (ret)
return ret;
- scoped_guard(rwsem_read, &namespace_sem)
+ scoped_guard(namespace_shared)
ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
if (!ret)
@@ -5968,6 +5900,7 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
return ret;
}
+/* locks: namespace_shared */
static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
bool reverse)
@@ -6079,7 +6012,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
* We only need to guard against mount topology changes as
* listmount() doesn't care about any mount properties.
*/
- scoped_guard(rwsem_read, &namespace_sem)
+ scoped_guard(namespace_shared)
ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
if (ret <= 0)
@@ -6162,12 +6095,10 @@ void put_mnt_ns(struct mnt_namespace *ns)
{
if (!refcount_dec_and_test(&ns->ns.count))
return;
- namespace_lock();
+ guard(namespace_excl)();
emptied_ns = ns;
- lock_mount_hash();
+ guard(mount_writer)();
umount_tree(ns->root, 0);
- unlock_mount_hash();
- namespace_unlock();
}
struct vfsmount *kern_mount(struct file_system_type *type)
@@ -6216,25 +6147,18 @@ bool our_mnt(struct vfsmount *mnt)
bool current_chrooted(void)
{
/* Does the current process have a non-standard root */
- struct path ns_root;
- struct path fs_root;
- bool chrooted;
-
- /* Find the namespace root */
- ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
- ns_root.dentry = ns_root.mnt->mnt_root;
- path_get(&ns_root);
- while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
- ;
+ struct path fs_root __free(path_put) = {};
+ struct mount *root;
get_fs_root(current->fs, &fs_root);
- chrooted = !path_equal(&fs_root, &ns_root);
+ /* Find the namespace root */
- path_put(&fs_root);
- path_put(&ns_root);
+ guard(mount_locked_reader)();
- return chrooted;
+ root = topmost_overmount(current->nsproxy->mnt_ns->root);
+
+ return fs_root.mnt != &root->mnt || !path_mounted(&fs_root);
}
static bool mnt_already_visible(struct mnt_namespace *ns,
@@ -6243,9 +6167,8 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
{
int new_flags = *new_mnt_flags;
struct mount *mnt, *n;
- bool visible = false;
- down_read(&namespace_sem);
+ guard(namespace_shared)();
rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) {
struct mount *child;
int mnt_flags;
@@ -6292,13 +6215,10 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
/* Preserve the locked attributes */
*new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
MNT_LOCK_ATIME);
- visible = true;
- goto found;
+ return true;
next: ;
}
-found:
- up_read(&namespace_sem);
- return visible;
+ return false;
}
static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d8121792..5f7d9be 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2198,8 +2198,6 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
else
dput(dentry);
}
- if (IS_ERR(res))
- return PTR_ERR(res);
return finish_no_open(file, res);
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);
@@ -2260,7 +2258,7 @@ int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned int open_flags,
umode_t mode)
{
-
+ struct dentry *res = NULL;
/* Same as look+open from lookup_open(), but with different O_TRUNC
* handling.
*/
@@ -2275,21 +2273,15 @@ int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
if (error)
return error;
return finish_open(file, dentry, NULL);
- } else if (d_in_lookup(dentry)) {
+ }
+ if (d_in_lookup(dentry)) {
/* The only flags nfs_lookup considers are
* LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and
* we want those to be zero so the lookup isn't skipped.
*/
- struct dentry *res = nfs_lookup(dir, dentry, 0);
-
- d_lookup_done(dentry);
- if (unlikely(res)) {
- if (IS_ERR(res))
- return PTR_ERR(res);
- return finish_no_open(file, res);
- }
+ res = nfs_lookup(dir, dentry, 0);
}
- return finish_no_open(file, NULL);
+ return finish_no_open(file, res);
}
EXPORT_SYMBOL_GPL(nfs_atomic_open_v23);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 9e94d18..b4679b7 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -1269,8 +1269,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
int ret;
data->context[NFS_MAX_CONTEXT_LEN] = '\0';
- ret = vfs_parse_fs_string(fc, "context",
- data->context, strlen(data->context));
+ ret = vfs_parse_fs_string(fc, "context", data->context);
if (ret < 0)
return ret;
#else
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index bd5fca2..1f5d8c5 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -529,7 +529,7 @@ nfs_set_local_verifier(struct inode *inode,
}
/* Factored out from fs/nfsd/vfs.h:fh_getattr() */
-static int __vfs_getattr(struct path *p, struct kstat *stat, int version)
+static int __vfs_getattr(const struct path *p, struct kstat *stat, int version)
{
u32 request_mask = STATX_BASIC_STATS;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 7f1ec9c..5735c04 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -290,7 +290,8 @@ int nfs_do_submount(struct fs_context *fc)
nfs_errorf(fc, "NFS: Couldn't determine submount pathname");
ret = PTR_ERR(p);
} else {
- ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p);
+ ret = vfs_parse_fs_qstr(fc, "source",
+ &QSTR_LEN(p, buffer + 4096 - p));
if (!ret)
ret = vfs_get_tree(fc);
}
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index b29a26923..5ec9c83 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -149,21 +149,9 @@ static int do_nfs4_mount(struct nfs_server *server,
struct fs_context *root_fc;
struct vfsmount *root_mnt;
struct dentry *dentry;
- size_t len;
+ char *source;
int ret;
- struct fs_parameter param = {
- .key = "source",
- .type = fs_value_is_string,
- .dirfd = -1,
- };
-
- struct fs_parameter param_fsc = {
- .key = "fsc",
- .type = fs_value_is_string,
- .dirfd = -1,
- };
-
if (IS_ERR(server))
return PTR_ERR(server);
@@ -181,15 +169,7 @@ static int do_nfs4_mount(struct nfs_server *server,
root_ctx->server = server;
if (ctx->fscache_uniq) {
- len = strlen(ctx->fscache_uniq);
- param_fsc.size = len;
- param_fsc.string = kmemdup_nul(ctx->fscache_uniq, len, GFP_KERNEL);
- if (param_fsc.string == NULL) {
- put_fs_context(root_fc);
- return -ENOMEM;
- }
- ret = vfs_parse_fs_param(root_fc, ¶m_fsc);
- kfree(param_fsc.string);
+ ret = vfs_parse_fs_string(root_fc, "fsc", ctx->fscache_uniq);
if (ret < 0) {
put_fs_context(root_fc);
return ret;
@@ -197,20 +177,18 @@ static int do_nfs4_mount(struct nfs_server *server,
}
/* We leave export_path unset as it's not used to find the root. */
- len = strlen(hostname) + 5;
- param.string = kmalloc(len, GFP_KERNEL);
- if (param.string == NULL) {
+ /* Does hostname needs to be enclosed in brackets? */
+ if (strchr(hostname, ':'))
+ source = kasprintf(GFP_KERNEL, "[%s]:/", hostname);
+ else
+ source = kasprintf(GFP_KERNEL, "%s:/", hostname);
+
+ if (!source) {
put_fs_context(root_fc);
return -ENOMEM;
}
-
- /* Does hostname needs to be enclosed in brackets? */
- if (strchr(hostname, ':'))
- param.size = snprintf(param.string, len, "[%s]:/", hostname);
- else
- param.size = snprintf(param.string, len, "%s:/", hostname);
- ret = vfs_parse_fs_param(root_fc, ¶m);
- kfree(param.string);
+ ret = vfs_parse_fs_string(root_fc, "source", source);
+ kfree(source);
if (ret < 0) {
put_fs_context(root_fc);
return ret;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cadfc2b..caa695c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -402,7 +402,7 @@ static struct svc_export *svc_export_update(struct svc_export *new,
struct svc_export *old);
static struct svc_export *svc_export_lookup(struct svc_export *);
-static int check_export(struct path *path, int *flags, unsigned char *uuid)
+static int check_export(const struct path *path, int *flags, unsigned char *uuid)
{
struct inode *inode = d_inode(path->dentry);
@@ -1181,7 +1181,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
* use exp_get_by_name() or exp_find().
*/
struct svc_export *
-rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
+rqst_exp_get_by_name(struct svc_rqst *rqstp, const struct path *path)
{
struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index b9c0adb..cb36e6c 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -111,7 +111,7 @@ int nfsd_export_init(struct net *);
void nfsd_export_shutdown(struct net *);
void nfsd_export_flush(struct net *);
struct svc_export * rqst_exp_get_by_name(struct svc_rqst *,
- struct path *);
+ const struct path *);
struct svc_export * rqst_exp_parent(struct svc_rqst *,
struct path *);
struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index bc6b776f..c19a74a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1103,89 +1103,48 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
* populating the filesystem.
*/
-/* Basically copying rpc_get_inode. */
static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
- if (!inode)
- return NULL;
- /* Following advice from simple_fill_super documentation: */
- inode->i_ino = iunique(sb, NFSD_MaxReserved);
- inode->i_mode = mode;
- simple_inode_init_ts(inode);
- switch (mode & S_IFMT) {
- case S_IFDIR:
- inode->i_fop = &simple_dir_operations;
- inode->i_op = &simple_dir_inode_operations;
- inc_nlink(inode);
- break;
- case S_IFLNK:
- inode->i_op = &simple_symlink_inode_operations;
- break;
- default:
- break;
+ if (inode) {
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+ simple_inode_init_ts(inode);
}
return inode;
}
-static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, struct nfsdfs_client *ncl)
-{
- struct inode *inode;
-
- inode = nfsd_get_inode(dir->i_sb, mode);
- if (!inode)
- return -ENOMEM;
- if (ncl) {
- inode->i_private = ncl;
- kref_get(&ncl->cl_ref);
- }
- d_add(dentry, inode);
- inc_nlink(dir);
- fsnotify_mkdir(dir, dentry);
- return 0;
-}
-
static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
{
struct inode *dir = parent->d_inode;
struct dentry *dentry;
- int ret = -ENOMEM;
+ struct inode *inode;
- inode_lock(dir);
- dentry = d_alloc_name(parent, name);
- if (!dentry)
- goto out_err;
- ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600, ncl);
- if (ret)
- goto out_err;
-out:
+ inode = nfsd_get_inode(parent->d_sb, S_IFDIR | 0600);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return dentry;
+ }
+ inode->i_fop = &simple_dir_operations;
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ if (ncl) {
+ inode->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
+ d_instantiate(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_mkdir(dir, dentry);
inode_unlock(dir);
return dentry;
-out_err:
- dput(dentry);
- dentry = ERR_PTR(ret);
- goto out;
}
#if IS_ENABLED(CONFIG_SUNRPC_GSS)
-static int __nfsd_symlink(struct inode *dir, struct dentry *dentry,
- umode_t mode, const char *content)
-{
- struct inode *inode;
-
- inode = nfsd_get_inode(dir->i_sb, mode);
- if (!inode)
- return -ENOMEM;
-
- inode->i_link = (char *)content;
- inode->i_size = strlen(content);
-
- d_add(dentry, inode);
- inc_nlink(dir);
- fsnotify_create(dir, dentry);
- return 0;
-}
-
/*
* @content is assumed to be a NUL-terminated string that lives
* longer than the symlink itself.
@@ -1194,17 +1153,25 @@ static void _nfsd_symlink(struct dentry *parent, const char *name,
const char *content)
{
struct inode *dir = parent->d_inode;
+ struct inode *inode;
struct dentry *dentry;
- int ret;
- inode_lock(dir);
- dentry = d_alloc_name(parent, name);
- if (!dentry)
- goto out;
- ret = __nfsd_symlink(d_inode(parent), dentry, S_IFLNK | 0777, content);
- if (ret)
- dput(dentry);
-out:
+ inode = nfsd_get_inode(dir->i_sb, S_IFLNK | 0777);
+ if (!inode)
+ return;
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return;
+ }
+
+ inode->i_op = &simple_symlink_inode_operations;
+ inode->i_link = (char *)content;
+ inode->i_size = strlen(content);
+
+ d_instantiate(dentry, inode);
+ fsnotify_create(dir, dentry);
inode_unlock(dir);
}
#else
@@ -1240,40 +1207,34 @@ struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
* code instead. */
-static int nfsdfs_create_files(struct dentry *root,
+static int nfsdfs_create_files(struct dentry *root,
const struct tree_descr *files,
struct nfsdfs_client *ncl,
struct dentry **fdentries)
{
struct inode *dir = d_inode(root);
- struct inode *inode;
struct dentry *dentry;
- int i;
- inode_lock(dir);
- for (i = 0; files->name && files->name[0]; i++, files++) {
- dentry = d_alloc_name(root, files->name);
- if (!dentry)
- goto out;
- inode = nfsd_get_inode(d_inode(root)->i_sb,
- S_IFREG | files->mode);
- if (!inode) {
- dput(dentry);
- goto out;
+ for (int i = 0; files->name && files->name[0]; i++, files++) {
+ struct inode *inode = nfsd_get_inode(root->d_sb,
+ S_IFREG | files->mode);
+ if (!inode)
+ return -ENOMEM;
+ dentry = simple_start_creating(root, files->name);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return PTR_ERR(dentry);
}
kref_get(&ncl->cl_ref);
inode->i_fop = files->ops;
inode->i_private = ncl;
- d_add(dentry, inode);
+ d_instantiate(dentry, inode);
fsnotify_create(dir, dentry);
if (fdentries)
fdentries[i] = dentry;
+ inode_unlock(dir);
}
- inode_unlock(dir);
return 0;
-out:
- inode_unlock(dir);
- return -ENOMEM;
}
/* on success, returns positive number unique to that client. */
diff --git a/fs/open.c b/fs/open.c
index 9655158..3d64372 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1022,8 +1022,8 @@ static int do_dentry_open(struct file *f,
put_file_access(f);
cleanup_file:
path_put(&f->f_path);
- f->f_path.mnt = NULL;
- f->f_path.dentry = NULL;
+ f->__f_path.mnt = NULL;
+ f->__f_path.dentry = NULL;
f->f_inode = NULL;
return error;
}
@@ -1050,7 +1050,7 @@ int finish_open(struct file *file, struct dentry *dentry,
{
BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
- file->f_path.dentry = dentry;
+ file->__f_path.dentry = dentry;
return do_dentry_open(file, open);
}
EXPORT_SYMBOL(finish_open);
@@ -1059,19 +1059,21 @@ EXPORT_SYMBOL(finish_open);
* finish_no_open - finish ->atomic_open() without opening the file
*
* @file: file pointer
- * @dentry: dentry or NULL (as returned from ->lookup())
+ * @dentry: dentry, ERR_PTR(-E...) or NULL (as returned from ->lookup())
*
- * This can be used to set the result of a successful lookup in ->atomic_open().
+ * This can be used to set the result of a lookup in ->atomic_open().
*
* NB: unlike finish_open() this function does consume the dentry reference and
* the caller need not dput() it.
*
- * Returns "0" which must be the return value of ->atomic_open() after having
- * called this function.
+ * Returns 0 or -E..., which must be the return value of ->atomic_open() after
+ * having called this function.
*/
int finish_no_open(struct file *file, struct dentry *dentry)
{
- file->f_path.dentry = dentry;
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ file->__f_path.dentry = dentry;
return 0;
}
EXPORT_SYMBOL(finish_no_open);
@@ -1091,7 +1093,7 @@ int vfs_open(const struct path *path, struct file *file)
{
int ret;
- file->f_path = *path;
+ file->__f_path = *path;
ret = do_dentry_open(file, NULL);
if (!ret) {
/*
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 27396fe..59630b8 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -242,7 +242,7 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
return 0;
}
-static int ovl_sync_file(struct path *path)
+static int ovl_sync_file(const struct path *path)
{
struct file *new_file;
int err;
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index f5b8877..fc52c79 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -120,7 +120,7 @@ static bool ovl_is_real_file(const struct file *realfile,
}
static struct file *ovl_real_file_path(const struct file *file,
- struct path *realpath)
+ const struct path *realpath)
{
struct ovl_file *of = file->private_data;
struct file *realfile = of->realfile;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index bb0d7de..e3a7492 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -563,11 +563,11 @@ int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d,
struct ovl_metacopy *metacopy);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding);
-int ovl_ensure_verity_loaded(struct path *path);
+int ovl_ensure_verity_loaded(const struct path *path);
int ovl_validate_verity(struct ovl_fs *ofs,
- struct path *metapath,
- struct path *datapath);
-int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src,
+ const struct path *metapath,
+ const struct path *datapath);
+int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src,
struct ovl_metacopy *metacopy);
int ovl_sync_status(struct ovl_fs *ofs);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index df85a76..e3d0e86 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -394,7 +394,7 @@ static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
return err;
}
-static int ovl_lower_dir(const char *name, struct path *path,
+static int ovl_lower_dir(const char *name, const struct path *path,
struct ovl_fs *ofs, int *stack_depth)
{
int fh_type;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 41033ba..14f1c2a 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -1381,7 +1381,7 @@ char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int pa
}
/* Call with mounter creds as it may open the file */
-int ovl_ensure_verity_loaded(struct path *datapath)
+int ovl_ensure_verity_loaded(const struct path *datapath)
{
struct inode *inode = d_inode(datapath->dentry);
struct file *filp;
@@ -1401,8 +1401,8 @@ int ovl_ensure_verity_loaded(struct path *datapath)
}
int ovl_validate_verity(struct ovl_fs *ofs,
- struct path *metapath,
- struct path *datapath)
+ const struct path *metapath,
+ const struct path *datapath)
{
struct ovl_metacopy metacopy_data;
u8 actual_digest[FS_VERITY_MAX_DIGEST_SIZE];
@@ -1455,7 +1455,7 @@ int ovl_validate_verity(struct ovl_fs *ofs,
return 0;
}
-int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src,
+int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src,
struct ovl_metacopy *metacopy)
{
int err, digest_size;
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 108e7527..5af4fee 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -847,7 +847,7 @@ static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
return 0;
}
-static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
+static struct file *pidfs_export_open(const struct path *path, unsigned int oflags)
{
/*
* Clear O_LARGEFILE as open_by_handle_at() forces it and raise
diff --git a/fs/pnode.c b/fs/pnode.c
index 6f7d02f..5d91c3e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -29,6 +29,7 @@ static inline struct mount *next_slave(struct mount *p)
return hlist_entry(p->mnt_slave.next, struct mount, mnt_slave);
}
+/* locks: namespace_shared && is_mounted(mnt) */
static struct mount *get_peer_under_root(struct mount *mnt,
struct mnt_namespace *ns,
const struct path *root)
@@ -50,7 +51,7 @@ static struct mount *get_peer_under_root(struct mount *mnt,
* Get ID of closest dominating peer group having a representative
* under the given root.
*
- * Caller must hold namespace_sem
+ * locks: namespace_shared
*/
int get_dominating_id(struct mount *mnt, const struct path *root)
{
@@ -70,19 +71,6 @@ static inline bool will_be_unmounted(struct mount *m)
return m->mnt.mnt_flags & MNT_UMOUNT;
}
-static struct mount *propagation_source(struct mount *mnt)
-{
- do {
- struct mount *m;
- for (m = next_peer(mnt); m != mnt; m = next_peer(m)) {
- if (!will_be_unmounted(m))
- return m;
- }
- mnt = mnt->mnt_master;
- } while (mnt && will_be_unmounted(mnt));
- return mnt;
-}
-
static void transfer_propagation(struct mount *mnt, struct mount *to)
{
struct hlist_node *p = NULL, *n;
@@ -111,11 +99,10 @@ void change_mnt_propagation(struct mount *mnt, int type)
return;
}
if (IS_MNT_SHARED(mnt)) {
- if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list))
- m = propagation_source(mnt);
if (list_empty(&mnt->mnt_share)) {
mnt_release_group_id(mnt);
} else {
+ m = next_peer(mnt);
list_del_init(&mnt->mnt_share);
mnt->mnt_group_id = 0;
}
@@ -136,6 +123,57 @@ void change_mnt_propagation(struct mount *mnt, int type)
}
}
+static struct mount *trace_transfers(struct mount *m)
+{
+ while (1) {
+ struct mount *next = next_peer(m);
+
+ if (next != m) {
+ list_del_init(&m->mnt_share);
+ m->mnt_group_id = 0;
+ m->mnt_master = next;
+ } else {
+ if (IS_MNT_SHARED(m))
+ mnt_release_group_id(m);
+ next = m->mnt_master;
+ }
+ hlist_del_init(&m->mnt_slave);
+ CLEAR_MNT_SHARED(m);
+ SET_MNT_MARK(m);
+
+ if (!next || !will_be_unmounted(next))
+ return next;
+ if (IS_MNT_MARKED(next))
+ return next->mnt_master;
+ m = next;
+ }
+}
+
+static void set_destinations(struct mount *m, struct mount *master)
+{
+ struct mount *next;
+
+ while ((next = m->mnt_master) != master) {
+ m->mnt_master = master;
+ m = next;
+ }
+}
+
+void bulk_make_private(struct list_head *set)
+{
+ struct mount *m;
+
+ list_for_each_entry(m, set, mnt_list)
+ if (!IS_MNT_MARKED(m))
+ set_destinations(m, trace_transfers(m));
+
+ list_for_each_entry(m, set, mnt_list) {
+ transfer_propagation(m, m->mnt_master);
+ m->mnt_master = NULL;
+ CLEAR_MNT_MARK(m);
+ }
+}
+
static struct mount *__propagation_next(struct mount *m,
struct mount *origin)
{
@@ -304,9 +342,8 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
err = PTR_ERR(this);
break;
}
- read_seqlock_excl(&mount_lock);
- mnt_set_mountpoint(n, dest_mp, this);
- read_sequnlock_excl(&mount_lock);
+ scoped_guard(mount_locked_reader)
+ mnt_set_mountpoint(n, dest_mp, this);
if (n->mnt_master)
SET_MNT_MARK(n->mnt_master);
copy = this;
diff --git a/fs/pnode.h b/fs/pnode.h
index 00ab153..b029db2 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -42,6 +42,7 @@ static inline bool peers(const struct mount *m1, const struct mount *m2)
}
void change_mnt_propagation(struct mount *, int);
+void bulk_make_private(struct list_head *);
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
struct hlist_head *);
void propagate_umount(struct list_head *);
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index 5223edf..47710aa 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -484,8 +484,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
* in network traffic in the other paths.
*/
if (!(oflags & O_CREAT)) {
- struct dentry *res;
-
/*
* Check for hashed negative dentry. We have already revalidated
* the dentry and it is fine. No need to perform another lookup.
@@ -493,11 +491,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
if (!d_in_lookup(direntry))
return -ENOENT;
- res = cifs_lookup(inode, direntry, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- return finish_no_open(file, res);
+ return finish_no_open(file, cifs_lookup(inode, direntry, 0));
}
xid = get_xid();
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 0723838..306b76e 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -773,16 +773,14 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
}
- len = 0;
value = strchr(key, '=');
if (value) {
if (value == key)
continue;
*value++ = 0;
- len = strlen(value);
}
- ret = vfs_parse_fs_string(fc, key, value, len);
+ ret = vfs_parse_fs_string(fc, key, value);
if (ret < 0)
break;
}
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 0453903..a33b088a 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -72,7 +72,7 @@ static int ksmbd_vfs_path_lookup(struct ksmbd_share_config *share_conf,
{
struct qstr last;
struct filename *filename __free(putname) = NULL;
- struct path *root_share_path = &share_conf->vfs_path;
+ const struct path *root_share_path = &share_conf->vfs_path;
int err, type;
struct dentry *d;
@@ -1306,7 +1306,7 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *filepath,
caseless, true);
}
-void ksmbd_vfs_kern_path_unlock(struct path *path)
+void ksmbd_vfs_kern_path_unlock(const struct path *path)
{
/* While lock is still held, ->d_parent is safe */
inode_unlock(d_inode(path->dentry->d_parent));
@@ -1856,7 +1856,7 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
}
int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
- struct path *path)
+ const struct path *path)
{
struct posix_acl_state acl_state;
struct posix_acl *acls;
@@ -1909,7 +1909,7 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
}
int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
- struct path *path, struct inode *parent_inode)
+ const struct path *path, struct inode *parent_inode)
{
struct posix_acl *acls;
struct posix_acl_entry *pace;
diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h
index d47472f..df6421b 100644
--- a/fs/smb/server/vfs.h
+++ b/fs/smb/server/vfs.h
@@ -123,7 +123,7 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
unsigned int flags,
struct path *path, bool caseless);
-void ksmbd_vfs_kern_path_unlock(struct path *path);
+void ksmbd_vfs_kern_path_unlock(const struct path *path);
struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
const char *name,
unsigned int flags,
@@ -164,8 +164,8 @@ int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da);
int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
- struct path *path);
+ const struct path *path);
int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
- struct path *path,
+ const struct path *path,
struct inode *parent_inode);
#endif /* __KSMBD_VFS_H__ */
diff --git a/fs/stat.c b/fs/stat.c
index f95c1dc..6c79661 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -293,7 +293,7 @@ static int statx_lookup_flags(int flags)
return lookup_flags;
}
-static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
+static int vfs_statx_path(const struct path *path, int flags, struct kstat *stat,
u32 request_mask)
{
int error = vfs_getattr(path, stat, request_mask, flags);
diff --git a/fs/super.c b/fs/super.c
index 7f876f3..3b0f49e1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -323,7 +323,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
if (!s)
return NULL;
- INIT_LIST_HEAD(&s->s_mounts);
s->s_user_ns = get_user_ns(user_ns);
init_rwsem(&s->s_umount);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -408,7 +407,7 @@ static void __put_super(struct super_block *s)
list_del_init(&s->s_list);
WARN_ON(s->s_dentry_lru.node);
WARN_ON(s->s_inode_lru.node);
- WARN_ON(!list_empty(&s->s_mounts));
+ WARN_ON(s->s_mounts);
call_rcu(&s->rcu, destroy_super_rcu);
}
}
diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
index 770e29e..42bedc4 100644
--- a/fs/vboxsf/dir.c
+++ b/fs/vboxsf/dir.c
@@ -315,46 +315,39 @@ static int vboxsf_dir_atomic_open(struct inode *parent, struct dentry *dentry,
{
struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
struct vboxsf_handle *sf_handle;
- struct dentry *res = NULL;
u64 handle;
int err;
if (d_in_lookup(dentry)) {
- res = vboxsf_dir_lookup(parent, dentry, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- if (res)
- dentry = res;
+ struct dentry *res = vboxsf_dir_lookup(parent, dentry, 0);
+ if (res || d_really_is_positive(dentry))
+ return finish_no_open(file, res);
}
/* Only creates */
- if (!(flags & O_CREAT) || d_really_is_positive(dentry))
- return finish_no_open(file, res);
+ if (!(flags & O_CREAT))
+ return finish_no_open(file, NULL);
err = vboxsf_dir_create(parent, dentry, mode, false, flags & O_EXCL, &handle);
if (err)
- goto out;
+ return err;
sf_handle = vboxsf_create_sf_handle(d_inode(dentry), handle, SHFL_CF_ACCESS_READWRITE);
if (IS_ERR(sf_handle)) {
vboxsf_close(sbi->root, handle);
- err = PTR_ERR(sf_handle);
- goto out;
+ return PTR_ERR(sf_handle);
}
err = finish_open(file, dentry, generic_file_open);
if (err) {
/* This also closes the handle passed to vboxsf_create_sf_handle() */
vboxsf_release_sf_handle(d_inode(dentry), sf_handle);
- goto out;
+ return err;
}
file->private_data = sf_handle;
file->f_mode |= FMODE_CREATED;
-out:
- dput(res);
- return err;
+ return 0;
}
static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry)
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index cc3e1c1..c83e02b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -95,7 +95,10 @@ struct dentry {
seqcount_spinlock_t d_seq; /* per dentry seqlock */
struct hlist_bl_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
- struct qstr d_name;
+ union {
+ struct qstr __d_name; /* for use ONLY in fs/dcache.c */
+ const struct qstr d_name;
+ };
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
union shortname_store d_shortname;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index cfb0dd1..f43c83e 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -270,7 +270,7 @@ struct export_operations {
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
int nr_iomaps, struct iattr *iattr);
int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags);
- struct file * (*open)(struct path *path, unsigned int oflags);
+ struct file * (*open)(const struct path *path, unsigned int oflags);
#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f9..810c262 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1082,6 +1082,8 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_cred: stashed credentials of creator/opener
* @f_owner: file owner
* @f_path: path of the file
+ * @__f_path: writable alias for @f_path; *ONLY* for core VFS and only before
+ * the file gets open
* @f_pos_lock: lock protecting file position
* @f_pipe: specific to pipes
* @f_pos: file position
@@ -1107,7 +1109,10 @@ struct file {
const struct cred *f_cred;
struct fown_struct *f_owner;
/* --- cacheline 1 boundary (64 bytes) --- */
- struct path f_path;
+ union {
+ const struct path f_path;
+ struct path __f_path;
+ };
union {
/* regular files (with FMODE_ATOMIC_POS) and directories */
struct mutex f_pos_lock;
@@ -1324,6 +1329,8 @@ struct sb_writers {
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
+struct mount;
+
struct super_block {
struct list_head s_list; /* Keep this first */
dev_t s_dev; /* search index; _not_ kdev_t */
@@ -1358,7 +1365,7 @@ struct super_block {
__u16 s_encoding_flags;
#endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
- struct list_head s_mounts; /* list of mounts; _not_ for fs use */
+ struct mount *s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
struct file *s_bdev_file;
struct backing_dev_info *s_bdi;
@@ -2879,7 +2886,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags,
const struct cred *cred);
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred);
-struct path *backing_file_user_path(const struct file *f);
+const struct path *backing_file_user_path(const struct file *f);
/*
* When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
@@ -3719,7 +3726,8 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
* happens when a directory is casefolded and the filesystem is strict
* about its encoding.
*/
-static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+static inline bool generic_ci_validate_strict_name(struct inode *dir,
+ const struct qstr *name)
{
if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb))
return true;
@@ -3734,7 +3742,8 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qst
return !utf8_validate(dir->i_sb->s_encoding, name);
}
#else
-static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+static inline bool generic_ci_validate_strict_name(struct inode *dir,
+ const struct qstr *name)
{
return true;
}
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 7773eb8..97b514a 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -134,8 +134,13 @@ extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_ty
extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc);
extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
-extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
- const char *value, size_t v_size);
+extern int vfs_parse_fs_qstr(struct fs_context *fc, const char *key,
+ const struct qstr *value);
+static inline int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+ const char *value)
+{
+ return vfs_parse_fs_qstr(fc, key, value ? &QSTR(value) : NULL);
+}
int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
char *(*sep)(char **));
extern int generic_parse_monolithic(struct fs_context *fc, void *data);
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index fd11fff..aa4d6ec 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -85,7 +85,7 @@ LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry,
int mode, const struct qstr *name, const char **xattr_name,
struct lsm_context *cp)
LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode,
- struct qstr *name, const struct cred *old, struct cred *new)
+ const struct qstr *name, const struct cred *old, struct cred *new)
#ifdef CONFIG_SECURITY_PATH
LSM_HOOK(int, 0, path_unlink, const struct path *dir, struct dentry *dentry)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5f9c053..acfe7ef 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -33,7 +33,6 @@ enum mount_flags {
MNT_NOSYMFOLLOW = 0x80,
MNT_SHRINKABLE = 0x100,
- MNT_WRITE_HOLD = 0x200,
MNT_INTERNAL = 0x4000,
@@ -52,7 +51,7 @@ enum mount_flags {
| MNT_READONLY | MNT_NOSYMFOLLOW,
MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
- MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED |
+ MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED |
MNT_SYNC_UMOUNT | MNT_LOCKED
};
@@ -77,7 +76,7 @@ extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt);
extern void mnt_make_shortterm(struct vfsmount *mnt);
extern struct vfsmount *mnt_clone_internal(const struct path *path);
-extern bool __mnt_is_readonly(struct vfsmount *mnt);
+extern bool __mnt_is_readonly(const struct vfsmount *mnt);
extern bool mnt_may_suid(struct vfsmount *mnt);
extern struct vfsmount *clone_private_mount(const struct path *path);
@@ -104,8 +103,8 @@ extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
int do_mount(const char *, const char __user *,
const char *, unsigned long, void *);
-extern struct path *collect_paths(const struct path *, struct path *, unsigned);
-extern void drop_collected_paths(struct path *, struct path *);
+extern const struct path *collect_paths(const struct path *, struct path *, unsigned);
+extern void drop_collected_paths(const struct path *, const struct path *);
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
extern int cifs_root_data(char **dev, char **opts);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 5d08542..75c0b66 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -60,7 +60,7 @@ extern int kern_path(const char *, unsigned, struct path *);
extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
-extern void done_path_create(struct path *, struct dentry *);
+extern void done_path_create(const struct path *, struct dentry *);
extern struct dentry *kern_path_locked(const char *, struct path *);
extern struct dentry *kern_path_locked_negative(const char *, struct path *);
extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
diff --git a/include/linux/security.h b/include/linux/security.h
index 521bcb5..3f694d3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -391,7 +391,7 @@ int security_dentry_init_security(struct dentry *dentry, int mode,
const char **xattr_name,
struct lsm_context *lsmcxt);
int security_dentry_create_files_as(struct dentry *dentry, int mode,
- struct qstr *name,
+ const struct qstr *name,
const struct cred *old,
struct cred *new);
int security_path_notify(const struct path *path, u64 mask,
@@ -871,7 +871,7 @@ static inline int security_dentry_init_security(struct dentry *dentry,
}
static inline int security_dentry_create_files_as(struct dentry *dentry,
- int mode, struct qstr *name,
+ int mode, const struct qstr *name,
const struct cred *old,
struct cred *new)
{
diff --git a/kernel/acct.c b/kernel/acct.c
index 6520baa..6163011 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -44,19 +44,14 @@
* a struct file opened for write. Fixed. 2/6/2000, AV.
*/
-#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/acct.h>
#include <linux/capability.h>
-#include <linux/file.h>
#include <linux/tty.h>
-#include <linux/security.h>
-#include <linux/vfs.h>
+#include <linux/statfs.h>
#include <linux/jiffies.h>
-#include <linux/times.h>
#include <linux/syscalls.h>
-#include <linux/mount.h>
-#include <linux/uaccess.h>
+#include <linux/namei.h>
#include <linux/sched/cputime.h>
#include <asm/div64.h>
@@ -217,84 +212,70 @@ static void close_work(struct work_struct *work)
complete(&acct->done);
}
-static int acct_on(struct filename *pathname)
+DEFINE_FREE(fput_sync, struct file *, if (!IS_ERR_OR_NULL(_T)) __fput_sync(_T))
+static int acct_on(const char __user *name)
{
- struct file *file;
- struct vfsmount *mnt, *internal;
+ /* Difference from BSD - they don't do O_APPEND */
+ const int open_flags = O_WRONLY|O_APPEND|O_LARGEFILE;
struct pid_namespace *ns = task_active_pid_ns(current);
+ struct filename *pathname __free(putname) = getname(name);
+ struct file *original_file __free(fput) = NULL; // in that order
+ struct path internal __free(path_put) = {}; // in that order
+ struct file *file __free(fput_sync) = NULL; // in that order
struct bsd_acct_struct *acct;
+ struct vfsmount *mnt;
struct fs_pin *old;
- int err;
+
+ if (IS_ERR(pathname))
+ return PTR_ERR(pathname);
+ original_file = file_open_name(pathname, open_flags, 0);
+ if (IS_ERR(original_file))
+ return PTR_ERR(original_file);
+
+ mnt = mnt_clone_internal(&original_file->f_path);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ internal.mnt = mnt;
+ internal.dentry = dget(mnt->mnt_root);
+
+ file = dentry_open(&internal, open_flags, current_cred());
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return -EACCES;
+
+ /* Exclude kernel kernel internal filesystems. */
+ if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT))
+ return -EINVAL;
+
+ /* Exclude procfs and sysfs. */
+ if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE)
+ return -EINVAL;
+
+ if (!(file->f_mode & FMODE_CAN_WRITE))
+ return -EIO;
acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
if (!acct)
return -ENOMEM;
- /* Difference from BSD - they don't do O_APPEND */
- file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
- if (IS_ERR(file)) {
- kfree(acct);
- return PTR_ERR(file);
- }
-
- if (!S_ISREG(file_inode(file)->i_mode)) {
- kfree(acct);
- filp_close(file, NULL);
- return -EACCES;
- }
-
- /* Exclude kernel kernel internal filesystems. */
- if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) {
- kfree(acct);
- filp_close(file, NULL);
- return -EINVAL;
- }
-
- /* Exclude procfs and sysfs. */
- if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) {
- kfree(acct);
- filp_close(file, NULL);
- return -EINVAL;
- }
-
- if (!(file->f_mode & FMODE_CAN_WRITE)) {
- kfree(acct);
- filp_close(file, NULL);
- return -EIO;
- }
- internal = mnt_clone_internal(&file->f_path);
- if (IS_ERR(internal)) {
- kfree(acct);
- filp_close(file, NULL);
- return PTR_ERR(internal);
- }
- err = mnt_get_write_access(internal);
- if (err) {
- mntput(internal);
- kfree(acct);
- filp_close(file, NULL);
- return err;
- }
- mnt = file->f_path.mnt;
- file->f_path.mnt = internal;
-
atomic_long_set(&acct->count, 1);
init_fs_pin(&acct->pin, acct_pin_kill);
- acct->file = file;
+ acct->file = no_free_ptr(file);
acct->needcheck = jiffies;
acct->ns = ns;
mutex_init(&acct->lock);
INIT_WORK(&acct->work, close_work);
init_completion(&acct->done);
mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
- pin_insert(&acct->pin, mnt);
+ pin_insert(&acct->pin, original_file->f_path.mnt);
rcu_read_lock();
old = xchg(&ns->bacct, &acct->pin);
mutex_unlock(&acct->lock);
pin_kill(old);
- mnt_put_write_access(mnt);
- mntput(mnt);
return 0;
}
@@ -319,14 +300,9 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
return -EPERM;
if (name) {
- struct filename *tmp = getname(name);
-
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
mutex_lock(&acct_on_mutex);
- error = acct_on(tmp);
+ error = acct_on(name);
mutex_unlock(&acct_on_mutex);
- putname(tmp);
} else {
rcu_read_lock();
pin_kill(task_active_pid_ns(current)->bacct);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index b0eae2a..32007ed 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -678,7 +678,7 @@ void audit_trim_trees(void)
struct audit_tree *tree;
struct path path;
struct audit_node *node;
- struct path *paths;
+ const struct path *paths;
struct path array[16];
int err;
@@ -701,7 +701,7 @@ void audit_trim_trees(void)
struct audit_chunk *chunk = find_chunk(node);
/* this could be NULL if the watch is dying else where... */
node->index |= 1U<<31;
- for (struct path *p = paths; p->dentry; p++) {
+ for (const struct path *p = paths; p->dentry; p++) {
struct inode *inode = p->dentry->d_inode;
if (inode_to_key(inode) == chunk->key) {
node->index &= ~(1U<<31);
@@ -740,9 +740,9 @@ void audit_put_tree(struct audit_tree *tree)
put_tree(tree);
}
-static int tag_mounts(struct path *paths, struct audit_tree *tree)
+static int tag_mounts(const struct path *paths, struct audit_tree *tree)
{
- for (struct path *p = paths; p->dentry; p++) {
+ for (const struct path *p = paths; p->dentry; p++) {
int err = tag_chunk(p->dentry->d_inode, tree);
if (err)
return err;
@@ -805,7 +805,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
struct audit_tree *seed = rule->tree, *tree;
struct path path;
struct path array[16];
- struct path *paths;
+ const struct path *paths;
int err;
rule->tree = NULL;
@@ -877,7 +877,7 @@ int audit_tag_tree(char *old, char *new)
int failed = 0;
struct path path1, path2;
struct path array[16];
- struct path *paths;
+ const struct path *paths;
int err;
err = kern_path(new, 0, &path2);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3ae5297..a8bd6a7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -900,7 +900,7 @@ const struct bpf_func_proto bpf_send_signal_thread_proto = {
.arg1_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
+BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz)
{
struct path copy;
long len;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1b7db73..033af10 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10207,8 +10207,7 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
- ret = vfs_parse_fs_string(fc, "source",
- "tracefs", strlen("tracefs"));
+ ret = vfs_parse_fs_string(fc, "source", "tracefs");
if (!ret)
mnt = fc_mount(fc);
else
diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c
index 9129766..ac0f4be 100644
--- a/security/apparmor/af_unix.c
+++ b/security/apparmor/af_unix.c
@@ -31,7 +31,7 @@ static inline struct sock *aa_unix_sk(struct unix_sock *u)
}
static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred,
- struct aa_label *label, struct path *path)
+ struct aa_label *label, const struct path *path)
{
AA_BUG(!label);
AA_BUG(!path);
@@ -224,7 +224,7 @@ static int profile_create_perm(struct aa_profile *profile, int family,
static int profile_sk_perm(struct aa_profile *profile,
struct apparmor_audit_data *ad,
- u32 request, struct sock *sk, struct path *path)
+ u32 request, struct sock *sk, const struct path *path)
{
struct aa_ruleset *rules = profile->label.rules[0];
struct aa_perms *p = NULL;
@@ -386,9 +386,9 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request,
/* null peer_label is allowed, in which case the peer_sk label is used */
static int profile_peer_perm(struct aa_profile *profile, u32 request,
- struct sock *sk, struct path *path,
+ struct sock *sk, const struct path *path,
struct sockaddr_un *peer_addr,
- int peer_addrlen, struct path *peer_path,
+ int peer_addrlen, const struct path *peer_path,
struct aa_label *peer_label,
struct apparmor_audit_data *ad)
{
@@ -445,7 +445,7 @@ int aa_unix_create_perm(struct aa_label *label, int family, int type,
static int aa_unix_label_sk_perm(const struct cred *subj_cred,
struct aa_label *label,
const char *op, u32 request, struct sock *sk,
- struct path *path)
+ const struct path *path)
{
if (!unconfined(label)) {
struct aa_profile *profile;
@@ -599,9 +599,9 @@ int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock,
static int unix_peer_perm(const struct cred *subj_cred,
struct aa_label *label, const char *op, u32 request,
- struct sock *sk, struct path *path,
+ struct sock *sk, const struct path *path,
struct sockaddr_un *peer_addr, int peer_addrlen,
- struct path *peer_path, struct aa_label *peer_label)
+ const struct path *peer_path, struct aa_label *peer_label)
{
struct aa_profile *profile;
DEFINE_AUDIT_SK(ad, op, subj_cred, sk);
diff --git a/security/security.c b/security/security.c
index ad163f0..db2d75b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1775,7 +1775,7 @@ EXPORT_SYMBOL(security_dentry_init_security);
* Return: Returns 0 on success, error on failure.
*/
int security_dentry_create_files_as(struct dentry *dentry, int mode,
- struct qstr *name,
+ const struct qstr *name,
const struct cred *old, struct cred *new)
{
return call_int_hook(dentry_create_files_as, dentry, mode,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c95a587..58ce499 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2901,7 +2901,7 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode,
}
static int selinux_dentry_create_files_as(struct dentry *dentry, int mode,
- struct qstr *name,
+ const struct qstr *name,
const struct cred *old,
struct cred *new)
{
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index fc340a6..5caa372 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4908,7 +4908,7 @@ static int smack_inode_copy_up_xattr(struct dentry *src, const char *name)
}
static int smack_dentry_create_files_as(struct dentry *dentry, int mode,
- struct qstr *name,
+ const struct qstr *name,
const struct cred *old,
struct cred *new)
{
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index da7e230..c157976 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -219,7 +219,7 @@ extern void bpf_put_file(struct file *file) __ksym;
* including the NULL termination character, stored in the supplied
* buffer. On error, a negative integer is returned.
*/
-extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym;
+extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym;
/* This macro must be used to mark the exception callback corresponding to the
* main program. For example: