| From 1758e8a2bf315a5742b8ffc227203c68a0144a95 Mon Sep 17 00:00:00 2001 |
| From: David Howells <dhowells@redhat.com> |
| Date: Fri, 10 Apr 2020 15:23:27 +0100 |
| Subject: [PATCH] afs: Fix race between post-modification dir edit and |
| readdir/d_revalidate |
| |
| commit 2105c2820d366b76f38e6ad61c75771881ecc532 upstream. |
| |
| AFS directories are retained locally as a structured file, with lookup |
| being effected by a local search of the file contents. When a modification |
| (such as mkdir) happens, the dir file content is modified locally rather |
| than redownloading the directory. |
| |
| The directory contents are accessed in a number of ways, with a number of |
| different locks schemes: |
| |
| (1) Download of contents - dvnode->validate_lock/write in afs_read_dir(). |
| |
| (2) Lookup and readdir - dvnode->validate_lock/read in afs_dir_iterate(), |
| downgrading from (1) if necessary. |
| |
| (3) d_revalidate of child dentry - dvnode->validate_lock/read in |
| afs_do_lookup_one() downgrading from (1) if necessary. |
| |
| (4) Edit of dir after modification - page locks on individual dir pages. |
| |
| Unfortunately, because (4) uses different locking scheme to (1) - (3), |
| nothing protects against the page being scanned whilst the edit is |
| underway. Even download is not safe as it doesn't lock the pages - relying |
| instead on the validate_lock to serialise as a whole (the theory being that |
| directory contents are treated as a block and always downloaded as a |
| block). |
| |
| Fix this by write-locking dvnode->validate_lock around the edits. Care |
| must be taken in the rename case as there may be two different dirs - but |
| they need not be locked at the same time. In any case, once the lock is |
| taken, the directory version must be rechecked, and the edit skipped if a |
| later version has been downloaded by revalidation (there can't have been |
| any local changes because the VFS holds the inode lock, but there can have |
| been remote changes). |
| |
| Fixes: 63a4681ff39c ("afs: Locally edit directory data for mkdir/create/unlink/...") |
| Signed-off-by: David Howells <dhowells@redhat.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/afs/dir.c b/fs/afs/dir.c |
| index 4e2bc3d78fc5..02052f5269ae 100644 |
| --- a/fs/afs/dir.c |
| +++ b/fs/afs/dir.c |
| @@ -1224,6 +1224,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
| struct afs_fs_cursor fc; |
| struct afs_vnode *dvnode = AFS_FS_I(dir); |
| struct key *key; |
| + afs_dataversion_t data_version; |
| int ret; |
| |
| mode |= S_IFDIR; |
| @@ -1244,7 +1245,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
| |
| ret = -ERESTARTSYS; |
| if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { |
| - afs_dataversion_t data_version = dvnode->status.data_version + 1; |
| + data_version = dvnode->status.data_version + 1; |
| |
| while (afs_select_fileserver(&fc)) { |
| fc.cb_break = afs_calc_vnode_cb_break(dvnode); |
| @@ -1265,10 +1266,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
| goto error_key; |
| } |
| |
| - if (ret == 0 && |
| - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| - afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, |
| - afs_edit_dir_for_create); |
| + if (ret == 0) { |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == data_version) |
| + afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, |
| + afs_edit_dir_for_create); |
| + up_write(&dvnode->validate_lock); |
| + } |
| |
| key_put(key); |
| kfree(scb); |
| @@ -1309,6 +1314,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) |
| struct afs_fs_cursor fc; |
| struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; |
| struct key *key; |
| + afs_dataversion_t data_version; |
| int ret; |
| |
| _enter("{%llx:%llu},{%pd}", |
| @@ -1340,7 +1346,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) |
| |
| ret = -ERESTARTSYS; |
| if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { |
| - afs_dataversion_t data_version = dvnode->status.data_version + 1; |
| + data_version = dvnode->status.data_version + 1; |
| |
| while (afs_select_fileserver(&fc)) { |
| fc.cb_break = afs_calc_vnode_cb_break(dvnode); |
| @@ -1353,9 +1359,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) |
| ret = afs_end_vnode_operation(&fc); |
| if (ret == 0) { |
| afs_dir_remove_subdir(dentry); |
| - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == data_version) |
| afs_edit_dir_remove(dvnode, &dentry->d_name, |
| afs_edit_dir_for_rmdir); |
| + up_write(&dvnode->validate_lock); |
| } |
| } |
| |
| @@ -1495,10 +1504,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) |
| ret = afs_end_vnode_operation(&fc); |
| if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) |
| ret = afs_dir_remove_link(dvnode, dentry, key); |
| - if (ret == 0 && |
| - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| - afs_edit_dir_remove(dvnode, &dentry->d_name, |
| - afs_edit_dir_for_unlink); |
| + |
| + if (ret == 0) { |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == data_version) |
| + afs_edit_dir_remove(dvnode, &dentry->d_name, |
| + afs_edit_dir_for_unlink); |
| + up_write(&dvnode->validate_lock); |
| + } |
| } |
| |
| if (need_rehash && ret < 0 && ret != -ENOENT) |
| @@ -1524,6 +1538,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
| struct afs_status_cb *scb; |
| struct afs_vnode *dvnode = AFS_FS_I(dir); |
| struct key *key; |
| + afs_dataversion_t data_version; |
| int ret; |
| |
| mode |= S_IFREG; |
| @@ -1548,7 +1563,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
| |
| ret = -ERESTARTSYS; |
| if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { |
| - afs_dataversion_t data_version = dvnode->status.data_version + 1; |
| + data_version = dvnode->status.data_version + 1; |
| |
| while (afs_select_fileserver(&fc)) { |
| fc.cb_break = afs_calc_vnode_cb_break(dvnode); |
| @@ -1569,9 +1584,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
| goto error_key; |
| } |
| |
| - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == data_version) |
| afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, |
| afs_edit_dir_for_create); |
| + up_write(&dvnode->validate_lock); |
| |
| kfree(scb); |
| key_put(key); |
| @@ -1599,6 +1617,7 @@ static int afs_link(struct dentry *from, struct inode *dir, |
| struct afs_vnode *dvnode = AFS_FS_I(dir); |
| struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); |
| struct key *key; |
| + afs_dataversion_t data_version; |
| int ret; |
| |
| _enter("{%llx:%llu},{%llx:%llu},{%pd}", |
| @@ -1623,7 +1642,7 @@ static int afs_link(struct dentry *from, struct inode *dir, |
| |
| ret = -ERESTARTSYS; |
| if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { |
| - afs_dataversion_t data_version = dvnode->status.data_version + 1; |
| + data_version = dvnode->status.data_version + 1; |
| |
| if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { |
| afs_end_vnode_operation(&fc); |
| @@ -1653,9 +1672,12 @@ static int afs_link(struct dentry *from, struct inode *dir, |
| goto error_key; |
| } |
| |
| - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == data_version) |
| afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, |
| afs_edit_dir_for_link); |
| + up_write(&dvnode->validate_lock); |
| |
| key_put(key); |
| kfree(scb); |
| @@ -1683,6 +1705,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, |
| struct afs_status_cb *scb; |
| struct afs_vnode *dvnode = AFS_FS_I(dir); |
| struct key *key; |
| + afs_dataversion_t data_version; |
| int ret; |
| |
| _enter("{%llx:%llu},{%pd},%s", |
| @@ -1710,7 +1733,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, |
| |
| ret = -ERESTARTSYS; |
| if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { |
| - afs_dataversion_t data_version = dvnode->status.data_version + 1; |
| + data_version = dvnode->status.data_version + 1; |
| |
| while (afs_select_fileserver(&fc)) { |
| fc.cb_break = afs_calc_vnode_cb_break(dvnode); |
| @@ -1731,9 +1754,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, |
| goto error_key; |
| } |
| |
| - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == data_version) |
| afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, |
| afs_edit_dir_for_symlink); |
| + up_write(&dvnode->validate_lock); |
| |
| key_put(key); |
| kfree(scb); |
| @@ -1763,6 +1789,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, |
| struct dentry *tmp = NULL, *rehash = NULL; |
| struct inode *new_inode; |
| struct key *key; |
| + afs_dataversion_t orig_data_version; |
| + afs_dataversion_t new_data_version; |
| bool new_negative = d_is_negative(new_dentry); |
| int ret; |
| |
| @@ -1841,9 +1869,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, |
| |
| ret = -ERESTARTSYS; |
| if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { |
| - afs_dataversion_t orig_data_version; |
| - afs_dataversion_t new_data_version; |
| - |
| orig_data_version = orig_dvnode->status.data_version + 1; |
| |
| if (orig_dvnode != new_dvnode) { |
| @@ -1879,18 +1904,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, |
| if (ret == 0) { |
| if (rehash) |
| d_rehash(rehash); |
| - if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) |
| - afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, |
| - afs_edit_dir_for_rename_0); |
| + down_write(&orig_dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && |
| + orig_dvnode->status.data_version == orig_data_version) |
| + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, |
| + afs_edit_dir_for_rename_0); |
| + if (orig_dvnode != new_dvnode) { |
| + up_write(&orig_dvnode->validate_lock); |
| |
| - if (!new_negative && |
| - test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) |
| - afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, |
| - afs_edit_dir_for_rename_1); |
| + down_write(&new_dvnode->validate_lock); |
| + } |
| + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) && |
| + orig_dvnode->status.data_version == new_data_version) { |
| + if (!new_negative) |
| + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, |
| + afs_edit_dir_for_rename_1); |
| |
| - if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) |
| afs_edit_dir_add(new_dvnode, &new_dentry->d_name, |
| &vnode->fid, afs_edit_dir_for_rename_2); |
| + } |
| |
| new_inode = d_inode(new_dentry); |
| if (new_inode) { |
| @@ -1909,6 +1941,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, |
| afs_update_dentry_version(&fc, old_dentry, &scb[1]); |
| afs_update_dentry_version(&fc, new_dentry, &scb[1]); |
| d_move(old_dentry, new_dentry); |
| + up_write(&new_dvnode->validate_lock); |
| goto error_tmp; |
| } |
| |
| diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c |
| index 057b8d322422..bb3fcbfd3afc 100644 |
| --- a/fs/afs/dir_silly.c |
| +++ b/fs/afs/dir_silly.c |
| @@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode |
| { |
| struct afs_fs_cursor fc; |
| struct afs_status_cb *scb; |
| + afs_dataversion_t dir_data_version; |
| int ret = -ERESTARTSYS; |
| |
| _enter("%pd,%pd", old, new); |
| @@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode |
| |
| trace_afs_silly_rename(vnode, false); |
| if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { |
| - afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; |
| + dir_data_version = dvnode->status.data_version + 1; |
| |
| while (afs_select_fileserver(&fc)) { |
| fc.cb_break = afs_calc_vnode_cb_break(dvnode); |
| @@ -54,13 +55,17 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode |
| dvnode->silly_key = key_get(key); |
| } |
| |
| - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == dir_data_version) { |
| afs_edit_dir_remove(dvnode, &old->d_name, |
| afs_edit_dir_for_silly_0); |
| - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| afs_edit_dir_add(dvnode, &new->d_name, |
| &vnode->fid, afs_edit_dir_for_silly_1); |
| |
| + } |
| + up_write(&dvnode->validate_lock); |
| + |
| /* vfs_unlink and the like do not issue this when a file is |
| * sillyrenamed, so do it here. |
| */ |
| @@ -186,10 +191,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode |
| clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); |
| } |
| } |
| - if (ret == 0 && |
| - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) |
| - afs_edit_dir_remove(dvnode, &dentry->d_name, |
| - afs_edit_dir_for_unlink); |
| + if (ret == 0) { |
| + down_write(&dvnode->validate_lock); |
| + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && |
| + dvnode->status.data_version == dir_data_version) |
| + afs_edit_dir_remove(dvnode, &dentry->d_name, |
| + afs_edit_dir_for_unlink); |
| + up_write(&dvnode->validate_lock); |
| + } |
| } |
| |
| kfree(scb); |
| -- |
| 2.7.4 |
| |