| From f30d500f809eca67a21704347ab14bb35877b5ee Mon Sep 17 00:00:00 2001 |
| From: Dave Chinner <dchinner@redhat.com> |
| Date: Wed, 7 Mar 2012 04:50:25 +0000 |
| Subject: xfs: fix inode lookup race |
| |
| From: Dave Chinner <dchinner@redhat.com> |
| |
| commit f30d500f809eca67a21704347ab14bb35877b5ee upstream. |
| |
| When we get concurrent lookups of the same inode that is not in the |
| per-AG inode cache, there is a race condition that triggers warnings |
| in unlock_new_inode() indicating that we are initialising an inode |
| that isn't in a the correct state for a new inode. |
| |
| When we do an inode lookup via a file handle or a bulkstat, we don't |
| serialise lookups at a higher level through the dentry cache (i.e. |
| pathless lookup), and so we can get concurrent lookups of the same |
| inode. |
| |
| The race condition is between the insertion of the inode into the |
| cache in the case of a cache miss and a concurrently lookup: |
| |
| Thread 1 Thread 2 |
| xfs_iget() |
| xfs_iget_cache_miss() |
| xfs_iread() |
| lock radix tree |
| radix_tree_insert() |
| rcu_read_lock |
| radix_tree_lookup |
| lock inode flags |
| XFS_INEW not set |
| igrab() |
| unlock inode flags |
| rcu_read_unlock |
| use uninitialised inode |
| ..... |
| lock inode flags |
| set XFS_INEW |
| unlock inode flags |
| unlock radix tree |
| xfs_setup_inode() |
| inode flags = I_NEW |
| unlock_new_inode() |
| WARNING as inode flags != I_NEW |
| |
| This can lead to inode corruption, inode list corruption, etc, and |
| is generally a bad thing to occur. |
| |
| Fix this by setting XFS_INEW before inserting the inode into the |
| radix tree. This will ensure any concurrent lookup will find the new |
| inode with XFS_INEW set and that forces the lookup to wait until the |
| XFS_INEW flag is removed before allowing the lookup to succeed. |
| |
| Signed-off-by: Dave Chinner <dchinner@redhat.com> |
| Reviewed-by: Christoph Hellwig <hch@lst.de> |
| Signed-off-by: Ben Myers <bpm@sgi.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| fs/xfs/xfs_iget.c | 18 ++++++++++++------ |
| 1 file changed, 12 insertions(+), 6 deletions(-) |
| |
| --- a/fs/xfs/xfs_iget.c |
| +++ b/fs/xfs/xfs_iget.c |
| @@ -353,9 +353,20 @@ xfs_iget_cache_miss( |
| BUG(); |
| } |
| |
| - spin_lock(&pag->pag_ici_lock); |
| + /* |
| + * These values must be set before inserting the inode into the radix |
| + * tree as the moment it is inserted a concurrent lookup (allowed by the |
| + * RCU locking mechanism) can find it and that lookup must see that this |
| + * is an inode currently under construction (i.e. that XFS_INEW is set). |
| + * The ip->i_flags_lock that protects the XFS_INEW flag forms the |
| + * memory barrier that ensures this detection works correctly at lookup |
| + * time. |
| + */ |
| + ip->i_udquot = ip->i_gdquot = NULL; |
| + xfs_iflags_set(ip, XFS_INEW); |
| |
| /* insert the new inode */ |
| + spin_lock(&pag->pag_ici_lock); |
| error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
| if (unlikely(error)) { |
| WARN_ON(error != -EEXIST); |
| @@ -363,11 +374,6 @@ xfs_iget_cache_miss( |
| error = EAGAIN; |
| goto out_preload_end; |
| } |
| - |
| - /* These values _must_ be set before releasing the radix tree lock! */ |
| - ip->i_udquot = ip->i_gdquot = NULL; |
| - xfs_iflags_set(ip, XFS_INEW); |
| - |
| spin_unlock(&pag->pag_ici_lock); |
| radix_tree_preload_end(); |
| |