| From b8b8f0a0ba7a6bfd266a1498ee8cfb72e864a453 Mon Sep 17 00:00:00 2001 |
| From: Theodore Ts'o <tytso@mit.edu> |
| Date: Fri, 18 Sep 2009 13:34:02 -0400 |
| Subject: [PATCH 07/85] ext4: Avoid group preallocation for closed files |
| |
| (cherry picked from commit 50797481a7bdee548589506d7d7b48b08bc14dcd) |
| |
| Currently the group preallocation code tries to find a large (512) |
| free block from which to do per-cpu group allocation for small files. |
| The problem with this scheme is that it leaves the filesystem horribly |
| fragmented. In the worst case, if the filesystem is unmounted and |
| remounted (after a system shutdown, for example) we forget the fact |
| that wee were using a particular (now-partially filled) 512 block |
| extent. So the next time we try to allocate space for a small file, |
| we will find *another* completely free 512 block chunk to allocate |
| small files. Given that there are 32,768 blocks in a block group, |
| after 64 iterations of "mount, write one 4k file in a directory, |
| unmount", the block group will have 64 files, each separated by 511 |
| blocks, and the block group will no longer have any free 512 |
| completely free chunks of blocks for group preallocation space. |
| |
| So if we try to allocate blocks for a file that has been closed, such |
| that we know the final size of the file, and the filesystem is not |
| busy, avoid using group preallocation. |
| |
| Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| --- |
| fs/ext4/ext4.h | 30 +++++++++++++++++++++++++++++- |
| fs/ext4/mballoc.c | 10 +++++++++- |
| 2 files changed, 38 insertions(+), 2 deletions(-) |
| |
| --- a/fs/ext4/ext4.h |
| +++ b/fs/ext4/ext4.h |
| @@ -952,6 +952,7 @@ struct ext4_sb_info { |
| atomic_t s_mb_lost_chunks; |
| atomic_t s_mb_preallocated; |
| atomic_t s_mb_discarded; |
| + atomic_t s_lock_busy; |
| |
| /* locality groups */ |
| struct ext4_locality_group *s_locality_groups; |
| @@ -1593,15 +1594,42 @@ struct ext4_group_info { |
| #define EXT4_MB_GRP_NEED_INIT(grp) \ |
| (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
| |
| +#define EXT4_MAX_CONTENTION 8 |
| +#define EXT4_CONTENTION_THRESHOLD 2 |
| + |
| static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
| ext4_group_t group) |
| { |
| return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
| } |
| |
| +/* |
| + * Returns true if the filesystem is busy enough that attempts to |
| + * access the block group locks has run into contention. |
| + */ |
| +static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) |
| +{ |
| + return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); |
| +} |
| + |
| static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
| { |
| - spin_lock(ext4_group_lock_ptr(sb, group)); |
| + spinlock_t *lock = ext4_group_lock_ptr(sb, group); |
| + if (spin_trylock(lock)) |
| + /* |
| + * We're able to grab the lock right away, so drop the |
| + * lock contention counter. |
| + */ |
| + atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); |
| + else { |
| + /* |
| + * The lock is busy, so bump the contention counter, |
| + * and then wait on the spin lock. |
| + */ |
| + atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, |
| + EXT4_MAX_CONTENTION); |
| + spin_lock(lock); |
| + } |
| } |
| |
| static inline void ext4_unlock_group(struct super_block *sb, |
| --- a/fs/ext4/mballoc.c |
| +++ b/fs/ext4/mballoc.c |
| @@ -4154,9 +4154,17 @@ static void ext4_mb_group_or_file(struct |
| return; |
| |
| size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; |
| - isize = i_size_read(ac->ac_inode) >> bsbits; |
| + isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
| + >> bsbits; |
| size = max(size, isize); |
| |
| + if ((size == isize) && |
| + !ext4_fs_is_busy(sbi) && |
| + (atomic_read(&ac->ac_inode->i_writecount) == 0)) { |
| + ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; |
| + return; |
| + } |
| + |
| /* don't use group allocation for large files */ |
| if (size >= sbi->s_mb_stream_request) { |
| ac->ac_flags |= EXT4_MB_STREAM_ALLOC; |