| /* |
| * Copyright (C) International Business Machines Corp., 2000-2004 |
| * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
| * the GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write to the Free Software |
| * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| */ |
| |
| #include <linux/fs.h> |
| #include <linux/init.h> |
| #include "jfs_incore.h" |
| #include "jfs_superblock.h" |
| #include "jfs_filsys.h" |
| #include "jfs_metapage.h" |
| #include "jfs_txnmgr.h" |
| #include "jfs_debug.h" |
| |
| extern struct task_struct *jfsCommitTask; |
| static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED; |
| static wait_queue_head_t meta_wait; |
| |
| #ifdef CONFIG_JFS_STATISTICS |
| struct { |
| uint pagealloc; /* # of page allocations */ |
| uint pagefree; /* # of page frees */ |
| uint lockwait; /* # of sleeping lock_metapage() calls */ |
| uint allocwait; /* # of sleeping alloc_metapage() calls */ |
| } mpStat; |
| #endif |
| |
| |
| #define HASH_BITS 10 /* This makes hash_table 1 4K page */ |
| #define HASH_SIZE (1 << HASH_BITS) |
| static struct metapage **hash_table = NULL; |
| static unsigned long hash_order; |
| |
| |
| static inline int metapage_locked(struct metapage *mp) |
| { |
| return test_bit(META_locked, &mp->flag); |
| } |
| |
| static inline int trylock_metapage(struct metapage *mp) |
| { |
| return test_and_set_bit(META_locked, &mp->flag); |
| } |
| |
| static inline void unlock_metapage(struct metapage *mp) |
| { |
| clear_bit(META_locked, &mp->flag); |
| wake_up(&mp->wait); |
| } |
| |
| static void __lock_metapage(struct metapage *mp) |
| { |
| DECLARE_WAITQUEUE(wait, current); |
| |
| INCREMENT(mpStat.lockwait); |
| |
| add_wait_queue_exclusive(&mp->wait, &wait); |
| do { |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| if (metapage_locked(mp)) { |
| spin_unlock(&meta_lock); |
| schedule(); |
| spin_lock(&meta_lock); |
| } |
| } while (trylock_metapage(mp)); |
| __set_current_state(TASK_RUNNING); |
| remove_wait_queue(&mp->wait, &wait); |
| } |
| |
| /* needs meta_lock */ |
| static inline void lock_metapage(struct metapage *mp) |
| { |
| if (trylock_metapage(mp)) |
| __lock_metapage(mp); |
| } |
| |
| /* |
| * metapage pool is based on Linux 2.5's mempool |
| * |
| * Tap into reserved structures in critical paths where waiting on a |
| * memory allocation could cause deadlock |
| */ |
| #define METAPOOL_MIN_PAGES 32 |
| static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES]; |
| static int num_reserved = 0; |
| kmem_cache_t *metapage_cache; |
| |
| static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) |
| { |
| struct metapage *mp = (struct metapage *)foo; |
| |
| if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == |
| SLAB_CTOR_CONSTRUCTOR) { |
| mp->lid = 0; |
| mp->lsn = 0; |
| mp->flag = 0; |
| mp->data = NULL; |
| mp->clsn = 0; |
| mp->log = NULL; |
| set_bit(META_free, &mp->flag); |
| init_waitqueue_head(&mp->wait); |
| } |
| } |
| |
| static void empty_reserved(void) |
| { |
| while (num_reserved--) |
| kmem_cache_free(metapage_cache, |
| reserved_metapages[num_reserved]); |
| } |
| |
| static struct metapage *alloc_metapage(int *dropped_lock, int no_wait) |
| { |
| struct metapage *new; |
| |
| *dropped_lock = 0; |
| |
| /* |
| * Always try an atomic alloc first, to avoid dropping the |
| * spinlock |
| */ |
| new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC); |
| if (new) |
| return new; |
| |
| if (no_wait && num_reserved) |
| return reserved_metapages[--num_reserved]; |
| |
| *dropped_lock = 1; |
| spin_unlock(&meta_lock); |
| new = kmem_cache_alloc(metapage_cache, GFP_NOFS); |
| spin_lock(&meta_lock); |
| return new; |
| } |
| |
| static void __free_metapage(struct metapage *mp) |
| { |
| mp->flag = 0; |
| set_bit(META_free, &mp->flag); |
| |
| if (num_reserved < METAPOOL_MIN_PAGES) |
| reserved_metapages[num_reserved++] = mp; |
| else |
| kmem_cache_free(metapage_cache, mp); |
| } |
| |
| static inline void free_metapage(struct metapage * mp) |
| { |
| spin_lock(&meta_lock); |
| __free_metapage(mp); |
| spin_unlock(&meta_lock); |
| } |
| |
| int __init metapage_init(void) |
| { |
| struct metapage *mp; |
| |
| /* |
| * Initialize wait queue |
| */ |
| init_waitqueue_head(&meta_wait); |
| |
| /* |
| * Allocate the metapage structures |
| */ |
| metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), |
| 0, 0, init_once, NULL); |
| if (metapage_cache == NULL) |
| return -ENOMEM; |
| |
| while (num_reserved < METAPOOL_MIN_PAGES) { |
| mp = kmem_cache_alloc(metapage_cache, GFP_NOFS); |
| if (mp) |
| reserved_metapages[num_reserved++] = mp; |
| else { |
| empty_reserved(); |
| kmem_cache_destroy(metapage_cache); |
| return -ENOMEM; |
| } |
| } |
| /* |
| * Now the hash list |
| */ |
| for (hash_order = 0; |
| ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE; |
| hash_order++); |
| hash_table = |
| (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order); |
| assert(hash_table); |
| memset(hash_table, 0, PAGE_SIZE << hash_order); |
| |
| return 0; |
| } |
| |
| void metapage_exit(void) |
| { |
| empty_reserved(); |
| kmem_cache_destroy(metapage_cache); |
| } |
| |
| /* |
| * Basically same hash as in pagemap.h, but using our hash table |
| */ |
| static struct metapage **meta_hash(struct address_space *mapping, |
| unsigned long index) |
| { |
| #define i (((unsigned long)mapping)/ \ |
| (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) |
| #define s(x) ((x) + ((x) >> HASH_BITS)) |
| return hash_table + (s(i + index) & (HASH_SIZE - 1)); |
| #undef i |
| #undef s |
| } |
| |
| static struct metapage *search_hash(struct metapage ** hash_ptr, |
| struct address_space *mapping, |
| unsigned long index) |
| { |
| struct metapage *ptr; |
| |
| for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { |
| if ((ptr->mapping == mapping) && (ptr->index == index)) |
| return ptr; |
| } |
| |
| return NULL; |
| } |
| |
| static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr) |
| { |
| if (*hash_ptr) |
| (*hash_ptr)->hash_prev = mp; |
| |
| mp->hash_prev = NULL; |
| mp->hash_next = *hash_ptr; |
| *hash_ptr = mp; |
| } |
| |
| static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr) |
| { |
| if (mp->hash_prev) |
| mp->hash_prev->hash_next = mp->hash_next; |
| else { |
| assert(*hash_ptr == mp); |
| *hash_ptr = mp->hash_next; |
| } |
| |
| if (mp->hash_next) |
| mp->hash_next->hash_prev = mp->hash_prev; |
| } |
| |
| struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, |
| unsigned int size, int absolute, |
| unsigned long new) |
| { |
| int dropped_lock; |
| struct metapage **hash_ptr; |
| int l2BlocksPerPage; |
| int l2bsize; |
| int no_wait; |
| struct address_space *mapping; |
| struct metapage *mp; |
| unsigned long page_index; |
| unsigned long page_offset; |
| |
| jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock); |
| |
| if (absolute) |
| mapping = inode->i_sb->s_bdev->bd_inode->i_mapping; |
| else { |
| /* |
| * If an nfs client tries to read an inode that is larger |
| * than any existing inodes, we may try to read past the |
| * end of the inode map |
| */ |
| if ((lblock << inode->i_blkbits) >= inode->i_size) |
| return NULL; |
| mapping = inode->i_mapping; |
| } |
| |
| hash_ptr = meta_hash(mapping, lblock); |
| again: |
| spin_lock(&meta_lock); |
| mp = search_hash(hash_ptr, mapping, lblock); |
| if (mp) { |
| page_found: |
| if (test_bit(META_stale, &mp->flag)) { |
| spin_unlock(&meta_lock); |
| yield(); |
| goto again; |
| } |
| mp->count++; |
| lock_metapage(mp); |
| spin_unlock(&meta_lock); |
| if (test_bit(META_discard, &mp->flag)) { |
| if (!new) { |
| jfs_error(inode->i_sb, |
| "__get_metapage: using a " |
| "discarded metapage"); |
| release_metapage(mp); |
| return NULL; |
| } |
| clear_bit(META_discard, &mp->flag); |
| } |
| jfs_info("__get_metapage: found 0x%p, in hash", mp); |
| if (mp->logical_size != size) { |
| jfs_error(inode->i_sb, |
| "__get_metapage: mp->logical_size != size"); |
| release_metapage(mp); |
| return NULL; |
| } |
| } else { |
| l2bsize = inode->i_blkbits; |
| l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; |
| page_index = lblock >> l2BlocksPerPage; |
| page_offset = (lblock - (page_index << l2BlocksPerPage)) << |
| l2bsize; |
| if ((page_offset + size) > PAGE_CACHE_SIZE) { |
| spin_unlock(&meta_lock); |
| jfs_err("MetaData crosses page boundary!!"); |
| return NULL; |
| } |
| |
| /* |
| * Locks held on aggregate inode pages are usually |
| * not held long, and they are taken in critical code |
| * paths (committing dirty inodes, txCommit thread) |
| * |
| * Attempt to get metapage without blocking, tapping into |
| * reserves if necessary. |
| */ |
| if (JFS_IP(inode)->fileset == AGGREGATE_I) |
| no_wait = 1; |
| else |
| no_wait = 0; |
| |
| mp = alloc_metapage(&dropped_lock, no_wait); |
| if (!mp) { |
| spin_unlock(&meta_lock); |
| return NULL; |
| } |
| if (dropped_lock) { |
| /* alloc_metapage blocked, we need to search the hash |
| * again. |
| */ |
| struct metapage *mp2; |
| mp2 = search_hash(hash_ptr, mapping, lblock); |
| if (mp2) { |
| __free_metapage(mp); |
| mp = mp2; |
| goto page_found; |
| } |
| } |
| mp->flag = 0; |
| lock_metapage(mp); |
| if (absolute) |
| set_bit(META_absolute, &mp->flag); |
| mp->xflag = COMMIT_PAGE; |
| mp->count = 1; |
| atomic_set(&mp->nohomeok,0); |
| mp->mapping = mapping; |
| mp->index = lblock; |
| mp->page = 0; |
| mp->logical_size = size; |
| add_to_hash(mp, hash_ptr); |
| spin_unlock(&meta_lock); |
| |
| if (new) { |
| jfs_info("__get_metapage: Calling grab_cache_page"); |
| mp->page = grab_cache_page(mapping, page_index); |
| if (!mp->page) { |
| jfs_err("grab_cache_page failed!"); |
| goto freeit; |
| } else { |
| INCREMENT(mpStat.pagealloc); |
| UnlockPage(mp->page); |
| } |
| } else { |
| jfs_info("__get_metapage: Calling read_cache_page"); |
| mp->page = read_cache_page(mapping, lblock, |
| (filler_t *)mapping->a_ops->readpage, NULL); |
| if (IS_ERR(mp->page)) { |
| jfs_err("read_cache_page failed!"); |
| goto freeit; |
| } else |
| INCREMENT(mpStat.pagealloc); |
| } |
| mp->data = kmap(mp->page) + page_offset; |
| } |
| |
| if (new) |
| memset(mp->data, 0, PSIZE); |
| |
| jfs_info("__get_metapage: returning = 0x%p", mp); |
| return mp; |
| |
| freeit: |
| spin_lock(&meta_lock); |
| remove_from_hash(mp, hash_ptr); |
| __free_metapage(mp); |
| spin_unlock(&meta_lock); |
| return NULL; |
| } |
| |
| void hold_metapage(struct metapage * mp, int force) |
| { |
| spin_lock(&meta_lock); |
| |
| mp->count++; |
| |
| if (force) { |
| ASSERT (!(test_bit(META_forced, &mp->flag))); |
| if (trylock_metapage(mp)) |
| set_bit(META_forced, &mp->flag); |
| } else |
| lock_metapage(mp); |
| |
| spin_unlock(&meta_lock); |
| } |
| |
| static void __write_metapage(struct metapage * mp) |
| { |
| int l2bsize = mp->mapping->host->i_blkbits; |
| int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; |
| unsigned long page_index; |
| unsigned long page_offset; |
| int rc; |
| |
| jfs_info("__write_metapage: mp = 0x%p", mp); |
| |
| if (test_bit(META_discard, &mp->flag)) { |
| /* |
| * This metadata is no longer valid |
| */ |
| clear_bit(META_dirty, &mp->flag); |
| return; |
| } |
| |
| page_index = mp->page->index; |
| page_offset = |
| (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; |
| |
| lock_page(mp->page); |
| rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, |
| page_offset + |
| mp->logical_size); |
| if (rc) { |
| jfs_err("prepare_write return %d!", rc); |
| ClearPageUptodate(mp->page); |
| UnlockPage(mp->page); |
| kunmap(mp->page); |
| clear_bit(META_dirty, &mp->flag); |
| return; |
| } |
| rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, |
| page_offset + |
| mp->logical_size); |
| if (rc) { |
| jfs_err("commit_write returned %d", rc); |
| } |
| |
| UnlockPage(mp->page); |
| clear_bit(META_dirty, &mp->flag); |
| |
| jfs_info("__write_metapage done"); |
| } |
| |
| static inline void sync_metapage(struct metapage *mp) |
| { |
| struct page *page = mp->page; |
| |
| page_cache_get(page); |
| lock_page(page); |
| |
| /* we're done with this page - no need to check for errors */ |
| if (page->buffers) { |
| writeout_one_page(page); |
| waitfor_one_page(page); |
| } |
| |
| UnlockPage(page); |
| page_cache_release(page); |
| } |
| |
| void release_metapage(struct metapage * mp) |
| { |
| struct jfs_log *log; |
| |
| jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); |
| |
| spin_lock(&meta_lock); |
| if (test_bit(META_forced, &mp->flag)) { |
| clear_bit(META_forced, &mp->flag); |
| mp->count--; |
| spin_unlock(&meta_lock); |
| return; |
| } |
| |
| assert(mp->count); |
| if (--mp->count || atomic_read(&mp->nohomeok)) { |
| unlock_metapage(mp); |
| spin_unlock(&meta_lock); |
| return; |
| } |
| |
| if (mp->page) { |
| set_bit(META_stale, &mp->flag); |
| spin_unlock(&meta_lock); |
| kunmap(mp->page); |
| mp->data = 0; |
| if (test_bit(META_dirty, &mp->flag)) |
| __write_metapage(mp); |
| if (test_bit(META_sync, &mp->flag)) { |
| sync_metapage(mp); |
| clear_bit(META_sync, &mp->flag); |
| } |
| |
| if (test_bit(META_discard, &mp->flag)) { |
| lock_page(mp->page); |
| block_flushpage(mp->page, 0); |
| UnlockPage(mp->page); |
| } |
| |
| page_cache_release(mp->page); |
| mp->page = NULL; |
| INCREMENT(mpStat.pagefree); |
| spin_lock(&meta_lock); |
| } |
| |
| if (mp->lsn) { |
| /* |
| * Remove metapage from logsynclist. |
| */ |
| log = mp->log; |
| LOGSYNC_LOCK(log); |
| mp->log = 0; |
| mp->lsn = 0; |
| mp->clsn = 0; |
| log->count--; |
| list_del(&mp->synclist); |
| LOGSYNC_UNLOCK(log); |
| } |
| remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); |
| spin_unlock(&meta_lock); |
| |
| free_metapage(mp); |
| } |
| |
| void __invalidate_metapages(struct inode *ip, s64 addr, int len) |
| { |
| struct metapage **hash_ptr; |
| unsigned long lblock; |
| int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; |
| /* All callers are interested in block device's mapping */ |
| struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping; |
| struct metapage *mp; |
| struct page *page; |
| |
| /* |
| * First, mark metapages to discard. They will eventually be |
| * released, but should not be written. |
| */ |
| for (lblock = addr; lblock < addr + len; |
| lblock += 1 << l2BlocksPerPage) { |
| hash_ptr = meta_hash(mapping, lblock); |
| again: |
| spin_lock(&meta_lock); |
| mp = search_hash(hash_ptr, mapping, lblock); |
| if (mp) { |
| if (test_bit(META_stale, &mp->flag)) { |
| spin_unlock(&meta_lock); |
| yield(); |
| goto again; |
| } |
| |
| set_bit(META_discard, &mp->flag); |
| spin_unlock(&meta_lock); |
| } else { |
| spin_unlock(&meta_lock); |
| page = find_lock_page(mapping, lblock>>l2BlocksPerPage); |
| if (page) { |
| block_flushpage(page, 0); |
| UnlockPage(page); |
| page_cache_release(page); |
| } |
| } |
| } |
| } |
| |
| #ifdef CONFIG_JFS_STATISTICS |
| int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, |
| int *eof, void *data) |
| { |
| int len = 0; |
| off_t begin; |
| |
| len += sprintf(buffer, |
| "JFS Metapage statistics\n" |
| "=======================\n" |
| "page allocations = %d\n" |
| "page frees = %d\n" |
| "lock waits = %d\n" |
| "allocation waits = %d\n", |
| mpStat.pagealloc, |
| mpStat.pagefree, |
| mpStat.lockwait, |
| mpStat.allocwait); |
| |
| begin = offset; |
| *start = buffer + begin; |
| len -= begin; |
| |
| if (len > length) |
| len = length; |
| else |
| *eof = 1; |
| |
| if (len < 0) |
| len = 0; |
| |
| return len; |
| } |
| #endif |