fs/jfs/jfs_metapage.c - pub/scm/linux/kernel/git/wtarreau/linux-2.4 - Git at Google

 /*
  *   Copyright (C) International Business Machines Corp., 2000-2004
  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation; either version 2 of the License, or
  *   (at your option) any later version.
  *
  *   This program is distributed in the hope that it will be useful,
  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  *   the GNU General Public License for more details.
  *
  *   You should have received a copy of the GNU General Public License
  *   along with this program;  if not, write to the Free Software
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */

 #include <linux/fs.h>
 #include <linux/init.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"

 extern struct task_struct *jfsCommitTask;
 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
 static wait_queue_head_t meta_wait;

 #ifdef CONFIG_JFS_STATISTICS
 struct {
 	uint	pagealloc;	/* # of page allocations */
 	uint	pagefree;	/* # of page frees */
 	uint	lockwait;	/* # of sleeping lock_metapage() calls */
 	uint	allocwait;	/* # of sleeping alloc_metapage() calls */
 } mpStat;
 #endif


 #define HASH_BITS 10		/* This makes hash_table 1 4K page */
 #define HASH_SIZE (1 << HASH_BITS)
 static struct metapage **hash_table = NULL;
 static unsigned long hash_order;


 static inline int metapage_locked(struct metapage *mp)
 {
 	return test_bit(META_locked, &mp->flag);
 }

 static inline int trylock_metapage(struct metapage *mp)
 {
 	return test_and_set_bit(META_locked, &mp->flag);
 }

 static inline void unlock_metapage(struct metapage *mp)
 {
 	clear_bit(META_locked, &mp->flag);
 	wake_up(&mp->wait);
 }

 static void __lock_metapage(struct metapage *mp)
 {
 	DECLARE_WAITQUEUE(wait, current);

 	INCREMENT(mpStat.lockwait);

 	add_wait_queue_exclusive(&mp->wait, &wait);
 	do {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (metapage_locked(mp)) {
 			spin_unlock(&meta_lock);
 			schedule();
 			spin_lock(&meta_lock);
 		}
 	} while (trylock_metapage(mp));
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&mp->wait, &wait);
 }

 /* needs meta_lock */
 static inline void lock_metapage(struct metapage *mp)
 {
 	if (trylock_metapage(mp))
 		__lock_metapage(mp);
 }

 /*
  * metapage pool is based on Linux 2.5's mempool
  *
  * Tap into reserved structures in critical paths where waiting on a
  * memory allocation could cause deadlock
  */
 #define METAPOOL_MIN_PAGES 32
 static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES];
 static int num_reserved = 0;
 kmem_cache_t *metapage_cache;

 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
 	struct metapage *mp = (struct metapage *)foo;

 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
 		mp->lid = 0;
 		mp->lsn = 0;
 		mp->flag = 0;
 		mp->data = NULL;
 		mp->clsn = 0;
 		mp->log = NULL;
 		set_bit(META_free, &mp->flag);
 		init_waitqueue_head(&mp->wait);
 	}
 }

 static void empty_reserved(void)
 {
 	while (num_reserved--)
 		kmem_cache_free(metapage_cache,
 				reserved_metapages[num_reserved]);
 }

 static struct metapage *alloc_metapage(int *dropped_lock, int no_wait)
 {
 	struct metapage *new;

 	*dropped_lock = 0;

 	/*
 	 * Always try an atomic alloc first, to avoid dropping the
 	 * spinlock
 	 */
 	new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC);
 	if (new)
 		return new;

 	if (no_wait && num_reserved)
 		return reserved_metapages[--num_reserved];

 	*dropped_lock = 1;
 	spin_unlock(&meta_lock);
 	new = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 	spin_lock(&meta_lock);
 	return new;
 }

 static void __free_metapage(struct metapage *mp)
 {
 	mp->flag = 0;
 	set_bit(META_free, &mp->flag);

 	if (num_reserved < METAPOOL_MIN_PAGES)
 		reserved_metapages[num_reserved++] = mp;
 	else
 		kmem_cache_free(metapage_cache, mp);
 }

 static inline void free_metapage(struct metapage * mp)
 {
 	spin_lock(&meta_lock);
 	__free_metapage(mp);
 	spin_unlock(&meta_lock);
 }

 int __init metapage_init(void)
 {
 	struct metapage *mp;

 	/*
 	 * Initialize wait queue
 	 */
 	init_waitqueue_head(&meta_wait);

 	/*
 	 * Allocate the metapage structures
 	 */
 	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
 					   0, 0, init_once, NULL);
 	if (metapage_cache == NULL)
 		return -ENOMEM;

 	while (num_reserved < METAPOOL_MIN_PAGES) {
 		mp = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 		if (mp)
 			reserved_metapages[num_reserved++] = mp;
 		else {
 			empty_reserved();
 			kmem_cache_destroy(metapage_cache);
 			return -ENOMEM;
 		}
 	}
 	/*
 	 * Now the hash list
 	 */
 	for (hash_order = 0;
 	     ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
 	     hash_order++);
 	hash_table =
 	    (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
 	assert(hash_table);
 	memset(hash_table, 0, PAGE_SIZE << hash_order);

 	return 0;
 }

 void metapage_exit(void)
 {
 	empty_reserved();
 	kmem_cache_destroy(metapage_cache);
 }

 /*
  * Basically same hash as in pagemap.h, but using our hash table
  */
 static struct metapage **meta_hash(struct address_space *mapping,
 				   unsigned long index)
 {
 #define i (((unsigned long)mapping)/ \
 	   (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
 #define s(x) ((x) + ((x) >> HASH_BITS))
 	return hash_table + (s(i + index) & (HASH_SIZE - 1));
 #undef i
 #undef s
 }

 static struct metapage *search_hash(struct metapage ** hash_ptr,
 				    struct address_space *mapping,
 			       unsigned long index)
 {
 	struct metapage *ptr;

 	for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
 		if ((ptr->mapping == mapping) && (ptr->index == index))
 			return ptr;
 	}

 	return NULL;
 }

 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
 {
 	if (*hash_ptr)
 		(*hash_ptr)->hash_prev = mp;

 	mp->hash_prev = NULL;
 	mp->hash_next = *hash_ptr;
 	*hash_ptr = mp;
 }

 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
 {
 	if (mp->hash_prev)
 		mp->hash_prev->hash_next = mp->hash_next;
 	else {
 		assert(*hash_ptr == mp);
 		*hash_ptr = mp->hash_next;
 	}

 	if (mp->hash_next)
 		mp->hash_next->hash_prev = mp->hash_prev;
 }

 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 				unsigned int size, int absolute,
 				unsigned long new)
 {
 	int dropped_lock;
 	struct metapage **hash_ptr;
 	int l2BlocksPerPage;
 	int l2bsize;
 	int no_wait;
 	struct address_space *mapping;
 	struct metapage *mp;
 	unsigned long page_index;
 	unsigned long page_offset;

 	jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);

 	if (absolute)
 		mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
 	else {
 		/*
 		 * If an nfs client tries to read an inode that is larger
 		 * than any existing inodes, we may try to read past the
 		 * end of the inode map
 		 */
 		if ((lblock << inode->i_blkbits) >= inode->i_size)
 			return NULL;
 		mapping = inode->i_mapping;
 	}

 	hash_ptr = meta_hash(mapping, lblock);
 again:
 	spin_lock(&meta_lock);
 	mp = search_hash(hash_ptr, mapping, lblock);
 	if (mp) {
 	      page_found:
 		if (test_bit(META_stale, &mp->flag)) {
 			spin_unlock(&meta_lock);
 			yield();
 			goto again;
 		}
 		mp->count++;
 		lock_metapage(mp);
 		spin_unlock(&meta_lock);
 		if (test_bit(META_discard, &mp->flag)) {
 			if (!new) {
 				jfs_error(inode->i_sb,
 					  "__get_metapage: using a "
 					  "discarded metapage");
 				release_metapage(mp);
 				return NULL;
 			}
 			clear_bit(META_discard, &mp->flag);
 		}
 		jfs_info("__get_metapage: found 0x%p, in hash", mp);
 		if (mp->logical_size != size) {
 			jfs_error(inode->i_sb,
 				  "__get_metapage: mp->logical_size != size");
 			release_metapage(mp);
 			return NULL;
 		}
 	} else {
 		l2bsize = inode->i_blkbits;
 		l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 		page_index = lblock >> l2BlocksPerPage;
 		page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
 		    l2bsize;
 		if ((page_offset + size) > PAGE_CACHE_SIZE) {
 			spin_unlock(&meta_lock);
 			jfs_err("MetaData crosses page boundary!!");
 			return NULL;
 		}

 		/*
 		 * Locks held on aggregate inode pages are usually
 		 * not held long, and they are taken in critical code
 		 * paths (committing dirty inodes, txCommit thread)
 		 *
 		 * Attempt to get metapage without blocking, tapping into
 		 * reserves if necessary.
 		 */
 		if (JFS_IP(inode)->fileset == AGGREGATE_I)
 			no_wait = 1;
 		else
 			no_wait = 0;

 		mp = alloc_metapage(&dropped_lock, no_wait);
 		if (!mp) {
 			spin_unlock(&meta_lock);
 			return NULL;
 		}
 		if (dropped_lock) {
 			/* alloc_metapage blocked, we need to search the hash
 			 * again.
 			 */
 			struct metapage *mp2;
 			mp2 = search_hash(hash_ptr, mapping, lblock);
 			if (mp2) {
 				__free_metapage(mp);
 				mp = mp2;
 				goto page_found;
 			}
 		}
 		mp->flag = 0;
 		lock_metapage(mp);
 		if (absolute)
 			set_bit(META_absolute, &mp->flag);
 		mp->xflag = COMMIT_PAGE;
 		mp->count = 1;
 		atomic_set(&mp->nohomeok,0);
 		mp->mapping = mapping;
 		mp->index = lblock;
 		mp->page = 0;
 		mp->logical_size = size;
 		add_to_hash(mp, hash_ptr);
 		spin_unlock(&meta_lock);

 		if (new) {
 			jfs_info("__get_metapage: Calling grab_cache_page");
 			mp->page = grab_cache_page(mapping, page_index);
 			if (!mp->page) {
 				jfs_err("grab_cache_page failed!");
 				goto freeit;
 			} else {
 				INCREMENT(mpStat.pagealloc);
 				UnlockPage(mp->page);
 			}
 		} else {
 			jfs_info("__get_metapage: Calling read_cache_page");
 			mp->page = read_cache_page(mapping, lblock,
 				    (filler_t *)mapping->a_ops->readpage, NULL);
 			if (IS_ERR(mp->page)) {
 				jfs_err("read_cache_page failed!");
 				goto freeit;
 			} else
 				INCREMENT(mpStat.pagealloc);
 		}
 		mp->data = kmap(mp->page) + page_offset;
 	}

 	if (new)
 		memset(mp->data, 0, PSIZE);

 	jfs_info("__get_metapage: returning = 0x%p", mp);
 	return mp;

 freeit:
 	spin_lock(&meta_lock);
 	remove_from_hash(mp, hash_ptr);
 	__free_metapage(mp);
 	spin_unlock(&meta_lock);
 	return NULL;
 }

 void hold_metapage(struct metapage * mp, int force)
 {
 	spin_lock(&meta_lock);

 	mp->count++;

 	if (force) {
 		ASSERT (!(test_bit(META_forced, &mp->flag)));
 		if (trylock_metapage(mp))
 			set_bit(META_forced, &mp->flag);
 	} else
 		lock_metapage(mp);

 	spin_unlock(&meta_lock);
 }

 static void __write_metapage(struct metapage * mp)
 {
 	int l2bsize = mp->mapping->host->i_blkbits;
 	int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 	unsigned long page_index;
 	unsigned long page_offset;
 	int rc;

 	jfs_info("__write_metapage: mp = 0x%p", mp);

 	if (test_bit(META_discard, &mp->flag)) {
 		/*
 		 * This metadata is no longer valid
 		 */
 		clear_bit(META_dirty, &mp->flag);
 		return;
 	}

 	page_index = mp->page->index;
 	page_offset =
 	    (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;

 	lock_page(mp->page);
 	rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
 					       page_offset +
 					       mp->logical_size);
 	if (rc) {
 		jfs_err("prepare_write return %d!", rc);
 		ClearPageUptodate(mp->page);
 		UnlockPage(mp->page);
 		kunmap(mp->page);
 		clear_bit(META_dirty, &mp->flag);
 		return;
 	}
 	rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
 					      page_offset +
 					      mp->logical_size);
 	if (rc) {
 		jfs_err("commit_write returned %d", rc);
 	}

 	UnlockPage(mp->page);
 	clear_bit(META_dirty, &mp->flag);

 	jfs_info("__write_metapage done");
 }

 static inline void sync_metapage(struct metapage *mp)
 {
 	struct page *page = mp->page;

 	page_cache_get(page);
 	lock_page(page);

 	/* we're done with this page - no need to check for errors */
 	if (page->buffers) {
 		writeout_one_page(page);
 		waitfor_one_page(page);
 	}

 	UnlockPage(page);
 	page_cache_release(page);
 }

 void release_metapage(struct metapage * mp)
 {
 	struct jfs_log *log;

 	jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);

 	spin_lock(&meta_lock);
 	if (test_bit(META_forced, &mp->flag)) {
 		clear_bit(META_forced, &mp->flag);
 		mp->count--;
 		spin_unlock(&meta_lock);
 		return;
 	}

 	assert(mp->count);
 	if (--mp->count || atomic_read(&mp->nohomeok)) {
 		unlock_metapage(mp);
 		spin_unlock(&meta_lock);
 		return;
 	}

 	if (mp->page) {
 		set_bit(META_stale, &mp->flag);
 		spin_unlock(&meta_lock);
 		kunmap(mp->page);
 		mp->data = 0;
 		if (test_bit(META_dirty, &mp->flag))
 			__write_metapage(mp);
 		if (test_bit(META_sync, &mp->flag)) {
 			sync_metapage(mp);
 			clear_bit(META_sync, &mp->flag);
 		}

 		if (test_bit(META_discard, &mp->flag)) {
 			lock_page(mp->page);
 			block_flushpage(mp->page, 0);
 			UnlockPage(mp->page);
 		}

 		page_cache_release(mp->page);
 		mp->page = NULL;
 		INCREMENT(mpStat.pagefree);
 		spin_lock(&meta_lock);
 	}

 	if (mp->lsn) {
 		/*
 		 * Remove metapage from logsynclist.
 		 */
 		log = mp->log;
 		LOGSYNC_LOCK(log);
 		mp->log = 0;
 		mp->lsn = 0;
 		mp->clsn = 0;
 		log->count--;
 		list_del(&mp->synclist);
 		LOGSYNC_UNLOCK(log);
 	}
 	remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
 	spin_unlock(&meta_lock);

 	free_metapage(mp);
 }

 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 {
 	struct metapage **hash_ptr;
 	unsigned long lblock;
 	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
 	/* All callers are interested in block device's mapping */
 	struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
 	struct metapage *mp;
 	struct page *page;

 	/*
 	 * First, mark metapages to discard.  They will eventually be
 	 * released, but should not be written.
 	 */
 	for (lblock = addr; lblock < addr + len;
 	     lblock += 1 << l2BlocksPerPage) {
 		hash_ptr = meta_hash(mapping, lblock);
 again:
 		spin_lock(&meta_lock);
 		mp = search_hash(hash_ptr, mapping, lblock);
 		if (mp) {
 			if (test_bit(META_stale, &mp->flag)) {
 				spin_unlock(&meta_lock);
 				yield();
 				goto again;
 			}

 			set_bit(META_discard, &mp->flag);
 			spin_unlock(&meta_lock);
 		} else {
 			spin_unlock(&meta_lock);
 			page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
 			if (page) {
 				block_flushpage(page, 0);
 				UnlockPage(page);
 				page_cache_release(page);
 			}
 		}
 	}
 }

 #ifdef CONFIG_JFS_STATISTICS
 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 		    int *eof, void *data)
 {
 	int len = 0;
 	off_t begin;

 	len += sprintf(buffer,
 		       "JFS Metapage statistics\n"
 		       "=======================\n"
 		       "page allocations = %d\n"
 		       "page frees = %d\n"
 		       "lock waits = %d\n"
 		       "allocation waits = %d\n",
 		       mpStat.pagealloc,
 		       mpStat.pagefree,
 		       mpStat.lockwait,
 		       mpStat.allocwait);

 	begin = offset;
 	*start = buffer + begin;
 	len -= begin;

 	if (len > length)
 		len = length;
 	else
 		*eof = 1;

 	if (len < 0)
 		len = 0;

 	return len;
 }
 #endif
	/*
	* Copyright (C) International Business Machines Corp., 2000-2004
	* Portions Copyright (C) Christoph Hellwig, 2001-2002
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
	* the GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, write to the Free Software
	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	*/

	#include <linux/fs.h>
	#include <linux/init.h>
	#include "jfs_incore.h"
	#include "jfs_superblock.h"
	#include "jfs_filsys.h"
	#include "jfs_metapage.h"
	#include "jfs_txnmgr.h"
	#include "jfs_debug.h"

	extern struct task_struct *jfsCommitTask;
	static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
	static wait_queue_head_t meta_wait;

	#ifdef CONFIG_JFS_STATISTICS
	struct {
	uint pagealloc; /* # of page allocations */
	uint pagefree; /* # of page frees */
	uint lockwait; /* # of sleeping lock_metapage() calls */
	uint allocwait; /* # of sleeping alloc_metapage() calls */
	} mpStat;
	#endif


	#define HASH_BITS 10 /* This makes hash_table 1 4K page */
	#define HASH_SIZE (1 << HASH_BITS)
	static struct metapage **hash_table = NULL;
	static unsigned long hash_order;


	static inline int metapage_locked(struct metapage *mp)
	{
	return test_bit(META_locked, &mp->flag);
	}

	static inline int trylock_metapage(struct metapage *mp)
	{
	return test_and_set_bit(META_locked, &mp->flag);
	}

	static inline void unlock_metapage(struct metapage *mp)
	{
	clear_bit(META_locked, &mp->flag);
	wake_up(&mp->wait);
	}

	static void __lock_metapage(struct metapage *mp)
	{
	DECLARE_WAITQUEUE(wait, current);

	INCREMENT(mpStat.lockwait);

	add_wait_queue_exclusive(&mp->wait, &wait);
	do {
	set_current_state(TASK_UNINTERRUPTIBLE);
	if (metapage_locked(mp)) {
	spin_unlock(&meta_lock);
	schedule();
	spin_lock(&meta_lock);
	}
	} while (trylock_metapage(mp));
	__set_current_state(TASK_RUNNING);
	remove_wait_queue(&mp->wait, &wait);
	}

	/* needs meta_lock */
	static inline void lock_metapage(struct metapage *mp)
	{
	if (trylock_metapage(mp))
	__lock_metapage(mp);
	}

	/*
	* metapage pool is based on Linux 2.5's mempool
	*
	* Tap into reserved structures in critical paths where waiting on a
	* memory allocation could cause deadlock
	*/
	#define METAPOOL_MIN_PAGES 32
	static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES];
	static int num_reserved = 0;
	kmem_cache_t *metapage_cache;

	static void init_once(void foo, kmem_cache_t cachep, unsigned long flags)
	{
	struct metapage mp = (struct metapage )foo;

	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
	SLAB_CTOR_CONSTRUCTOR) {
	mp->lid = 0;
	mp->lsn = 0;
	mp->flag = 0;
	mp->data = NULL;
	mp->clsn = 0;
	mp->log = NULL;
	set_bit(META_free, &mp->flag);
	init_waitqueue_head(&mp->wait);
	}
	}

	static void empty_reserved(void)
	{
	while (num_reserved--)
	kmem_cache_free(metapage_cache,
	reserved_metapages[num_reserved]);
	}

	static struct metapage alloc_metapage(int dropped_lock, int no_wait)
	{
	struct metapage *new;

	*dropped_lock = 0;

	/*
	* Always try an atomic alloc first, to avoid dropping the
	* spinlock
	*/
	new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC);
	if (new)
	return new;

	if (no_wait && num_reserved)
	return reserved_metapages[--num_reserved];

	*dropped_lock = 1;
	spin_unlock(&meta_lock);
	new = kmem_cache_alloc(metapage_cache, GFP_NOFS);
	spin_lock(&meta_lock);
	return new;
	}

	static void __free_metapage(struct metapage *mp)
	{
	mp->flag = 0;
	set_bit(META_free, &mp->flag);

	if (num_reserved < METAPOOL_MIN_PAGES)
	reserved_metapages[num_reserved++] = mp;
	else
	kmem_cache_free(metapage_cache, mp);
	}

	static inline void free_metapage(struct metapage * mp)
	{
	spin_lock(&meta_lock);
	__free_metapage(mp);
	spin_unlock(&meta_lock);
	}

	int __init metapage_init(void)
	{
	struct metapage *mp;

	/*
	* Initialize wait queue
	*/
	init_waitqueue_head(&meta_wait);

	/*
	* Allocate the metapage structures
	*/
	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
	0, 0, init_once, NULL);
	if (metapage_cache == NULL)
	return -ENOMEM;

	while (num_reserved < METAPOOL_MIN_PAGES) {
	mp = kmem_cache_alloc(metapage_cache, GFP_NOFS);
	if (mp)
	reserved_metapages[num_reserved++] = mp;
	else {
	empty_reserved();
	kmem_cache_destroy(metapage_cache);
	return -ENOMEM;
	}
	}
	/*
	* Now the hash list
	*/
	for (hash_order = 0;
	((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
	hash_order++);
	hash_table =
	(struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
	assert(hash_table);
	memset(hash_table, 0, PAGE_SIZE << hash_order);

	return 0;
	}

	void metapage_exit(void)
	{
	empty_reserved();
	kmem_cache_destroy(metapage_cache);
	}

	/*
	* Basically same hash as in pagemap.h, but using our hash table
	*/
	static struct metapage *meta_hash(struct address_space mapping,
	unsigned long index)
	{
	#define i (((unsigned long)mapping)/ \
	(sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
	#define s(x) ((x) + ((x) >> HASH_BITS))
	return hash_table + (s(i + index) & (HASH_SIZE - 1));
	#undef i
	#undef s
	}

	static struct metapage search_hash(struct metapage * hash_ptr,
	struct address_space *mapping,
	unsigned long index)
	{
	struct metapage *ptr;

	for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
	if ((ptr->mapping == mapping) && (ptr->index == index))
	return ptr;
	}

	return NULL;
	}

	static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
	{
	if (*hash_ptr)
	(*hash_ptr)->hash_prev = mp;

	mp->hash_prev = NULL;
	mp->hash_next = *hash_ptr;
	*hash_ptr = mp;
	}

	static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
	{
	if (mp->hash_prev)
	mp->hash_prev->hash_next = mp->hash_next;
	else {
	assert(*hash_ptr == mp);
	*hash_ptr = mp->hash_next;
	}

	if (mp->hash_next)
	mp->hash_next->hash_prev = mp->hash_prev;
	}

	struct metapage __get_metapage(struct inode inode, unsigned long lblock,
	unsigned int size, int absolute,
	unsigned long new)
	{
	int dropped_lock;
	struct metapage **hash_ptr;
	int l2BlocksPerPage;
	int l2bsize;
	int no_wait;
	struct address_space *mapping;
	struct metapage *mp;
	unsigned long page_index;
	unsigned long page_offset;

	jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);

	if (absolute)
	mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
	else {
	/*
	* If an nfs client tries to read an inode that is larger
	* than any existing inodes, we may try to read past the
	* end of the inode map
	*/
	if ((lblock << inode->i_blkbits) >= inode->i_size)
	return NULL;
	mapping = inode->i_mapping;
	}

	hash_ptr = meta_hash(mapping, lblock);
	again:
	spin_lock(&meta_lock);
	mp = search_hash(hash_ptr, mapping, lblock);
	if (mp) {
	page_found:
	if (test_bit(META_stale, &mp->flag)) {
	spin_unlock(&meta_lock);
	yield();
	goto again;
	}
	mp->count++;
	lock_metapage(mp);
	spin_unlock(&meta_lock);
	if (test_bit(META_discard, &mp->flag)) {
	if (!new) {
	jfs_error(inode->i_sb,
	"__get_metapage: using a "
	"discarded metapage");
	release_metapage(mp);
	return NULL;
	}
	clear_bit(META_discard, &mp->flag);
	}
	jfs_info("__get_metapage: found 0x%p, in hash", mp);
	if (mp->logical_size != size) {
	jfs_error(inode->i_sb,
	"__get_metapage: mp->logical_size != size");
	release_metapage(mp);
	return NULL;
	}
	} else {
	l2bsize = inode->i_blkbits;
	l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
	page_index = lblock >> l2BlocksPerPage;
	page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
	l2bsize;
	if ((page_offset + size) > PAGE_CACHE_SIZE) {
	spin_unlock(&meta_lock);
	jfs_err("MetaData crosses page boundary!!");
	return NULL;
	}

	/*
	* Locks held on aggregate inode pages are usually
	* not held long, and they are taken in critical code
	* paths (committing dirty inodes, txCommit thread)
	*
	* Attempt to get metapage without blocking, tapping into
	* reserves if necessary.
	*/
	if (JFS_IP(inode)->fileset == AGGREGATE_I)
	no_wait = 1;
	else
	no_wait = 0;

	mp = alloc_metapage(&dropped_lock, no_wait);
	if (!mp) {
	spin_unlock(&meta_lock);
	return NULL;
	}
	if (dropped_lock) {
	/* alloc_metapage blocked, we need to search the hash
	* again.
	*/
	struct metapage *mp2;
	mp2 = search_hash(hash_ptr, mapping, lblock);
	if (mp2) {
	__free_metapage(mp);
	mp = mp2;
	goto page_found;
	}
	}
	mp->flag = 0;
	lock_metapage(mp);
	if (absolute)
	set_bit(META_absolute, &mp->flag);
	mp->xflag = COMMIT_PAGE;
	mp->count = 1;
	atomic_set(&mp->nohomeok,0);
	mp->mapping = mapping;
	mp->index = lblock;
	mp->page = 0;
	mp->logical_size = size;
	add_to_hash(mp, hash_ptr);
	spin_unlock(&meta_lock);

	if (new) {
	jfs_info("__get_metapage: Calling grab_cache_page");
	mp->page = grab_cache_page(mapping, page_index);
	if (!mp->page) {
	jfs_err("grab_cache_page failed!");
	goto freeit;
	} else {
	INCREMENT(mpStat.pagealloc);
	UnlockPage(mp->page);
	}
	} else {
	jfs_info("__get_metapage: Calling read_cache_page");
	mp->page = read_cache_page(mapping, lblock,
	(filler_t *)mapping->a_ops->readpage, NULL);
	if (IS_ERR(mp->page)) {
	jfs_err("read_cache_page failed!");
	goto freeit;
	} else
	INCREMENT(mpStat.pagealloc);
	}
	mp->data = kmap(mp->page) + page_offset;
	}

	if (new)
	memset(mp->data, 0, PSIZE);

	jfs_info("__get_metapage: returning = 0x%p", mp);
	return mp;

	freeit:
	spin_lock(&meta_lock);
	remove_from_hash(mp, hash_ptr);
	__free_metapage(mp);
	spin_unlock(&meta_lock);
	return NULL;
	}

	void hold_metapage(struct metapage * mp, int force)
	{
	spin_lock(&meta_lock);

	mp->count++;

	if (force) {
	ASSERT (!(test_bit(META_forced, &mp->flag)));
	if (trylock_metapage(mp))
	set_bit(META_forced, &mp->flag);
	} else
	lock_metapage(mp);

	spin_unlock(&meta_lock);
	}

	static void __write_metapage(struct metapage * mp)
	{
	int l2bsize = mp->mapping->host->i_blkbits;
	int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
	unsigned long page_index;
	unsigned long page_offset;
	int rc;

	jfs_info("__write_metapage: mp = 0x%p", mp);

	if (test_bit(META_discard, &mp->flag)) {
	/*
	* This metadata is no longer valid
	*/
	clear_bit(META_dirty, &mp->flag);
	return;
	}

	page_index = mp->page->index;
	page_offset =
	(mp->index - (page_index << l2BlocksPerPage)) << l2bsize;

	lock_page(mp->page);
	rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
	page_offset +
	mp->logical_size);
	if (rc) {
	jfs_err("prepare_write return %d!", rc);
	ClearPageUptodate(mp->page);
	UnlockPage(mp->page);
	kunmap(mp->page);
	clear_bit(META_dirty, &mp->flag);
	return;
	}
	rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
	page_offset +
	mp->logical_size);
	if (rc) {
	jfs_err("commit_write returned %d", rc);
	}

	UnlockPage(mp->page);
	clear_bit(META_dirty, &mp->flag);

	jfs_info("__write_metapage done");
	}

	static inline void sync_metapage(struct metapage *mp)
	{
	struct page *page = mp->page;

	page_cache_get(page);
	lock_page(page);

	/* we're done with this page - no need to check for errors */
	if (page->buffers) {
	writeout_one_page(page);
	waitfor_one_page(page);
	}

	UnlockPage(page);
	page_cache_release(page);
	}

	void release_metapage(struct metapage * mp)
	{
	struct jfs_log *log;

	jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);

	spin_lock(&meta_lock);
	if (test_bit(META_forced, &mp->flag)) {
	clear_bit(META_forced, &mp->flag);
	mp->count--;
	spin_unlock(&meta_lock);
	return;
	}

	assert(mp->count);
	if (--mp->count \|\| atomic_read(&mp->nohomeok)) {
	unlock_metapage(mp);
	spin_unlock(&meta_lock);
	return;
	}

	if (mp->page) {
	set_bit(META_stale, &mp->flag);
	spin_unlock(&meta_lock);
	kunmap(mp->page);
	mp->data = 0;
	if (test_bit(META_dirty, &mp->flag))
	__write_metapage(mp);
	if (test_bit(META_sync, &mp->flag)) {
	sync_metapage(mp);
	clear_bit(META_sync, &mp->flag);
	}

	if (test_bit(META_discard, &mp->flag)) {
	lock_page(mp->page);
	block_flushpage(mp->page, 0);
	UnlockPage(mp->page);
	}

	page_cache_release(mp->page);
	mp->page = NULL;
	INCREMENT(mpStat.pagefree);
	spin_lock(&meta_lock);
	}

	if (mp->lsn) {
	/*
	* Remove metapage from logsynclist.
	*/
	log = mp->log;
	LOGSYNC_LOCK(log);
	mp->log = 0;
	mp->lsn = 0;
	mp->clsn = 0;
	log->count--;
	list_del(&mp->synclist);
	LOGSYNC_UNLOCK(log);
	}
	remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
	spin_unlock(&meta_lock);

	free_metapage(mp);
	}

	void __invalidate_metapages(struct inode *ip, s64 addr, int len)
	{
	struct metapage **hash_ptr;
	unsigned long lblock;
	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
	/* All callers are interested in block device's mapping */
	struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
	struct metapage *mp;
	struct page *page;

	/*
	* First, mark metapages to discard. They will eventually be
	* released, but should not be written.
	*/
	for (lblock = addr; lblock < addr + len;
	lblock += 1 << l2BlocksPerPage) {
	hash_ptr = meta_hash(mapping, lblock);
	again:
	spin_lock(&meta_lock);
	mp = search_hash(hash_ptr, mapping, lblock);
	if (mp) {
	if (test_bit(META_stale, &mp->flag)) {
	spin_unlock(&meta_lock);
	yield();
	goto again;
	}

	set_bit(META_discard, &mp->flag);
	spin_unlock(&meta_lock);
	} else {
	spin_unlock(&meta_lock);
	page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
	if (page) {
	block_flushpage(page, 0);
	UnlockPage(page);
	page_cache_release(page);
	}
	}
	}
	}

	#ifdef CONFIG_JFS_STATISTICS
	int jfs_mpstat_read(char buffer, char *start, off_t offset, int length,
	int eof, void data)
	{
	int len = 0;
	off_t begin;

	len += sprintf(buffer,
	"JFS Metapage statistics\n"
	"=======================\n"
	"page allocations = %d\n"
	"page frees = %d\n"
	"lock waits = %d\n"
	"allocation waits = %d\n",
	mpStat.pagealloc,
	mpStat.pagefree,
	mpStat.lockwait,
	mpStat.allocwait);

	begin = offset;
	*start = buffer + begin;
	len -= begin;

	if (len > length)
	len = length;
	else
	*eof = 1;

	if (len < 0)
	len = 0;

	return len;
	}
	#endif