fs/buffer.c - pub/scm/linux/kernel/git/nico/archive - Git at Google

 /*
  *  linux/fs/buffer.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */

 /*
  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
  * been avoided by NEVER letting an interrupt change a buffer (except for the
  * data, of course), but instead letting the caller do it.
  */

 /*
  * NOTE! There is one discordant note here: checking floppies for
  * disk change. This is where it fits best, I think, as it should
  * invalidate changed floppy-disk-caches.
  */

 #include <stdarg.h>

 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/string.h>
 #include <linux/locks.h>
 #include <linux/errno.h>

 #include <asm/system.h>
 #include <asm/io.h>

 #ifdef CONFIG_SCSI
 #ifdef CONFIG_BLK_DEV_SR
 extern int check_cdrom_media_change(int, int);
 #endif
 #ifdef CONFIG_BLK_DEV_SD
 extern int check_scsidisk_media_change(int, int);
 extern int revalidate_scsidisk(int, int);
 #endif
 #endif
 #ifdef CONFIG_CDU31A
 extern int check_cdu31a_media_change(int, int);
 #endif
 #ifdef CONFIG_MCD
 extern int check_mcd_media_change(int, int);
 #endif

 static int grow_buffers(int pri, int size);

 static struct buffer_head * hash_table[NR_HASH];
 static struct buffer_head * free_list = NULL;
 static struct buffer_head * unused_list = NULL;
 static struct wait_queue * buffer_wait = NULL;

 int nr_buffers = 0;
 int buffermem = 0;
 int nr_buffer_heads = 0;
 static int min_free_pages = 20;	/* nr free pages needed before buffer grows */
 extern int *blksize_size[];

 /*
  * Rewrote the wait-routines to use the "new" wait-queue functionality,
  * and getting rid of the cli-sti pairs. The wait-queue routines still
  * need cli-sti, but now it's just a couple of 386 instructions or so.
  *
  * Note that the real wait_on_buffer() is an inline function that checks
  * if 'b_wait' is set before calling this, so that the queues aren't set
  * up unnecessarily.
  */
 void __wait_on_buffer(struct buffer_head * bh)
 {
 	struct wait_queue wait = { current, NULL };

 	bh->b_count++;
 	add_wait_queue(&bh->b_wait, &wait);
 repeat:
 	current->state = TASK_UNINTERRUPTIBLE;
 	if (bh->b_lock) {
 		schedule();
 		goto repeat;
 	}
 	remove_wait_queue(&bh->b_wait, &wait);
 	bh->b_count--;
 	current->state = TASK_RUNNING;
 }

 /* Call sync_buffers with wait!=0 to ensure that the call does not
    return until all buffer writes have completed.  Sync() may return
    before the writes have finished; fsync() may not. */

 static int sync_buffers(dev_t dev, int wait)
 {
 	int i, retry, pass = 0, err = 0;
 	struct buffer_head * bh;

 	/* One pass for no-wait, three for wait:
 	   0) write out all dirty, unlocked buffers;
 	   1) write out all dirty buffers, waiting if locked;
 	   2) wait for completion by waiting for all buffers to unlock.
 	 */
 repeat:
 	retry = 0;
 	bh = free_list;
 	for (i = nr_buffers*2 ; i-- > 0 ; bh = bh->b_next_free) {
 		if (dev && bh->b_dev != dev)
 			continue;
 #ifdef 0 /* Disable bad-block debugging code */
 		if (bh->b_req && !bh->b_lock &&
 		    !bh->b_dirt && !bh->b_uptodate)
 			printk ("Warning (IO error) - orphaned block %08x on %04x\n",
 				bh->b_blocknr, bh->b_dev);
 #endif
 		if (bh->b_lock)
 		{
 			/* Buffer is locked; skip it unless wait is
 			   requested AND pass > 0. */
 			if (!wait || !pass) {
 				retry = 1;
 				continue;
 			}
 			wait_on_buffer (bh);
 		}
 		/* If an unlocked buffer is not uptodate, there has been
 		   an IO error. Skip it. */
 		if (wait && bh->b_req && !bh->b_lock &&
 		    !bh->b_dirt && !bh->b_uptodate)
 		{
 			err = 1;
 			continue;
 		}
 		/* Don't write clean buffers.  Don't write ANY buffers
 		   on the third pass. */
 		if (!bh->b_dirt || pass>=2)
 			continue;
 		bh->b_count++;
 		ll_rw_block(WRITE, 1, &bh);
 		bh->b_count--;
 		retry = 1;
 	}
 	/* If we are waiting for the sync to succeed, and if any dirty
 	   blocks were written, then repeat; on the second pass, only
 	   wait for buffers being written (do not pass to write any
 	   more buffers on the second pass). */
 	if (wait && retry && ++pass<=2)
 		goto repeat;
 	return err;
 }

 void sync_dev(dev_t dev)
 {
 	sync_buffers(dev, 0);
 	sync_supers(dev);
 	sync_inodes(dev);
 	sync_buffers(dev, 0);
 }

 int fsync_dev(dev_t dev)
 {
 	sync_buffers(dev, 0);
 	sync_supers(dev);
 	sync_inodes(dev);
 	return sync_buffers(dev, 1);
 }

 asmlinkage int sys_sync(void)
 {
 	sync_dev(0);
 	return 0;
 }

 int file_fsync (struct inode *inode, struct file *filp)
 {
 	return fsync_dev(inode->i_dev);
 }

 asmlinkage int sys_fsync(unsigned int fd)
 {
 	struct file * file;
 	struct inode * inode;

 	if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
 		return -EBADF;
 	if (!file->f_op || !file->f_op->fsync)
 		return -EINVAL;
 	if (file->f_op->fsync(inode,file))
 		return -EIO;
 	return 0;
 }

 void invalidate_buffers(dev_t dev)
 {
 	int i;
 	struct buffer_head * bh;

 	bh = free_list;
 	for (i = nr_buffers*2 ; --i > 0 ; bh = bh->b_next_free) {
 		if (bh->b_dev != dev)
 			continue;
 		wait_on_buffer(bh);
 		if (bh->b_dev == dev)
 			bh->b_uptodate = bh->b_dirt = bh->b_req = 0;
 	}
 }

 /*
  * This routine checks whether a floppy has been changed, and
  * invalidates all buffer-cache-entries in that case. This
  * is a relatively slow routine, so we have to try to minimize using
  * it. Thus it is called only upon a 'mount' or 'open'. This
  * is the best way of combining speed and utility, I think.
  * People changing diskettes in the middle of an operation deserve
  * to loose :-)
  *
  * NOTE! Although currently this is only for floppies, the idea is
  * that any additional removable block-device will use this routine,
  * and that mount/open needn't know that floppies/whatever are
  * special.
  */
 void check_disk_change(dev_t dev)
 {
 	int i;
 	struct buffer_head * bh;

 	switch(MAJOR(dev)){
 	case FLOPPY_MAJOR:
 		if (!(bh = getblk(dev,0,1024)))
 			return;
 		i = floppy_change(bh);
 		brelse(bh);
 		break;

 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
          case SCSI_DISK_MAJOR:
 		i = check_scsidisk_media_change(dev, 0);
 		break;
 #endif

 #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI)
 	 case SCSI_CDROM_MAJOR:
 		i = check_cdrom_media_change(dev, 0);
 		break;
 #endif

 #if defined(CONFIG_CDU31A)
          case CDU31A_CDROM_MAJOR:
 		i = check_cdu31a_media_change(dev, 0);
 		break;
 #endif

 #if defined(CONFIG_MCD)
          case MITSUMI_CDROM_MAJOR:
 		i = check_mcd_media_change(dev, 0);
 		break;
 #endif

          default:
 		return;
 	};

 	if (!i)	return;

 	printk("VFS: Disk change detected on device %d/%d\n",
 					MAJOR(dev), MINOR(dev));
 	for (i=0 ; i<NR_SUPER ; i++)
 		if (super_blocks[i].s_dev == dev)
 			put_super(super_blocks[i].s_dev);
 	invalidate_inodes(dev);
 	invalidate_buffers(dev);

 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 /* This is trickier for a removable hardisk, because we have to invalidate
    all of the partitions that lie on the disk. */
 	if (MAJOR(dev) == SCSI_DISK_MAJOR)
 		revalidate_scsidisk(dev, 0);
 #endif
 }

 #define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
 #define hash(dev,block) hash_table[_hashfn(dev,block)]

 static inline void remove_from_hash_queue(struct buffer_head * bh)
 {
 	if (bh->b_next)
 		bh->b_next->b_prev = bh->b_prev;
 	if (bh->b_prev)
 		bh->b_prev->b_next = bh->b_next;
 	if (hash(bh->b_dev,bh->b_blocknr) == bh)
 		hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 	bh->b_next = bh->b_prev = NULL;
 }

 static inline void remove_from_free_list(struct buffer_head * bh)
 {
 	if (!(bh->b_prev_free) || !(bh->b_next_free))
 		panic("VFS: Free block list corrupted");
 	bh->b_prev_free->b_next_free = bh->b_next_free;
 	bh->b_next_free->b_prev_free = bh->b_prev_free;
 	if (free_list == bh)
 		free_list = bh->b_next_free;
 	bh->b_next_free = bh->b_prev_free = NULL;
 }

 static inline void remove_from_queues(struct buffer_head * bh)
 {
 	remove_from_hash_queue(bh);
 	remove_from_free_list(bh);
 }

 static inline void put_first_free(struct buffer_head * bh)
 {
 	if (!bh || (bh == free_list))
 		return;
 	remove_from_free_list(bh);
 /* add to front of free list */
 	bh->b_next_free = free_list;
 	bh->b_prev_free = free_list->b_prev_free;
 	free_list->b_prev_free->b_next_free = bh;
 	free_list->b_prev_free = bh;
 	free_list = bh;
 }

 static inline void put_last_free(struct buffer_head * bh)
 {
 	if (!bh)
 		return;
 	if (bh == free_list) {
 		free_list = bh->b_next_free;
 		return;
 	}
 	remove_from_free_list(bh);
 /* add to back of free list */
 	bh->b_next_free = free_list;
 	bh->b_prev_free = free_list->b_prev_free;
 	free_list->b_prev_free->b_next_free = bh;
 	free_list->b_prev_free = bh;
 }

 static inline void insert_into_queues(struct buffer_head * bh)
 {
 /* put at end of free list */
 	bh->b_next_free = free_list;
 	bh->b_prev_free = free_list->b_prev_free;
 	free_list->b_prev_free->b_next_free = bh;
 	free_list->b_prev_free = bh;
 /* put the buffer in new hash-queue if it has a device */
 	bh->b_prev = NULL;
 	bh->b_next = NULL;
 	if (!bh->b_dev)
 		return;
 	bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 	hash(bh->b_dev,bh->b_blocknr) = bh;
 	if (bh->b_next)
 		bh->b_next->b_prev = bh;
 }

 static struct buffer_head * find_buffer(dev_t dev, int block, int size)
 {
 	struct buffer_head * tmp;

 	for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 		if (tmp->b_dev==dev && tmp->b_blocknr==block)
 			if (tmp->b_size == size)
 				return tmp;
 			else {
 				printk("VFS: Wrong blocksize on device %d/%d\n",
 							MAJOR(dev), MINOR(dev));
 				return NULL;
 			}
 	return NULL;
 }

 /*
  * Why like this, I hear you say... The reason is race-conditions.
  * As we don't lock buffers (unless we are readint them, that is),
  * something might happen to it while we sleep (ie a read-error
  * will force it bad). This shouldn't really happen currently, but
  * the code is ready.
  */
 struct buffer_head * get_hash_table(dev_t dev, int block, int size)
 {
 	struct buffer_head * bh;

 	for (;;) {
 		if (!(bh=find_buffer(dev,block,size)))
 			return NULL;
 		bh->b_count++;
 		wait_on_buffer(bh);
 		if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
 			return bh;
 		bh->b_count--;
 	}
 }

 void set_blocksize(dev_t dev, int size)
 {
 	int i;
 	struct buffer_head * bh, *bhnext;

 	if (!blksize_size[MAJOR(dev)])
 		return;

 	switch(size) {
 		default: panic("Invalid blocksize passed to set_blocksize");
 		case 512: case 1024: case 2048: case 4096:;
 	}

 	if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 		blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 		return;
 	}
 	if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 		return;
 	sync_buffers(dev, 2);
 	blksize_size[MAJOR(dev)][MINOR(dev)] = size;

   /* We need to be quite careful how we do this - we are moving entries
      around on the free list, and we can get in a loop if we are not careful.*/

 	bh = free_list;
 	for (i = nr_buffers*2 ; --i > 0 ; bh = bhnext) {
 		bhnext = bh->b_next_free;
 		if (bh->b_dev != dev)
 			continue;
 		if (bh->b_size == size)
 			continue;

 		wait_on_buffer(bh);
 		if (bh->b_dev == dev && bh->b_size != size)
 			bh->b_uptodate = bh->b_dirt = 0;
 		remove_from_hash_queue(bh);
 /*    put_first_free(bh); */
 	}
 }

 /*
  * Ok, this is getblk, and it isn't very clear, again to hinder
  * race-conditions. Most of the code is seldom used, (ie repeating),
  * so it should be much more efficient than it looks.
  *
  * The algoritm is changed: hopefully better, and an elusive bug removed.
  *
  * 14.02.92: changed it to sync dirty buffers a bit: better performance
  * when the filesystem starts to get full of dirty blocks (I hope).
  */
 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 struct buffer_head * getblk(dev_t dev, int block, int size)
 {
 	struct buffer_head * bh, * tmp;
 	int buffers;
 	static int grow_size = 0;

 repeat:
 	bh = get_hash_table(dev, block, size);
 	if (bh) {
 		if (bh->b_uptodate && !bh->b_dirt)
 			put_last_free(bh);
 		return bh;
 	}
 	grow_size -= size;
 	if (nr_free_pages > min_free_pages && grow_size <= 0) {
 		if (grow_buffers(GFP_BUFFER, size))
 			grow_size = PAGE_SIZE;
 	}
 	buffers = nr_buffers;
 	bh = NULL;

 	for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) {
 		if (tmp->b_count || tmp->b_size != size)
 			continue;
 		if (mem_map[MAP_NR((unsigned long) tmp->b_data)] != 1)
 			continue;
 		if (!bh || BADNESS(tmp)<BADNESS(bh)) {
 			bh = tmp;
 			if (!BADNESS(tmp))
 				break;
 		}
 #if 0
 		if (tmp->b_dirt) {
 			tmp->b_count++;
 			ll_rw_block(WRITEA, 1, &tmp);
 			tmp->b_count--;
 		}
 #endif
 	}

 	if (!bh && nr_free_pages > 5) {
 		if (grow_buffers(GFP_BUFFER, size))
 			goto repeat;
 	}

 /* and repeat until we find something good */
 	if (!bh) {
 		if (!grow_buffers(GFP_ATOMIC, size))
 			sleep_on(&buffer_wait);
 		goto repeat;
 	}
 	wait_on_buffer(bh);
 	if (bh->b_count || bh->b_size != size)
 		goto repeat;
 	if (bh->b_dirt) {
 		sync_buffers(0,0);
 		goto repeat;
 	}
 /* NOTE!! While we slept waiting for this block, somebody else might */
 /* already have added "this" block to the cache. check it */
 	if (find_buffer(dev,block,size))
 		goto repeat;
 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 	bh->b_count=1;
 	bh->b_dirt=0;
 	bh->b_uptodate=0;
 	bh->b_req=0;
 	remove_from_queues(bh);
 	bh->b_dev=dev;
 	bh->b_blocknr=block;
 	insert_into_queues(bh);
 	return bh;
 }

 void brelse(struct buffer_head * buf)
 {
 	if (!buf)
 		return;
 	wait_on_buffer(buf);
 	if (buf->b_count) {
 		if (--buf->b_count)
 			return;
 		wake_up(&buffer_wait);
 		return;
 	}
 	printk("VFS: brelse: Trying to free free buffer\n");
 }

 /*
  * bread() reads a specified block and returns the buffer that contains
  * it. It returns NULL if the block was unreadable.
  */
 struct buffer_head * bread(dev_t dev, int block, int size)
 {
 	struct buffer_head * bh;

 	if (!(bh = getblk(dev, block, size))) {
 		printk("VFS: bread: READ error on device %d/%d\n",
 						MAJOR(dev), MINOR(dev));
 		return NULL;
 	}
 	if (bh->b_uptodate)
 		return bh;
 	ll_rw_block(READ, 1, &bh);
 	wait_on_buffer(bh);
 	if (bh->b_uptodate)
 		return bh;
 	brelse(bh);
 	return NULL;
 }

 /*
  * Ok, breada can be used as bread, but additionally to mark other
  * blocks for reading as well. End the argument list with a negative
  * number.
  */
 struct buffer_head * breada(dev_t dev,int first, ...)
 {
 	va_list args;
 	unsigned int blocksize;
 	struct buffer_head * bh, *tmp;

 	va_start(args,first);

 	blocksize = BLOCK_SIZE;
 	if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
 		blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];

 	if (!(bh = getblk(dev, first, blocksize))) {
 		printk("VFS: breada: READ error on device %d/%d\n",
 						MAJOR(dev), MINOR(dev));
 		return NULL;
 	}
 	if (!bh->b_uptodate)
 		ll_rw_block(READ, 1, &bh);
 	while ((first=va_arg(args,int))>=0) {
 		tmp = getblk(dev, first, blocksize);
 		if (tmp) {
 			if (!tmp->b_uptodate)
 				ll_rw_block(READA, 1, &tmp);
 			tmp->b_count--;
 		}
 	}
 	va_end(args);
 	wait_on_buffer(bh);
 	if (bh->b_uptodate)
 		return bh;
 	brelse(bh);
 	return (NULL);
 }

 /*
  * See fs/inode.c for the weird use of volatile..
  */
 static void put_unused_buffer_head(struct buffer_head * bh)
 {
 	struct wait_queue * wait;

 	wait = ((volatile struct buffer_head *) bh)->b_wait;
 	memset((void *) bh,0,sizeof(*bh));
 	((volatile struct buffer_head *) bh)->b_wait = wait;
 	bh->b_next_free = unused_list;
 	unused_list = bh;
 }

 static void get_more_buffer_heads(void)
 {
 	int i;
 	struct buffer_head * bh;

 	if (unused_list)
 		return;

 	if(! (bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 		return;

 	for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 		bh->b_next_free = unused_list;	/* only make link */
 		unused_list = bh++;
 	}
 }

 static struct buffer_head * get_unused_buffer_head(void)
 {
 	struct buffer_head * bh;

 	get_more_buffer_heads();
 	if (!unused_list)
 		return NULL;
 	bh = unused_list;
 	unused_list = bh->b_next_free;
 	bh->b_next_free = NULL;
 	bh->b_data = NULL;
 	bh->b_size = 0;
 	bh->b_req = 0;
 	return bh;
 }

 /*
  * Create the appropriate buffers when given a page for data area and
  * the size of each buffer.. Use the bh->b_this_page linked list to
  * follow the buffers created.  Return NULL if unable to create more
  * buffers.
  */
 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
 {
 	struct buffer_head *bh, *head;
 	unsigned long offset;

 	head = NULL;
 	offset = PAGE_SIZE;
 	while ((offset -= size) < PAGE_SIZE) {
 		bh = get_unused_buffer_head();
 		if (!bh)
 			goto no_grow;
 		bh->b_this_page = head;
 		head = bh;
 		bh->b_data = (char *) (page+offset);
 		bh->b_size = size;
 	}
 	return head;
 /*
  * In case anything failed, we just free everything we got.
  */
 no_grow:
 	bh = head;
 	while (bh) {
 		head = bh;
 		bh = bh->b_this_page;
 		put_unused_buffer_head(head);
 	}
 	return NULL;
 }

 static void read_buffers(struct buffer_head * bh[], int nrbuf)
 {
 	int i;
 	int bhnum = 0;
 	struct buffer_head * bhr[8];

 	for (i = 0 ; i < nrbuf ; i++) {
 		if (bh[i] && !bh[i]->b_uptodate)
 			bhr[bhnum++] = bh[i];
 	}
 	if (bhnum)
 		ll_rw_block(READ, bhnum, bhr);
 	for (i = 0 ; i < nrbuf ; i++) {
 		if (bh[i]) {
 			wait_on_buffer(bh[i]);
 		}
 	}
 }

 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
 	dev_t dev, int *b, int size)
 {
 	struct buffer_head * bh[8];
 	unsigned long page;
 	unsigned long offset;
 	int block;
 	int nrbuf;

 	page = (unsigned long) first->b_data;
 	if (page & ~PAGE_MASK) {
 		brelse(first);
 		return 0;
 	}
 	mem_map[MAP_NR(page)]++;
 	bh[0] = first;
 	nrbuf = 1;
 	for (offset = size ; offset < PAGE_SIZE ; offset += size) {
 		block = *++b;
 		if (!block)
 			goto no_go;
 		first = get_hash_table(dev, block, size);
 		if (!first)
 			goto no_go;
 		bh[nrbuf++] = first;
 		if (page+offset != (unsigned long) first->b_data)
 			goto no_go;
 	}
 	read_buffers(bh,nrbuf);		/* make sure they are actually read correctly */
 	while (nrbuf-- > 0)
 		brelse(bh[nrbuf]);
 	free_page(address);
 	++current->min_flt;
 	return page;
 no_go:
 	while (nrbuf-- > 0)
 		brelse(bh[nrbuf]);
 	free_page(page);
 	return 0;
 }

 static unsigned long try_to_load_aligned(unsigned long address,
 	dev_t dev, int b[], int size)
 {
 	struct buffer_head * bh, * tmp, * arr[8];
 	unsigned long offset;
 	int * p;
 	int block;

 	bh = create_buffers(address, size);
 	if (!bh)
 		return 0;
 	p = b;
 	for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
 		block = *(p++);
 		if (!block)
 			goto not_aligned;
 		tmp = get_hash_table(dev, block, size);
 		if (tmp) {
 			brelse(tmp);
 			goto not_aligned;
 		}
 	}
 	tmp = bh;
 	p = b;
 	block = 0;
 	while (1) {
 		arr[block++] = bh;
 		bh->b_count = 1;
 		bh->b_dirt = 0;
 		bh->b_uptodate = 0;
 		bh->b_dev = dev;
 		bh->b_blocknr = *(p++);
 		nr_buffers++;
 		insert_into_queues(bh);
 		if (bh->b_this_page)
 			bh = bh->b_this_page;
 		else
 			break;
 	}
 	buffermem += PAGE_SIZE;
 	bh->b_this_page = tmp;
 	mem_map[MAP_NR(address)]++;
 	read_buffers(arr,block);
 	while (block-- > 0)
 		brelse(arr[block]);
 	++current->maj_flt;
 	return address;
 not_aligned:
 	while ((tmp = bh) != NULL) {
 		bh = bh->b_this_page;
 		put_unused_buffer_head(tmp);
 	}
 	return 0;
 }

 /*
  * Try-to-share-buffers tries to minimize memory use by trying to keep
  * both code pages and the buffer area in the same page. This is done by
  * (a) checking if the buffers are already aligned correctly in memory and
  * (b) if none of the buffer heads are in memory at all, trying to load
  * them into memory the way we want them.
  *
  * This doesn't guarantee that the memory is shared, but should under most
  * circumstances work very well indeed (ie >90% sharing of code pages on
  * demand-loadable executables).
  */
 static inline unsigned long try_to_share_buffers(unsigned long address,
 	dev_t dev, int *b, int size)
 {
 	struct buffer_head * bh;
 	int block;

 	block = b[0];
 	if (!block)
 		return 0;
 	bh = get_hash_table(dev, block, size);
 	if (bh)
 		return check_aligned(bh, address, dev, b, size);
 	return try_to_load_aligned(address, dev, b, size);
 }

 #define COPYBLK(size,from,to) \
 __asm__ __volatile__("rep ; movsl": \
 	:"c" (((unsigned long) size) >> 2),"S" (from),"D" (to) \
 	:"cx","di","si")

 /*
  * bread_page reads four buffers into memory at the desired address. It's
  * a function of its own, as there is some speed to be got by reading them
  * all at the same time, not waiting for one to be read, and then another
  * etc. This also allows us to optimize memory usage by sharing code pages
  * and filesystem buffers..
  */
 unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot)
 {
 	struct buffer_head * bh[8];
 	unsigned long where;
 	int i, j;

 	if (!(prot & PAGE_RW)) {
 		where = try_to_share_buffers(address,dev,b,size);
 		if (where)
 			return where;
 	}
 	++current->maj_flt;
  	for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
 		bh[i] = NULL;
 		if (b[i])
 			bh[i] = getblk(dev, b[i], size);
 	}
 	read_buffers(bh,i);
 	where = address;
  	for (i=0, j=0; j<PAGE_SIZE ; i++, j += size,address += size) {
 		if (bh[i]) {
 			if (bh[i]->b_uptodate)
 				COPYBLK(size, (unsigned long) bh[i]->b_data,address);
 			brelse(bh[i]);
 		}
 	}
 	return where;
 }

 /*
  * Try to increase the number of buffers available: the size argument
  * is used to determine what kind of buffers we want.
  */
 static int grow_buffers(int pri, int size)
 {
 	unsigned long page;
 	struct buffer_head *bh, *tmp;

 	if ((size & 511) || (size > PAGE_SIZE)) {
 		printk("VFS: grow_buffers: size = %d\n",size);
 		return 0;
 	}
 	if(!(page = __get_free_page(pri)))
 		return 0;
 	bh = create_buffers(page, size);
 	if (!bh) {
 		free_page(page);
 		return 0;
 	}
 	tmp = bh;
 	while (1) {
 		if (free_list) {
 			tmp->b_next_free = free_list;
 			tmp->b_prev_free = free_list->b_prev_free;
 			free_list->b_prev_free->b_next_free = tmp;
 			free_list->b_prev_free = tmp;
 		} else {
 			tmp->b_prev_free = tmp;
 			tmp->b_next_free = tmp;
 		}
 		free_list = tmp;
 		++nr_buffers;
 		if (tmp->b_this_page)
 			tmp = tmp->b_this_page;
 		else
 			break;
 	}
 	tmp->b_this_page = bh;
 	buffermem += PAGE_SIZE;
 	return 1;
 }

 /*
  * try_to_free() checks if all the buffers on this particular page
  * are unused, and free's the page if so.
  */
 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
 {
 	unsigned long page;
 	struct buffer_head * tmp, * p;

 	*bhp = bh;
 	page = (unsigned long) bh->b_data;
 	page &= PAGE_MASK;
 	tmp = bh;
 	do {
 		if (!tmp)
 			return 0;
 		if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
 			return 0;
 		tmp = tmp->b_this_page;
 	} while (tmp != bh);
 	tmp = bh;
 	do {
 		p = tmp;
 		tmp = tmp->b_this_page;
 		nr_buffers--;
 		if (p == *bhp)
 			*bhp = p->b_prev_free;
 		remove_from_queues(p);
 		put_unused_buffer_head(p);
 	} while (tmp != bh);
 	buffermem -= PAGE_SIZE;
 	free_page(page);
 	return !mem_map[MAP_NR(page)];
 }

 /*
  * Try to free up some pages by shrinking the buffer-cache
  *
  * Priority tells the routine how hard to try to shrink the
  * buffers: 3 means "don't bother too much", while a value
  * of 0 means "we'd better get some free pages now".
  */
 int shrink_buffers(unsigned int priority)
 {
 	struct buffer_head *bh;
 	int i;

 	if (priority < 2)
 		sync_buffers(0,0);
 	bh = free_list;
 	i = nr_buffers >> priority;
 	for ( ; i-- > 0 ; bh = bh->b_next_free) {
 		if (bh->b_count || !bh->b_this_page)
 			continue;
 		if (bh->b_lock)
 			if (priority)
 				continue;
 			else
 				wait_on_buffer(bh);
 		if (bh->b_dirt) {
 			bh->b_count++;
 			ll_rw_block(WRITEA, 1, &bh);
 			bh->b_count--;
 			continue;
 		}
 		if (try_to_free(bh, &bh))
 			return 1;
 	}
 	return 0;
 }

 /*
  * This initializes the initial buffer free list.  nr_buffers is set
  * to one less the actual number of buffers, as a sop to backwards
  * compatibility --- the old code did this (I think unintentionally,
  * but I'm not sure), and programs in the ps package expect it.
  * 					- TYT 8/30/92
  */
 void buffer_init(void)
 {
 	int i;

 	if (high_memory >= 4*1024*1024)
 		min_free_pages = 200;
 	else
 		min_free_pages = 20;
 	for (i = 0 ; i < NR_HASH ; i++)
 		hash_table[i] = NULL;
 	free_list = 0;
 	grow_buffers(GFP_KERNEL, BLOCK_SIZE);
 	if (!free_list)
 		panic("VFS: Unable to initialize buffer free list!");
 	return;
 }