| /* |
| * linux/fs/buffer.c |
| * |
| * Copyright (C) 1991, 1992 Linus Torvalds |
| */ |
| |
| /* |
| * 'buffer.c' implements the buffer-cache functions. Race-conditions have |
| * been avoided by NEVER letting an interrupt change a buffer (except for the |
| * data, of course), but instead letting the caller do it. |
| */ |
| |
| /* |
| * NOTE! There is one discordant note here: checking floppies for |
| * disk change. This is where it fits best, I think, as it should |
| * invalidate changed floppy-disk-caches. |
| */ |
| |
| #include <stdarg.h> |
| |
| #include <linux/config.h> |
| #include <linux/errno.h> |
| #include <linux/sched.h> |
| #include <linux/kernel.h> |
| #include <linux/major.h> |
| #include <linux/string.h> |
| #include <linux/locks.h> |
| #include <linux/errno.h> |
| |
| #include <asm/system.h> |
| #include <asm/io.h> |
| |
| #ifdef CONFIG_SCSI |
| #ifdef CONFIG_BLK_DEV_SR |
| extern int check_cdrom_media_change(int, int); |
| #endif |
| #ifdef CONFIG_BLK_DEV_SD |
| extern int check_scsidisk_media_change(int, int); |
| extern int revalidate_scsidisk(int, int); |
| #endif |
| #endif |
| #ifdef CONFIG_CDU31A |
| extern int check_cdu31a_media_change(int, int); |
| #endif |
| #ifdef CONFIG_MCD |
| extern int check_mcd_media_change(int, int); |
| #endif |
| |
| static int grow_buffers(int pri, int size); |
| |
| static struct buffer_head * hash_table[NR_HASH]; |
| static struct buffer_head * free_list = NULL; |
| static struct buffer_head * unused_list = NULL; |
| static struct wait_queue * buffer_wait = NULL; |
| |
| int nr_buffers = 0; |
| int buffermem = 0; |
| int nr_buffer_heads = 0; |
| static int min_free_pages = 20; /* nr free pages needed before buffer grows */ |
| extern int *blksize_size[]; |
| |
| /* |
| * Rewrote the wait-routines to use the "new" wait-queue functionality, |
| * and getting rid of the cli-sti pairs. The wait-queue routines still |
| * need cli-sti, but now it's just a couple of 386 instructions or so. |
| * |
| * Note that the real wait_on_buffer() is an inline function that checks |
| * if 'b_wait' is set before calling this, so that the queues aren't set |
| * up unnecessarily. |
| */ |
| void __wait_on_buffer(struct buffer_head * bh) |
| { |
| struct wait_queue wait = { current, NULL }; |
| |
| bh->b_count++; |
| add_wait_queue(&bh->b_wait, &wait); |
| repeat: |
| current->state = TASK_UNINTERRUPTIBLE; |
| if (bh->b_lock) { |
| schedule(); |
| goto repeat; |
| } |
| remove_wait_queue(&bh->b_wait, &wait); |
| bh->b_count--; |
| current->state = TASK_RUNNING; |
| } |
| |
| /* Call sync_buffers with wait!=0 to ensure that the call does not |
| return until all buffer writes have completed. Sync() may return |
| before the writes have finished; fsync() may not. */ |
| |
| static int sync_buffers(dev_t dev, int wait) |
| { |
| int i, retry, pass = 0, err = 0; |
| struct buffer_head * bh; |
| |
| /* One pass for no-wait, three for wait: |
| 0) write out all dirty, unlocked buffers; |
| 1) write out all dirty buffers, waiting if locked; |
| 2) wait for completion by waiting for all buffers to unlock. |
| */ |
| repeat: |
| retry = 0; |
| bh = free_list; |
| for (i = nr_buffers*2 ; i-- > 0 ; bh = bh->b_next_free) { |
| if (dev && bh->b_dev != dev) |
| continue; |
| #ifdef 0 /* Disable bad-block debugging code */ |
| if (bh->b_req && !bh->b_lock && |
| !bh->b_dirt && !bh->b_uptodate) |
| printk ("Warning (IO error) - orphaned block %08x on %04x\n", |
| bh->b_blocknr, bh->b_dev); |
| #endif |
| if (bh->b_lock) |
| { |
| /* Buffer is locked; skip it unless wait is |
| requested AND pass > 0. */ |
| if (!wait || !pass) { |
| retry = 1; |
| continue; |
| } |
| wait_on_buffer (bh); |
| } |
| /* If an unlocked buffer is not uptodate, there has been |
| an IO error. Skip it. */ |
| if (wait && bh->b_req && !bh->b_lock && |
| !bh->b_dirt && !bh->b_uptodate) |
| { |
| err = 1; |
| continue; |
| } |
| /* Don't write clean buffers. Don't write ANY buffers |
| on the third pass. */ |
| if (!bh->b_dirt || pass>=2) |
| continue; |
| bh->b_count++; |
| ll_rw_block(WRITE, 1, &bh); |
| bh->b_count--; |
| retry = 1; |
| } |
| /* If we are waiting for the sync to succeed, and if any dirty |
| blocks were written, then repeat; on the second pass, only |
| wait for buffers being written (do not pass to write any |
| more buffers on the second pass). */ |
| if (wait && retry && ++pass<=2) |
| goto repeat; |
| return err; |
| } |
| |
| void sync_dev(dev_t dev) |
| { |
| sync_buffers(dev, 0); |
| sync_supers(dev); |
| sync_inodes(dev); |
| sync_buffers(dev, 0); |
| } |
| |
| int fsync_dev(dev_t dev) |
| { |
| sync_buffers(dev, 0); |
| sync_supers(dev); |
| sync_inodes(dev); |
| return sync_buffers(dev, 1); |
| } |
| |
| asmlinkage int sys_sync(void) |
| { |
| sync_dev(0); |
| return 0; |
| } |
| |
| int file_fsync (struct inode *inode, struct file *filp) |
| { |
| return fsync_dev(inode->i_dev); |
| } |
| |
| asmlinkage int sys_fsync(unsigned int fd) |
| { |
| struct file * file; |
| struct inode * inode; |
| |
| if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode)) |
| return -EBADF; |
| if (!file->f_op || !file->f_op->fsync) |
| return -EINVAL; |
| if (file->f_op->fsync(inode,file)) |
| return -EIO; |
| return 0; |
| } |
| |
| void invalidate_buffers(dev_t dev) |
| { |
| int i; |
| struct buffer_head * bh; |
| |
| bh = free_list; |
| for (i = nr_buffers*2 ; --i > 0 ; bh = bh->b_next_free) { |
| if (bh->b_dev != dev) |
| continue; |
| wait_on_buffer(bh); |
| if (bh->b_dev == dev) |
| bh->b_uptodate = bh->b_dirt = bh->b_req = 0; |
| } |
| } |
| |
| /* |
| * This routine checks whether a floppy has been changed, and |
| * invalidates all buffer-cache-entries in that case. This |
| * is a relatively slow routine, so we have to try to minimize using |
| * it. Thus it is called only upon a 'mount' or 'open'. This |
| * is the best way of combining speed and utility, I think. |
| * People changing diskettes in the middle of an operation deserve |
| * to loose :-) |
| * |
| * NOTE! Although currently this is only for floppies, the idea is |
| * that any additional removable block-device will use this routine, |
| * and that mount/open needn't know that floppies/whatever are |
| * special. |
| */ |
| void check_disk_change(dev_t dev) |
| { |
| int i; |
| struct buffer_head * bh; |
| |
| switch(MAJOR(dev)){ |
| case FLOPPY_MAJOR: |
| if (!(bh = getblk(dev,0,1024))) |
| return; |
| i = floppy_change(bh); |
| brelse(bh); |
| break; |
| |
| #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI) |
| case SCSI_DISK_MAJOR: |
| i = check_scsidisk_media_change(dev, 0); |
| break; |
| #endif |
| |
| #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI) |
| case SCSI_CDROM_MAJOR: |
| i = check_cdrom_media_change(dev, 0); |
| break; |
| #endif |
| |
| #if defined(CONFIG_CDU31A) |
| case CDU31A_CDROM_MAJOR: |
| i = check_cdu31a_media_change(dev, 0); |
| break; |
| #endif |
| |
| #if defined(CONFIG_MCD) |
| case MITSUMI_CDROM_MAJOR: |
| i = check_mcd_media_change(dev, 0); |
| break; |
| #endif |
| |
| default: |
| return; |
| }; |
| |
| if (!i) return; |
| |
| printk("VFS: Disk change detected on device %d/%d\n", |
| MAJOR(dev), MINOR(dev)); |
| for (i=0 ; i<NR_SUPER ; i++) |
| if (super_blocks[i].s_dev == dev) |
| put_super(super_blocks[i].s_dev); |
| invalidate_inodes(dev); |
| invalidate_buffers(dev); |
| |
| #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI) |
| /* This is trickier for a removable hardisk, because we have to invalidate |
| all of the partitions that lie on the disk. */ |
| if (MAJOR(dev) == SCSI_DISK_MAJOR) |
| revalidate_scsidisk(dev, 0); |
| #endif |
| } |
| |
| #define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH) |
| #define hash(dev,block) hash_table[_hashfn(dev,block)] |
| |
| static inline void remove_from_hash_queue(struct buffer_head * bh) |
| { |
| if (bh->b_next) |
| bh->b_next->b_prev = bh->b_prev; |
| if (bh->b_prev) |
| bh->b_prev->b_next = bh->b_next; |
| if (hash(bh->b_dev,bh->b_blocknr) == bh) |
| hash(bh->b_dev,bh->b_blocknr) = bh->b_next; |
| bh->b_next = bh->b_prev = NULL; |
| } |
| |
| static inline void remove_from_free_list(struct buffer_head * bh) |
| { |
| if (!(bh->b_prev_free) || !(bh->b_next_free)) |
| panic("VFS: Free block list corrupted"); |
| bh->b_prev_free->b_next_free = bh->b_next_free; |
| bh->b_next_free->b_prev_free = bh->b_prev_free; |
| if (free_list == bh) |
| free_list = bh->b_next_free; |
| bh->b_next_free = bh->b_prev_free = NULL; |
| } |
| |
| static inline void remove_from_queues(struct buffer_head * bh) |
| { |
| remove_from_hash_queue(bh); |
| remove_from_free_list(bh); |
| } |
| |
| static inline void put_first_free(struct buffer_head * bh) |
| { |
| if (!bh || (bh == free_list)) |
| return; |
| remove_from_free_list(bh); |
| /* add to front of free list */ |
| bh->b_next_free = free_list; |
| bh->b_prev_free = free_list->b_prev_free; |
| free_list->b_prev_free->b_next_free = bh; |
| free_list->b_prev_free = bh; |
| free_list = bh; |
| } |
| |
| static inline void put_last_free(struct buffer_head * bh) |
| { |
| if (!bh) |
| return; |
| if (bh == free_list) { |
| free_list = bh->b_next_free; |
| return; |
| } |
| remove_from_free_list(bh); |
| /* add to back of free list */ |
| bh->b_next_free = free_list; |
| bh->b_prev_free = free_list->b_prev_free; |
| free_list->b_prev_free->b_next_free = bh; |
| free_list->b_prev_free = bh; |
| } |
| |
| static inline void insert_into_queues(struct buffer_head * bh) |
| { |
| /* put at end of free list */ |
| bh->b_next_free = free_list; |
| bh->b_prev_free = free_list->b_prev_free; |
| free_list->b_prev_free->b_next_free = bh; |
| free_list->b_prev_free = bh; |
| /* put the buffer in new hash-queue if it has a device */ |
| bh->b_prev = NULL; |
| bh->b_next = NULL; |
| if (!bh->b_dev) |
| return; |
| bh->b_next = hash(bh->b_dev,bh->b_blocknr); |
| hash(bh->b_dev,bh->b_blocknr) = bh; |
| if (bh->b_next) |
| bh->b_next->b_prev = bh; |
| } |
| |
| static struct buffer_head * find_buffer(dev_t dev, int block, int size) |
| { |
| struct buffer_head * tmp; |
| |
| for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next) |
| if (tmp->b_dev==dev && tmp->b_blocknr==block) |
| if (tmp->b_size == size) |
| return tmp; |
| else { |
| printk("VFS: Wrong blocksize on device %d/%d\n", |
| MAJOR(dev), MINOR(dev)); |
| return NULL; |
| } |
| return NULL; |
| } |
| |
| /* |
| * Why like this, I hear you say... The reason is race-conditions. |
| * As we don't lock buffers (unless we are readint them, that is), |
| * something might happen to it while we sleep (ie a read-error |
| * will force it bad). This shouldn't really happen currently, but |
| * the code is ready. |
| */ |
| struct buffer_head * get_hash_table(dev_t dev, int block, int size) |
| { |
| struct buffer_head * bh; |
| |
| for (;;) { |
| if (!(bh=find_buffer(dev,block,size))) |
| return NULL; |
| bh->b_count++; |
| wait_on_buffer(bh); |
| if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size) |
| return bh; |
| bh->b_count--; |
| } |
| } |
| |
| void set_blocksize(dev_t dev, int size) |
| { |
| int i; |
| struct buffer_head * bh, *bhnext; |
| |
| if (!blksize_size[MAJOR(dev)]) |
| return; |
| |
| switch(size) { |
| default: panic("Invalid blocksize passed to set_blocksize"); |
| case 512: case 1024: case 2048: case 4096:; |
| } |
| |
| if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) { |
| blksize_size[MAJOR(dev)][MINOR(dev)] = size; |
| return; |
| } |
| if (blksize_size[MAJOR(dev)][MINOR(dev)] == size) |
| return; |
| sync_buffers(dev, 2); |
| blksize_size[MAJOR(dev)][MINOR(dev)] = size; |
| |
| /* We need to be quite careful how we do this - we are moving entries |
| around on the free list, and we can get in a loop if we are not careful.*/ |
| |
| bh = free_list; |
| for (i = nr_buffers*2 ; --i > 0 ; bh = bhnext) { |
| bhnext = bh->b_next_free; |
| if (bh->b_dev != dev) |
| continue; |
| if (bh->b_size == size) |
| continue; |
| |
| wait_on_buffer(bh); |
| if (bh->b_dev == dev && bh->b_size != size) |
| bh->b_uptodate = bh->b_dirt = 0; |
| remove_from_hash_queue(bh); |
| /* put_first_free(bh); */ |
| } |
| } |
| |
| /* |
| * Ok, this is getblk, and it isn't very clear, again to hinder |
| * race-conditions. Most of the code is seldom used, (ie repeating), |
| * so it should be much more efficient than it looks. |
| * |
| * The algoritm is changed: hopefully better, and an elusive bug removed. |
| * |
| * 14.02.92: changed it to sync dirty buffers a bit: better performance |
| * when the filesystem starts to get full of dirty blocks (I hope). |
| */ |
| #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock) |
| struct buffer_head * getblk(dev_t dev, int block, int size) |
| { |
| struct buffer_head * bh, * tmp; |
| int buffers; |
| static int grow_size = 0; |
| |
| repeat: |
| bh = get_hash_table(dev, block, size); |
| if (bh) { |
| if (bh->b_uptodate && !bh->b_dirt) |
| put_last_free(bh); |
| return bh; |
| } |
| grow_size -= size; |
| if (nr_free_pages > min_free_pages && grow_size <= 0) { |
| if (grow_buffers(GFP_BUFFER, size)) |
| grow_size = PAGE_SIZE; |
| } |
| buffers = nr_buffers; |
| bh = NULL; |
| |
| for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) { |
| if (tmp->b_count || tmp->b_size != size) |
| continue; |
| if (mem_map[MAP_NR((unsigned long) tmp->b_data)] != 1) |
| continue; |
| if (!bh || BADNESS(tmp)<BADNESS(bh)) { |
| bh = tmp; |
| if (!BADNESS(tmp)) |
| break; |
| } |
| #if 0 |
| if (tmp->b_dirt) { |
| tmp->b_count++; |
| ll_rw_block(WRITEA, 1, &tmp); |
| tmp->b_count--; |
| } |
| #endif |
| } |
| |
| if (!bh && nr_free_pages > 5) { |
| if (grow_buffers(GFP_BUFFER, size)) |
| goto repeat; |
| } |
| |
| /* and repeat until we find something good */ |
| if (!bh) { |
| if (!grow_buffers(GFP_ATOMIC, size)) |
| sleep_on(&buffer_wait); |
| goto repeat; |
| } |
| wait_on_buffer(bh); |
| if (bh->b_count || bh->b_size != size) |
| goto repeat; |
| if (bh->b_dirt) { |
| sync_buffers(0,0); |
| goto repeat; |
| } |
| /* NOTE!! While we slept waiting for this block, somebody else might */ |
| /* already have added "this" block to the cache. check it */ |
| if (find_buffer(dev,block,size)) |
| goto repeat; |
| /* OK, FINALLY we know that this buffer is the only one of its kind, */ |
| /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */ |
| bh->b_count=1; |
| bh->b_dirt=0; |
| bh->b_uptodate=0; |
| bh->b_req=0; |
| remove_from_queues(bh); |
| bh->b_dev=dev; |
| bh->b_blocknr=block; |
| insert_into_queues(bh); |
| return bh; |
| } |
| |
| void brelse(struct buffer_head * buf) |
| { |
| if (!buf) |
| return; |
| wait_on_buffer(buf); |
| if (buf->b_count) { |
| if (--buf->b_count) |
| return; |
| wake_up(&buffer_wait); |
| return; |
| } |
| printk("VFS: brelse: Trying to free free buffer\n"); |
| } |
| |
| /* |
| * bread() reads a specified block and returns the buffer that contains |
| * it. It returns NULL if the block was unreadable. |
| */ |
| struct buffer_head * bread(dev_t dev, int block, int size) |
| { |
| struct buffer_head * bh; |
| |
| if (!(bh = getblk(dev, block, size))) { |
| printk("VFS: bread: READ error on device %d/%d\n", |
| MAJOR(dev), MINOR(dev)); |
| return NULL; |
| } |
| if (bh->b_uptodate) |
| return bh; |
| ll_rw_block(READ, 1, &bh); |
| wait_on_buffer(bh); |
| if (bh->b_uptodate) |
| return bh; |
| brelse(bh); |
| return NULL; |
| } |
| |
| /* |
| * Ok, breada can be used as bread, but additionally to mark other |
| * blocks for reading as well. End the argument list with a negative |
| * number. |
| */ |
| struct buffer_head * breada(dev_t dev,int first, ...) |
| { |
| va_list args; |
| unsigned int blocksize; |
| struct buffer_head * bh, *tmp; |
| |
| va_start(args,first); |
| |
| blocksize = BLOCK_SIZE; |
| if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)]) |
| blocksize = blksize_size[MAJOR(dev)][MINOR(dev)]; |
| |
| if (!(bh = getblk(dev, first, blocksize))) { |
| printk("VFS: breada: READ error on device %d/%d\n", |
| MAJOR(dev), MINOR(dev)); |
| return NULL; |
| } |
| if (!bh->b_uptodate) |
| ll_rw_block(READ, 1, &bh); |
| while ((first=va_arg(args,int))>=0) { |
| tmp = getblk(dev, first, blocksize); |
| if (tmp) { |
| if (!tmp->b_uptodate) |
| ll_rw_block(READA, 1, &tmp); |
| tmp->b_count--; |
| } |
| } |
| va_end(args); |
| wait_on_buffer(bh); |
| if (bh->b_uptodate) |
| return bh; |
| brelse(bh); |
| return (NULL); |
| } |
| |
| /* |
| * See fs/inode.c for the weird use of volatile.. |
| */ |
| static void put_unused_buffer_head(struct buffer_head * bh) |
| { |
| struct wait_queue * wait; |
| |
| wait = ((volatile struct buffer_head *) bh)->b_wait; |
| memset((void *) bh,0,sizeof(*bh)); |
| ((volatile struct buffer_head *) bh)->b_wait = wait; |
| bh->b_next_free = unused_list; |
| unused_list = bh; |
| } |
| |
| static void get_more_buffer_heads(void) |
| { |
| int i; |
| struct buffer_head * bh; |
| |
| if (unused_list) |
| return; |
| |
| if(! (bh = (struct buffer_head*) get_free_page(GFP_BUFFER))) |
| return; |
| |
| for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) { |
| bh->b_next_free = unused_list; /* only make link */ |
| unused_list = bh++; |
| } |
| } |
| |
| static struct buffer_head * get_unused_buffer_head(void) |
| { |
| struct buffer_head * bh; |
| |
| get_more_buffer_heads(); |
| if (!unused_list) |
| return NULL; |
| bh = unused_list; |
| unused_list = bh->b_next_free; |
| bh->b_next_free = NULL; |
| bh->b_data = NULL; |
| bh->b_size = 0; |
| bh->b_req = 0; |
| return bh; |
| } |
| |
| /* |
| * Create the appropriate buffers when given a page for data area and |
| * the size of each buffer.. Use the bh->b_this_page linked list to |
| * follow the buffers created. Return NULL if unable to create more |
| * buffers. |
| */ |
| static struct buffer_head * create_buffers(unsigned long page, unsigned long size) |
| { |
| struct buffer_head *bh, *head; |
| unsigned long offset; |
| |
| head = NULL; |
| offset = PAGE_SIZE; |
| while ((offset -= size) < PAGE_SIZE) { |
| bh = get_unused_buffer_head(); |
| if (!bh) |
| goto no_grow; |
| bh->b_this_page = head; |
| head = bh; |
| bh->b_data = (char *) (page+offset); |
| bh->b_size = size; |
| } |
| return head; |
| /* |
| * In case anything failed, we just free everything we got. |
| */ |
| no_grow: |
| bh = head; |
| while (bh) { |
| head = bh; |
| bh = bh->b_this_page; |
| put_unused_buffer_head(head); |
| } |
| return NULL; |
| } |
| |
| static void read_buffers(struct buffer_head * bh[], int nrbuf) |
| { |
| int i; |
| int bhnum = 0; |
| struct buffer_head * bhr[8]; |
| |
| for (i = 0 ; i < nrbuf ; i++) { |
| if (bh[i] && !bh[i]->b_uptodate) |
| bhr[bhnum++] = bh[i]; |
| } |
| if (bhnum) |
| ll_rw_block(READ, bhnum, bhr); |
| for (i = 0 ; i < nrbuf ; i++) { |
| if (bh[i]) { |
| wait_on_buffer(bh[i]); |
| } |
| } |
| } |
| |
| static unsigned long check_aligned(struct buffer_head * first, unsigned long address, |
| dev_t dev, int *b, int size) |
| { |
| struct buffer_head * bh[8]; |
| unsigned long page; |
| unsigned long offset; |
| int block; |
| int nrbuf; |
| |
| page = (unsigned long) first->b_data; |
| if (page & ~PAGE_MASK) { |
| brelse(first); |
| return 0; |
| } |
| mem_map[MAP_NR(page)]++; |
| bh[0] = first; |
| nrbuf = 1; |
| for (offset = size ; offset < PAGE_SIZE ; offset += size) { |
| block = *++b; |
| if (!block) |
| goto no_go; |
| first = get_hash_table(dev, block, size); |
| if (!first) |
| goto no_go; |
| bh[nrbuf++] = first; |
| if (page+offset != (unsigned long) first->b_data) |
| goto no_go; |
| } |
| read_buffers(bh,nrbuf); /* make sure they are actually read correctly */ |
| while (nrbuf-- > 0) |
| brelse(bh[nrbuf]); |
| free_page(address); |
| ++current->min_flt; |
| return page; |
| no_go: |
| while (nrbuf-- > 0) |
| brelse(bh[nrbuf]); |
| free_page(page); |
| return 0; |
| } |
| |
| static unsigned long try_to_load_aligned(unsigned long address, |
| dev_t dev, int b[], int size) |
| { |
| struct buffer_head * bh, * tmp, * arr[8]; |
| unsigned long offset; |
| int * p; |
| int block; |
| |
| bh = create_buffers(address, size); |
| if (!bh) |
| return 0; |
| p = b; |
| for (offset = 0 ; offset < PAGE_SIZE ; offset += size) { |
| block = *(p++); |
| if (!block) |
| goto not_aligned; |
| tmp = get_hash_table(dev, block, size); |
| if (tmp) { |
| brelse(tmp); |
| goto not_aligned; |
| } |
| } |
| tmp = bh; |
| p = b; |
| block = 0; |
| while (1) { |
| arr[block++] = bh; |
| bh->b_count = 1; |
| bh->b_dirt = 0; |
| bh->b_uptodate = 0; |
| bh->b_dev = dev; |
| bh->b_blocknr = *(p++); |
| nr_buffers++; |
| insert_into_queues(bh); |
| if (bh->b_this_page) |
| bh = bh->b_this_page; |
| else |
| break; |
| } |
| buffermem += PAGE_SIZE; |
| bh->b_this_page = tmp; |
| mem_map[MAP_NR(address)]++; |
| read_buffers(arr,block); |
| while (block-- > 0) |
| brelse(arr[block]); |
| ++current->maj_flt; |
| return address; |
| not_aligned: |
| while ((tmp = bh) != NULL) { |
| bh = bh->b_this_page; |
| put_unused_buffer_head(tmp); |
| } |
| return 0; |
| } |
| |
| /* |
| * Try-to-share-buffers tries to minimize memory use by trying to keep |
| * both code pages and the buffer area in the same page. This is done by |
| * (a) checking if the buffers are already aligned correctly in memory and |
| * (b) if none of the buffer heads are in memory at all, trying to load |
| * them into memory the way we want them. |
| * |
| * This doesn't guarantee that the memory is shared, but should under most |
| * circumstances work very well indeed (ie >90% sharing of code pages on |
| * demand-loadable executables). |
| */ |
| static inline unsigned long try_to_share_buffers(unsigned long address, |
| dev_t dev, int *b, int size) |
| { |
| struct buffer_head * bh; |
| int block; |
| |
| block = b[0]; |
| if (!block) |
| return 0; |
| bh = get_hash_table(dev, block, size); |
| if (bh) |
| return check_aligned(bh, address, dev, b, size); |
| return try_to_load_aligned(address, dev, b, size); |
| } |
| |
| #define COPYBLK(size,from,to) \ |
| __asm__ __volatile__("rep ; movsl": \ |
| :"c" (((unsigned long) size) >> 2),"S" (from),"D" (to) \ |
| :"cx","di","si") |
| |
| /* |
| * bread_page reads four buffers into memory at the desired address. It's |
| * a function of its own, as there is some speed to be got by reading them |
| * all at the same time, not waiting for one to be read, and then another |
| * etc. This also allows us to optimize memory usage by sharing code pages |
| * and filesystem buffers.. |
| */ |
| unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot) |
| { |
| struct buffer_head * bh[8]; |
| unsigned long where; |
| int i, j; |
| |
| if (!(prot & PAGE_RW)) { |
| where = try_to_share_buffers(address,dev,b,size); |
| if (where) |
| return where; |
| } |
| ++current->maj_flt; |
| for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) { |
| bh[i] = NULL; |
| if (b[i]) |
| bh[i] = getblk(dev, b[i], size); |
| } |
| read_buffers(bh,i); |
| where = address; |
| for (i=0, j=0; j<PAGE_SIZE ; i++, j += size,address += size) { |
| if (bh[i]) { |
| if (bh[i]->b_uptodate) |
| COPYBLK(size, (unsigned long) bh[i]->b_data,address); |
| brelse(bh[i]); |
| } |
| } |
| return where; |
| } |
| |
| /* |
| * Try to increase the number of buffers available: the size argument |
| * is used to determine what kind of buffers we want. |
| */ |
| static int grow_buffers(int pri, int size) |
| { |
| unsigned long page; |
| struct buffer_head *bh, *tmp; |
| |
| if ((size & 511) || (size > PAGE_SIZE)) { |
| printk("VFS: grow_buffers: size = %d\n",size); |
| return 0; |
| } |
| if(!(page = __get_free_page(pri))) |
| return 0; |
| bh = create_buffers(page, size); |
| if (!bh) { |
| free_page(page); |
| return 0; |
| } |
| tmp = bh; |
| while (1) { |
| if (free_list) { |
| tmp->b_next_free = free_list; |
| tmp->b_prev_free = free_list->b_prev_free; |
| free_list->b_prev_free->b_next_free = tmp; |
| free_list->b_prev_free = tmp; |
| } else { |
| tmp->b_prev_free = tmp; |
| tmp->b_next_free = tmp; |
| } |
| free_list = tmp; |
| ++nr_buffers; |
| if (tmp->b_this_page) |
| tmp = tmp->b_this_page; |
| else |
| break; |
| } |
| tmp->b_this_page = bh; |
| buffermem += PAGE_SIZE; |
| return 1; |
| } |
| |
| /* |
| * try_to_free() checks if all the buffers on this particular page |
| * are unused, and free's the page if so. |
| */ |
| static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp) |
| { |
| unsigned long page; |
| struct buffer_head * tmp, * p; |
| |
| *bhp = bh; |
| page = (unsigned long) bh->b_data; |
| page &= PAGE_MASK; |
| tmp = bh; |
| do { |
| if (!tmp) |
| return 0; |
| if (tmp->b_count || tmp->b_dirt || tmp->b_lock) |
| return 0; |
| tmp = tmp->b_this_page; |
| } while (tmp != bh); |
| tmp = bh; |
| do { |
| p = tmp; |
| tmp = tmp->b_this_page; |
| nr_buffers--; |
| if (p == *bhp) |
| *bhp = p->b_prev_free; |
| remove_from_queues(p); |
| put_unused_buffer_head(p); |
| } while (tmp != bh); |
| buffermem -= PAGE_SIZE; |
| free_page(page); |
| return !mem_map[MAP_NR(page)]; |
| } |
| |
| /* |
| * Try to free up some pages by shrinking the buffer-cache |
| * |
| * Priority tells the routine how hard to try to shrink the |
| * buffers: 3 means "don't bother too much", while a value |
| * of 0 means "we'd better get some free pages now". |
| */ |
| int shrink_buffers(unsigned int priority) |
| { |
| struct buffer_head *bh; |
| int i; |
| |
| if (priority < 2) |
| sync_buffers(0,0); |
| bh = free_list; |
| i = nr_buffers >> priority; |
| for ( ; i-- > 0 ; bh = bh->b_next_free) { |
| if (bh->b_count || !bh->b_this_page) |
| continue; |
| if (bh->b_lock) |
| if (priority) |
| continue; |
| else |
| wait_on_buffer(bh); |
| if (bh->b_dirt) { |
| bh->b_count++; |
| ll_rw_block(WRITEA, 1, &bh); |
| bh->b_count--; |
| continue; |
| } |
| if (try_to_free(bh, &bh)) |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * This initializes the initial buffer free list. nr_buffers is set |
| * to one less the actual number of buffers, as a sop to backwards |
| * compatibility --- the old code did this (I think unintentionally, |
| * but I'm not sure), and programs in the ps package expect it. |
| * - TYT 8/30/92 |
| */ |
| void buffer_init(void) |
| { |
| int i; |
| |
| if (high_memory >= 4*1024*1024) |
| min_free_pages = 200; |
| else |
| min_free_pages = 20; |
| for (i = 0 ; i < NR_HASH ; i++) |
| hash_table[i] = NULL; |
| free_list = 0; |
| grow_buffers(GFP_KERNEL, BLOCK_SIZE); |
| if (!free_list) |
| panic("VFS: Unable to initialize buffer free list!"); |
| return; |
| } |