| /* |
| * Map logical file extents to physical disk |
| * |
| * Original copyright (c) 2008 Daniel Phillips <phillips@phunq.net> |
| * Licensed under the GPL version 2 |
| * |
| * By contributing changes to this file you grant the original copyright holder |
| * the right to distribute those changes under any license. |
| */ |
| |
| /* |
| * Locking order: Take care about memory allocation. (It may call our fs.) |
| * |
| * down_write(itree: btree->lock) (alloc_inum, save_inode, purge_inode) |
| * down_read(itree: btree->lock) (open_inode) |
| * |
| * down_write(otree: btree->lock) (tux3_unify_orphan_add, |
| * tux3_unify_orphan_del, |
| * load_otree_orphan) |
| * |
| * down_write(inode: btree->lock) (btree_chop, map_region for write) |
| * down_read(inode: btree->lock) (map_region for read) |
| * |
| * inode->i_mutex |
| * mapping->private_lock (front uses to protect dirty buffer list) |
| * tuxnode->hole_extents_lock (for inode->hole_extents, |
| * i_ddc->dirty_holes is protected by ->i_mutex) |
| * |
| * inode->i_lock |
| * tuxnode->lock (to protect tuxnode data) |
| * tuxnode->dirty_inodes_lock (for i_ddc->dirty_inodes, |
| * Note: timestamp can be updated |
| * outside inode->i_mutex) |
| * |
| * sb->forked_buffers (for sb->forked_buffers) |
| * |
| * This lock may be first lock except vfs locks (lock_super, i_mutex). |
| * sb->delta_lock (change_begin, change_end) [only for TUX3_FLUSHER_SYNC] |
| * |
| * memory allocation: (blockread, blockget, kmalloc, etc.) |
| * FIXME: fill here, what functions/locks are used via memory reclaim path |
| * |
| * So, to prevent reentering into our fs recursively by memory reclaim |
| * from memory allocation, lower layer wouldn't use __GFP_FS. |
| */ |
| |
| #include "tux3.h" |
| #include "dleaf.h" |
| |
| #ifndef trace |
| #define trace trace_on |
| #endif |
| |
| enum map_mode { |
| MAP_READ = 0, /* map_region for read */ |
| MAP_WRITE = 1, /* map_region for overwrite */ |
| MAP_REDIRECT = 2, /* map_region for redirected write |
| * (copy-on-write) */ |
| MAX_MAP_MODE, |
| }; |
| |
| #include "filemap_hole.c" |
| |
| /* userland only */ |
| void show_segs(struct block_segment seg[], unsigned segs) |
| { |
| __tux3_dbg("%i segs: ", segs); |
| for (int i = 0; i < segs; i++) |
| __tux3_dbg("%Lx/%i ", seg[i].block, seg[i].count); |
| __tux3_dbg("\n"); |
| } |
| |
| static int map_bfree(struct inode *inode, block_t block, unsigned count) |
| { |
| struct sb *sb = tux_sb(inode->i_sb); |
| |
| switch (tux_inode(inode)->inum) { |
| case TUX_BITMAP_INO: |
| case TUX_COUNTMAP_INO: |
| log_bfree_on_unify(sb, block, count); |
| defer_bfree(sb, &sb->deunify, block, count); |
| break; |
| default: |
| log_bfree(sb, block, count); |
| defer_bfree(sb, &sb->defree, block, count); |
| break; |
| } |
| |
| return 0; |
| } |
| |
| static void seg_free(struct btree *btree, block_t block, unsigned count) |
| { |
| map_bfree(btree_inode(btree), block, count); |
| } |
| |
| /* |
| * FIXME: Use balloc_find() and balloc_modify(). Use multiple segment |
| * allocation |
| */ |
| static int seg_find(struct btree *btree, struct dleaf_req *rq, |
| int space, unsigned seg_len, unsigned *alloc_len) |
| { |
| struct sb *sb = btree->sb; |
| struct block_segment *seg = rq->seg + rq->seg_idx; |
| int maxsegs = min(space, rq->seg_max - rq->seg_idx); |
| unsigned len = seg_len; |
| /* If overwrite mode, set SEG_NEW to allocated seg */ |
| const int seg_state = rq->overwrite ? BLOCK_SEG_NEW : 0; |
| int err, i, segs; |
| |
| assert(rq->seg_idx == rq->seg_cnt); |
| |
| err = balloc_find(sb, seg, maxsegs, &segs, &len); |
| if (err) { |
| assert(err != -ENOSPC); /* frontend reservation bug */ |
| return err; |
| } |
| for (i = 0; i < segs; i++) |
| seg[i].state = seg_state; |
| |
| rq->seg_cnt = rq->seg_idx + segs; |
| *alloc_len = seg_len - len; |
| |
| return 0; |
| } |
| |
| /* |
| * Callback to allocate blocks to ->seg. dleaf is doing to write segs, |
| * now we have to assign physical address to segs. |
| */ |
| static int seg_alloc(struct btree *btree, struct dleaf_req *rq, int new_cnt) |
| { |
| struct sb *sb = btree->sb; |
| struct block_segment *seg = rq->seg + rq->seg_idx; |
| struct block_segment *limit = rq->seg + rq->seg_cnt; |
| int err; |
| |
| if (new_cnt) { |
| err = balloc_use(sb, seg, new_cnt); |
| if (err) |
| return err; /* FIXME: error handling */ |
| |
| while (new_cnt) { |
| log_balloc(sb, seg->block, seg->count); |
| new_cnt--; |
| seg++; |
| } |
| } |
| rq->seg_cnt -= limit - seg; |
| |
| /* FIXME: tell unused seg[] to balloc for reusing seg[] later */ |
| /* balloc_cache(sb, seg, limit - seg); */ |
| |
| return 0; |
| } |
| |
| /* map_region() by using dleaf2 */ |
| static int map_region2(struct inode *inode, block_t start, unsigned count, |
| struct block_segment seg[], unsigned seg_max, |
| enum map_mode mode) |
| { |
| struct btree *btree = &tux_inode(inode)->btree; |
| struct cursor *cursor = NULL; |
| int err, segs = 0; |
| |
| assert(seg_max > 0); |
| |
| /* |
| * bitmap enters here recursively. |
| * |
| * tux3_flush_inode_internal() (flush bitmap) |
| * flush_list() |
| * map_region() (for flush) |
| * balloc() |
| * read bitmap |
| * map_region() (for read) |
| * |
| * But bitmap is used (read/write) only from backend. |
| * So, no need to lock. |
| */ |
| if (tux_inode(inode)->inum != TUX_BITMAP_INO) { |
| if (mode == MAP_READ) |
| down_read(&btree->lock); |
| else |
| down_write(&btree->lock); |
| } |
| |
| if (!has_root(btree) && mode != MAP_READ) { |
| /* |
| * Allocate empty btree if this btree doesn't have it yet. |
| * FIXME: this should be merged to insert_leaf() or something? |
| */ |
| err = alloc_empty_btree(btree); |
| if (err) { |
| segs = err; |
| goto out_unlock; |
| } |
| } |
| if (has_root(btree)) { |
| cursor = alloc_cursor(btree, 1); /* allows for depth increase */ |
| if (!cursor) { |
| segs = -ENOMEM; |
| goto out_unlock; |
| } |
| |
| err = btree_probe(cursor, start); |
| if (err) { |
| segs = err; |
| goto out_unlock; |
| } |
| } |
| |
| if (mode == MAP_READ) { |
| if (has_root(btree)) { |
| struct dleaf_req rq = { |
| .key = { |
| .start = start, |
| .len = count, |
| }, |
| .seg_max = seg_max, |
| .seg = seg, |
| }; |
| |
| /* Read extents from data btree */ |
| err = btree_read(cursor, &rq.key); |
| if (err) { |
| segs = err; |
| goto out_unlock; |
| } |
| segs = rq.seg_cnt; |
| /* |
| * Read might be partial. (due to seg_max, or FIXME: |
| * lack of read for multiple leaves) |
| */ |
| } else { |
| /* btree doesn't have root yet */ |
| segs = 1; |
| seg[0].block = 0; |
| seg[0].count = count; |
| seg[0].state = BLOCK_SEG_HOLE; |
| } |
| assert(segs); |
| } else { |
| /* Write extents from data btree */ |
| struct dleaf_req rq = { |
| .key = { |
| .start = start, |
| .len = count, |
| }, |
| .seg_max = seg_max, |
| .seg = seg, |
| .overwrite = mode != MAP_REDIRECT, |
| .seg_find = seg_find, |
| .seg_alloc = seg_alloc, |
| .seg_free = seg_free, |
| }; |
| err = btree_write(cursor, &rq.key); |
| if (err) |
| segs = err; |
| else |
| segs = rq.seg_cnt; |
| } |
| |
| if (cursor) |
| release_cursor(cursor); |
| out_unlock: |
| if (tux_inode(inode)->inum != TUX_BITMAP_INO) { |
| if (mode == MAP_READ) |
| up_read(&btree->lock); |
| else |
| up_write(&btree->lock); |
| } |
| if (cursor) |
| free_cursor(cursor); |
| |
| return segs; |
| } |
| |
| /* |
| * Map logical extent to physical extent |
| * |
| * return value: |
| * < 0 - error |
| * 0 < - number of physical extents which were mapped |
| */ |
| static int map_region(struct inode *inode, block_t start, unsigned count, |
| struct block_segment seg[], unsigned seg_max, |
| enum map_mode mode) |
| { |
| int segs; |
| |
| /* |
| * NOTE: hole extents are not protected by i_mutex on MAP_READ |
| * path. So, we shouldn't assume it is stable. |
| */ |
| |
| if (mode == MAP_READ) { |
| /* If whole region was hole, don't need to call map_region */ |
| if (tux3_is_hole(inode, start, count)) { |
| assert(seg_max >= 1); |
| seg[0].state = BLOCK_SEG_HOLE; |
| seg[0].block = 0; |
| seg[0].count = count; |
| return 1; |
| } |
| } |
| |
| segs = map_region2(inode, start, count, seg, seg_max, mode); |
| |
| if (mode == MAP_READ) { |
| /* Update seg[] with hole information */ |
| segs = tux3_map_hole(inode, start, count, seg, segs, seg_max); |
| } |
| |
| return segs; |
| } |
| |
| static int filemap_extent_io(enum map_mode mode, int rw, struct bufvec *bufvec); |
| int tux3_filemap_overwrite_io(int rw, struct bufvec *bufvec) |
| { |
| enum map_mode mode = (rw & WRITE) ? MAP_WRITE : MAP_READ; |
| return filemap_extent_io(mode, rw, bufvec); |
| } |
| |
| int tux3_filemap_redirect_io(int rw, struct bufvec *bufvec) |
| { |
| enum map_mode mode = (rw & WRITE) ? MAP_REDIRECT : MAP_READ; |
| return filemap_extent_io(mode, rw, bufvec); |
| } |
| |
| #ifdef __KERNEL__ |
| #include <linux/mpage.h> |
| #include <linux/swap.h> /* for mark_page_accessed() */ |
| #include <linux/aio.h> /* for kiocb */ |
| |
| static int filemap_extent_io(enum map_mode mode, int rw, struct bufvec *bufvec) |
| { |
| struct inode *inode = bufvec_inode(bufvec); |
| block_t block, index = bufvec_contig_index(bufvec); |
| unsigned count = bufvec_contig_count(bufvec); |
| int err; |
| struct block_segment seg[10]; |
| |
| /* FIXME: For now, this is only for write */ |
| assert(mode != MAP_READ); |
| |
| int segs = map_region(inode, index, count, seg, ARRAY_SIZE(seg), mode); |
| if (segs < 0) |
| return segs; |
| assert(segs); |
| |
| for (int i = 0; i < segs; i++) { |
| block = seg[i].block; |
| count = seg[i].count; |
| |
| trace("extent 0x%Lx/%x => %Lx", index, count, block); |
| |
| err = blockio_vec(rw, bufvec, block, count); |
| if (err) |
| break; |
| |
| index += count; |
| } |
| |
| return err; |
| } |
| |
| static void seg_to_buffer(struct sb *sb, struct buffer_head *buffer, |
| struct block_segment *seg, int delalloc) |
| { |
| switch (seg->state) { |
| case BLOCK_SEG_HOLE: |
| if (delalloc && !buffer_delay(buffer)) { |
| map_bh(buffer, vfs_sb(sb), 0); |
| set_buffer_new(buffer); |
| set_buffer_delay(buffer); |
| buffer->b_size = seg->count << sb->blockbits; |
| } |
| break; |
| case BLOCK_SEG_NEW: |
| assert(!delalloc); |
| assert(seg->block); |
| if (buffer_delay(buffer)) { |
| /* for now, block_write_full_page() clear delay */ |
| // clear_buffer_delay(buffer); |
| buffer->b_blocknr = seg->block; |
| /* |
| * FIXME: do we need to unmap_underlying_metadata() |
| * for sb->volmap? (at least, check buffer state?) |
| * And if needed, is it enough? |
| */ |
| break; |
| } |
| set_buffer_new(buffer); |
| /* FALLTHRU */ |
| default: |
| map_bh(buffer, vfs_sb(sb), seg->block); |
| buffer->b_size = seg->count << sb->blockbits; |
| break; |
| } |
| } |
| |
| /* create modes: 0 - read, 1 - write, 2 - redirect, 3 - delalloc */ |
| static int __tux3_get_block(struct inode *inode, sector_t iblock, |
| struct buffer_head *bh_result, int create) |
| { |
| struct sb *sb = tux_sb(inode->i_sb); |
| size_t max_blocks = bh_result->b_size >> sb->blockbits; |
| enum map_mode mode; |
| struct block_segment seg; |
| int segs, delalloc; |
| |
| trace("==> inum %Lu, iblock %Lu, b_size %zu, create %d", |
| tux_inode(inode)->inum, (u64)iblock, bh_result->b_size, create); |
| |
| if (create == 3) { |
| delalloc = 1; |
| mode = MAP_READ; |
| } else { |
| delalloc = 0; |
| mode = create; |
| } |
| assert(mode < MAX_MAP_MODE); |
| |
| segs = map_region(inode, iblock, max_blocks, &seg, 1, mode); |
| if (segs < 0) { |
| tux3_err(sb, "map_region failed: %d", segs); |
| return -EIO; |
| } |
| assert(segs == 1); |
| assert(seg.count <= max_blocks); |
| #if 1 |
| /* |
| * We doesn't use get_block() on write path in atomic-commit, |
| * so SEG_NEW never happen. (FIXME: Current direct I/O |
| * implementation is using this path.) |
| */ |
| assert(seg.state != BLOCK_SEG_NEW /*|| (create && !delalloc) */); |
| #endif |
| |
| seg_to_buffer(sb, bh_result, &seg, delalloc); |
| |
| trace("<== inum %Lu, mapped %d, block %Lu, size %zu", |
| tux_inode(inode)->inum, buffer_mapped(bh_result), |
| (u64)bh_result->b_blocknr, bh_result->b_size); |
| |
| return 0; |
| } |
| |
| /* Prepare buffer state for ->write_begin() to use as delalloc */ |
| static int tux3_da_get_block(struct inode *inode, sector_t iblock, |
| struct buffer_head *bh_result, int create) |
| { |
| /* FIXME: We should reserve the space */ |
| |
| /* buffer should not be mapped */ |
| assert(!buffer_mapped(bh_result)); |
| /* If page is uptodate, buffer should be uptodate too */ |
| assert(!PageUptodate(bh_result->b_page) || buffer_uptodate(bh_result)); |
| |
| /* |
| * If buffer is uptodate, we don't need physical address to |
| * read block. So, we don't need to find current physical |
| * address, just setup as SEG_HOLE for delalloc. |
| */ |
| if (buffer_uptodate(bh_result)) { |
| struct sb *sb = tux_sb(inode->i_sb); |
| static struct block_segment seg = { |
| .state = BLOCK_SEG_HOLE, |
| .block = 0, |
| .count = 1, |
| }; |
| assert(bh_result->b_size == sb->blocksize); |
| |
| seg_to_buffer(sb, bh_result, &seg, 1); |
| |
| trace("inum %Lu, mapped %d, block %Lu, size %zu", |
| tux_inode(inode)->inum, buffer_mapped(bh_result), |
| (u64)bh_result->b_blocknr, bh_result->b_size); |
| |
| return 0; |
| } |
| |
| return __tux3_get_block(inode, iblock, bh_result, 3); |
| } |
| |
| int tux3_get_block(struct inode *inode, sector_t iblock, |
| struct buffer_head *bh_result, int create) |
| { |
| return __tux3_get_block(inode, iblock, bh_result, create); |
| } |
| |
| struct buffer_head *__get_buffer(struct page *page, int offset) |
| { |
| struct buffer_head *buffer = page_buffers(page); |
| while (offset--) |
| buffer = buffer->b_this_page; |
| return buffer; |
| } |
| |
| static struct buffer_head *get_buffer(struct page *page, int offset) |
| { |
| struct buffer_head *buffer = __get_buffer(page, offset); |
| get_bh(buffer); |
| return buffer; |
| } |
| |
| static struct buffer_head *__find_get_buffer(struct address_space *mapping, |
| pgoff_t index, int offset, |
| int need_uptodate) |
| { |
| struct buffer_head *bh = NULL; |
| struct page *page; |
| |
| page = find_get_page(mapping, index); |
| if (page) { |
| if (!need_uptodate || PageUptodate(page)) { |
| spin_lock(&mapping->private_lock); |
| if (page_has_buffers(page)) { |
| bh = get_buffer(page, offset); |
| assert(!need_uptodate || buffer_uptodate(bh)); |
| } |
| spin_unlock(&mapping->private_lock); |
| } |
| page_cache_release(page); |
| } |
| return bh; |
| } |
| |
| static struct buffer_head *find_get_buffer(struct address_space *mapping, |
| pgoff_t index, int offset) |
| { |
| return __find_get_buffer(mapping, index, offset, 1); |
| } |
| |
| struct buffer_head *peekblk(struct address_space *mapping, block_t iblock) |
| { |
| struct inode *inode = mapping->host; |
| pgoff_t index; |
| int offset; |
| |
| index = iblock >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| offset = iblock & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1); |
| |
| return __find_get_buffer(mapping, index, offset, 0); |
| } |
| |
| struct buffer_head *blockread(struct address_space *mapping, block_t iblock) |
| { |
| struct inode *inode = mapping->host; |
| gfp_t gfp_mask = mapping_gfp_mask(mapping) | __GFP_COLD; /* FIXME(?) */ |
| pgoff_t index; |
| struct page *page; |
| struct buffer_head *bh; |
| int err, offset; |
| |
| index = iblock >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| offset = iblock & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1); |
| |
| bh = find_get_buffer(mapping, index, offset); |
| if (bh) |
| goto out; |
| |
| err = -ENOMEM; |
| /* FIXME: don't need to find again. Just try to allocate and insert */ |
| page = find_or_create_page(mapping, index, gfp_mask); |
| if (!page) |
| goto error; |
| |
| if (!page_has_buffers(page)) |
| create_empty_buffers(page, tux_sb(inode->i_sb)->blocksize, 0); |
| bh = get_buffer(page, offset); |
| |
| if (PageUptodate(page)) |
| unlock_page(page); |
| else { |
| err = mapping->a_ops->readpage(NULL, page); |
| if (err) |
| goto error_readpage; |
| wait_on_page_locked(page); |
| if (!PageUptodate(page)) { |
| err = -EIO; |
| goto error_readpage; |
| } |
| } |
| page_cache_release(page); |
| assert(buffer_uptodate(bh)); |
| |
| out: |
| touch_buffer(bh); |
| |
| return bh; |
| |
| error_readpage: |
| put_bh(bh); |
| page_cache_release(page); |
| error: |
| return NULL; |
| } |
| |
| struct buffer_head *blockget(struct address_space *mapping, block_t iblock) |
| { |
| struct inode *inode = mapping->host; |
| pgoff_t index; |
| struct page *page; |
| struct buffer_head *bh; |
| void *fsdata; |
| int err, offset; |
| unsigned aop_flags = AOP_FLAG_UNINTERRUPTIBLE; |
| |
| index = iblock >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| offset = iblock & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1); |
| |
| /* Prevent reentering into our fs recursively by memory allocation. */ |
| if (!(mapping_gfp_mask(mapping) & __GFP_FS)) |
| aop_flags |= AOP_FLAG_NOFS; |
| |
| err = mapping->a_ops->write_begin(NULL, mapping, |
| iblock << inode->i_blkbits, |
| 1 << inode->i_blkbits, |
| aop_flags, &page, &fsdata); |
| if (err) |
| return NULL; |
| |
| assert(page_has_buffers(page)); |
| |
| bh = get_buffer(page, offset); |
| /* Clear new, so the caller must initialize buffer. */ |
| clear_buffer_new(bh); |
| /* |
| * FIXME: now all read is using ->readpage(), this means it |
| * reads whole page with lock_page(), i.e. read non-target |
| * block. So, we have to hold to modify data to prevent race |
| * with ->readpage(). But we are not holding lock_page(). |
| * |
| * cpu0 cpu1 |
| * bufferA = blockget() |
| * modify data |
| * blockread(bufferC) |
| * readpage() |
| * read bufferA <= lost modify |
| * set_buffer_uptodate() |
| * read bufferC |
| * set_buffer_uptodate() |
| * set_buffer_uptodate() |
| * |
| * So, this set uptodate before unlock_page. But, we should |
| * use submit_bh() or similar to read block, instead. |
| * |
| * FIXME: another issue of blockread/blockget(). If those |
| * functions was used for volmap, we might read blocks nearby |
| * the target block. But nearby blocks can be allocated for |
| * data pages, furthermore nearby blocks can be in-flight I/O. |
| * |
| * So, nearby blocks on volmap can be non-volmap blocks, and |
| * it would just increase amount of I/O size, and seeks. |
| * |
| * Like above said, we should use submit_bh() or similar. |
| */ |
| set_buffer_uptodate(bh); |
| |
| unlock_page(page); |
| page_cache_release(page); |
| |
| touch_buffer(bh); |
| |
| return bh; |
| } |
| |
| static int tux3_readpage(struct file *file, struct page *page) |
| { |
| int err = mpage_readpage(page, tux3_get_block); |
| assert(!PageForked(page)); /* FIXME: handle forked page */ |
| return err; |
| } |
| |
| static int tux3_readpages(struct file *file, struct address_space *mapping, |
| struct list_head *pages, unsigned nr_pages) |
| { |
| return mpage_readpages(mapping, pages, nr_pages, tux3_get_block); |
| } |
| |
| #include "filemap_blocklib.c" |
| |
| static void tux3_write_failed(struct address_space *mapping, loff_t to) |
| { |
| struct inode *inode = mapping->host; |
| |
| if (to > inode->i_size) { |
| /* |
| * write_{begin,end}() is protected by change_{begin,end}, |
| * so there is no new blocks here on this page. |
| * No need to adjust the dtree. |
| * |
| * FIXME: right? |
| */ |
| truncate_pagecache(inode, inode->i_size); |
| } |
| } |
| |
| /* Use delalloc and check buffer fork. */ |
| static int __tux3_file_write_begin(struct file *file, |
| struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned flags, |
| struct page **pagep, void **fsdata, |
| int check_fork) |
| { |
| int ret; |
| |
| ret = tux3_write_begin(mapping, pos, len, flags, pagep, |
| tux3_da_get_block, check_fork); |
| if (ret < 0) |
| tux3_write_failed(mapping, pos + len); |
| return ret; |
| } |
| |
| static int __tux3_file_write_end(struct file *file, |
| struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned copied, |
| struct page *page, void *fsdata) |
| { |
| int ret; |
| |
| ret = tux3_write_end(file, mapping, pos, len, copied, page, fsdata); |
| if (ret < len) |
| tux3_write_failed(mapping, pos + len); |
| return ret; |
| } |
| |
| /* Separate big write transaction to page chunk */ |
| static int tux3_file_write_begin(struct file *file, |
| struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned flags, |
| struct page **pagep, void **fsdata) |
| { |
| /* Separate big write transaction to small chunk. */ |
| assert(S_ISREG(mapping->host->i_mode)); |
| change_begin_if_needed(tux_sb(mapping->host->i_sb)); |
| |
| return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep, |
| fsdata, 1); |
| } |
| |
| static int tux3_file_write_end(struct file *file, struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned copied, |
| struct page *page, void *fsdata) |
| { |
| int ret; |
| |
| ret = __tux3_file_write_end(file, mapping, pos, len, copied, page, |
| fsdata); |
| |
| /* Separate big write transaction to small chunk. */ |
| assert(S_ISREG(mapping->host->i_mode)); |
| change_end_if_needed(tux_sb(mapping->host->i_sb)); |
| |
| return ret; |
| } |
| |
| #if 0 /* disabled writeback for now */ |
| static int tux3_writepage(struct page *page, struct writeback_control *wbc) |
| { |
| struct sb *sb = tux_sb(page->mapping->host->i_sb); |
| change_begin(sb); |
| int err = block_write_full_page(page, tux3_get_block, wbc); |
| change_end(sb); |
| return err; |
| } |
| #endif |
| #if 0 |
| /* mpage_writepages() uses dummy bh, so we can't check buffer_delay. */ |
| static int tux3_writepages(struct address_space *mapping, |
| struct writeback_control *wbc) |
| { |
| return mpage_writepages(mapping, wbc, tux3_get_block); |
| } |
| #endif |
| |
| static int tux3_disable_writepage(struct page *page, |
| struct writeback_control *wbc) |
| { |
| /* |
| * FIXME: disable writeback for now. We would have to handle |
| * writeback for sync (e.g. by cache pressure). |
| * FIXME: we should use AOP_WRITEPAGE_ACTIVATE if for_reclaim? |
| * Or just set .writepage = NULL to page keep dirty and active? |
| */ |
| trace("writepage disabled for now (%d)", wbc->sync_mode); |
| redirty_page_for_writepage(wbc, page); |
| #if 0 |
| if (wbc->for_reclaim) |
| return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */ |
| #endif |
| unlock_page(page); |
| return 0; |
| } |
| |
| static int tux3_disable_writepages(struct address_space *mapping, |
| struct writeback_control *wbc) |
| { |
| /* |
| * FIXME: disable writeback for now. We would have to handle |
| * writeback for sync (e.g. by cache pressure) |
| */ |
| trace("writepages disabled for now (%d)", wbc->sync_mode); |
| return 0; |
| } |
| |
| #ifdef TUX3_DIRECT_IO |
| /* |
| * Direct I/O is unsupport for now. Since this is for |
| * non-atomic-commit mode, so this allocates blocks from frontend. |
| */ |
| static ssize_t tux3_direct_IO(int rw, struct kiocb *iocb, |
| const struct iovec *iov, |
| loff_t offset, unsigned long nr_segs) |
| { |
| struct file *file = iocb->ki_filp; |
| struct address_space *mapping = file->f_mapping; |
| struct inode *inode = mapping->host; |
| ssize_t ret; |
| |
| ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, |
| tux3_get_block); |
| if (ret < 0 && (rw & WRITE)) |
| tux3_write_failed(mapping, offset + iov_length(iov, nr_segs)); |
| return ret; |
| } |
| #endif |
| |
| static sector_t tux3_bmap(struct address_space *mapping, sector_t iblock) |
| { |
| sector_t blocknr; |
| |
| mutex_lock(&mapping->host->i_mutex); |
| blocknr = generic_block_bmap(mapping, iblock, tux3_get_block); |
| mutex_unlock(&mapping->host->i_mutex); |
| |
| return blocknr; |
| } |
| |
| const struct address_space_operations tux_file_aops = { |
| .readpage = tux3_readpage, |
| .readpages = tux3_readpages, |
| // .writepage = tux3_writepage, |
| // .writepages = tux3_writepages, |
| .writepage = tux3_disable_writepage, |
| .writepages = tux3_disable_writepages, |
| .write_begin = tux3_file_write_begin, |
| .write_end = tux3_file_write_end, |
| .bmap = tux3_bmap, |
| .invalidatepage = tux3_invalidatepage, |
| // .releasepage = ext4_releasepage, |
| #ifdef TUX3_DIRECT_IO |
| .direct_IO = tux3_direct_IO, |
| #endif |
| // .migratepage = buffer_migrate_page, /* FIXME */ |
| // .is_partially_uptodate = block_is_partially_uptodate, |
| // .is_dirty_writeback = buffer_check_dirty_writeback, |
| }; |
| |
| static int tux3_symlink_write_begin(struct file *file, |
| struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned flags, |
| struct page **pagep, void **fsdata) |
| { |
| return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep, |
| fsdata, 1); |
| } |
| |
| /* Copy of tux_file_aops, except ->write_begin/end */ |
| const struct address_space_operations tux_symlink_aops = { |
| .readpage = tux3_readpage, |
| .readpages = tux3_readpages, |
| // .writepage = tux3_writepage, |
| // .writepages = tux3_writepages, |
| .writepage = tux3_disable_writepage, |
| .writepages = tux3_disable_writepages, |
| .write_begin = tux3_symlink_write_begin, |
| .write_end = __tux3_file_write_end, |
| .bmap = tux3_bmap, |
| .invalidatepage = tux3_invalidatepage, |
| // .releasepage = ext4_releasepage, |
| #ifdef TUX3_DIRECT_IO |
| .direct_IO = tux3_direct_IO, |
| #endif |
| // .migratepage = buffer_migrate_page, /* FIXME */ |
| // .is_partially_uptodate = block_is_partially_uptodate, |
| // .is_dirty_writeback = buffer_check_dirty_writeback, |
| }; |
| |
| static int tux3_blk_readpage(struct file *file, struct page *page) |
| { |
| int err = block_read_full_page(page, tux3_get_block); |
| assert(!PageForked(page)); /* FIXME: handle forked page */ |
| return err; |
| } |
| |
| /* Use delalloc and doesn't check buffer fork */ |
| static int tux3_blk_write_begin(struct file *file, |
| struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned flags, |
| struct page **pagep, void **fsdata) |
| { |
| return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep, |
| fsdata, 0); |
| } |
| |
| #if 0 /* disabled writeback for now */ |
| static int tux3_blk_writepage(struct page *page, struct writeback_control *wbc) |
| { |
| return block_write_full_page(page, tux3_get_block, wbc); |
| } |
| #endif |
| |
| const struct address_space_operations tux_blk_aops = { |
| .readpage = tux3_blk_readpage, |
| // .writepage = tux3_blk_writepage, |
| // .writepages = tux3_writepages, |
| .writepage = tux3_disable_writepage, |
| .writepages = tux3_disable_writepages, |
| .write_begin = tux3_blk_write_begin, |
| .bmap = tux3_bmap, |
| .invalidatepage = tux3_invalidatepage, |
| // .migratepage = buffer_migrate_page, /* FIXME */ |
| // .is_partially_uptodate = block_is_partially_uptodate, |
| // .is_dirty_writeback = buffer_check_dirty_writeback, |
| }; |
| |
| static int tux3_vol_get_block(struct inode *inode, sector_t iblock, |
| struct buffer_head *bh_result, int create) |
| { |
| if (iblock >= tux_sb(inode->i_sb)->volblocks) { |
| assert(!create); |
| return 0; |
| } |
| map_bh(bh_result, inode->i_sb, iblock); |
| return 0; |
| } |
| |
| static int tux3_vol_readpage(struct file *file, struct page *page) |
| { |
| return block_read_full_page(page, tux3_vol_get_block); |
| } |
| |
| #if 0 /* disabled writeback for now */ |
| static int tux3_vol_writepage(struct page *page, struct writeback_control *wbc) |
| { |
| return block_write_full_page(page, tux3_vol_get_block, wbc); |
| } |
| #endif |
| |
| /* Use tux3_vol_get_block() (physical map) and doesn't check buffer fork */ |
| static int tux3_vol_write_begin(struct file *file, |
| struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned flags, |
| struct page **pagep, void **fsdata) |
| { |
| return tux3_write_begin(mapping, pos, len, flags, pagep, |
| tux3_vol_get_block, 0); |
| } |
| |
| const struct address_space_operations tux_vol_aops = { |
| .readpage = tux3_vol_readpage, |
| // .writepage = tux3_vol_writepage, |
| .writepage = tux3_disable_writepage, |
| .writepages = tux3_disable_writepages, |
| .write_begin = tux3_vol_write_begin, |
| .invalidatepage = tux3_invalidatepage, |
| // .is_partially_uptodate = block_is_partially_uptodate, |
| // .is_dirty_writeback = buffer_check_dirty_writeback, |
| }; |
| #endif /* __KERNEL__ */ |