| From tytso@mit.edu Mon Apr 19 10:19:40 2010 |
| From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> |
| Date: Mon, 15 Mar 2010 20:25:55 -0400 |
| Subject: ext4: invalidate pages if delalloc block allocation fails. |
| To: stable@kernel.org |
| Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> |
| Message-ID: <1268699165-17461-2-git-send-email-tytso@mit.edu> |
| |
| |
| From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> |
| |
| commit c4a0c46ec92c194c873232b88debce4e1a448483 upstream. |
| |
| We are a bit agressive in invalidating all the pages. But |
| it is ok because we really don't know why the block allocation |
| failed and it is better to come of the writeback path |
| so that user can look for more info. |
| |
| Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> |
| Signed-off-by: Jayson R. King <dev@jaysonking.com> |
| Signed-off-by: Theodore Ts'o <tytso@mit.edu> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| fs/ext4/inode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++-------- |
| 1 file changed, 73 insertions(+), 12 deletions(-) |
| |
| --- a/fs/ext4/inode.c |
| +++ b/fs/ext4/inode.c |
| @@ -1821,6 +1821,39 @@ static inline void __unmap_underlying_bl |
| unmap_underlying_metadata(bdev, bh->b_blocknr + i); |
| } |
| |
| +static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
| + sector_t logical, long blk_cnt) |
| +{ |
| + int nr_pages, i; |
| + pgoff_t index, end; |
| + struct pagevec pvec; |
| + struct inode *inode = mpd->inode; |
| + struct address_space *mapping = inode->i_mapping; |
| + |
| + index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| + end = (logical + blk_cnt - 1) >> |
| + (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| + while (index <= end) { |
| + nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
| + if (nr_pages == 0) |
| + break; |
| + for (i = 0; i < nr_pages; i++) { |
| + struct page *page = pvec.pages[i]; |
| + index = page->index; |
| + if (index > end) |
| + break; |
| + index++; |
| + |
| + BUG_ON(!PageLocked(page)); |
| + BUG_ON(PageWriteback(page)); |
| + block_invalidatepage(page, 0); |
| + ClearPageUptodate(page); |
| + unlock_page(page); |
| + } |
| + } |
| + return; |
| +} |
| + |
| /* |
| * mpage_da_map_blocks - go through given space |
| * |
| @@ -1830,7 +1863,7 @@ static inline void __unmap_underlying_bl |
| * The function skips space we know is already mapped to disk blocks. |
| * |
| */ |
| -static void mpage_da_map_blocks(struct mpage_da_data *mpd) |
| +static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
| { |
| int err = 0; |
| struct buffer_head *lbh = &mpd->lbh; |
| @@ -1841,7 +1874,7 @@ static void mpage_da_map_blocks(struct m |
| * We consider only non-mapped and non-allocated blocks |
| */ |
| if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
| - return; |
| + return 0; |
| |
| new.b_state = lbh->b_state; |
| new.b_blocknr = 0; |
| @@ -1852,10 +1885,38 @@ static void mpage_da_map_blocks(struct m |
| * to write simply return |
| */ |
| if (!new.b_size) |
| - return; |
| + return 0; |
| err = mpd->get_block(mpd->inode, next, &new, 1); |
| - if (err) |
| - return; |
| + if (err) { |
| + |
| + /* If get block returns with error |
| + * we simply return. Later writepage |
| + * will redirty the page and writepages |
| + * will find the dirty page again |
| + */ |
| + if (err == -EAGAIN) |
| + return 0; |
| + /* |
| + * get block failure will cause us |
| + * to loop in writepages. Because |
| + * a_ops->writepage won't be able to |
| + * make progress. The page will be redirtied |
| + * by writepage and writepages will again |
| + * try to write the same. |
| + */ |
| + printk(KERN_EMERG "%s block allocation failed for inode %lu " |
| + "at logical offset %llu with max blocks " |
| + "%zd with error %d\n", |
| + __func__, mpd->inode->i_ino, |
| + (unsigned long long)next, |
| + lbh->b_size >> mpd->inode->i_blkbits, err); |
| + printk(KERN_EMERG "This should not happen.!! " |
| + "Data will be lost\n"); |
| + /* invlaidate all the pages */ |
| + ext4_da_block_invalidatepages(mpd, next, |
| + lbh->b_size >> mpd->inode->i_blkbits); |
| + return err; |
| + } |
| BUG_ON(new.b_size == 0); |
| |
| if (buffer_new(&new)) |
| @@ -1868,7 +1929,7 @@ static void mpage_da_map_blocks(struct m |
| if (buffer_delay(lbh) || buffer_unwritten(lbh)) |
| mpage_put_bnr_to_bhs(mpd, next, &new); |
| |
| - return; |
| + return 0; |
| } |
| |
| #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
| @@ -1937,8 +1998,8 @@ flush_it: |
| * We couldn't merge the block to our extent, so we |
| * need to flush current extent and start new one |
| */ |
| - mpage_da_map_blocks(mpd); |
| - mpage_da_submit_io(mpd); |
| + if (mpage_da_map_blocks(mpd) == 0) |
| + mpage_da_submit_io(mpd); |
| mpd->io_done = 1; |
| return; |
| } |
| @@ -1980,8 +2041,8 @@ static int __mpage_da_writepage(struct p |
| * and start IO on them using writepage() |
| */ |
| if (mpd->next_page != mpd->first_page) { |
| - mpage_da_map_blocks(mpd); |
| - mpage_da_submit_io(mpd); |
| + if (mpage_da_map_blocks(mpd) == 0) |
| + mpage_da_submit_io(mpd); |
| /* |
| * skip rest of the page in the page_vec |
| */ |
| @@ -2102,8 +2163,8 @@ static int mpage_da_writepages(struct ad |
| * Handle last extent of pages |
| */ |
| if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
| - mpage_da_map_blocks(&mpd); |
| - mpage_da_submit_io(&mpd); |
| + if (mpage_da_map_blocks(&mpd) == 0) |
| + mpage_da_submit_io(&mpd); |
| } |
| |
| wbc->nr_to_write = to_write - mpd.pages_written; |