Merge tag 'gfs2-4.18.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2 Pull gfs2 updates from Bob Peterson: "We've got nine more patches for this merge window. - remove sd_jheightsize to greatly simplify some code (Andreas Gruenbacher) - fix some comments (Andreas) - fix a glock recursion bug when allocation errors occur (Andreas) - improve the hole_size function so it returns the entire hole rather than figuring it out piecemeal (Andreas) - clean up gfs2_stuffed_write_end to remove a lot of redundancy (Andreas) - clarify code with regard to the way ordered writes are processed (Andreas) - a bunch of improvements and cleanups of the iomap code to pave the way for iomap writes, which is a future patch set (Andreas) - fix a bug where block reservations can run off the end of a bitmap (Bob Peterson) - add Andreas to the MAINTAINERS file (Bob Peterson)" * tag 'gfs2-4.18.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: MAINTAINERS: Add Andreas Gruenbacher as a maintainer for gfs2 gfs2: Iomap cleanups and improvements gfs2: Remove ordered write mode handling from gfs2_trans_add_data gfs2: gfs2_stuffed_write_end cleanup gfs2: hole_size improvement GFS2: gfs2_free_extlen can return an extent that is too long GFS2: Fix allocation error bug with recursive rgrp glocking gfs2: Update find_metapath comment gfs2: Remove sdp->sd_jheightsize

commit: 1e43938bfbad4fd51bea4c092c1ded06790bf79c [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Mon Jun 04 14:36:38 2018 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Mon Jun 04 14:36:38 2018 -0700
tree: 91aadec745dfc3b753e911f8f3602160e75975dc
parent: 8a4631144b793590f1b5d2532a7f66df6de29423 [diff]
parent: 6d1c2cf2247599c5b50df7d04e254ef0edec812c [diff]
diff --git a/MAINTAINERS b/MAINTAINERS
index 1e22aa6..fdf15f3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -5950,8 +5950,8 @@
 F:	scripts/get_maintainer.pl
 
 GFS2 FILE SYSTEM
-M:	Steven Whitehouse <swhiteho@redhat.com>
 M:	Bob Peterson <rpeterso@redhat.com>
+M:	Andreas Gruenbacher <agruenba@redhat.com>
 L:	cluster-devel@redhat.com
 W:	http://sources.redhat.com/cluster/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index f587165..35f5ee2 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c

@@ -54,8 +54,7 @@
 			continue;
 		if (start >= to)
 			break;
-		if (gfs2_is_jdata(ip))
-			set_buffer_uptodate(bh);
+		set_buffer_uptodate(bh);
 		gfs2_trans_add_data(ip->i_gl, bh);
 	}
 }
@@ -747,18 +746,21 @@
 	put_page(page);
 
 	gfs2_trans_end(sdp);
-	if (pos + len > ip->i_inode.i_size)
-		gfs2_trim_blocks(&ip->i_inode);
-	goto out_trans_fail;
+	if (alloc_required) {
+		gfs2_inplace_release(ip);
+		if (pos + len > ip->i_inode.i_size)
+			gfs2_trim_blocks(&ip->i_inode);
+	}
+	goto out_qunlock;
 
 out_endtrans:
 	gfs2_trans_end(sdp);
 out_trans_fail:
-	if (alloc_required) {
+	if (alloc_required)
 		gfs2_inplace_release(ip);
 out_qunlock:
+	if (alloc_required)
 		gfs2_quota_unlock(ip);
-	}
 out_unlock:
 	if (&ip->i_inode == sdp->sd_rindex) {
 		gfs2_glock_dq(&m_ip->i_gh);
@@ -814,7 +816,6 @@
  * @inode: The inode
  * @dibh: The buffer_head containing the on-disk inode
  * @pos: The file position
- * @len: The length of the write
  * @copied: How much was actually copied by the VFS
  * @page: The page
  *
@@ -824,17 +825,15 @@
  * Returns: errno
  */
 static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
-				  loff_t pos, unsigned len, unsigned copied,
+				  loff_t pos, unsigned copied,
 				  struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	u64 to = pos + copied;
 	void *kaddr;
 	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
 
-	BUG_ON(pos + len > gfs2_max_stuffed_size(ip));
+	BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
 
 	kaddr = kmap_atomic(page);
 	memcpy(buf + pos, kaddr + pos, copied);
@@ -850,20 +849,6 @@
 			i_size_write(inode, to);
 		mark_inode_dirty(inode);
 	}
-
-	if (inode == sdp->sd_rindex) {
-		adjust_fs_space(inode);
-		sdp->sd_rindex_uptodate = 0;
-	}
-
-	brelse(dibh);
-	gfs2_trans_end(sdp);
-	if (inode == sdp->sd_rindex) {
-		gfs2_glock_dq(&m_ip->i_gh);
-		gfs2_holder_uninit(&m_ip->i_gh);
-	}
-	gfs2_glock_dq(&ip->i_gh);
-	gfs2_holder_uninit(&ip->i_gh);
 	return copied;
 }
 
@@ -877,9 +862,8 @@
  * @page: The page that has been written
  * @fsdata: The fsdata (unused in GFS2)
  *
- * The main write_end function for GFS2. We have a separate one for
- * stuffed files as they are slightly different, otherwise we just
- * put our locking around the VFS provided functions.
+ * The main write_end function for GFS2. We just put our locking around the VFS
+ * provided functions.
  *
  * Returns: errno
  */
@@ -900,32 +884,39 @@
 	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
-	if (unlikely(ret)) {
-		unlock_page(page);
-		put_page(page);
-		goto failed;
+	if (unlikely(ret))
+		goto out;
+
+	if (gfs2_is_stuffed(ip)) {
+		ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page);
+		page = NULL;
+		goto out2;
 	}
 
-	if (gfs2_is_stuffed(ip))
-		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
-
-	if (!gfs2_is_writeback(ip))
+	if (gfs2_is_jdata(ip))
 		gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
+	else
+		gfs2_ordered_add_inode(ip);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	page = NULL;
 	if (tr->tr_num_buf_new)
 		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	else
 		gfs2_trans_add_meta(ip->i_gl, dibh);
 
-
+out2:
 	if (inode == sdp->sd_rindex) {
 		adjust_fs_space(inode);
 		sdp->sd_rindex_uptodate = 0;
 	}
 
 	brelse(dibh);
-failed:
+out:
+	if (page) {
+		unlock_page(page);
+		put_page(page);
+	}
 	gfs2_trans_end(sdp);
 	gfs2_inplace_release(ip);
 	if (ip->i_qadata && ip->i_qadata->qa_qd_num)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 278ed08..a7b586e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c

@@ -89,10 +89,12 @@
 		map_bh(bh, inode->i_sb, block);
 
 	set_buffer_uptodate(bh);
-	if (!gfs2_is_jdata(ip))
-		mark_buffer_dirty(bh);
-	if (!gfs2_is_writeback(ip))
+	if (gfs2_is_jdata(ip))
 		gfs2_trans_add_data(ip->i_gl, bh);
+	else {
+		mark_buffer_dirty(bh);
+		gfs2_ordered_add_inode(ip);
+	}
 
 	if (release) {
 		unlock_page(page);
@@ -176,8 +178,8 @@
 /**
  * find_metapath - Find path through the metadata tree
  * @sdp: The superblock
- * @mp: The metapath to return the result in
  * @block: The disk block to look up
+ * @mp: The metapath to return the result in
  * @height: The pre-calculated height of the metadata tree
  *
  *   This routine returns a struct metapath structure that defines a path
@@ -188,8 +190,7 @@
  *   filesystem with a blocksize of 4096.
  *
  *   find_metapath() would return a struct metapath structure set to:
- *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
- *   and mp_list[2] = 165.
+ *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
  *
  *   That means that in order to get to the block containing the byte at
  *   offset 101342453, we would load the indirect block pointed to by pointer
@@ -279,6 +280,21 @@
 	return p + mp->mp_list[height];
 }
 
+static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
+{
+	const struct buffer_head *bh = mp->mp_bh[height];
+	return (const __be64 *)(bh->b_data + bh->b_size);
+}
+
+static void clone_metapath(struct metapath *clone, struct metapath *mp)
+{
+	unsigned int hgt;
+
+	*clone = *mp;
+	for (hgt = 0; hgt < mp->mp_aheight; hgt++)
+		get_bh(clone->mp_bh[hgt]);
+}
+
 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 {
 	const __be64 *t;
@@ -420,20 +436,140 @@
 	return (ptr - first);
 }
 
-static inline void bmap_lock(struct gfs2_inode *ip, int create)
+typedef const __be64 *(*gfs2_metadata_walker)(
+		struct metapath *mp,
+		const __be64 *start, const __be64 *end,
+		u64 factor, void *data);
+
+#define WALK_STOP ((__be64 *)0)
+#define WALK_NEXT ((__be64 *)1)
+
+static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
+		u64 len, struct metapath *mp, gfs2_metadata_walker walker,
+		void *data)
 {
-	if (create)
-		down_write(&ip->i_rw_mutex);
-	else
-		down_read(&ip->i_rw_mutex);
+	struct metapath clone;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	const __be64 *start, *end, *ptr;
+	u64 factor = 1;
+	unsigned int hgt;
+	int ret = 0;
+
+	for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
+		factor *= sdp->sd_inptrs;
+
+	for (;;) {
+		u64 step;
+
+		/* Walk indirect block. */
+		start = metapointer(hgt, mp);
+		end = metaend(hgt, mp);
+
+		step = (end - start) * factor;
+		if (step > len)
+			end = start + DIV_ROUND_UP_ULL(len, factor);
+
+		ptr = walker(mp, start, end, factor, data);
+		if (ptr == WALK_STOP)
+			break;
+		if (step >= len)
+			break;
+		len -= step;
+		if (ptr != WALK_NEXT) {
+			BUG_ON(!*ptr);
+			mp->mp_list[hgt] += ptr - start;
+			goto fill_up_metapath;
+		}
+
+lower_metapath:
+		/* Decrease height of metapath. */
+		if (mp != &clone) {
+			clone_metapath(&clone, mp);
+			mp = &clone;
+		}
+		brelse(mp->mp_bh[hgt]);
+		mp->mp_bh[hgt] = NULL;
+		if (!hgt)
+			break;
+		hgt--;
+		factor *= sdp->sd_inptrs;
+
+		/* Advance in metadata tree. */
+		(mp->mp_list[hgt])++;
+		start = metapointer(hgt, mp);
+		end = metaend(hgt, mp);
+		if (start >= end) {
+			mp->mp_list[hgt] = 0;
+			if (!hgt)
+				break;
+			goto lower_metapath;
+		}
+
+fill_up_metapath:
+		/* Increase height of metapath. */
+		if (mp != &clone) {
+			clone_metapath(&clone, mp);
+			mp = &clone;
+		}
+		ret = fillup_metapath(ip, mp, ip->i_height - 1);
+		if (ret < 0)
+			break;
+		hgt += ret;
+		for (; ret; ret--)
+			do_div(factor, sdp->sd_inptrs);
+		mp->mp_aheight = hgt + 1;
+	}
+	if (mp == &clone)
+		release_metapath(mp);
+	return ret;
 }
 
-static inline void bmap_unlock(struct gfs2_inode *ip, int create)
+struct gfs2_hole_walker_args {
+	u64 blocks;
+};
+
+static const __be64 *gfs2_hole_walker(struct metapath *mp,
+		const __be64 *start, const __be64 *end,
+		u64 factor, void *data)
 {
-	if (create)
-		up_write(&ip->i_rw_mutex);
-	else
-		up_read(&ip->i_rw_mutex);
+	struct gfs2_hole_walker_args *args = data;
+	const __be64 *ptr;
+
+	for (ptr = start; ptr < end; ptr++) {
+		if (*ptr) {
+			args->blocks += (ptr - start) * factor;
+			if (mp->mp_aheight == mp->mp_fheight)
+				return WALK_STOP;
+			return ptr;  /* increase height */
+		}
+	}
+	args->blocks += (end - start) * factor;
+	return WALK_NEXT;
+}
+
+/**
+ * gfs2_hole_size - figure out the size of a hole
+ * @inode: The inode
+ * @lblock: The logical starting block number
+ * @len: How far to look (in blocks)
+ * @mp: The metapath at lblock
+ * @iomap: The iomap to store the hole size in
+ *
+ * This function modifies @mp.
+ *
+ * Returns: errno on error
+ */
+static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
+			  struct metapath *mp, struct iomap *iomap)
+{
+	struct gfs2_hole_walker_args args = { };
+	int ret = 0;
+
+	ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
+	if (!ret)
+		iomap->length = args.blocks << inode->i_blkbits;
+	return ret;
 }
 
 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
@@ -462,15 +598,11 @@
 };
 
 /**
- * gfs2_bmap_alloc - Build a metadata tree of the requested height
+ * gfs2_iomap_alloc - Build a metadata tree of the requested height
  * @inode: The GFS2 inode
- * @lblock: The logical starting block of the extent
- * @bh_map: This is used to return the mapping details
- * @zero_new: True if newly allocated blocks should be zeroed
+ * @iomap: The iomap structure
+ * @flags: iomap flags
  * @mp: The metapath, with proper height information calculated
- * @maxlen: The max number of data blocks to alloc
- * @dblock: Pointer to return the resulting new block
- * @dblks: Pointer to return the number of blocks allocated
  *
  * In this routine we may have to alloc:
  *   i) Indirect blocks to grow the metadata tree height
@@ -483,6 +615,13 @@
  * blocks are available, there will only be one request per bmap call)
  * and uses the state machine to initialise the blocks in order.
  *
+ * Right now, this function will allocate at most one indirect block
+ * worth of data -- with a default block size of 4K, that's slightly
+ * less than 2M.  If this limitation is ever removed to allow huge
+ * allocations, we would probably still want to limit the iomap size we
+ * return to avoid stalling other tasks during huge writes; the next
+ * iomap iteration would then find the blocks already allocated.
+ *
  * Returns: errno on error
  */
 
@@ -497,6 +636,7 @@
 	unsigned dblks = 0;
 	unsigned ptrs_per_blk;
 	const unsigned end_of_metadata = mp->mp_fheight - 1;
+	int ret;
 	enum alloc_state state;
 	__be64 *ptr;
 	__be64 zero_bn = 0;
@@ -507,6 +647,8 @@
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 
+	down_write(&ip->i_rw_mutex);
+
 	if (mp->mp_fheight == mp->mp_aheight) {
 		struct buffer_head *bh;
 		int eob;
@@ -542,11 +684,10 @@
 	blks = dblks + iblks;
 	i = mp->mp_aheight;
 	do {
-		int error;
 		n = blks - alloced;
-		error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
-		if (error)
-			return error;
+		ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+		if (ret)
+			goto out;
 		alloced += n;
 		if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 			gfs2_trans_add_unrevoke(sdp, bn, n);
@@ -602,7 +743,7 @@
 			dblks = n;
 			ptr = metapointer(end_of_metadata, mp);
 			iomap->addr = bn << inode->i_blkbits;
-			iomap->flags |= IOMAP_F_NEW;
+			iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
 			while (n-- > 0)
 				*ptr++ = cpu_to_be64(bn++);
 			break;
@@ -612,64 +753,10 @@
 	iomap->length = (u64)dblks << inode->i_blkbits;
 	ip->i_height = mp->mp_fheight;
 	gfs2_add_inode_blocks(&ip->i_inode, alloced);
-	gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
-	return 0;
-}
-
-/**
- * hole_size - figure out the size of a hole
- * @inode: The inode
- * @lblock: The logical starting block number
- * @mp: The metapath
- *
- * Returns: The hole size in bytes
- *
- */
-static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct metapath mp_eof;
-	u64 factor = 1;
-	int hgt;
-	u64 holesz = 0;
-	const __be64 *first, *end, *ptr;
-	const struct buffer_head *bh;
-	u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
-	int zeroptrs;
-	bool done = false;
-
-	/* Get another metapath, to the very last byte */
-	find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
-	for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
-		bh = mp->mp_bh[hgt];
-		if (bh) {
-			zeroptrs = 0;
-			first = metapointer(hgt, mp);
-			end = (const __be64 *)(bh->b_data + bh->b_size);
-
-			for (ptr = first; ptr < end; ptr++) {
-				if (*ptr) {
-					done = true;
-					break;
-				} else {
-					zeroptrs++;
-				}
-			}
-		} else {
-			zeroptrs = sdp->sd_inptrs;
-		}
-		if (factor * zeroptrs >= lblock_stop - lblock + 1) {
-			holesz = lblock_stop - lblock + 1;
-			break;
-		}
-		holesz += factor * zeroptrs;
-
-		factor *= sdp->sd_inptrs;
-		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
-			(mp->mp_list[hgt - 1])++;
-	}
-	return holesz << inode->i_blkbits;
+	gfs2_dinode_out(ip, dibh->b_data);
+out:
+	up_write(&ip->i_rw_mutex);
+	return ret;
 }
 
 static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
@@ -685,121 +772,130 @@
 }
 
 /**
- * gfs2_iomap_begin - Map blocks from an inode to disk blocks
+ * gfs2_iomap_get - Map blocks from an inode to disk blocks
  * @inode: The inode
  * @pos: Starting position in bytes
  * @length: Length to map, in bytes
  * @flags: iomap flags
  * @iomap: The iomap structure
+ * @mp: The metapath
  *
  * Returns: errno
  */
-int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-		     unsigned flags, struct iomap *iomap)
+static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
+			  unsigned flags, struct iomap *iomap,
+			  struct metapath *mp)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct metapath mp = { .mp_aheight = 1, };
-	unsigned int factor = sdp->sd_sb.sb_bsize;
-	const u64 *arr = sdp->sd_heightsize;
 	__be64 *ptr;
 	sector_t lblock;
-	sector_t lend;
-	int ret = 0;
+	sector_t lblock_stop;
+	int ret;
 	int eob;
-	unsigned int len;
+	u64 len;
 	struct buffer_head *bh;
 	u8 height;
 
-	trace_gfs2_iomap_start(ip, pos, length, flags);
-	if (!length) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!length)
+		return -EINVAL;
 
 	if (gfs2_is_stuffed(ip)) {
 		if (flags & IOMAP_REPORT) {
+			if (pos >= i_size_read(inode))
+				return -ENOENT;
 			gfs2_stuffed_iomap(inode, iomap);
-			if (pos >= iomap->length)
-				ret = -ENOENT;
-			goto out;
+			return 0;
 		}
 		BUG_ON(!(flags & IOMAP_WRITE));
 	}
-
 	lblock = pos >> inode->i_blkbits;
-	lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
-
 	iomap->offset = lblock << inode->i_blkbits;
-	iomap->addr = IOMAP_NULL_ADDR;
-	iomap->type = IOMAP_HOLE;
-	iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
-	iomap->flags = IOMAP_F_MERGED;
-	bmap_lock(ip, flags & IOMAP_WRITE);
+	lblock_stop = (pos + length - 1) >> inode->i_blkbits;
+	len = lblock_stop - lblock + 1;
 
-	/*
-	 * Directory data blocks have a struct gfs2_meta_header header, so the
-	 * remaining size is smaller than the filesystem block size.  Logical
-	 * block numbers for directories are in units of this remaining size!
-	 */
-	if (gfs2_is_dir(ip)) {
-		factor = sdp->sd_jbsize;
-		arr = sdp->sd_jheightsize;
-	}
+	down_read(&ip->i_rw_mutex);
 
-	ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
+	ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
 	if (ret)
-		goto out_release;
+		goto unlock;
 
 	height = ip->i_height;
-	while ((lblock + 1) * factor > arr[height])
+	while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
 		height++;
-	find_metapath(sdp, lblock, &mp, height);
+	find_metapath(sdp, lblock, mp, height);
 	if (height > ip->i_height || gfs2_is_stuffed(ip))
 		goto do_alloc;
 
-	ret = lookup_metapath(ip, &mp);
+	ret = lookup_metapath(ip, mp);
 	if (ret)
-		goto out_release;
+		goto unlock;
 
-	if (mp.mp_aheight != ip->i_height)
+	if (mp->mp_aheight != ip->i_height)
 		goto do_alloc;
 
-	ptr = metapointer(ip->i_height - 1, &mp);
+	ptr = metapointer(ip->i_height - 1, mp);
 	if (*ptr == 0)
 		goto do_alloc;
 
-	iomap->type = IOMAP_MAPPED;
-	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+	bh = mp->mp_bh[ip->i_height - 1];
+	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
 
-	bh = mp.mp_bh[ip->i_height - 1];
-	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
+	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+	iomap->length = len << inode->i_blkbits;
+	iomap->type = IOMAP_MAPPED;
+	iomap->flags = IOMAP_F_MERGED;
 	if (eob)
 		iomap->flags |= IOMAP_F_BOUNDARY;
-	iomap->length = (u64)len << inode->i_blkbits;
 
-out_release:
-	release_metapath(&mp);
-	bmap_unlock(ip, flags & IOMAP_WRITE);
 out:
-	trace_gfs2_iomap_end(ip, iomap, ret);
+	iomap->bdev = inode->i_sb->s_bdev;
+unlock:
+	up_read(&ip->i_rw_mutex);
 	return ret;
 
 do_alloc:
-	if (flags & IOMAP_WRITE) {
-		ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
-	} else if (flags & IOMAP_REPORT) {
+	iomap->addr = IOMAP_NULL_ADDR;
+	iomap->length = len << inode->i_blkbits;
+	iomap->type = IOMAP_HOLE;
+	iomap->flags = 0;
+	if (flags & IOMAP_REPORT) {
 		loff_t size = i_size_read(inode);
 		if (pos >= size)
 			ret = -ENOENT;
-		else if (height <= ip->i_height)
-			iomap->length = hole_size(inode, lblock, &mp);
+		else if (height == ip->i_height)
+			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 		else
 			iomap->length = size - pos;
 	}
-	goto out_release;
+	goto out;
 }
 
+static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+			    unsigned flags, struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct metapath mp = { .mp_aheight = 1, };
+	int ret;
+
+	trace_gfs2_iomap_start(ip, pos, length, flags);
+	if (flags & IOMAP_WRITE) {
+		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+		if (!ret && iomap->type == IOMAP_HOLE)
+			ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+		release_metapath(&mp);
+	} else {
+		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+		release_metapath(&mp);
+	}
+	trace_gfs2_iomap_end(ip, iomap, ret);
+	return ret;
+}
+
+const struct iomap_ops gfs2_iomap_ops = {
+	.iomap_begin = gfs2_iomap_begin,
+};
+
 /**
  * gfs2_block_map - Map one or more blocks of an inode to a disk block
  * @inode: The inode
@@ -825,25 +921,34 @@
 		   struct buffer_head *bh_map, int create)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct iomap iomap;
-	int ret, flags = 0;
+	loff_t pos = (loff_t)lblock << inode->i_blkbits;
+	loff_t length = bh_map->b_size;
+	struct metapath mp = { .mp_aheight = 1, };
+	struct iomap iomap = { };
+	int ret;
 
 	clear_buffer_mapped(bh_map);
 	clear_buffer_new(bh_map);
 	clear_buffer_boundary(bh_map);
 	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 
-	if (create)
-		flags |= IOMAP_WRITE;
-	ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
-			       bh_map->b_size, flags, &iomap);
-	if (ret) {
-		if (!create && ret == -ENOENT) {
-			/* Return unmapped buffer beyond the end of file.  */
+	if (create) {
+		ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
+		if (!ret && iomap.type == IOMAP_HOLE)
+			ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+		release_metapath(&mp);
+	} else {
+		ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
+		release_metapath(&mp);
+
+		/* Return unmapped buffer beyond the end of file. */
+		if (ret == -ENOENT) {
 			ret = 0;
+			goto out;
 		}
-		goto out;
 	}
+	if (ret)
+		goto out;
 
 	if (iomap.length > bh_map->b_size) {
 		iomap.length = bh_map->b_size;
@@ -945,8 +1050,10 @@
 		err = 0;
 	}
 
-	if (!gfs2_is_writeback(ip))
+	if (gfs2_is_jdata(ip))
 		gfs2_trans_add_data(ip->i_gl, bh);
+	else
+		gfs2_ordered_add_inode(ip);
 
 	zero_user(page, offset, length);
 	mark_buffer_dirty(bh);
@@ -1056,6 +1163,19 @@
 	return error;
 }
 
+int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+			 struct iomap *iomap)
+{
+	struct metapath mp = { .mp_aheight = 1, };
+	int ret;
+
+	ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
+	if (!ret && iomap->type == IOMAP_HOLE)
+		ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+	release_metapath(&mp);
+	return ret;
+}
+
 /**
  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
  * @ip: inode

diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c3402fe..6b18fb3 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h

@@ -46,11 +46,13 @@
 	}
 }
 
+extern const struct iomap_ops gfs2_iomap_ops;
+
 extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh, int create);
-extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-			    unsigned flags, struct iomap *iomap);
+extern int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+				struct iomap *iomap);
 extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
 			   u64 *dblock, unsigned *extlen);
 extern int gfs2_setattr_size(struct inode *inode, u64 size);

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4b71f02..7137db7 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c

@@ -733,7 +733,7 @@
 	struct gfs2_inode *ip = GFS2_I(inode);
 	loff_t end = offset + len;
 	struct buffer_head *dibh;
-	struct iomap iomap;
+	struct iomap iomap = { };
 	int error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -749,8 +749,8 @@
 	}
 
 	while (offset < end) {
-		error = gfs2_iomap_begin(inode, offset, end - offset,
-					 IOMAP_WRITE, &iomap);
+		error = gfs2_iomap_get_alloc(inode, offset, end - offset,
+					     &iomap);
 		if (error)
 			goto out;
 		offset = iomap.offset + iomap.length;

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1b6b1e3..d2ad817 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h

@@ -116,6 +116,7 @@
 
 static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
 {
+	BUG_ON(rbm->offset >= rbm->rgd->rd_data);
 	return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
 		rbm->offset;
 }
@@ -696,8 +697,6 @@
 	u32 sd_max_dirres;	/* Max blocks needed to add a directory entry */
 	u32 sd_max_height;	/* Max height of a file's metadata tree */
 	u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
-	u32 sd_max_jheight; /* Max height of journaled file's meta tree */
-	u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
 	u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */
 
 	struct gfs2_args sd_args;	/* Mount arguments */

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8700eb8..feda55f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c

@@ -2006,10 +2006,6 @@
 	return 0;
 }
 
-const struct iomap_ops gfs2_iomap_ops = {
-	.iomap_begin = gfs2_iomap_begin,
-};
-
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {

diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 1862e31..2024143 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h

@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/writeback.h>
 #include "incore.h"
+#include "inode.h"
 
 /**
  * gfs2_log_lock - acquire the right to mess with the log manager
@@ -50,8 +51,12 @@
 
 static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct gfs2_sbd *sdp;
 
+	if (!gfs2_is_ordered(ip))
+		return;
+
+	sdp = GFS2_SB(&ip->i_inode);
 	if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
 		spin_lock(&sdp->sd_ordered_lock);
 		if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3ba3f16..c2469833b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c

@@ -335,25 +335,6 @@
 	sdp->sd_heightsize[x] = ~0;
 	gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
 
-	sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
-				 sizeof(struct gfs2_dinode);
-	sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
-	for (x = 2;; x++) {
-		u64 space, d;
-		u32 m;
-
-		space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
-		d = space;
-		m = do_div(d, sdp->sd_inptrs);
-
-		if (d != sdp->sd_jheightsize[x - 1] || m)
-			break;
-		sdp->sd_jheightsize[x] = space;
-	}
-	sdp->sd_max_jheight = x;
-	sdp->sd_jheightsize[x] = ~0;
-	gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
-
 	sdp->sd_max_dents_per_leaf = (sdp->sd_sb.sb_bsize -
 				      sizeof(struct gfs2_leaf)) /
 				     GFS2_MIN_DIRENT_SIZE;

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7a98abd..e8585df 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c

@@ -735,7 +735,10 @@
 			if (!buffer_uptodate(bh))
 				goto unlock_out;
 		}
-		gfs2_trans_add_data(ip->i_gl, bh);
+		if (gfs2_is_jdata(ip))
+			gfs2_trans_add_data(ip->i_gl, bh);
+		else
+			gfs2_ordered_add_inode(ip);
 
 		/* If we need to write to the next block as well */
 		if (to_write > (bsize - boff)) {

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b68391..6bc5cfe 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c

@@ -372,8 +372,8 @@
 		start = bi->bi_bh->b_data;
 		if (bi->bi_clone)
 			start = bi->bi_clone;
-		end = start + bi->bi_bh->b_size;
 		start += bi->bi_offset;
+		end = start + bi->bi_len;
 		BUG_ON(rbm.offset & 3);
 		start += (rbm.offset / GFS2_NBBY);
 		bytes = min_t(u32, len / GFS2_NBBY, (end - start));

diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index c75caca..064c9a0 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c

@@ -143,32 +143,21 @@
  * @gl: The inode glock associated with the buffer
  * @bh: The buffer to add
  *
- * This is used in two distinct cases:
- * i) In ordered write mode
- *    We put the data buffer on a list so that we can ensure that it's
- *    synced to disk at the right time
- * ii) In journaled data mode
- *    We need to journal the data block in the same way as metadata in
- *    the functions above. The difference is that here we have a tag
- *    which is two __be64's being the block number (as per meta data)
- *    and a flag which says whether the data block needs escaping or
- *    not. This means we need a new log entry for each 251 or so data
- *    blocks, which isn't an enormous overhead but twice as much as
- *    for normal metadata blocks.
+ * This is used in journaled data mode.
+ * We need to journal the data block in the same way as metadata in
+ * the functions above. The difference is that here we have a tag
+ * which is two __be64's being the block number (as per meta data)
+ * and a flag which says whether the data block needs escaping or
+ * not. This means we need a new log entry for each 251 or so data
+ * blocks, which isn't an enormous overhead but twice as much as
+ * for normal metadata blocks.
  */
 void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
 {
 	struct gfs2_trans *tr = current->journal_info;
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-	struct address_space *mapping = bh->b_page->mapping;
-	struct gfs2_inode *ip = GFS2_I(mapping->host);
 	struct gfs2_bufdata *bd;
 
-	if (!gfs2_is_jdata(ip)) {
-		gfs2_ordered_add_inode(ip);
-		return;
-	}
-
 	lock_buffer(bh);
 	if (buffer_pinned(bh)) {
 		set_bit(TR_TOUCHED, &tr->tr_flags);
commit	1e43938bfbad4fd51bea4c092c1ded06790bf79c	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Mon Jun 04 14:36:38 2018 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Mon Jun 04 14:36:38 2018 -0700
tree	91aadec745dfc3b753e911f8f3602160e75975dc
parent	8a4631144b793590f1b5d2532a7f66df6de29423 [diff]
parent	6d1c2cf2247599c5b50df7d04e254ef0edec812c [diff]