releases/4.9.51/xfs-try-to-avoid-blowing-out-the-transaction-reservation-when-bunmaping-a-shared-extent.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From e1a4e37cc7b665b6804fba812aca2f4d7402c249 Mon Sep 17 00:00:00 2001
 From: "Darrick J. Wong" <darrick.wong@oracle.com>
 Date: Wed, 14 Jun 2017 21:25:57 -0700
 Subject: xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent

 From: Darrick J. Wong <darrick.wong@oracle.com>

 commit e1a4e37cc7b665b6804fba812aca2f4d7402c249 upstream.

 In a pathological scenario where we are trying to bunmapi a single
 extent in which every other block is shared, it's possible that trying
 to unmap the entire large extent in a single transaction can generate so
 many EFIs that we overflow the transaction reservation.

 Therefore, use a heuristic to guess at the number of blocks we can
 safely unmap from a reflink file's data fork in an single transaction.
 This should prevent problems such as the log head slamming into the tail
 and ASSERTs that trigger because we've exceeded the transaction
 reservation.

 Note that since bunmapi can fail to unmap the entire range, we must also
 teach the deferred unmap code to roll into a new transaction whenever we
 get low on reservation.

 Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
 [hch: random edits, all bugs are my fault]
 Signed-off-by: Christoph Hellwig <hch@lst.de>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
  fs/xfs/libxfs/xfs_bmap.c     |   37 ++++++++++++++++++++++++++++---------
  fs/xfs/libxfs/xfs_bmap.h     |    2 +-
  fs/xfs/libxfs/xfs_refcount.c |   10 +---------
  fs/xfs/libxfs/xfs_refcount.h |   16 ++++++++++++++++
  fs/xfs/xfs_bmap_item.c       |   17 +++++++++++++++--
  fs/xfs/xfs_trans.h           |    2 +-
  fs/xfs/xfs_trans_bmap.c      |   11 +++++++++--
  7 files changed, 71 insertions(+), 24 deletions(-)

 --- a/fs/xfs/libxfs/xfs_bmap.c
 +++ b/fs/xfs/libxfs/xfs_bmap.c
 @@ -5555,6 +5555,7 @@ __xfs_bunmapi(
  	int			whichfork;	/* data or attribute fork */
  	xfs_fsblock_t		sum;
  	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
 +	xfs_fileoff_t		max_len;

  	trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);

 @@ -5576,6 +5577,16 @@ __xfs_bunmapi(
  	ASSERT(len > 0);
  	ASSERT(nexts >= 0);

 +	/*
 +	 * Guesstimate how many blocks we can unmap without running the risk of
 +	 * blowing out the transaction with a mix of EFIs and reflink
 +	 * adjustments.
 +	 */
 +	if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
 +		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
 +	else
 +		max_len = len;
 +
  	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
  	    (error = xfs_iread_extents(tp, ip, whichfork)))
  		return error;
 @@ -5621,7 +5632,7 @@ __xfs_bunmapi(

  	extno = 0;
  	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
 -	       (nexts == 0 || extno < nexts)) {
 +	       (nexts == 0 || extno < nexts) && max_len > 0) {
  		/*
  		 * Is the found extent after a hole in which bno lives?
  		 * Just back up to the previous extent, if so.
 @@ -5655,6 +5666,15 @@ __xfs_bunmapi(
  		}
  		if (del.br_startoff + del.br_blockcount > bno + 1)
  			del.br_blockcount = bno + 1 - del.br_startoff;
 +
 +		/* How much can we safely unmap? */
 +		if (max_len < del.br_blockcount) {
 +			del.br_startoff += del.br_blockcount - max_len;
 +			if (!wasdel)
 +				del.br_startblock += del.br_blockcount - max_len;
 +			del.br_blockcount = max_len;
 +		}
 +
  		sum = del.br_startblock + del.br_blockcount;
  		if (isrt &&
  		    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
 @@ -5835,6 +5855,7 @@ __xfs_bunmapi(
  		if (!isrt && wasdel)
  			xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);

 +		max_len -= del.br_blockcount;
  		bno = del.br_startoff - 1;
  nodelete:
  		/*
 @@ -6604,25 +6625,24 @@ xfs_bmap_finish_one(
  	int				whichfork,
  	xfs_fileoff_t			startoff,
  	xfs_fsblock_t			startblock,
 -	xfs_filblks_t			blockcount,
 +	xfs_filblks_t			*blockcount,
  	xfs_exntst_t			state)
  {
  	struct xfs_bmbt_irec		bmap;
  	int				nimaps = 1;
  	xfs_fsblock_t			firstfsb;
  	int				flags = XFS_BMAPI_REMAP;
 -	int				done;
  	int				error = 0;

  	bmap.br_startblock = startblock;
  	bmap.br_startoff = startoff;
 -	bmap.br_blockcount = blockcount;
 +	bmap.br_blockcount = *blockcount;
  	bmap.br_state = state;

  	trace_xfs_bmap_deferred(tp->t_mountp,
  			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
  			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
 -			ip->i_ino, whichfork, startoff, blockcount, state);
 +			ip->i_ino, whichfork, startoff, *blockcount, state);

  	if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK)
  		return -EFSCORRUPTED;
 @@ -6641,12 +6661,11 @@ xfs_bmap_finish_one(
  					bmap.br_blockcount, flags, &firstfsb,
  					bmap.br_blockcount, &bmap, &nimaps,
  					dfops);
 +		*blockcount = 0;
  		break;
  	case XFS_BMAP_UNMAP:
 -		error = xfs_bunmapi(tp, ip, bmap.br_startoff,
 -				bmap.br_blockcount, flags, 1, &firstfsb,
 -				dfops, &done);
 -		ASSERT(done);
 +		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
 +				XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
  		break;
  	default:
  		ASSERT(0);
 --- a/fs/xfs/libxfs/xfs_bmap.h
 +++ b/fs/xfs/libxfs/xfs_bmap.h
 @@ -265,7 +265,7 @@ struct xfs_bmap_intent {
  int	xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops,
  		struct xfs_inode *ip, enum xfs_bmap_intent_type type,
  		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
 -		xfs_filblks_t blockcount, xfs_exntst_t state);
 +		xfs_filblks_t *blockcount, xfs_exntst_t state);
  int	xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
  		struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
  int	xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 --- a/fs/xfs/libxfs/xfs_refcount.c
 +++ b/fs/xfs/libxfs/xfs_refcount.c
 @@ -784,14 +784,6 @@ xfs_refcount_merge_extents(
  }

  /*
 - * While we're adjusting the refcounts records of an extent, we have
 - * to keep an eye on the number of extents we're dirtying -- run too
 - * many in a single transaction and we'll exceed the transaction's
 - * reservation and crash the fs.  Each record adds 12 bytes to the
 - * log (plus any key updates) so we'll conservatively assume 24 bytes
 - * per record.  We must also leave space for btree splits on both ends
 - * of the range and space for the CUD and a new CUI.
 - *
   * XXX: This is a pretty hand-wavy estimate.  The penalty for guessing
   * true incorrectly is a shutdown FS; the penalty for guessing false
   * incorrectly is more transaction rolls than might be necessary.
 @@ -822,7 +814,7 @@ xfs_refcount_still_have_space(
  	else if (overhead > cur->bc_tp->t_log_res)
  		return false;
  	return  cur->bc_tp->t_log_res - overhead >
 -		cur->bc_private.a.priv.refc.nr_ops * 32;
 +		cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
  }

  /*
 --- a/fs/xfs/libxfs/xfs_refcount.h
 +++ b/fs/xfs/libxfs/xfs_refcount.h
 @@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(
  extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
  		xfs_agnumber_t agno);

 +/*
 + * While we're adjusting the refcounts records of an extent, we have
 + * to keep an eye on the number of extents we're dirtying -- run too
 + * many in a single transaction and we'll exceed the transaction's
 + * reservation and crash the fs.  Each record adds 12 bytes to the
 + * log (plus any key updates) so we'll conservatively assume 32 bytes
 + * per record.  We must also leave space for btree splits on both ends
 + * of the range and space for the CUD and a new CUI.
 + */
 +#define XFS_REFCOUNT_ITEM_OVERHEAD	32
 +
 +static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
 +{
 +	return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
 +}
 +
  #endif	/* __XFS_REFCOUNT_H__ */
 --- a/fs/xfs/xfs_bmap_item.c
 +++ b/fs/xfs/xfs_bmap_item.c
 @@ -395,6 +395,7 @@ xfs_bui_recover(
  	struct xfs_map_extent		*bmap;
  	xfs_fsblock_t			startblock_fsb;
  	xfs_fsblock_t			inode_fsb;
 +	xfs_filblks_t			count;
  	bool				op_ok;
  	struct xfs_bud_log_item		*budp;
  	enum xfs_bmap_intent_type	type;
 @@ -403,6 +404,7 @@ xfs_bui_recover(
  	struct xfs_trans		*tp;
  	struct xfs_inode		*ip = NULL;
  	struct xfs_defer_ops		dfops;
 +	struct xfs_bmbt_irec		irec;
  	xfs_fsblock_t			firstfsb;

  	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
 @@ -480,13 +482,24 @@ xfs_bui_recover(
  	}
  	xfs_trans_ijoin(tp, ip, 0);

 +	count = bmap->me_len;
  	error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
  			ip, whichfork, bmap->me_startoff,
 -			bmap->me_startblock, bmap->me_len,
 -			state);
 +			bmap->me_startblock, &count, state);
  	if (error)
  		goto err_dfops;

 +	if (count > 0) {
 +		ASSERT(type == XFS_BMAP_UNMAP);
 +		irec.br_startblock = bmap->me_startblock;
 +		irec.br_blockcount = count;
 +		irec.br_startoff = bmap->me_startoff;
 +		irec.br_state = state;
 +		error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
 +		if (error)
 +			goto err_dfops;
 +	}
 +
  	/* Finish transaction, free inodes. */
  	error = xfs_defer_finish(&tp, &dfops, NULL);
  	if (error)
 --- a/fs/xfs/xfs_trans.h
 +++ b/fs/xfs/xfs_trans.h
 @@ -277,6 +277,6 @@ int xfs_trans_log_finish_bmap_update(str
  		struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops,
  		enum xfs_bmap_intent_type type, struct xfs_inode *ip,
  		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
 -		xfs_filblks_t blockcount, xfs_exntst_t state);
 +		xfs_filblks_t *blockcount, xfs_exntst_t state);

  #endif	/* __XFS_TRANS_H__ */
 --- a/fs/xfs/xfs_trans_bmap.c
 +++ b/fs/xfs/xfs_trans_bmap.c
 @@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update(
  	int				whichfork,
  	xfs_fileoff_t			startoff,
  	xfs_fsblock_t			startblock,
 -	xfs_filblks_t			blockcount,
 +	xfs_filblks_t			*blockcount,
  	xfs_exntst_t			state)
  {
  	int				error;
 @@ -196,16 +196,23 @@ xfs_bmap_update_finish_item(
  	void				**state)
  {
  	struct xfs_bmap_intent		*bmap;
 +	xfs_filblks_t			count;
  	int				error;

  	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
 +	count = bmap->bi_bmap.br_blockcount;
  	error = xfs_trans_log_finish_bmap_update(tp, done_item, dop,
  			bmap->bi_type,
  			bmap->bi_owner, bmap->bi_whichfork,
  			bmap->bi_bmap.br_startoff,
  			bmap->bi_bmap.br_startblock,
 -			bmap->bi_bmap.br_blockcount,
 +			&count,
  			bmap->bi_bmap.br_state);
 +	if (!error && count > 0) {
 +		ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
 +		bmap->bi_bmap.br_blockcount = count;
 +		return -EAGAIN;
 +	}
  	kmem_free(bmap);
  	return error;
  }
	From e1a4e37cc7b665b6804fba812aca2f4d7402c249 Mon Sep 17 00:00:00 2001
	From: "Darrick J. Wong" <darrick.wong@oracle.com>
	Date: Wed, 14 Jun 2017 21:25:57 -0700
	Subject: xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent

	From: Darrick J. Wong <darrick.wong@oracle.com>

	commit e1a4e37cc7b665b6804fba812aca2f4d7402c249 upstream.

	In a pathological scenario where we are trying to bunmapi a single
	extent in which every other block is shared, it's possible that trying
	to unmap the entire large extent in a single transaction can generate so
	many EFIs that we overflow the transaction reservation.

	Therefore, use a heuristic to guess at the number of blocks we can
	safely unmap from a reflink file's data fork in an single transaction.
	This should prevent problems such as the log head slamming into the tail
	and ASSERTs that trigger because we've exceeded the transaction
	reservation.

	Note that since bunmapi can fail to unmap the entire range, we must also
	teach the deferred unmap code to roll into a new transaction whenever we
	get low on reservation.

	Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
	[hch: random edits, all bugs are my fault]
	Signed-off-by: Christoph Hellwig <hch@lst.de>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	---
	fs/xfs/libxfs/xfs_bmap.c \| 37 ++++++++++++++++++++++++++++---------
	fs/xfs/libxfs/xfs_bmap.h \| 2 +-
	fs/xfs/libxfs/xfs_refcount.c \| 10 +---------
	fs/xfs/libxfs/xfs_refcount.h \| 16 ++++++++++++++++
	fs/xfs/xfs_bmap_item.c \| 17 +++++++++++++++--
	fs/xfs/xfs_trans.h \| 2 +-
	fs/xfs/xfs_trans_bmap.c \| 11 +++++++++--
	7 files changed, 71 insertions(+), 24 deletions(-)

	--- a/fs/xfs/libxfs/xfs_bmap.c
	+++ b/fs/xfs/libxfs/xfs_bmap.c
	@@ -5555,6 +5555,7 @@ __xfs_bunmapi(
	int whichfork; /* data or attribute fork */
	xfs_fsblock_t sum;
	xfs_filblks_t len = rlen; / length to unmap in file */
	+ xfs_fileoff_t max_len;

	trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);

	@@ -5576,6 +5577,16 @@ __xfs_bunmapi(
	ASSERT(len > 0);
	ASSERT(nexts >= 0);

	+ /*
	+ * Guesstimate how many blocks we can unmap without running the risk of
	+ * blowing out the transaction with a mix of EFIs and reflink
	+ * adjustments.
	+ */
	+ if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
	+ max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
	+ else
	+ max_len = len;
	+
	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
	(error = xfs_iread_extents(tp, ip, whichfork)))
	return error;
	@@ -5621,7 +5632,7 @@ __xfs_bunmapi(

	extno = 0;
	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
	- (nexts == 0 \|\| extno < nexts)) {
	+ (nexts == 0 \|\| extno < nexts) && max_len > 0) {
	/*
	* Is the found extent after a hole in which bno lives?
	* Just back up to the previous extent, if so.
	@@ -5655,6 +5666,15 @@ __xfs_bunmapi(
	}
	if (del.br_startoff + del.br_blockcount > bno + 1)
	del.br_blockcount = bno + 1 - del.br_startoff;
	+
	+ /* How much can we safely unmap? */
	+ if (max_len < del.br_blockcount) {
	+ del.br_startoff += del.br_blockcount - max_len;
	+ if (!wasdel)
	+ del.br_startblock += del.br_blockcount - max_len;
	+ del.br_blockcount = max_len;
	+ }
	+
	sum = del.br_startblock + del.br_blockcount;
	if (isrt &&
	(mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
	@@ -5835,6 +5855,7 @@ __xfs_bunmapi(
	if (!isrt && wasdel)
	xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);

	+ max_len -= del.br_blockcount;
	bno = del.br_startoff - 1;
	nodelete:
	/*
	@@ -6604,25 +6625,24 @@ xfs_bmap_finish_one(
	int whichfork,
	xfs_fileoff_t startoff,
	xfs_fsblock_t startblock,
	- xfs_filblks_t blockcount,
	+ xfs_filblks_t *blockcount,
	xfs_exntst_t state)
	{
	struct xfs_bmbt_irec bmap;
	int nimaps = 1;
	xfs_fsblock_t firstfsb;
	int flags = XFS_BMAPI_REMAP;
	- int done;
	int error = 0;

	bmap.br_startblock = startblock;
	bmap.br_startoff = startoff;
	- bmap.br_blockcount = blockcount;
	+ bmap.br_blockcount = *blockcount;
	bmap.br_state = state;

	trace_xfs_bmap_deferred(tp->t_mountp,
	XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
	XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
	- ip->i_ino, whichfork, startoff, blockcount, state);
	+ ip->i_ino, whichfork, startoff, *blockcount, state);

	if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK)
	return -EFSCORRUPTED;
	@@ -6641,12 +6661,11 @@ xfs_bmap_finish_one(
	bmap.br_blockcount, flags, &firstfsb,
	bmap.br_blockcount, &bmap, &nimaps,
	dfops);
	+ *blockcount = 0;
	break;
	case XFS_BMAP_UNMAP:
	- error = xfs_bunmapi(tp, ip, bmap.br_startoff,
	- bmap.br_blockcount, flags, 1, &firstfsb,
	- dfops, &done);
	- ASSERT(done);
	+ error = __xfs_bunmapi(tp, ip, startoff, blockcount,
	+ XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
	break;
	default:
	ASSERT(0);
	--- a/fs/xfs/libxfs/xfs_bmap.h
	+++ b/fs/xfs/libxfs/xfs_bmap.h
	@@ -265,7 +265,7 @@ struct xfs_bmap_intent {
	int xfs_bmap_finish_one(struct xfs_trans tp, struct xfs_defer_ops dfops,
	struct xfs_inode *ip, enum xfs_bmap_intent_type type,
	int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
	- xfs_filblks_t blockcount, xfs_exntst_t state);
	+ xfs_filblks_t *blockcount, xfs_exntst_t state);
	int xfs_bmap_map_extent(struct xfs_mount mp, struct xfs_defer_ops dfops,
	struct xfs_inode ip, struct xfs_bmbt_irec imap);
	int xfs_bmap_unmap_extent(struct xfs_mount mp, struct xfs_defer_ops dfops,
	--- a/fs/xfs/libxfs/xfs_refcount.c
	+++ b/fs/xfs/libxfs/xfs_refcount.c
	@@ -784,14 +784,6 @@ xfs_refcount_merge_extents(
	}

	/*
	- * While we're adjusting the refcounts records of an extent, we have
	- * to keep an eye on the number of extents we're dirtying -- run too
	- * many in a single transaction and we'll exceed the transaction's
	- * reservation and crash the fs. Each record adds 12 bytes to the
	- * log (plus any key updates) so we'll conservatively assume 24 bytes
	- * per record. We must also leave space for btree splits on both ends
	- * of the range and space for the CUD and a new CUI.
	- *
	* XXX: This is a pretty hand-wavy estimate. The penalty for guessing
	* true incorrectly is a shutdown FS; the penalty for guessing false
	* incorrectly is more transaction rolls than might be necessary.
	@@ -822,7 +814,7 @@ xfs_refcount_still_have_space(
	else if (overhead > cur->bc_tp->t_log_res)
	return false;
	return cur->bc_tp->t_log_res - overhead >
	- cur->bc_private.a.priv.refc.nr_ops * 32;
	+ cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
	}

	/*
	--- a/fs/xfs/libxfs/xfs_refcount.h
	+++ b/fs/xfs/libxfs/xfs_refcount.h
	@@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(
	extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
	xfs_agnumber_t agno);

	+/*
	+ * While we're adjusting the refcounts records of an extent, we have
	+ * to keep an eye on the number of extents we're dirtying -- run too
	+ * many in a single transaction and we'll exceed the transaction's
	+ * reservation and crash the fs. Each record adds 12 bytes to the
	+ * log (plus any key updates) so we'll conservatively assume 32 bytes
	+ * per record. We must also leave space for btree splits on both ends
	+ * of the range and space for the CUD and a new CUI.
	+ */
	+#define XFS_REFCOUNT_ITEM_OVERHEAD 32
	+
	+static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
	+{
	+ return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
	+}
	+
	#endif /* __XFS_REFCOUNT_H__ */
	--- a/fs/xfs/xfs_bmap_item.c
	+++ b/fs/xfs/xfs_bmap_item.c
	@@ -395,6 +395,7 @@ xfs_bui_recover(
	struct xfs_map_extent *bmap;
	xfs_fsblock_t startblock_fsb;
	xfs_fsblock_t inode_fsb;
	+ xfs_filblks_t count;
	bool op_ok;
	struct xfs_bud_log_item *budp;
	enum xfs_bmap_intent_type type;
	@@ -403,6 +404,7 @@ xfs_bui_recover(
	struct xfs_trans *tp;
	struct xfs_inode *ip = NULL;
	struct xfs_defer_ops dfops;
	+ struct xfs_bmbt_irec irec;
	xfs_fsblock_t firstfsb;

	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
	@@ -480,13 +482,24 @@ xfs_bui_recover(
	}
	xfs_trans_ijoin(tp, ip, 0);

	+ count = bmap->me_len;
	error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
	ip, whichfork, bmap->me_startoff,
	- bmap->me_startblock, bmap->me_len,
	- state);
	+ bmap->me_startblock, &count, state);
	if (error)
	goto err_dfops;

	+ if (count > 0) {
	+ ASSERT(type == XFS_BMAP_UNMAP);
	+ irec.br_startblock = bmap->me_startblock;
	+ irec.br_blockcount = count;
	+ irec.br_startoff = bmap->me_startoff;
	+ irec.br_state = state;
	+ error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
	+ if (error)
	+ goto err_dfops;
	+ }
	+
	/* Finish transaction, free inodes. */
	error = xfs_defer_finish(&tp, &dfops, NULL);
	if (error)
	--- a/fs/xfs/xfs_trans.h
	+++ b/fs/xfs/xfs_trans.h
	@@ -277,6 +277,6 @@ int xfs_trans_log_finish_bmap_update(str
	struct xfs_bud_log_item rudp, struct xfs_defer_ops dfops,
	enum xfs_bmap_intent_type type, struct xfs_inode *ip,
	int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
	- xfs_filblks_t blockcount, xfs_exntst_t state);
	+ xfs_filblks_t *blockcount, xfs_exntst_t state);

	#endif /* __XFS_TRANS_H__ */
	--- a/fs/xfs/xfs_trans_bmap.c
	+++ b/fs/xfs/xfs_trans_bmap.c
	@@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update(
	int whichfork,
	xfs_fileoff_t startoff,
	xfs_fsblock_t startblock,
	- xfs_filblks_t blockcount,
	+ xfs_filblks_t *blockcount,
	xfs_exntst_t state)
	{
	int error;
	@@ -196,16 +196,23 @@ xfs_bmap_update_finish_item(
	void **state)
	{
	struct xfs_bmap_intent *bmap;
	+ xfs_filblks_t count;
	int error;

	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
	+ count = bmap->bi_bmap.br_blockcount;
	error = xfs_trans_log_finish_bmap_update(tp, done_item, dop,
	bmap->bi_type,
	bmap->bi_owner, bmap->bi_whichfork,
	bmap->bi_bmap.br_startoff,
	bmap->bi_bmap.br_startblock,
	- bmap->bi_bmap.br_blockcount,
	+ &count,
	bmap->bi_bmap.br_state);
	+ if (!error && count > 0) {
	+ ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
	+ bmap->bi_bmap.br_blockcount = count;
	+ return -EAGAIN;
	+ }
	kmem_free(bmap);
	return error;
	}