ialloc initial bulkload implementation
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 5897db7..1cd84f9d 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -513,38 +513,33 @@ xfs_inobt_stage_cursor(
  */
 void
 xfs_inobt_commit_staged_btree(
-	struct xfs_trans	*tp,
-	struct xbtree_afakeroot	*afake,
+	struct xfs_btree_cur	*cur,
 	struct xfs_buf		*agbp)
 {
 	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp);
+	struct xbtree_afakeroot	*afake = cur->bc_private.a.afake;
 
-	agi->agi_root = cpu_to_be32(afake->af_root);
-	agi->agi_level = cpu_to_be32(afake->af_levels);
-	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
-}
+	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
 
-/*
- * Install a new finobt btree root.  Caller is responsible for invalidating
- * and freeing the old btree blocks.
- */
-void
-xfs_finobt_commit_staged_btree(
-	struct xfs_trans	*tp,
-	struct xbtree_afakeroot	*afake,
-	struct xfs_buf		*agbp)
-{
-	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp);
-	int			fields;
+	if (cur->bc_btnum == XFS_BTNUM_INO) {
+		agi->agi_root = cpu_to_be32(afake->af_root);
+		agi->agi_level = cpu_to_be32(afake->af_levels);
+		xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT |
+						     XFS_AGI_LEVEL);
+		xfs_btree_commit_afakeroot(cur, agbp, &xfs_inobt_ops);
+	} else {
+		int			fields;
 
-	fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL;
-	if (xfs_sb_version_hasfinobtblocks(&tp->t_mountp->m_sb)) {
-		agi->agi_fino_blocks = cpu_to_be32(afake->af_blocks);
-		fields |= XFS_AGI_FINO_BLOCKS;
+		fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL;
+		if (xfs_sb_version_hasfinobtblocks(&cur->bc_mp->m_sb)) {
+			agi->agi_fino_blocks = cpu_to_be32(afake->af_blocks);
+			fields |= XFS_AGI_FINO_BLOCKS;
+		}
+		agi->agi_free_root = cpu_to_be32(afake->af_root);
+		agi->agi_free_level = cpu_to_be32(afake->af_levels);
+		xfs_ialloc_log_agi(cur->bc_tp, agbp, fields);
+		xfs_btree_commit_afakeroot(cur, agbp, &xfs_finobt_ops);
 	}
-	agi->agi_free_root = cpu_to_be32(afake->af_root);
-	agi->agi_free_level = cpu_to_be32(afake->af_levels);
-	xfs_ialloc_log_agi(tp, agbp, fields);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index 6893188..d115327 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -71,9 +71,7 @@ int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp,
 		xfs_agnumber_t agno, xfs_btnum_t btnum,
 		struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp);
 
-void xfs_inobt_commit_staged_btree(struct xfs_trans *tp,
-		struct xbtree_afakeroot *afake, struct xfs_buf *agbp);
-void xfs_finobt_commit_staged_btree(struct xfs_trans *tp,
-		struct xbtree_afakeroot *afake, struct xfs_buf *agbp);
+void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur,
+		struct xfs_buf *agbp);
 
 #endif	/* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c
index 5c96d2a..9830f28 100644
--- a/fs/xfs/scrub/ialloc_repair.c
+++ b/fs/xfs/scrub/ialloc_repair.c
@@ -91,9 +91,11 @@ struct xrep_ibt {
 	/* Record under construction. */
 	struct xfs_inobt_rec_incore	rie;
 
-	/* Fake roots for new btrees. */
-	struct xbtree_afakeroot	ino_root;
-	struct xbtree_afakeroot	fino_root;
+	/* new inobt information */
+	struct xrep_newbt	new_inobt_info;
+
+	/* new finobt information */
+	struct xrep_newbt	new_finobt_info;
 
 	/* Old inode btree blocks we found in the rmap. */
 	struct xfs_bitmap	old_iallocbt_blocks;
@@ -109,11 +111,11 @@ struct xrep_ibt {
 	/* Number of inodes in use. */
 	unsigned int		iused;
 
-	/* Number of inobt records needed. */
-	unsigned int		inobt_recs;
-
 	/* Number of finobt records needed. */
 	unsigned int		finobt_recs;
+
+	/* get_data()'s position in the inode record array. */
+	uint64_t		iter;
 };
 
 /*
@@ -167,6 +169,28 @@ xrep_ibt_check_ifree(
 	return 0;
 }
 
+/* Stash the accumulated inobt record for rebuilding. */
+STATIC int
+xrep_ibt_stash_record(
+	struct xrep_ibt		*ri)
+{
+	int			error;
+
+	ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie);
+	if (ri->rie.ir_freecount > 0)
+		ri->finobt_recs++;
+
+	trace_xrep_ibt_insert(ri->sc->mp, ri->sc->sa.agno, ri->rie.ir_startino,
+			ri->rie.ir_holemask, ri->rie.ir_count,
+			ri->rie.ir_freecount, ri->rie.ir_free);
+
+	error = xfbma_append(ri->inode_records, &ri->rie);
+	if (error)
+		return error;
+	ri->rie.ir_startino = NULLAGINO;
+	return 0;
+}
+
 /*
  * Given an extent of inodes and an inode cluster buffer, calculate the
  * location of the corresponding inobt record (creating it if necessary),
@@ -205,13 +229,9 @@ xrep_ibt_cluster_record(
 	 */
 	if (ri->rie.ir_startino != NULLAGINO &&
 	    ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) {
-		if (xfs_inobt_rec_freecount(&ri->rie) > 0)
-			ri->finobt_recs++;
-		ri->inobt_recs++;
-		error = xfbma_append(ri->inode_records, &ri->rie);
+		error = xrep_ibt_stash_record(ri);
 		if (error)
 			return error;
-		ri->rie.ir_startino = NULLAGINO;
 	}
 
 	if (ri->rie.ir_startino == NULLAGINO) {
@@ -403,6 +423,8 @@ xrep_ibt_find_inodes(
 	struct xfs_btree_cur	*cur;
 	int			error;
 
+	ri->rie.ir_startino = NULLAGINO;
+
 	/* Collect all reverse mappings for inode blocks. */
 	cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
 	error = xfs_rmap_query_all(cur, xrep_ibt_walk_rmap, ri);
@@ -412,10 +434,7 @@ xrep_ibt_find_inodes(
 
 	/* If we have a record ready to go, add it to the array. */
 	if (ri->rie.ir_startino != NULLAGINO) {
-		if (xfs_inobt_rec_freecount(&ri->rie) > 0)
-			ri->finobt_recs++;
-		ri->inobt_recs++;
-		error = xfbma_append(ri->inode_records, &ri->rie);
+		error = xrep_ibt_stash_record(ri);
 		if (error)
 			return error;
 	}
@@ -431,6 +450,7 @@ xrep_ibt_reset_counters(
 	struct xfs_scrub	*sc = ri->sc;
 	struct xfs_agi		*agi;
 	struct xfs_perag	*pag = sc->sa.pag;
+	struct xfs_buf		*bp;
 	unsigned int		freecount;
 
 	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
@@ -440,195 +460,167 @@ xrep_ibt_reset_counters(
 	xfs_force_summary_recalc(sc->mp);
 
 	/*
-	 * Reset the per-AG info, both incore and ondisk.  Mark the incore
-	 * state stale in case we fail out of here.
+	 * Mark the pagi information stale and use the accessor function to
+	 * forcibly reload it from the values we just logged.  We still own
+	 * the AGI bp so we can throw away bp.
 	 */
 	ASSERT(pag->pagi_init);
 	pag->pagi_init = 0;
-	pag->pagi_count = ri->icount;
-	pag->pagi_freecount = freecount;
 
 	agi->agi_count = cpu_to_be32(ri->icount);
 	agi->agi_freecount = cpu_to_be32(freecount);
 	xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
 			   XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
 
-	return 0;
+	return xfs_ialloc_read_agi(sc->mp, sc->tp, sc->sa.agno, &bp);
 }
 
-/* Initialize a new inode btree roots and implant it into the AGI. */
-STATIC int
-xrep_ibt_stage_btree(
+/* Do we even want this record? */
+static inline bool
+xrep_ibt_rec_wanted(
 	struct xrep_ibt			*ri,
-	xfs_btnum_t			btnum,
-	enum xfs_ag_resv_type		resv)
+	struct xfs_btree_cur		*cur,
+	struct xfs_inobt_rec_incore	*irec)
 {
-	struct xfs_scrub		*sc = ri->sc;
-	struct xfs_buf			*bp;
-	const struct xfs_buf_ops	*ops;
-	struct xbtree_afakeroot		*afake;
-	xfs_fsblock_t			fsbno;
-	int				error;
+	/* Ignore null records. */
+	if (xfbma_is_null(ri->inode_records, irec))
+		return false;
 
-	switch (btnum) {
-	case XFS_BTNUM_INO:
-		ops = &xfs_inobt_buf_ops;
-		afake = &ri->ino_root;
-		break;
-	case XFS_BTNUM_FINO:
-		ops = &xfs_finobt_buf_ops;
-		afake = &ri->fino_root;
-		break;
-	default:
-		ASSERT(0);
-		return -EFSCORRUPTED;
-	}
+	/* finobt only wants inode records with at least 1 free inode. */
+	if (cur->bc_btnum == XFS_BTNUM_FINO &&
+	    xfs_inobt_rec_freecount(irec) == 0)
+		return false;
 
-	/* Initialize new btree root. */
-	error = xrep_alloc_ag_block(sc, &XFS_RMAP_OINFO_INOBT, &fsbno, resv);
-	if (error)
-		return error;
-	error = xrep_init_btblock(sc, fsbno, &bp, btnum, ops);
-	if (error)
-		return error;
-
-	xbtree_afakeroot_init(sc->mp, afake, XFS_FSB_TO_AGBNO(sc->mp, fsbno));
-	return 0;
+	return true;
 }
 
-/*
- * Initialize new inode btrees root blocks and set up fake roots so we can
- * build new btrees and only plug them into the AGI if we're successful.
- */
+/* Retrieve inobt data for bulk load. */
 STATIC int
-xrep_ibt_stage_btrees(
-	struct xrep_ibt		*ri)
-{
-	struct xfs_scrub	*sc = ri->sc;
-	enum xfs_ag_resv_type	resv;
-	xfs_extlen_t		nr_blocks;
-	int			error;
-
-	/* Do we have enough space to rebuild all inode trees? */
-	nr_blocks = xfs_iallocbt_calc_size(sc->mp, ri->inobt_recs);
-	if (xfs_sb_version_hasfinobt(&sc->mp->m_sb))
-		nr_blocks += xfs_iallocbt_calc_size(sc->mp, ri->finobt_recs);
-	if (!xrep_ag_has_space(sc->sa.pag, nr_blocks, XFS_AG_RESV_NONE))
-		return -ENOSPC;
-
-	resv = XFS_AG_RESV_NONE;
-	error = xrep_ibt_stage_btree(ri, XFS_BTNUM_INO, resv);
-	if (error || !xfs_sb_version_hasfinobt(&sc->mp->m_sb))
-		return error;
-
-	/*
-	 * If we made a per-AG reservation for the finobt then we must account
-	 * the new block correctly.
-	 */
-	if (!sc->mp->m_finobt_nores)
-		resv = XFS_AG_RESV_METADATA;
-	return xrep_ibt_stage_btree(ri, XFS_BTNUM_FINO, resv);
-}
-
-/* Insert an inode chunk record into a given btree. */
-static int
-xrep_ibt_insert_btrec(
-	struct xfs_btree_cur			*cur,
-	const struct xfs_inobt_rec_incore	*rie,
-	unsigned int				freecount)
-{
-	int					stat;
-	int					error;
-
-	error = xfs_inobt_lookup(cur, rie->ir_startino, XFS_LOOKUP_EQ, &stat);
-	if (error)
-		return error;
-	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 0);
-	error = xfs_inobt_insert_rec(cur, rie->ir_holemask, rie->ir_count,
-			freecount, rie->ir_free, &stat);
-	if (error)
-		return error;
-	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
-	return error;
-}
-
-/* Insert an inode chunk record into both inode btrees. */
-static int
-xrep_ibt_insert_rec(
-	const void			*item,
+xrep_ibt_get_data(
+	struct xfs_btree_cur		*cur,
 	void				*priv)
 {
-	const struct xfs_inobt_rec_incore	*rie = item;
+	struct xfs_inobt_rec_incore	*irec = &cur->bc_rec.i;
 	struct xrep_ibt			*ri = priv;
-	struct xfs_scrub		*sc = ri->sc;
-	struct xfs_btree_cur		*cur;
-	unsigned int			freecount;
 	int				error;
 
-	freecount = xfs_inobt_rec_freecount(rie);
+	do {
+		error = xfbma_get(ri->inode_records, ri->iter++, irec);
+	} while (error == 0 && !xrep_ibt_rec_wanted(ri, cur, irec));
 
-	trace_xrep_ibt_insert(sc->mp, sc->sa.agno, rie->ir_startino,
-			rie->ir_holemask, rie->ir_count, freecount,
-			rie->ir_free);
-
-	/* Insert into the inobt. */
-	cur = xfs_inobt_stage_cursor(sc->mp, sc->tp, &ri->ino_root,
-			sc->sa.agno, XFS_BTNUM_INO);
-	error = xrep_ibt_insert_btrec(cur, rie, freecount);
-	if (error)
-		goto out_cur;
-	xfs_btree_del_cursor(cur, error);
-
-	/* Insert into the finobt if chunk has free inodes. */
-	if (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && freecount != 0) {
-		cur = xfs_inobt_stage_cursor(sc->mp, sc->tp, &ri->fino_root,
-				sc->sa.agno, XFS_BTNUM_FINO);
-		error = xrep_ibt_insert_btrec(cur, rie, freecount);
-		if (error)
-			goto out_cur;
-		xfs_btree_del_cursor(cur, error);
-	}
-
-	return xrep_roll_ag_trans(sc);
-out_cur:
-	xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
+/* Feed one of the new inobt blocks to the bulk loader. */
+STATIC int
+xrep_ibt_bload_alloc(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct xrep_ibt		*ri = priv;
+
+	if (cur->bc_btnum == XFS_BTNUM_INO)
+		return xrep_newbt_alloc_block(cur, &ri->new_inobt_info, ptr);
+	return xrep_newbt_alloc_block(cur, &ri->new_finobt_info, ptr);
+}
+
 /* Build new inode btrees and dispose of the old one. */
 STATIC int
 xrep_ibt_build_new_trees(
 	struct xrep_ibt		*ri)
 {
+	struct xfs_btree_bload	ino_bload;
+	struct xfs_btree_bload	fino_bload;
 	struct xfs_scrub	*sc = ri->sc;
+	struct xfs_btree_cur	*ino_cur;
+	struct xfs_btree_cur	*fino_cur = NULL;
+	bool			need_finobt;
 	int			error;
 
+	need_finobt = xfs_sb_version_hasfinobt(&sc->mp->m_sb);
+
 	/*
-	 * Sort the inode extents by startino to avoid btree splits when we
-	 * rebuild the inode btrees.
+	 * Sort the inode extents by startino or else the btree records will
+	 * be in the wrong order.
 	 */
 	error = xfbma_sort(ri->inode_records, xfs_inobt_rec_incore_cmp);
 	if (error)
 		return error;
 
 	/*
-	 * Create a new btree for staging all the refcount records we collected
-	 * earlier.  This btree will not be rooted in the AGF until we've
+	 * Create new btrees for staging all the inobt records we collected
+	 * earlier.  These btrees will not be rooted in the AGI until we've
 	 * succesfully reloaded the tree.
 	 */
-	error = xrep_ibt_stage_btrees(ri);
-	if (error)
-		return error;
 
-	/* Add all records. */
-	error = xfbma_iter_del(ri->inode_records, xrep_ibt_insert_rec, ri);
+	/* Set up inobt staging cursor. */
+	xrep_newbt_init(&ri->new_inobt_info, sc, &XFS_RMAP_OINFO_INOBT,
+			XFS_AGB_TO_FSB(sc->mp, sc->sa.agno,
+				       XFS_IBT_BLOCK(sc->mp)),
+			XFS_AG_RESV_NONE);
+	ino_cur = xfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&ri->new_inobt_info.afake, sc->sa.agno, XFS_BTNUM_INO);
+	error = xfs_btree_bload_init(ino_cur, &ino_bload,
+			xfbma_length(ri->inode_records), 0, 0);
+	xfs_btree_del_cursor(ino_cur, error);
 	if (error)
-		return error;
+		goto err_inobt;
 
-	/* Clean transaction ahead of installing the new btree roots. */
-	error = xrep_roll_ag_trans(sc);
+	/* Set up finobt staging cursor. */
+	if (need_finobt) {
+		enum xfs_ag_resv_type	resv = XFS_AG_RESV_METADATA;
+
+		if (sc->mp->m_finobt_nores)
+			resv = XFS_AG_RESV_NONE;
+
+		xrep_newbt_init(&ri->new_finobt_info, sc, &XFS_RMAP_OINFO_INOBT,
+				XFS_AGB_TO_FSB(sc->mp, sc->sa.agno,
+					       XFS_FIBT_BLOCK(sc->mp)),
+				resv);
+		fino_cur = xfs_inobt_stage_cursor(sc->mp, sc->tp,
+				&ri->new_finobt_info.afake, sc->sa.agno,
+				XFS_BTNUM_FINO);
+		error = xfs_btree_bload_init(fino_cur, &fino_bload,
+				ri->finobt_recs, 0, 0);
+		xfs_btree_del_cursor(fino_cur, error);
+		if (error)
+			goto err_finobt;
+	}
+
+	/* Reserve all the space we need to build the new btrees. */
+	error = xrep_newbt_reserve_space(&ri->new_inobt_info,
+			ino_bload.nr_blocks);
 	if (error)
-		return error;
+		goto err_finobt;
+
+	if (need_finobt) {
+		error = xrep_newbt_reserve_space(&ri->new_finobt_info,
+				fino_bload.nr_blocks);
+		if (error)
+			goto err_finobt;
+	}
+
+	/* Add all inobt records. */
+	ri->iter = 0;
+	ino_cur = xfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&ri->new_inobt_info.afake, sc->sa.agno, XFS_BTNUM_INO);
+	error = xfs_btree_bload(ino_cur, &ino_bload, xrep_ibt_get_data,
+			xrep_ibt_bload_alloc, ri);
+	if (error)
+		goto err_inocur;
+
+	/* Add all finobt records. */
+	if (need_finobt) {
+		ri->iter = 0;
+		fino_cur = xfs_inobt_stage_cursor(sc->mp, sc->tp,
+				&ri->new_finobt_info.afake, sc->sa.agno,
+				XFS_BTNUM_FINO);
+		error = xfs_btree_bload(fino_cur, &fino_bload,
+				xrep_ibt_get_data, xrep_ibt_bload_alloc, ri);
+		if (error)
+			goto err_finocur;
+	}
 
 	/*
 	 * Re-read the AGI so that the buffer type is set properly.  Since we
@@ -638,16 +630,45 @@ xrep_ibt_build_new_trees(
 	 */
 	error = xfs_read_agi(sc->mp, sc->tp, sc->sa.agno, &sc->sa.agi_bp);
 	if (error)
-		return error;
+		goto err_finocur;
 
 	/* Install new btree roots. */
-	xfs_inobt_commit_staged_btree(sc->tp, &ri->ino_root, sc->sa.agi_bp);
-	if (xfs_sb_version_hasfinobt(&sc->mp->m_sb))
-		xfs_finobt_commit_staged_btree(sc->tp, &ri->fino_root,
-				sc->sa.agi_bp);
+	xfs_inobt_commit_staged_btree(ino_cur, sc->sa.agi_bp);
+	xfs_btree_del_cursor(ino_cur, 0);
+
+	if (fino_cur) {
+		xfs_inobt_commit_staged_btree(fino_cur, sc->sa.agi_bp);
+		xfs_btree_del_cursor(fino_cur, 0);
+	}
 
 	/* Reset the AGI counters now that we've changed the inode roots. */
-	return xrep_ibt_reset_counters(ri);
+	error = xrep_ibt_reset_counters(ri);
+	if (error)
+		goto err_finobt;
+
+	/* Free unused blocks and bitmap. */
+	if (fino_cur) {
+		error = xrep_newbt_unreserve_space(&ri->new_finobt_info);
+		if (error)
+			goto err_inobt;
+	}
+	error = xrep_newbt_unreserve_space(&ri->new_inobt_info);
+	if (error)
+		return error;
+
+	return xrep_roll_ag_trans(sc);
+
+err_finocur:
+	if (need_finobt)
+		xfs_btree_del_cursor(fino_cur, error);
+err_inocur:
+	xfs_btree_del_cursor(ino_cur, error);
+err_finobt:
+	if (need_finobt)
+		xrep_newbt_unreserve_space(&ri->new_finobt_info);
+err_inobt:
+	xrep_newbt_unreserve_space(&ri->new_inobt_info);
+	return error;
 }
 
 /*