patch xfsprogs-rmap-btree-single-owner-2
diff --git a/db/agf.c b/db/agf.c
index d9a07ca..f0121ab 100644
--- a/db/agf.c
+++ b/db/agf.c
@@ -55,6 +55,9 @@
{ "cntroot", FLDT_AGBLOCK,
OI(OFF(roots) + XFS_BTNUM_CNT * SZ(roots[XFS_BTNUM_CNT])), C1, 0,
TYP_CNTBT },
+ { "rmaproot", FLDT_AGBLOCK,
+ OI(OFF(roots) + XFS_BTNUM_RMAP * SZ(roots[XFS_BTNUM_RMAP])), C1, 0,
+ TYP_RMAPBT },
{ "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF),
FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
{ "bnolevel", FLDT_UINT32D,
@@ -63,6 +66,9 @@
{ "cntlevel", FLDT_UINT32D,
OI(OFF(levels) + XFS_BTNUM_CNT * SZ(levels[XFS_BTNUM_CNT])), C1, 0,
TYP_NONE },
+ { "rmaplevel", FLDT_UINT32D,
+ OI(OFF(levels) + XFS_BTNUM_RMAP * SZ(levels[XFS_BTNUM_RMAP])), C1, 0,
+ TYP_NONE },
{ "flfirst", FLDT_UINT32D, OI(OFF(flfirst)), C1, 0, TYP_NONE },
{ "fllast", FLDT_UINT32D, OI(OFF(fllast)), C1, 0, TYP_NONE },
{ "flcount", FLDT_UINT32D, OI(OFF(flcount)), C1, 0, TYP_NONE },
diff --git a/db/btblock.c b/db/btblock.c
index cdb8b1d..707bac8 100644
--- a/db/btblock.c
+++ b/db/btblock.c
@@ -96,6 +96,12 @@
sizeof(xfs_inobt_rec_t),
sizeof(__be32),
},
+ { XFS_RMAP_CRC_MAGIC,
+ XFS_BTREE_SBLOCK_CRC_LEN,
+ sizeof(struct xfs_rmap_key),
+ sizeof(struct xfs_rmap_rec),
+ sizeof(__be32),
+ },
{ 0,
},
};
@@ -571,3 +577,47 @@
{ NULL }
};
#undef ROFF
+
+/* RMAP btree blocks */
+const field_t rmapbt_crc_hfld[] = {
+ { "", FLDT_RMAPBT_CRC, OI(0), C1, 0, TYP_NONE },
+ { NULL }
+};
+
+#define OFF(f) bitize(offsetof(struct xfs_btree_block, bb_ ## f))
+const field_t rmapbt_crc_flds[] = {
+ { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+ { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+ { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+ { "leftsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_leftsib)), C1, 0, TYP_RMAPBT },
+ { "rightsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_rightsib)), C1, 0, TYP_RMAPBT },
+ { "bno", FLDT_DFSBNO, OI(OFF(u.s.bb_blkno)), C1, 0, TYP_CNTBT },
+ { "lsn", FLDT_UINT64X, OI(OFF(u.s.bb_lsn)), C1, 0, TYP_NONE },
+ { "uuid", FLDT_UUID, OI(OFF(u.s.bb_uuid)), C1, 0, TYP_NONE },
+ { "owner", FLDT_AGNUMBER, OI(OFF(u.s.bb_owner)), C1, 0, TYP_NONE },
+ { "crc", FLDT_CRC, OI(OFF(u.s.bb_crc)), C1, 0, TYP_NONE },
+ { "recs", FLDT_RMAPBTREC, btblock_rec_offset, btblock_rec_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+ { "keys", FLDT_RMAPBTKEY, btblock_key_offset, btblock_key_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+ { "ptrs", FLDT_RMAPBTPTR, btblock_ptr_offset, btblock_key_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_RMAPBT },
+ { NULL }
+};
+#undef OFF
+
+#define KOFF(f) bitize(offsetof(struct xfs_rmap_key, rm_ ## f))
+const field_t rmapbt_key_flds[] = {
+ { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+ { NULL }
+};
+#undef KOFF
+
+#define ROFF(f) bitize(offsetof(struct xfs_rmap_rec, rm_ ## f))
+const field_t rmapbt_rec_flds[] = {
+ { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+ { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+ { "owner", FLDT_UINT64X, OI(ROFF(owner)), C1, 0, TYP_NONE },
+ { NULL }
+};
+#undef ROFF
diff --git a/db/btblock.h b/db/btblock.h
index daee060..d8662a1 100644
--- a/db/btblock.h
+++ b/db/btblock.h
@@ -51,4 +51,9 @@
extern const struct field cntbt_key_flds[];
extern const struct field cntbt_rec_flds[];
+extern const struct field rmapbt_crc_flds[];
+extern const struct field rmapbt_crc_hfld[];
+extern const struct field rmapbt_key_flds[];
+extern const struct field rmapbt_rec_flds[];
+
extern int btblock_size(void *obj, int startoff, int idx);
diff --git a/db/field.c b/db/field.c
index 816065e..d185b23 100644
--- a/db/field.c
+++ b/db/field.c
@@ -164,6 +164,15 @@
{ FLDT_CNTBTREC, "cntbtrec", fp_sarray, (char *)cntbt_rec_flds,
SI(bitsz(xfs_alloc_rec_t)), 0, NULL, cntbt_rec_flds },
+ { FLDT_RMAPBT_CRC, "rmapbt", NULL, (char *)rmapbt_crc_flds, btblock_size,
+ FTARG_SIZE, NULL, rmapbt_crc_flds },
+ { FLDT_RMAPBTKEY, "rmapbtkey", fp_sarray, (char *)rmapbt_key_flds,
+ SI(bitsz(struct xfs_rmap_key)), 0, NULL, rmapbt_key_flds },
+ { FLDT_RMAPBTPTR, "rmapbtptr", fp_num, "%u",
+ SI(bitsz(xfs_rmap_ptr_t)), 0, fa_agblock, NULL },
+ { FLDT_RMAPBTREC, "rmapbtrec", fp_sarray, (char *)rmapbt_rec_flds,
+ SI(bitsz(struct xfs_rmap_rec)), 0, NULL, rmapbt_rec_flds },
+
/* CRC field */
{ FLDT_CRC, "crc", fp_crc, "%#x (%s)", SI(bitsz(__uint32_t)),
0, NULL, NULL },
diff --git a/db/field.h b/db/field.h
index 6343c9a..f3fba66 100644
--- a/db/field.h
+++ b/db/field.h
@@ -80,6 +80,10 @@
FLDT_CNTBTKEY,
FLDT_CNTBTPTR,
FLDT_CNTBTREC,
+ FLDT_RMAPBT_CRC,
+ FLDT_RMAPBTKEY,
+ FLDT_RMAPBTPTR,
+ FLDT_RMAPBTREC,
/* CRC field type */
FLDT_CRC,
diff --git a/db/type.c b/db/type.c
index b29f2a4..de97850 100644
--- a/db/type.c
+++ b/db/type.c
@@ -58,6 +58,7 @@
{ TYP_BMAPBTD, "bmapbtd", handle_struct, bmapbtd_hfld, NULL },
{ TYP_BNOBT, "bnobt", handle_struct, bnobt_hfld, NULL },
{ TYP_CNTBT, "cntbt", handle_struct, cntbt_hfld, NULL },
+ { TYP_RMAPBT, NULL },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir2", handle_struct, dir2_hfld, NULL },
{ TYP_DQBLK, "dqblk", handle_struct, dqblk_hfld, NULL },
@@ -87,6 +88,8 @@
&xfs_allocbt_buf_ops },
{ TYP_CNTBT, "cntbt", handle_struct, cntbt_crc_hfld,
&xfs_allocbt_buf_ops },
+ { TYP_RMAPBT, "rmapbt", handle_struct, rmapbt_crc_hfld,
+ &xfs_rmapbt_buf_ops },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir3", handle_struct, dir3_hfld,
&xfs_dir3_db_buf_ops },
diff --git a/db/type.h b/db/type.h
index 3bb26f1..9d02d6d 100644
--- a/db/type.h
+++ b/db/type.h
@@ -24,7 +24,7 @@
typedef enum typnm
{
TYP_AGF, TYP_AGFL, TYP_AGI, TYP_ATTR, TYP_BMAPBTA,
- TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_DATA,
+ TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_RMAPBT, TYP_DATA,
TYP_DIR2, TYP_DQBLK, TYP_INOBT, TYP_INODATA, TYP_INODE,
TYP_LOG, TYP_RTBITMAP, TYP_RTSUMMARY, TYP_SB, TYP_SYMLINK,
TYP_TEXT, TYP_NONE
diff --git a/include/Makefile b/include/Makefile
index 70e43a0..b3526ec 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -30,7 +30,7 @@
xfs_trace.h \
xfs_trans.h
-HFILES = handle.h jdm.h xqm.h xfs.h
+HFILES = handle.h jdm.h xqm.h xfs.h platform_defs.h
HFILES += $(PKG_PLATFORM).h
PHFILES = darwin.h freebsd.h irix.h linux.h gnukfreebsd.h
DKHFILES = volume.h fstyp.h dvh.h
diff --git a/include/libxfs.h b/include/libxfs.h
index 6a59cc0..c6bd37d 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -66,6 +66,7 @@
#include <xfs/xfs_bmap_btree.h>
#include <xfs/xfs_alloc_btree.h>
#include <xfs/xfs_ialloc_btree.h>
+#include <xfs/xfs_rmap_btree.h>
#include <xfs/xfs_attr_sf.h>
#include <xfs/xfs_inode_fork.h>
#include <xfs/xfs_inode_buf.h>
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 70bdea0..b614edd 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -64,6 +64,8 @@
uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */
uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */
uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */
+ uint m_rmap_mxr[2]; /* max rmap btree records */
+ uint m_rmap_mnr[2]; /* min rmap btree records */
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
diff --git a/include/xfs_trace.h b/include/xfs_trace.h
index ab046a9..31a994b 100644
--- a/include/xfs_trace.h
+++ b/include/xfs_trace.h
@@ -169,4 +169,11 @@
#define trace_xfs_perag_get_tag(a,b,c,d) ((c) = (c))
#define trace_xfs_perag_put(a,b,c,d) ((c) = (c))
+#define trace_xfs_rmap_alloc_extent(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_alloc_extent_done(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_alloc_extent_error(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_free_extent(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_free_extent_done(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_free_extent_error(a,b,c,d,e) ((void) 0)
+
#endif /* __TRACE_H__ */
diff --git a/libxfs/Makefile b/libxfs/Makefile
index 981cb0b..7dffa6b 100644
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -42,6 +42,7 @@
xfs_inode_fork.h \
xfs_log_format.h \
xfs_quota_defs.h \
+ xfs_rmap_btree.h \
xfs_sb.h \
xfs_shared.h \
xfs_trans_resv.h \
@@ -75,6 +76,8 @@
xfs_ialloc_btree.c \
xfs_log_rlimit.c \
xfs_rtbitmap.c \
+ xfs_rmap.c \
+ xfs_rmap_btree.c \
xfs_sb.c \
xfs_symlink_remote.c \
xfs_trans_resv.c
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index 23e3c53..d0003c5 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -26,6 +26,7 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_cksum.h"
@@ -615,6 +616,12 @@
ASSERT(!args->wasfromfl || !args->isfl);
ASSERT(args->agbno % args->alignment == 0);
+ /* insert new block into the reverse map btree */
+ error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
+ args->agbno, args->len, args->owner);
+ if (error)
+ return error;
+
if (!args->wasfromfl) {
error = xfs_alloc_update_counters(args->tp, args->pag,
args->agbp,
@@ -1962,6 +1969,7 @@
memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
+ targs.owner = XFS_RMAP_OWN_AG;
targs.agbp = agbp;
targs.agno = args->agno;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
@@ -2586,6 +2594,8 @@
* Free an extent.
* Just break up the extent address and hand off to xfs_free_ag_extent
* after fixing up the freelist.
+ *
+ * XXX: need owner of extent being freed
*/
int /* error */
xfs_free_extent(
@@ -2627,6 +2637,12 @@
goto error0;
}
+ /* XXX: need owner */
+ error = xfs_rmap_free(tp, args.agbp, args.agno, args.agbno, len, 0);
+ if (error)
+ goto error0;
+
+ /* XXX: initially no multiple references, so just free it */
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
if (!error)
xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
@@ -2634,3 +2650,14 @@
xfs_perag_put(args.pag);
return error;
}
+
+xfs_extlen_t
+xfs_prealloc_blocks(
+ struct xfs_mount *mp)
+{
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return XFS_RMAP_BLOCK(mp) + 1;
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ return XFS_FIBT_BLOCK(mp) + 1;
+ return XFS_IBT_BLOCK(mp) + 1;
+}
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index db5da4a..4e52b1f 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -72,6 +72,8 @@
* needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
* btree requires 1 fsb, so we set the number of set-aside blocks
* to 4 + 4*agcount.
+ *
+ * XXX: this changes for rmapbt filesystems.
*/
#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
@@ -86,10 +88,13 @@
*
* The AG headers are sector sized, so the amount of space they take up is
* dependent on filesystem geometry. The others are all single blocks.
+ *
+ * XXX: this changes for rmapbt filesystems.
*/
#define XFS_ALLOC_AG_MAX_USABLE(mp) \
((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
+xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
/*
* Argument structure for xfs_alloc routines.
@@ -120,6 +125,7 @@
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
xfs_fsblock_t firstblock; /* io first block allocated */
+ uint64_t owner; /* owner of blocks being allocated */
} xfs_alloc_arg_t;
/*
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index e6d1e6c..56e6872 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -769,6 +769,7 @@
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = mp;
+ args.owner = ip->i_ino;
args.firstblock = *firstblock;
if (*firstblock == NULLFSBLOCK) {
args.type = XFS_ALLOCTYPE_START_BNO;
@@ -915,6 +916,7 @@
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = ip->i_mount;
+ args.owner = ip->i_ino;
args.firstblock = *firstblock;
/*
* Allocate a block. We know we need only one, since the
@@ -3683,6 +3685,7 @@
memset(&args, 0, sizeof(args));
args.tp = ap->tp;
args.mp = mp;
+ args.owner = ap->ip->i_ino;
args.fsbno = ap->blkno;
/* Trim the allocation back to the maximum an AG can fit. */
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
index 2fd04e0..5df4000 100644
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -442,6 +442,7 @@
args.mp = cur->bc_mp;
args.fsbno = cur->bc_private.b.firstblock;
args.firstblock = args.fsbno;
+ args.owner = cur->bc_private.b.ip->i_ino;
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
index 8f18bab..48ab2b1 100644
--- a/libxfs/xfs_btree.h
+++ b/libxfs/xfs_btree.h
@@ -38,17 +38,19 @@
};
union xfs_btree_key {
- xfs_bmbt_key_t bmbt;
- xfs_bmdr_key_t bmbr; /* bmbt root block */
- xfs_alloc_key_t alloc;
- xfs_inobt_key_t inobt;
+ struct xfs_bmbt_key bmbt;
+ xfs_bmdr_key_t bmbr; /* bmbt root block */
+ xfs_alloc_key_t alloc;
+ struct xfs_inobt_key inobt;
+ struct xfs_rmap_key rmap;
};
union xfs_btree_rec {
- xfs_bmbt_rec_t bmbt;
- xfs_bmdr_rec_t bmbr; /* bmbt root block */
- xfs_alloc_rec_t alloc;
- xfs_inobt_rec_t inobt;
+ struct xfs_bmbt_rec bmbt;
+ xfs_bmdr_rec_t bmbr; /* bmbt root block */
+ struct xfs_alloc_rec alloc;
+ struct xfs_inobt_rec inobt;
+ struct xfs_rmap_rec rmap;
};
/*
@@ -63,6 +65,7 @@
#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
+#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
/*
* For logging record fields.
@@ -94,6 +97,7 @@
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
+ case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(rmap, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -108,6 +112,7 @@
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
+ case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_ADD(rmap, stat, val); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -199,6 +204,7 @@
xfs_alloc_rec_incore_t a;
xfs_bmbt_irec_t b;
xfs_inobt_rec_incore_t i;
+ struct xfs_rmap_irec r;
} bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index 4d313d3..0fe326f 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -445,8 +445,10 @@
}
#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
+#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
- (XFS_SB_FEAT_RO_COMPAT_FINOBT)
+ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
+ XFS_SB_FEAT_RO_COMPAT_RMAPBT)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
@@ -506,6 +508,12 @@
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
}
+static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
+{
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+ (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
+}
+
/*
* end of superblock version macros
*/
@@ -566,10 +574,10 @@
#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
/*
- * Btree number 0 is bno, 1 is cnt. This value gives the size of the
+ * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
* arrays below.
*/
-#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1)
+#define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1)
/*
* The second word of agf_levels in the first a.g. overlaps the EFS
@@ -586,12 +594,10 @@
__be32 agf_seqno; /* sequence # starting from 0 */
__be32 agf_length; /* size in blocks of a.g. */
/*
- * Freespace information
+ * Freespace and rmap information
*/
__be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */
- __be32 agf_spare0; /* spare field */
__be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
- __be32 agf_spare1; /* spare field */
__be32 agf_flfirst; /* first freelist block's index */
__be32 agf_fllast; /* last freelist block's index */
@@ -1254,16 +1260,74 @@
#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
/*
- * The first data block of an AG depends on whether the filesystem was formatted
- * with the finobt feature. If so, account for the finobt reserved root btree
- * block.
+ * Reverse mapping btree format definitions
+ *
+ * There is a btree for the reverse map per allocation group
*/
-#define XFS_PREALLOC_BLOCKS(mp) \
+#define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */
+
+/*
+ * Special owner types.
+ *
+ * Seeing as we only support up to 8EB, we have the upper bit of the owner field
+ * to tell us we have a special owner value. We use these for static metadata
+ * allocated at mkfs/growfs time, as well as for freespace management metadata.
+ */
+#define XFS_RMAP_OWN_NULL (-1ULL) /* No owner, for growfs */
+#define XFS_RMAP_OWN_UNKNOWN (-2ULL) /* Unknown owner, for EFI recovery */
+#define XFS_RMAP_OWN_FS (-3ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_LOG (-4ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */
+#define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */
+#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */
+#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */
+
+/*
+ * Data record structure
+ */
+struct xfs_rmap_rec {
+ __be32 rm_startblock; /* extent start block */
+ __be32 rm_blockcount; /* extent length */
+ __be64 rm_owner; /* extent owner */
+};
+
+struct xfs_rmap_irec {
+ xfs_agblock_t rm_startblock; /* extent start block */
+ xfs_extlen_t rm_blockcount; /* extent length */
+ __uint64_t rm_owner; /* extent owner */
+};
+
+/*
+ * Key structure
+ *
+ * We don't use the length for lookups
+ */
+struct xfs_rmap_key {
+ __be32 rm_startblock; /* extent start block */
+};
+
+/* btree pointer type */
+typedef __be32 xfs_rmap_ptr_t;
+
+/*
+ * block numbers in the AG.
+ */
+#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
+#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+#define XFS_RMAP_BLOCK(mp) \
(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
XFS_FIBT_BLOCK(mp) + 1 : \
XFS_IBT_BLOCK(mp) + 1)
-
+/*
+ * The first data block of an AG depends on whether the filesystem was formatted
+ * with the optional btree features. These need to be accounted for
+ * appropriately.
+ *
+ * XXX: this should be calculated once at mount time and stored in the struct
+ * xfs_mount rather than calculated every time it is used.
+ */
+#define XFS_PREALLOC_BLOCKS(mp) xfs_prealloc_blocks(mp)
/*
* BMAP Btree format definitions
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index 2b4e4e0..08716f7 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -364,6 +364,7 @@
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = tp->t_mountp;
+ args.owner = XFS_RMAP_OWN_INODES;
/*
* Locking will ensure that we don't have two callers in here
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index 9ac143a..a40b9e8 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -95,6 +95,7 @@
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
+ args.owner = XFS_RMAP_OWN_INOBT;
args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
args.minlen = 1;
args.maxlen = 1;
diff --git a/libxfs/xfs_rmap.c b/libxfs/xfs_rmap.c
new file mode 100644
index 0000000..b2a3330
--- /dev/null
+++ b/libxfs/xfs_rmap.c
@@ -0,0 +1,413 @@
+
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_btree.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int
+xfs_rmap_lookup_le(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ uint64_t owner,
+ int *stat)
+{
+ cur->bc_rec.r.rm_startblock = bno;
+ cur->bc_rec.r.rm_blockcount = len;
+ cur->bc_rec.r.rm_owner = owner;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, ref].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_rmap_update(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *irec)
+{
+ union xfs_btree_rec rec;
+
+ rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
+ rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
+ rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+STATIC int
+xfs_rmap_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *irec,
+ int *stat)
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || !*stat)
+ return error;
+
+ irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
+ irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
+ irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
+ return 0;
+}
+
+/*
+ * Find the extent in the rmap btree and remove it.
+ *
+ * The record we find should always span a range greater than or equal to the
+ * the extent being freed. This makes the code simple as, in theory, we do not
+ * have to handle ranges that are split across multiple records as extents that
+ * result in bmap btree extent merges should also result in rmap btree extent
+ * merges. The owner field ensures we don't merge extents from different
+ * structures into the same record, hence this property should always hold true
+ * if we ensure that the rmap btree supports at least the same size maximum
+ * extent as the bmap btree (2^21 blocks at present).
+ *
+ * Complexity: when growing the filesystem, we "free" an extent when growing the
+ * last AG. This extent is new space and so it is not tracked as used space in
+ * the btree. The growfs code will pass in an owner of XFS_RMAP_OWN_NULL to
+ * indicate that it expected that there is no owner of this extent. We verify
+ * that - the extent lookup result in a record that does not overlap.
+ *
+ * Complexity #2: EFIs do not record the owner of the extent, so when recovering
+ * EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap btree to
+ * ignore the owner (i.e. wildcard match) so we don't trigger corruption checks
+ * during log recovery.
+ */
+int
+xfs_rmap_free(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ uint64_t owner)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_rmap_irec ltrec;
+ int error;
+ int i;
+
+ /*
+ * if rmap btree is not supported, then just return success without
+ * doing anything.
+ */
+ if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+ return 0;
+
+ trace_xfs_rmap_free_extent(mp, agno, bno, len, owner);
+ cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+ /*
+ * We always have a left record because there's a static record
+ * for the AG headers at rm_startblock == 0.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+ error = xfs_rmap_get_rec(cur, <rec, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+ /* special growfs case - bno is beyond last record */
+ if (owner == XFS_RMAP_OWN_NULL) {
+ XFS_WANT_CORRUPTED_GOTO(mp, bno > ltrec.rm_startblock +
+ ltrec.rm_blockcount, out_error);
+ goto out_done;
+ }
+
+ /* make sure the extent we found covers the entire freeing range. */
+ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno, out_error);
+ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_blockcount >= len, out_error);
+
+/*
+ if (owner != ltrec.rm_owner ||
+ bno > ltrec.rm_startblock + ltrec.rm_blockcount)
+ */
+ //printk("rmfree ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
+ // agno, bno, len, owner, ltrec.rm_startblock,
+ // ltrec.rm_blockcount, ltrec.rm_owner);
+ XFS_WANT_CORRUPTED_GOTO(mp, bno <= ltrec.rm_startblock + ltrec.rm_blockcount,
+ out_error);
+ XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
+ (owner < XFS_RMAP_OWN_NULL &&
+ owner >= XFS_RMAP_OWN_MIN), out_error);
+
+ /* exact match is easy */
+ if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
+ //printk("remove exact\n");
+ /* remove extent from rmap tree */
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ } else if (ltrec.rm_startblock == bno) {
+ //printk("remove left\n");
+ /*
+ * overlap left hand side of extent
+ *
+ * ltbno ltlen
+ * Orig: |oooooooooooooooooooo|
+ * Freeing: |fffffffff|
+ * Result: |rrrrrrrrrr|
+ * bno len
+ */
+ ltrec.rm_startblock += len;
+ ltrec.rm_blockcount -= len;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+ } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
+ //printk("remove right\n");
+ /*
+ * overlap right hand side of extent
+ *
+ * ltbno ltlen
+ * Orig: |oooooooooooooooooooo|
+ * Freeing: |fffffffff|
+ * Result: |rrrrrrrrrr|
+ * bno len
+ */
+ ltrec.rm_blockcount -= len;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+ } else {
+ /*
+ * overlap middle of extent
+ *
+ * ltbno ltlen
+ * Orig: |oooooooooooooooooooo|
+ * Freeing: |fffffffff|
+ * Result: |rrrrr| |rrrr|
+ * bno len
+ */
+ xfs_extlen_t orig_len = ltrec.rm_blockcount;
+ //printk("remove middle\n");
+
+ ltrec.rm_blockcount = bno - ltrec.rm_startblock;;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+
+ error = xfs_btree_increment(cur, 0, &i);
+ if (error)
+ goto out_error;
+
+ cur->bc_rec.r.rm_startblock = bno + len;
+ cur->bc_rec.r.rm_blockcount = orig_len - len -
+ ltrec.rm_blockcount;
+ cur->bc_rec.r.rm_owner = ltrec.rm_owner;
+ error = xfs_btree_insert(cur, &i);
+ if (error)
+ goto out_error;
+ }
+
+out_done:
+ trace_xfs_rmap_free_extent_done(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+
+out_error:
+ trace_xfs_rmap_free_extent_error(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * When we allocate a new block, the first thing we do is add a reference to the
+ * extent in the rmap btree. This is how we track the owner of the extent and th
+ * enumber of references to it.
+ *
+ * Initially, we do not have shared extents, and so the extent can only have a
+ * single reference count and owner. This makes the initial implementation easy,
+ * but does not allow us to use the rmap tree for tracking reflink shared files.
+ * Hence the initial implementation is simply a lookup to find the place to
+ * insert (and checking we don't find a duplicate/overlap) and then insertng the
+ * appropriate record.
+ */
+int
+xfs_rmap_alloc(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ uint64_t owner)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_rmap_irec ltrec;
+ struct xfs_rmap_irec gtrec;
+ int have_gt;
+ int error;
+ int i;
+
+ /*
+ * if rmap btree is not supported, then just return success without
+ * doing anything.
+ */
+ if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+ return 0;
+
+ trace_xfs_rmap_alloc_extent(mp, agno, bno, len, owner);
+ cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+ /*
+ * chekc to see if we find an existing record for this extent rather
+ * than just the location for insert.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+ error = xfs_rmap_get_rec(cur, <rec, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ //printk("rmalloc ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
+ // agno, bno, len, owner, ltrec.rm_startblock,
+ // ltrec.rm_blockcount, ltrec.rm_owner);
+
+ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock + ltrec.rm_blockcount <= bno,
+ out_error);
+
+ error = xfs_btree_increment(cur, 0, &have_gt);
+ if (error)
+ goto out_error;
+ if (have_gt) {
+ error = xfs_rmap_get_rec(cur, >rec, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ //printk("rmalloc ag %d bno 0x%x/0x%x/0x%llx, gtrec 0x%x/0x%x/0x%llx\n",
+ // agno, bno, len, owner, gtrec.rm_startblock,
+ // gtrec.rm_blockcount, gtrec.rm_owner);
+ XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
+ out_error);
+ } else {
+ gtrec.rm_owner = XFS_RMAP_OWN_NULL;
+ }
+
+ /* cursor currently points one record past ltrec */
+ if (ltrec.rm_owner == owner &&
+ ltrec.rm_startblock + ltrec.rm_blockcount == bno) {
+ /*
+ * left edge contiguous
+ *
+ * ltbno ltlen
+ * orig: |ooooooooo|
+ * adding: |aaaaaaaaa|
+ * result: |rrrrrrrrrrrrrrrrrrr|
+ * bno len
+ */
+ //printk("add left\n");
+ ltrec.rm_blockcount += len;
+ if (gtrec.rm_owner == owner &&
+ bno + len == gtrec.rm_startblock) {
+ //printk("add middle\n");
+ /*
+ * right edge also contiguous
+ *
+ * ltbno ltlen gtbno gtlen
+ * orig: |ooooooooo| |ooooooooo|
+ * adding: |aaaaaaaaa|
+ * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
+ */
+ ltrec.rm_blockcount += gtrec.rm_blockcount;
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ }
+
+ error = xfs_btree_decrement(cur, 0, &have_gt);
+ if (error)
+ goto out_error;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+ } else if (gtrec.rm_owner == owner &&
+ bno + len == gtrec.rm_startblock) {
+ /*
+ * right edge contiguous
+ *
+ * gtbno gtlen
+ * Orig: |ooooooooo|
+ * adding: |aaaaaaaaa|
+ * Result: |rrrrrrrrrrrrrrrrrrr|
+ * bno len
+ */
+ //printk("add right\n");
+ gtrec.rm_startblock = bno;
+ gtrec.rm_blockcount += len;
+ error = xfs_rmap_update(cur, >rec);
+ if (error)
+ goto out_error;
+ } else {
+ //printk("add no match\n");
+ /* no contiguous edge with identical owner */
+ cur->bc_rec.r.rm_startblock = bno;
+ cur->bc_rec.r.rm_blockcount = len;
+ cur->bc_rec.r.rm_owner = owner;
+ error = xfs_btree_insert(cur, &i);
+ if (error)
+ goto out_error;
+ }
+
+ trace_xfs_rmap_alloc_extent_done(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+
+out_error:
+ trace_xfs_rmap_alloc_extent_error(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
diff --git a/libxfs/xfs_rmap_btree.c b/libxfs/xfs_rmap_btree.c
new file mode 100644
index 0000000..ed1792d
--- /dev/null
+++ b/libxfs/xfs_rmap_btree.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+
+
+/*
+ * Reverse map btree.
+ *
+ * This is a per-ag tree used to track the owner of a given extent. Owner
+ * records are inserted when an extent is allocated, and removed when an extent
+ * is freed. For existing filesystems, there can only be one owner of an extent,
+ * usually an inode or some other metadata structure like a AG btree.
+ *
+ * Initial thoughts are that the
+ * value of the owner field needs external flags to define what it means, and
+ * hence we need a flags field in the record. This means the record is going to
+ * be larger than 16 bytes (agbno,len,owner = 16 bytes), so maybe this isn't the
+ * best idea. Initially just implement the owner field - we can probably steal
+ * bits from the extent length field for type descriptors given that MAXEXTLEN
+ * is only 21 bits if we want to store the type as well. Keep in mind that if we
+ * want to do this there are still restrictions on the length of extents we
+ * track in the rmap btree (see comments on xfs_rmap_free()).
+ *
+ * The rmap btree is part of the free space management, so blocks for the tree
+ * are sourced from the agfl. Hence we need transaction reservation support for
+ * this tree so that the freelist is always large enough. This also impacts on
+ * the minimum space we need to leave free in the AG.
+ *
+ * The tree is ordered by block number - there's no need to order/search by
+ * extent size for online updating/management of the tree, and the reverse
+ * lookups are going to be "who owns this block" and so are by-block ordering is
+ * perfect for this.
+ *
+ * XXX: open question is how to handle blocks that are owned by the freespace
+ * tree blocks. Right now they will be classified when they are moved to the
+ * freelist or removed from the freelist. i.e. the extent allocation/freeing
+ * will mark the extents allocated as owned by the AG.
+ */
+STATIC struct xfs_btree_cur *
+xfs_rmapbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
+
+STATIC void
+xfs_rmapbt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int inc)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ int btnum = cur->bc_btnum;
+ struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
+
+ ASSERT(ptr->s != 0);
+
+ agf->agf_roots[btnum] = ptr->s;
+ be32_add_cpu(&agf->agf_levels[btnum], inc);
+ pag->pagf_levels[btnum] += inc;
+ xfs_perag_put(pag);
+
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_rmapbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ int error;
+ xfs_agblock_t bno;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ /* Allocate the new block from the freelist. If we can't, give up. */
+ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+ &bno, 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
+
+ if (bno == NULLAGBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
+ xfs_trans_agbtree_delta(cur->bc_tp, 1);
+ new->s = cpu_to_be32(bno);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+}
+
+STATIC int
+xfs_rmapbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agblock_t bno;
+ int error;
+
+ bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+ error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+ if (error)
+ return error;
+
+ xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+ XFS_EXTENT_BUSY_SKIP_DISCARD);
+ xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+ xfs_trans_binval(cur->bc_tp, bp);
+ return 0;
+}
+
+STATIC int
+xfs_rmapbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_rmap_mnr[level != 0];
+}
+
+STATIC int
+xfs_rmapbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_rmap_mxr[level != 0];
+}
+
+STATIC void
+xfs_rmapbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->rmap.rm_startblock = rec->rmap.rm_startblock;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ rec->rmap.rm_startblock = key->rmap.rm_startblock;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
+ rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
+ rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
+}
+
+STATIC void
+xfs_rmapbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
+
+ ptr->s = agf->agf_roots[cur->bc_btnum];
+}
+
+STATIC __int64_t
+xfs_rmapbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ struct xfs_rmap_irec *rec = &cur->bc_rec.r;
+ struct xfs_rmap_key *kp = &key->rmap;
+
+ return (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+}
+
+static bool
+xfs_rmapbt_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+ unsigned int level;
+
+ /*
+ * magic number and level verification
+ *
+ * During growfs operations, we can't verify the exact level or owner as
+ * the perag is not fully initialised and hence not attached to the
+ * buffer. In this case, check against the maximum tree depth.
+ *
+ * Similarly, during log recovery we will have a perag structure
+ * attached, but the agf information will not yet have been initialised
+ * from the on disk AGF. Again, we can only check against maximum limits
+ * in this case.
+ */
+ if (block->bb_magic!= cpu_to_be32(XFS_RMAP_CRC_MAGIC))
+ return false;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+
+ level = be16_to_cpu(block->bb_level);
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_rmap_mxr[level != 0])
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
+}
+
+static void
+xfs_rmapbt_read_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_rmapbt_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
+ }
+}
+
+static void
+xfs_rmapbt_write_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_rmapbt_verify(bp)) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
+ }
+ xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
+ .verify_read = xfs_rmapbt_read_verify,
+ .verify_write = xfs_rmapbt_write_verify,
+};
+
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_rmapbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->rmap.rm_startblock) <
+ be32_to_cpu(k2->rmap.rm_startblock);
+}
+
+STATIC int
+xfs_rmapbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return be32_to_cpu(r1->rmap.rm_startblock) +
+ be32_to_cpu(r1->rmap.rm_blockcount) <=
+ be32_to_cpu(r2->rmap.rm_startblock);
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_rmapbt_ops = {
+ .rec_len = sizeof(struct xfs_rmap_rec),
+ .key_len = sizeof(struct xfs_rmap_key),
+
+ .dup_cursor = xfs_rmapbt_dup_cursor,
+ .set_root = xfs_rmapbt_set_root,
+ .alloc_block = xfs_rmapbt_alloc_block,
+ .free_block = xfs_rmapbt_free_block,
+ .get_minrecs = xfs_rmapbt_get_minrecs,
+ .get_maxrecs = xfs_rmapbt_get_maxrecs,
+ .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
+ .init_rec_from_key = xfs_rmapbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
+ .key_diff = xfs_rmapbt_key_diff,
+ .buf_ops = &xfs_rmapbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_rmapbt_keys_inorder,
+ .recs_inorder = xfs_rmapbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new allocation btree cursor.
+ */
+struct xfs_btree_cur *
+xfs_rmapbt_init_cursor(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
+
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_btnum = XFS_BTNUM_RMAP;
+ cur->bc_flags = XFS_BTREE_CRC_BLOCKS;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
+ cur->bc_ops = &xfs_rmapbt_ops;
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
+
+ return cur;
+}
+
+/*
+ * Calculate number of records in an rmap btree block.
+ */
+int
+xfs_rmapbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
+{
+ blocklen -= XFS_RMAP_BLOCK_LEN;
+
+ if (leaf)
+ return blocklen / sizeof(struct xfs_rmap_rec);
+ return blocklen /
+ (sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
diff --git a/libxfs/xfs_rmap_btree.h b/libxfs/xfs_rmap_btree.h
new file mode 100644
index 0000000..9ad65e5
--- /dev/null
+++ b/libxfs/xfs_rmap_btree.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_RMAP_BTREE_H__
+#define __XFS_RMAP_BTREE_H__
+
+/*
+ * Freespace on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/* rmaps only exist on crc enabled filesystems */
+#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_RMAP_REC_ADDR(block, index) \
+ ((struct xfs_rmap_rec *) \
+ ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+ (((index) - 1) * sizeof(struct xfs_rmap_rec))))
+
+#define XFS_RMAP_KEY_ADDR(block, index) \
+ ((struct xfs_rmap_key *) \
+ ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+ ((index) - 1) * sizeof(struct xfs_rmap_key)))
+
+#define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \
+ ((xfs_rmap_ptr_t *) \
+ ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+ (maxrecs) * sizeof(struct xfs_rmap_key) + \
+ ((index) - 1) * sizeof(xfs_rmap_ptr_t)))
+
+struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
+ struct xfs_trans *tp, struct xfs_buf *bp,
+ xfs_agnumber_t agno);
+int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
+
+int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
+ xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+ uint64_t owner);
+int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
+ xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+ uint64_t owner);
+
+#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
index 6844cd8..e5c7a85 100644
--- a/libxfs/xfs_sb.c
+++ b/libxfs/xfs_sb.c
@@ -668,6 +668,11 @@
mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
+ mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
+ mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
+ mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
diff --git a/libxfs/xfs_shared.h b/libxfs/xfs_shared.h
index 8dda4b3..e8e88f3 100644
--- a/libxfs/xfs_shared.h
+++ b/libxfs/xfs_shared.h
@@ -38,6 +38,7 @@
extern const struct xfs_buf_ops xfs_agf_buf_ops;
extern const struct xfs_buf_ops xfs_agfl_buf_ops;
extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
diff --git a/libxfs/xfs_types.h b/libxfs/xfs_types.h
index b79dc66..3d50364 100644
--- a/libxfs/xfs_types.h
+++ b/libxfs/xfs_types.h
@@ -108,8 +108,8 @@
} xfs_lookup_t;
typedef enum {
- XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
- XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+ XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
+ XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
} xfs_btnum_t;
struct xfs_name {
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 1770666..c42581b 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -185,6 +185,8 @@
"crc",
#define M_FINOBT 1
"finobt",
+#define M_RMAPBT 2
+ "rmapbt",
NULL
};
@@ -1004,6 +1006,7 @@
int lazy_sb_counters;
int crcs_enabled;
int finobt;
+ bool rmapbt;
progname = basename(argv[0]);
setlocale(LC_ALL, "");
@@ -1038,6 +1041,7 @@
lazy_sb_counters = 1;
crcs_enabled = 0;
finobt = 0;
+ rmapbt = false;
memset(&fsx, 0, sizeof(fsx));
memset(&xi, 0, sizeof(xi));
@@ -1539,6 +1543,14 @@
illegal(value, "m finobt");
finobt = c;
break;
+ case M_RMAPBT:
+ if (!value || *value == '\0')
+ reqval('m', mopts, M_CRC);
+ c = atoi(value);
+ if (c < 0 || c > 1)
+ illegal(value, "m rmapbt");
+ rmapbt = c;
+ break;
default:
unknown('m', value);
}
@@ -1889,6 +1901,11 @@
_("warning: finobt not supported without CRC support, disabled.\n"));
finobt = 0;
}
+ if (rmapbt && !crcs_enabled) {
+ fprintf(stderr,
+_("warning: rmapbt not supported without CRC support, disabled.\n"));
+ rmapbt = 0;
+ }
if (nsflag || nlflag) {
if (dirblocksize < blocksize ||
@@ -2483,7 +2500,7 @@
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
/*
- * sb_versionnum and finobt flags must be set before we use
+ * sb_versionnum, finobt and rmapbt flags must be set before we use
* XFS_PREALLOC_BLOCKS().
*/
sbp->sb_features2 = XFS_SB_VERSION2_MKFS(crcs_enabled, lazy_sb_counters,
@@ -2505,6 +2522,8 @@
if (finobt)
sbp->sb_features_ro_compat = XFS_SB_FEAT_RO_COMPAT_FINOBT;
+ if (rmapbt)
+ sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT;
if (loginternal) {
/*
@@ -2568,7 +2587,7 @@
printf(_(
"meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n"
" =%-22s sectsz=%-5u attr=%u, projid32bit=%u\n"
- " =%-22s crc=%-8u finobt=%u\n"
+ " =%-22s crc=%-8u finobt=%u, rmapbt=%u\n"
"data =%-22s bsize=%-6u blocks=%llu, imaxpct=%u\n"
" =%-22s sunit=%-6u swidth=%u blks\n"
"naming =version %-14u bsize=%-6u ascii-ci=%d ftype=%d\n"
@@ -2577,7 +2596,7 @@
"realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n"),
dfile, isize, (long long)agcount, (long long)agsize,
"", sectorsize, attrversion, !projid16bit,
- "", crcs_enabled, finobt,
+ "", crcs_enabled, finobt, rmapbt,
"", blocksize, (long long)dblocks, imaxpct,
"", dsunit, dswidth,
dirversion, dirblocksize, nci, dirftype,
@@ -2748,6 +2767,12 @@
agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ agf->agf_roots[XFS_BTNUM_RMAPi] =
+ cpu_to_be32(XFS_RMAP_BLOCK(mp));
+ agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+ }
+
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
agf->agf_flcount = 0;
@@ -2935,22 +2960,83 @@
/*
* Free INO btree root block
*/
- if (!finobt)
- continue;
+ if (finobt) {
+ buf = libxfs_getbuf(mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+ bsize);
+ buf->b_ops = &xfs_inobt_buf_ops;
+ block = XFS_BUF_TO_BLOCK(buf);
+ memset(block, 0, blocksize);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0,
+ agno, XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0,
+ agno, 0);
+ libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+ }
- buf = libxfs_getbuf(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+ /* RMAP btree root block */
+ if (rmapbt) {
+ struct xfs_rmap_rec *rrec;
+
+ buf = libxfs_getbuf(mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
bsize);
- buf->b_ops = &xfs_inobt_buf_ops;
- block = XFS_BUF_TO_BLOCK(buf);
- memset(block, 0, blocksize);
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0,
+ buf->b_ops = &xfs_rmapbt_buf_ops;
+ block = XFS_BUF_TO_BLOCK(buf);
+ memset(block, 0, blocksize);
+
+ xfs_btree_init_block(mp, buf, XFS_RMAP_CRC_MAGIC, 0, 0,
agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0,
- agno, 0);
- libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+
+ /*
+ * mark the AG header regions as static metadata
+ * The BNO btree block is the first block after the
+ * headers, so it's location defines the size of region
+ * the static metadata consumes.
+ */
+ rrec = XFS_RMAP_REC_ADDR(block, 1);
+ rrec->rm_startblock = 0;
+ rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account freespace btree root blocks */
+ rrec = XFS_RMAP_REC_ADDR(block, 2);
+ rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
+ rrec->rm_blockcount = cpu_to_be32(2);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account inode btree root blocks */
+ rrec = XFS_RMAP_REC_ADDR(block, 3);
+ rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
+ rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
+ XFS_IBT_BLOCK(mp));
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account for rmap btree root */
+ rrec = XFS_RMAP_REC_ADDR(block, 4);
+ rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
+ rrec->rm_blockcount = cpu_to_be32(1);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account for the log space */
+ if (loginternal && agno == logagno) {
+ rrec = XFS_RMAP_REC_ADDR(block, 5);
+ rrec->rm_startblock = cpu_to_be32(
+ XFS_FSB_TO_AGBNO(mp, logstart));
+ rrec->rm_blockcount = cpu_to_be32(logblocks);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+ be16_add_cpu(&block->bb_numrecs, 1);
+ }
+
+ libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+ }
+
}
/*
diff --git a/repair/dinode.c b/repair/dinode.c
index 179203e..fc8bc12 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -744,6 +744,7 @@
_("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
forkname, ino, (__uint64_t) b);
/* fall through ... */
+ case XR_E_INUSE1: /* seen by rmap */
case XR_E_UNKNOWN:
set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
break;
@@ -751,6 +752,11 @@
case XR_E_BAD_STATE:
do_error(_("bad state in block map %" PRIu64 "\n"), b);
+ case XR_E_FS_MAP1:
+ case XR_E_INO1:
+ case XR_E_INUSE_FS1:
+ do_warn(_("rmap claims metadata use!\n"));
+ /* fall through */
case XR_E_FS_MAP:
case XR_E_INO:
case XR_E_INUSE_FS:
diff --git a/repair/incore.h b/repair/incore.h
index ba819b4..b5c0087 100644
--- a/repair/incore.h
+++ b/repair/incore.h
@@ -102,17 +102,11 @@
#define XR_E_MULT 5 /* extent is multiply referenced */
#define XR_E_INO 6 /* extent used by inodes (inode blocks) */
#define XR_E_FS_MAP 7 /* extent used by fs space/inode maps */
-#define XR_E_BAD_STATE 8
-
-/* extent states, in 64 bit word chunks */
-#define XR_E_UNKNOWN_LL 0x0000000000000000LL
-#define XR_E_FREE1_LL 0x1111111111111111LL
-#define XR_E_FREE_LL 0x2222222222222222LL
-#define XR_E_INUSE_LL 0x3333333333333333LL
-#define XR_E_INUSE_FS_LL 0x4444444444444444LL
-#define XR_E_MULT_LL 0x5555555555555555LL
-#define XR_E_INO_LL 0x6666666666666666LL
-#define XR_E_FS_MAP_LL 0x7777777777777777LL
+#define XR_E_INUSE1 8 /* used block (marked by rmap btree) */
+#define XR_E_INUSE_FS1 9 /* used by fs ag header or log (rmap btree) */
+#define XR_E_INO1 10 /* used by inodes (marked by rmap btree) */
+#define XR_E_FS_MAP1 11 /* used by fs space/inode maps (rmap btree) */
+#define XR_E_BAD_STATE 12
/* separate state bit, OR'ed into high (4th) bit of ex_state field */
diff --git a/repair/scan.c b/repair/scan.c
index e7e05d1..3c00660 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -44,6 +44,7 @@
__uint32_t agicount;
__uint32_t agifreecount;
__uint64_t fdblocks;
+ __uint64_t usedblocks;
__uint64_t icount;
__uint64_t ifreecount;
__uint32_t fibtfreecount;
@@ -292,6 +293,13 @@
pthread_mutex_lock(&ag_locks[agno].lock);
state = get_bmap(agno, agbno);
switch (state) {
+ case XR_E_INUSE1:
+ /*
+ * block was claimed as in use data by the rmap
+ * btree, but has not been found in the data extent
+ * map for the inode. That means this bmbt block hasn't
+ * yet been claimed as in use, which means -it's ours-
+ */
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
@@ -737,6 +745,251 @@
}
}
+static void
+scan_rmapbt(
+ struct xfs_btree_block *block,
+ int level,
+ xfs_agblock_t bno,
+ xfs_agnumber_t agno,
+ int suspect,
+ int isroot,
+ __uint32_t magic,
+ void *priv)
+{
+ struct aghdr_cnts *agcnts = priv;
+ const char *name = "rmap";
+ int i;
+ xfs_rmap_ptr_t *pp;
+ struct xfs_rmap_rec *rp;
+ int hdr_errors = 0;
+ int numrecs;
+ int state;
+ xfs_agblock_t lastblock = 0;
+
+ if (magic != XFS_RMAP_CRC_MAGIC) {
+ name = "(unknown)";
+ assert(0);
+ }
+
+ if (be32_to_cpu(block->bb_magic) != magic) {
+ do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
+ be32_to_cpu(block->bb_magic), name, agno, bno);
+ hdr_errors++;
+ if (suspect)
+ return;
+ }
+
+ /*
+ * All RMAP btree blocks except the roots are freed for a
+ * fully empty filesystem, thus they are counted towards the
+ * free data block counter.
+ */
+ if (!isroot) {
+ agcnts->agfbtreeblks++;
+ agcnts->fdblocks++;
+ }
+
+ if (be16_to_cpu(block->bb_level) != level) {
+ do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
+ level, be16_to_cpu(block->bb_level), name, agno, bno);
+ hdr_errors++;
+ if (suspect)
+ return;
+ }
+
+ /* check for btree blocks multiply claimed */
+ state = get_bmap(agno, bno);
+ if (!(state == XR_E_UNKNOWN || state == XR_E_FS_MAP1)) {
+ set_bmap(agno, bno, XR_E_MULT);
+ do_warn(
+_("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
+ name, state, agno, bno, suspect);
+ return;
+ }
+ set_bmap(agno, bno, XR_E_FS_MAP);
+
+ numrecs = be16_to_cpu(block->bb_numrecs);
+ if (level == 0) {
+ if (numrecs > mp->m_rmap_mxr[0]) {
+ numrecs = mp->m_rmap_mxr[0];
+ hdr_errors++;
+ }
+ if (isroot == 0 && numrecs < mp->m_rmap_mnr[0]) {
+ numrecs = mp->m_rmap_mnr[0];
+ hdr_errors++;
+ }
+
+ if (hdr_errors) {
+ do_warn(
+ _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs),
+ mp->m_rmap_mnr[0], mp->m_rmap_mxr[0],
+ name, agno, bno);
+ suspect++;
+ }
+
+ rp = XFS_RMAP_REC_ADDR(block, 1);
+ for (i = 0; i < numrecs; i++) {
+ xfs_agblock_t b, end;
+ xfs_extlen_t len, blen;
+ int64_t owner;
+
+ b = be32_to_cpu(rp[i].rm_startblock);
+ len = be32_to_cpu(rp[i].rm_blockcount);
+ owner = be64_to_cpu(rp[i].rm_owner);
+ end = b + len;
+
+ if (!verify_agbno(mp, agno, b)) {
+ do_warn(
+ _("invalid start block %u in record %u of %s btree block %u/%u\n"),
+ b, i, name, agno, bno);
+ continue;
+ }
+ if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
+ do_warn(
+ _("invalid length %u in record %u of %s btree block %u/%u\n"),
+ len, i, name, agno, bno);
+ continue;
+ }
+
+ /* XXX: range check owner */
+
+ if (b && b <= lastblock) {
+ do_warn(_(
+ "out-of-order rmap btree record %d (%u %u) block %u/%u\n"),
+ i, b, len, agno, bno);
+ } else {
+ lastblock = b;
+ }
+
+ for ( ; b < end; b += blen) {
+ state = get_bmap_ext(agno, b, end, &blen);
+ switch (state) {
+ case XR_E_UNKNOWN:
+ switch (owner) {
+ case XFS_RMAP_OWN_FS:
+ case XFS_RMAP_OWN_LOG:
+ set_bmap(agno, b, XR_E_INUSE_FS1);
+ break;
+ case XFS_RMAP_OWN_AG:
+ case XFS_RMAP_OWN_INOBT:
+ set_bmap(agno, b, XR_E_FS_MAP1);
+ break;
+ case XFS_RMAP_OWN_INODES:
+ set_bmap(agno, b, XR_E_INO1);
+ break;
+ case XFS_RMAP_OWN_NULL:
+ /* still unknown */
+ break;
+ default:
+ /* file data */
+ set_bmap(agno, b, XR_E_INUSE1);
+ break;
+ }
+ break;
+ case XR_E_INUSE_FS:
+ if (owner == XFS_RMAP_OWN_FS ||
+ owner == XFS_RMAP_OWN_LOG)
+ break;
+ do_warn(
+_("Static meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_FS_MAP:
+ if (owner == XFS_RMAP_OWN_AG ||
+ owner == XFS_RMAP_OWN_INOBT)
+ break;
+ do_warn(
+_("AG meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_INO:
+ if (owner == XFS_RMAP_OWN_INODES)
+ break;
+ do_warn(
+_("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_INUSE:
+ if (owner >= 0 &&
+ owner < mp->m_sb.sb_dblocks)
+ break;
+ do_warn(
+_("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_FREE1:
+ case XR_E_FREE:
+ /*
+ * May be on the AGFL. If not, they'll
+ * be caught later.
+ */
+ break;
+ default:
+ do_warn(
+_("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ }
+ }
+ }
+ return;
+ }
+
+ /*
+ * interior record
+ */
+ pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
+
+ if (numrecs > mp->m_rmap_mxr[1]) {
+ numrecs = mp->m_rmap_mxr[1];
+ hdr_errors++;
+ }
+ if (isroot == 0 && numrecs < mp->m_rmap_mnr[1]) {
+ numrecs = mp->m_rmap_mnr[1];
+ hdr_errors++;
+ }
+
+ /*
+ * don't pass bogus tree flag down further if this block
+ * looked ok. bail out if two levels in a row look bad.
+ */
+ if (hdr_errors) {
+ do_warn(
+ _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs),
+ mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
+ name, agno, bno);
+ if (suspect)
+ return;
+ suspect++;
+ } else if (suspect) {
+ suspect = 0;
+ }
+
+ for (i = 0; i < numrecs; i++) {
+ xfs_agblock_t bno = be32_to_cpu(pp[i]);
+
+ /*
+ * XXX - put sibling detection right here.
+ * we know our sibling chain is good. So as we go,
+ * we check the entry before and after each entry.
+ * If either of the entries references a different block,
+ * check the sibling pointer. If there's a sibling
+ * pointer mismatch, try and extract as much data
+ * as possible.
+ */
+ if (bno != 0 && verify_agbno(mp, agno, bno)) {
+ scan_sbtree(bno, level, agno, suspect, scan_rmapbt, 0,
+ magic, priv, &xfs_rmapbt_buf_ops);
+ }
+ }
+}
static int
scan_single_ino_chunk(
xfs_agnumber_t agno,
@@ -814,20 +1067,27 @@
agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
state = get_bmap(agno, agbno);
- if (state == XR_E_UNKNOWN) {
+ switch (state) {
+ case XR_E_INO:
+ break;
+ case XR_E_UNKNOWN:
+ case XR_E_INO1: /* seen by rmap */
set_bmap(agno, agbno, XR_E_INO);
- } else if (state == XR_E_INUSE_FS && agno == 0 &&
- ino + j >= first_prealloc_ino &&
- ino + j < last_prealloc_ino) {
- set_bmap(agno, agbno, XR_E_INO);
- } else {
+ break;
+ case XR_E_INUSE_FS:
+ case XR_E_INUSE_FS1:
+ if (agno == 0 &&
+ ino + j >= first_prealloc_ino &&
+ ino + j < last_prealloc_ino) {
+ set_bmap(agno, agbno, XR_E_INO);
+ break;
+ }
+ /* fall through */
+ default:
+ /* XXX - maybe should mark block a duplicate */
do_warn(
_("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
agno, agbno, mp->m_sb.sb_inopblock);
- /*
- * XXX - maybe should mark
- * block a duplicate
- */
return ++suspect;
}
}
@@ -973,19 +1233,35 @@
agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
state = get_bmap(agno, agbno);
- if (state == XR_E_INO) {
- continue;
- } else if ((state == XR_E_UNKNOWN) ||
- (state == XR_E_INUSE_FS && agno == 0 &&
- ino + j >= first_prealloc_ino &&
- ino + j < last_prealloc_ino)) {
+ switch (state) {
+ case XR_E_INO:
+ break;
+ case XR_E_INO1: /* seen by rmap */
+ set_bmap(agno, agbno, XR_E_INO);
+ break;
+ case XR_E_UNKNOWN:
do_warn(
_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
agno, agbno, mp->m_sb.sb_inopblock);
set_bmap(agno, agbno, XR_E_INO);
suspect++;
- } else {
+ break;
+ case XR_E_INUSE_FS:
+ case XR_E_INUSE_FS1:
+ if (agno == 0 &&
+ ino + j >= first_prealloc_ino &&
+ ino + j < last_prealloc_ino) {
+ do_warn(
+_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
+ agno, agbno, mp->m_sb.sb_inopblock);
+
+ set_bmap(agno, agbno, XR_E_INO);
+ suspect++;
+ break;
+ }
+ /* fall through */
+ default:
do_warn(
_("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
agno, agbno, mp->m_sb.sb_inopblock);
@@ -1163,6 +1439,7 @@
*/
state = get_bmap(agno, bno);
switch (state) {
+ case XR_E_FS_MAP1: /* already been seen by an rmap scan */
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
@@ -1296,7 +1573,7 @@
if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
- set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+ set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_INUSE_FS);
if (be32_to_cpu(agf->agf_flcount) == 0)
return;
@@ -1381,6 +1658,19 @@
bno, agno);
}
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
+ if (bno != 0 && verify_agbno(mp, agno, bno)) {
+ scan_sbtree(bno,
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
+ agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC,
+ agcnts, &xfs_rmapbt_buf_ops);
+ } else {
+ do_warn(_("bad agbno %u for rmapbt root, agno %d\n"),
+ bno, agno);
+ }
+ }
+
if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
@@ -1396,6 +1686,7 @@
do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
}
+
}
static void
@@ -1635,6 +1926,7 @@
__uint64_t fdblocks = 0;
__uint64_t icount = 0;
__uint64_t ifreecount = 0;
+ __uint64_t usedblocks = 0;
xfs_agnumber_t i;
work_queue_t wq;
@@ -1657,6 +1949,7 @@
fdblocks += agcnts[i].fdblocks;
icount += agcnts[i].icount;
ifreecount += agcnts[i].ifreecount;
+ usedblocks += agcnts[i].usedblocks;
}
free(agcnts);
@@ -1678,5 +1971,11 @@
do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
mp->m_sb.sb_fdblocks, fdblocks);
}
+
+ if (usedblocks &&
+ usedblocks != mp->m_sb.sb_dblocks - fdblocks) {
+ do_warn(_("used blocks %" PRIu64 ", counted %" PRIu64 "\n"),
+ mp->m_sb.sb_dblocks - fdblocks, usedblocks);
+ }
}
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 11a6069..07ddd00 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -411,6 +411,8 @@
fino_bno = inobt_root + XFS_MIN_FREELIST_RAW(1, 1, mp) + 1;
if (xfs_sb_version_hasfinobt(&mp->m_sb))
fino_bno++;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ fino_bno++;
/*
* If the log is allocated in the first allocation group we need to