blob: 2fc89dac345d4ce74068f7e48721b6e63e81836c [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
#include "libxfs.h"
#include "libxlog.h"
#include <sys/resource.h>
#include "xfs_multidisk.h"
#include "avl.h"
#include "libfrog/avl64.h"
#include "globals.h"
#include "versions.h"
#include "agheader.h"
#include "protos.h"
#include "incore.h"
#include "err_protos.h"
#include "prefetch.h"
#include "threads.h"
#include "progress.h"
#include "dinode.h"
#include "slab.h"
#include "rmap.h"
#include "libfrog/fsgeom.h"
#include "libfrog/platform.h"
#include "bulkload.h"
#include "quotacheck.h"
/*
* option tables for getsubopt calls
*/
/*
* -o: user-supplied override options
*/
enum o_opt_nums {
ASSUME_XFS = 0,
IHASH_SIZE,
BHASH_SIZE,
AG_STRIDE,
FORCE_GEO,
PHASE2_THREADS,
BLOAD_LEAF_SLACK,
BLOAD_NODE_SLACK,
NOQUOTA,
O_MAX_OPTS,
};
static char *o_opts[] = {
[ASSUME_XFS] = "assume_xfs",
[IHASH_SIZE] = "ihash",
[BHASH_SIZE] = "bhash",
[AG_STRIDE] = "ag_stride",
[FORCE_GEO] = "force_geometry",
[PHASE2_THREADS] = "phase2_threads",
[BLOAD_LEAF_SLACK] = "debug_bload_leaf_slack",
[BLOAD_NODE_SLACK] = "debug_bload_node_slack",
[NOQUOTA] = "noquota",
[O_MAX_OPTS] = NULL,
};
/*
* -c: conversion options
*/
enum c_opt_nums {
CONVERT_LAZY_COUNT = 0,
CONVERT_INOBTCOUNT,
CONVERT_BIGTIME,
CONVERT_NREXT64,
C_MAX_OPTS,
};
static char *c_opts[] = {
[CONVERT_LAZY_COUNT] = "lazycount",
[CONVERT_INOBTCOUNT] = "inobtcount",
[CONVERT_BIGTIME] = "bigtime",
[CONVERT_NREXT64] = "nrext64",
[C_MAX_OPTS] = NULL,
};
static int bhash_option_used;
static long max_mem_specified; /* in megabytes */
static int phase2_threads = 32;
static bool report_corrected;
static void
usage(void)
{
do_warn(_(
"Usage: %s [options] device\n"
"\n"
"Options:\n"
" -f The device is a file\n"
" -L Force log zeroing. Do this as a last resort.\n"
" -l logdev Specifies the device where the external log resides.\n"
" -m maxmem Maximum amount of memory to be used in megabytes.\n"
" -n No modify mode, just checks the filesystem for damage.\n"
" (Cannot be used together with -e.)\n"
" -P Disables prefetching.\n"
" -r rtdev Specifies the device where the realtime section resides.\n"
" -v Verbose output.\n"
" -c subopts Change filesystem parameters - use xfs_admin.\n"
" -o subopts Override default behaviour, refer to man page.\n"
" -t interval Reporting interval in seconds.\n"
" -d Repair dangerously.\n"
" -e Exit with a non-zero code if any errors were repaired.\n"
" (Cannot be used together with -n.)\n"
" -V Reports version and exits.\n"), progname);
exit(1);
}
char *
err_string(int err_code)
{
static char *err_message[XR_BAD_ERR_CODE];
static int done;
if (!done) {
err_message[XR_OK] = _("no error");
err_message[XR_BAD_MAGIC] = _("bad magic number");
err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
err_message[XR_BAD_VERSION] = _("bad or unsupported version");
err_message[XR_BAD_INPROGRESS] =
_("filesystem mkfs-in-progress bit set");
err_message[XR_BAD_FS_SIZE_DATA] =
_("inconsistent filesystem geometry information");
err_message[XR_BAD_INO_SIZE_DATA] =
_("bad inode size or inconsistent with number of inodes/block"),
err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
err_message[XR_AGF_GEO_MISMATCH] =
_("AGF geometry info conflicts with filesystem geometry");
err_message[XR_AGI_GEO_MISMATCH] =
_("AGI geometry info conflicts with filesystem geometry");
err_message[XR_SB_GEO_MISMATCH] =
_("AG superblock geometry info conflicts with filesystem geometry");
err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
err_message[XR_BAD_RT_GEO_DATA] =
_("inconsistent filesystem geometry in realtime filesystem component");
err_message[XR_BAD_INO_MAX_PCT] =
_("maximum indicated percentage of inodes > 100%");
err_message[XR_BAD_INO_ALIGN] =
_("inconsistent inode alignment value");
err_message[XR_INSUFF_SEC_SB] =
_("not enough secondary superblocks with matching geometry");
err_message[XR_BAD_SB_UNIT] =
_("bad stripe unit in superblock");
err_message[XR_BAD_SB_WIDTH] =
_("bad stripe width in superblock");
err_message[XR_BAD_SVN] =
_("bad shared version number in superblock");
err_message[XR_BAD_CRC] =
_("bad CRC in superblock");
err_message[XR_BAD_DIR_SIZE_DATA] =
_("inconsistent directory geometry information");
err_message[XR_BAD_LOG_GEOMETRY] =
_("inconsistent log geometry information");
done = 1;
}
if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
do_abort(_("bad error code - %d\n"), err_code);
return(err_message[err_code]);
}
static void
noval(char opt, char *tbl[], int idx)
{
do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
usage();
}
static void
respec(char opt, char *tbl[], int idx)
{
do_warn("-%c ", opt);
if (tbl)
do_warn("%s ", tbl[idx]);
do_warn(_("option respecified\n"));
usage();
}
static void
unknown(char opt, char *s)
{
do_warn(_("unknown option -%c %s\n"), opt, s);
usage();
}
/*
* sets only the global argument flags and variables
*/
static void
process_args(int argc, char **argv)
{
char *p;
int c;
log_spec = 0;
fs_is_dirty = 0;
verbose = 0;
no_modify = 0;
dangerously = 0;
isa_file = 0;
zap_log = 0;
dumpcore = 0;
full_ino_ex_data = 0;
force_geo = 0;
assume_xfs = 0;
copied_sunit = 0;
sb_inoalignmt = 0;
sb_unit = 0;
sb_width = 0;
ag_stride = 0;
thread_count = 1;
report_interval = PROG_RPT_DEFAULT;
report_corrected = false;
/*
* XXX have to add suboption processing here
* attributes, quotas, nlinks, aligned_inos, sb_fbits
*/
while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
switch (c) {
case 'D':
dumpcore = 1;
break;
case 'o':
p = optarg;
while (*p != '\0') {
char *val;
switch (getsubopt(&p, o_opts, &val)) {
case ASSUME_XFS:
if (val)
noval('o', o_opts, ASSUME_XFS);
if (assume_xfs)
respec('o', o_opts, ASSUME_XFS);
assume_xfs = 1;
break;
case IHASH_SIZE:
do_warn(
_("-o ihash option has been removed and will be ignored\n"));
break;
case BHASH_SIZE:
if (max_mem_specified)
do_abort(
_("-o bhash option cannot be used with -m option\n"));
if (!val)
do_abort(
_("-o bhash requires a parameter\n"));
errno = 0;
libxfs_bhash_size = (int)strtol(val, NULL, 0);
if (errno)
do_abort(
_("-o bhash invalid parameter: %s\n"), strerror(errno));
bhash_option_used = 1;
break;
case AG_STRIDE:
if (!val)
do_abort(
_("-o ag_stride requires a parameter\n"));
errno = 0;
ag_stride = (int)strtol(val, NULL, 0);
if (errno)
do_abort(
_("-o ag_stride invalid parameter: %s\n"), strerror(errno));
break;
case FORCE_GEO:
if (val)
noval('o', o_opts, FORCE_GEO);
if (force_geo)
respec('o', o_opts, FORCE_GEO);
force_geo = 1;
break;
case PHASE2_THREADS:
if (!val)
do_abort(
_("-o phase2_threads requires a parameter\n"));
errno = 0;
phase2_threads = (int)strtol(val, NULL, 0);
if (errno)
do_abort(
_("-o phase2_threads invalid parameter: %s\n"), strerror(errno));
break;
case BLOAD_LEAF_SLACK:
if (!val)
do_abort(
_("-o debug_bload_leaf_slack requires a parameter\n"));
errno = 0;
bload_leaf_slack = (int)strtol(val, NULL, 0);
if (errno)
do_abort(
_("-o debug_bload_leaf_slack invalid parameter: %s\n"), strerror(errno));
break;
case BLOAD_NODE_SLACK:
if (!val)
do_abort(
_("-o debug_bload_node_slack requires a parameter\n"));
errno = 0;
bload_node_slack = (int)strtol(val, NULL, 0);
if (errno)
do_abort(
_("-o debug_bload_node_slack invalid parameter: %s\n"), strerror(errno));
break;
case NOQUOTA:
quotacheck_skip();
break;
default:
unknown('o', val);
break;
}
}
break;
case 'c':
p = optarg;
while (*p) {
char *val;
switch (getsubopt(&p, c_opts, &val)) {
case CONVERT_LAZY_COUNT:
if (!val)
do_abort(
_("-c lazycount requires a parameter\n"));
errno = 0;
lazy_count = (int)strtol(val, NULL, 0);
if (errno)
do_abort(
_("-o lazycount invalid parameter: %s\n"), strerror(errno));
convert_lazy_count = 1;
break;
case CONVERT_INOBTCOUNT:
if (!val)
do_abort(
_("-c inobtcount requires a parameter\n"));
if (strtol(val, NULL, 0) != 1)
do_abort(
_("-c inobtcount only supports upgrades\n"));
add_inobtcount = true;
break;
case CONVERT_BIGTIME:
if (!val)
do_abort(
_("-c bigtime requires a parameter\n"));
if (strtol(val, NULL, 0) != 1)
do_abort(
_("-c bigtime only supports upgrades\n"));
add_bigtime = true;
break;
case CONVERT_NREXT64:
if (!val)
do_abort(
_("-c nrext64 requires a parameter\n"));
if (strtol(val, NULL, 0) != 1)
do_abort(
_("-c nrext64 only supports upgrades\n"));
add_nrext64 = true;
break;
default:
unknown('c', val);
break;
}
}
break;
case 'l':
log_name = optarg;
log_spec = 1;
break;
case 'r':
rt_name = optarg;
rt_spec = 1;
break;
case 'f':
isa_file = 1;
break;
case 'm':
if (bhash_option_used)
do_abort(_("-m option cannot be used with "
"-o bhash option\n"));
errno = 0;
max_mem_specified = strtol(optarg, NULL, 0);
if (errno)
do_abort(
_("%s: invalid memory amount: %s\n"), optarg, strerror(errno));
break;
case 'L':
zap_log = 1;
break;
case 'n':
no_modify = 1;
break;
case 'd':
dangerously = 1;
break;
case 'v':
verbose++;
break;
case 'V':
printf(_("%s version %s\n"), progname, VERSION);
exit(0);
case 'P':
do_prefetch = 0;
break;
case 't':
errno = 0;
report_interval = strtol(optarg, NULL, 0);
if (errno)
do_abort(
_("%s: invalid interval: %s\n"), optarg, strerror(errno));
break;
case 'e':
report_corrected = true;
break;
default:
usage();
}
}
if (argc - optind != 1)
usage();
if ((fs_name = argv[optind]) == NULL)
usage();
if (report_corrected && no_modify)
usage();
p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
if (p) {
errno = 0;
fail_after_phase = (int)strtol(p, NULL, 0);
if (errno)
do_abort(
_("%s: invalid phase in XFS_REPAIR_FAIL_AFTER_PHASE: %s\n"),
p, strerror(errno));
}
}
void __attribute__((noreturn))
do_error(char const *msg, ...)
{
va_list args;
fprintf(stderr, _("\nfatal error -- "));
va_start(args, msg);
vfprintf(stderr, msg, args);
if (dumpcore)
abort();
exit(1);
}
/*
* like do_error, only the error is internal, no system
* error so no oserror processing
*/
void __attribute__((noreturn))
do_abort(char const *msg, ...)
{
va_list args;
va_start(args, msg);
vfprintf(stderr, msg, args);
if (dumpcore)
abort();
exit(1);
}
void
do_warn(char const *msg, ...)
{
va_list args;
fs_is_dirty = 1;
va_start(args, msg);
vfprintf(stderr, msg, args);
va_end(args);
}
/* no formatting */
void
do_log(char const *msg, ...)
{
va_list args;
va_start(args, msg);
vfprintf(stderr, msg, args);
va_end(args);
}
/* Make sure a fixed-location inode is where it should be. */
static void
validate_sb_ino(
xfs_ino_t *ino,
xfs_ino_t expected_ino,
const char *tag)
{
if (*ino == expected_ino)
return;
do_warn(
_("sb %s inode value %" PRIu64 " %sinconsistent with calculated value %"PRIu64"\n"),
tag, *ino, *ino == NULLFSINO ? "(NULLFSINO) " : "",
expected_ino);
if (!no_modify)
do_warn(
_("resetting superblock %s inode pointer to %"PRIu64"\n"),
tag, expected_ino);
else
do_warn(
_("would reset superblock %s inode pointer to %"PRIu64"\n"),
tag, expected_ino);
/*
* Just set the value -- safe since the superblock doesn't get flushed
* out if no_modify is set.
*/
*ino = expected_ino;
}
/* Does the root directory inode look like a plausible root directory? */
static bool
has_plausible_rootdir(
struct xfs_mount *mp)
{
struct xfs_inode *ip;
xfs_ino_t ino;
int error;
bool ret = false;
error = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &ip);
if (error)
goto out;
if (!S_ISDIR(VFS_I(ip)->i_mode))
goto out_rele;
error = -libxfs_dir_lookup(NULL, ip, &xfs_name_dotdot, &ino, NULL);
if (error)
goto out_rele;
/* The root directory '..' entry points to the directory. */
if (ino == mp->m_sb.sb_rootino)
ret = true;
out_rele:
libxfs_irele(ip);
out:
return ret;
}
/*
* If any of the secondary SBs contain a *correct* value for sunit, write that
* back to the primary superblock.
*/
static void
guess_correct_sunit(
struct xfs_mount *mp)
{
struct xfs_sb sb;
struct xfs_buf *bp;
xfs_ino_t calc_rootino = NULLFSINO;
xfs_agnumber_t agno;
unsigned int new_sunit;
unsigned int sunit_guess;
int error;
/* Try reading secondary supers to see if we find a good sb_unit. */
for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
error = -libxfs_sb_read_secondary(mp, NULL, agno, &bp);
if (error)
continue;
libxfs_sb_from_disk(&sb, bp->b_addr);
libxfs_buf_relse(bp);
calc_rootino = libxfs_ialloc_calc_rootino(mp, sb.sb_unit);
if (calc_rootino == mp->m_sb.sb_rootino)
break;
}
/* If we found a reasonable value, log where we found it. */
if (calc_rootino == mp->m_sb.sb_rootino) {
do_warn(_("AG %u superblock contains plausible sb_unit value\n"),
agno);
new_sunit = sb.sb_unit;
goto fix;
}
/* Try successive powers of two. */
for (sunit_guess = 1;
sunit_guess <= XFS_AG_MAX_BLOCKS(mp->m_sb.sb_blocklog);
sunit_guess *= 2) {
calc_rootino = libxfs_ialloc_calc_rootino(mp, sunit_guess);
if (calc_rootino == mp->m_sb.sb_rootino)
break;
}
/* If we found a reasonable value, log where we found it. */
if (calc_rootino == mp->m_sb.sb_rootino) {
do_warn(_("Found an sb_unit value that looks plausible\n"));
new_sunit = sunit_guess;
goto fix;
}
do_warn(_("Could not estimate a plausible sb_unit value\n"));
return;
fix:
if (!no_modify)
do_warn(_("Resetting sb_unit to %u\n"), new_sunit);
else
do_warn(_("Would reset sb_unit to %u\n"), new_sunit);
/*
* Just set the value -- safe since the superblock doesn't get flushed
* out if no_modify is set.
*/
mp->m_sb.sb_unit = new_sunit;
/* Make sure that swidth is still a multiple of sunit. */
if (mp->m_sb.sb_width % mp->m_sb.sb_unit == 0)
return;
if (!no_modify)
do_warn(_("Resetting sb_width to %u\n"), new_sunit);
else
do_warn(_("Would reset sb_width to %u\n"), new_sunit);
}
/*
* Make sure that the first 3 inodes in the filesystem are the root directory,
* the realtime bitmap, and the realtime summary, in that order.
*/
static void
calc_mkfs(
struct xfs_mount *mp)
{
xfs_ino_t rootino;
rootino = libxfs_ialloc_calc_rootino(mp, mp->m_sb.sb_unit);
/*
* If the root inode isn't where we think it is, check its plausibility
* as a root directory. It's possible that somebody changed sunit
* since the filesystem was created, which can change the value of the
* above computation. Don't blow up the root directory if this is the
* case.
*/
if (mp->m_sb.sb_rootino != rootino && has_plausible_rootdir(mp)) {
do_warn(
_("sb root inode value %" PRIu64 " valid but in unaligned location (expected %"PRIu64") possibly due to sunit change\n"),
mp->m_sb.sb_rootino, rootino);
guess_correct_sunit(mp);
rootino = mp->m_sb.sb_rootino;
}
validate_sb_ino(&mp->m_sb.sb_rootino, rootino,
_("root"));
validate_sb_ino(&mp->m_sb.sb_rbmino, rootino + 1,
_("realtime bitmap"));
validate_sb_ino(&mp->m_sb.sb_rsumino, rootino + 2,
_("realtime summary"));
}
/*
* v5 superblock metadata track the LSN of last modification and thus require
* that the current LSN is always moving forward. The current LSN is reset if
* the log has been cleared, which puts the log behind parts of the filesystem
* on-disk and can disrupt log recovery.
*
* We have tracked the maximum LSN of every piece of metadata that has been read
* in via the read verifiers. Compare the max LSN with the log and if the log is
* behind, bump the cycle number and reformat the log.
*/
static void
format_log_max_lsn(
struct xfs_mount *mp)
{
struct xlog *log = mp->m_log;
int max_cycle;
int max_block;
int new_cycle;
xfs_daddr_t logstart;
xfs_daddr_t logblocks;
int logversion;
if (!xfs_has_crc(mp))
return;
/*
* If the log is ahead of the highest metadata LSN we've seen, we're
* safe and there's nothing to do.
*/
max_cycle = CYCLE_LSN(libxfs_max_lsn);
max_block = BLOCK_LSN(libxfs_max_lsn);
if (max_cycle < log->l_curr_cycle ||
(max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
return;
/*
* Going to the next cycle should be sufficient but we bump by a few
* counts to help cover any metadata LSNs we could have missed.
*/
new_cycle = max_cycle + 3;
logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
logversion = xfs_has_logv2(mp) ? 2 : 1;
do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
if (no_modify) {
do_warn(_("Would format log to cycle %d.\n"), new_cycle);
return;
}
do_warn(_("Format log to cycle %d.\n"), new_cycle);
libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
&mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
XLOG_FMT, new_cycle, true);
}
/*
* mkfs increases the AG count for "multidisk" configurations, we want
* to target these for an increase in thread count. Hence check the superlock
* geometry information to determine if mkfs considered this a multidisk
* configuration.
*/
static bool
is_multidisk_filesystem(
struct xfs_mount *mp)
{
struct xfs_sb *sbp = &mp->m_sb;
/* High agcount filesystems are always considered "multidisk" */
if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
return true;
/*
* If it doesn't have a sunit/swidth, mkfs didn't consider it a
* multi-disk array, so we don't either.
*/
if (!sbp->sb_unit)
return false;
ASSERT(sbp->sb_width);
return true;
}
/*
* if the sector size of the filesystem we are trying to repair is
* smaller than that of the underlying filesystem (i.e. we are repairing
* an image), the we have to turn off direct IO because we cannot do IO
* smaller than the host filesystem's sector size.
*/
static void
check_fs_vs_host_sectsize(
struct xfs_sb *sb)
{
int ret;
long old_flags;
struct xfs_fsop_geom geom = { 0 };
ret = -xfrog_geometry(x.data.fd, &geom);
if (ret) {
do_log(_("Cannot get host filesystem geometry.\n"
"Repair may fail if there is a sector size mismatch between\n"
"the image and the host filesystem.\n"));
geom.sectsize = BBSIZE;
}
if (sb->sb_sectsize < geom.sectsize) {
old_flags = fcntl(x.data.fd, F_GETFL, 0);
if (fcntl(x.data.fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
do_warn(_(
"Sector size on host filesystem larger than image sector size.\n"
"Cannot turn off direct IO, so exiting.\n"));
exit(1);
}
}
}
/*
* If we set up a writeback function to set NEEDSREPAIR while the filesystem is
* dirty, there's a chance that calling libxfs_getsb could deadlock the buffer
* cache while trying to get the primary sb buffer if the first non-sb write to
* the filesystem is the result of a cache shake. Retain a reference to the
* primary sb buffer to avoid all that.
*/
static struct xfs_buf *primary_sb_bp; /* buffer for superblock */
int
retain_primary_sb(
struct xfs_mount *mp)
{
int error;
error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp,
&xfs_sb_buf_ops);
if (error)
return error;
libxfs_buf_unlock(primary_sb_bp);
return 0;
}
static void
drop_primary_sb(void)
{
if (!primary_sb_bp)
return;
libxfs_buf_lock(primary_sb_bp);
libxfs_buf_relse(primary_sb_bp);
primary_sb_bp = NULL;
}
static int
get_primary_sb(
struct xfs_mount *mp,
struct xfs_buf **bpp)
{
int error;
*bpp = NULL;
if (!primary_sb_bp) {
error = retain_primary_sb(mp);
if (error)
return error;
}
libxfs_buf_lock(primary_sb_bp);
xfs_buf_hold(primary_sb_bp);
*bpp = primary_sb_bp;
return 0;
}
/* Clear needsrepair after a successful repair run. */
static void
clear_needsrepair(
struct xfs_mount *mp)
{
struct xfs_buf *bp;
int error;
/*
* If we're going to clear NEEDSREPAIR, we need to make absolutely sure
* that everything is ok with the ondisk filesystem. Make sure any
* dirty buffers are sent to disk and that the disks have persisted
* writes to stable storage. If that fails, leave NEEDSREPAIR in
* place.
*/
error = -libxfs_flush_mount(mp);
if (error) {
do_warn(
_("Cannot clear needsrepair due to flush failure, err=%d.\n"),
error);
goto drop;
}
/* Clear needsrepair from the superblock. */
error = get_primary_sb(mp, &bp);
if (error) {
do_warn(
_("Cannot clear needsrepair from primary super, err=%d.\n"), error);
} else {
mp->m_sb.sb_features_incompat &=
~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
libxfs_buf_mark_dirty(bp);
}
if (bp)
libxfs_buf_relse(bp);
drop:
drop_primary_sb();
}
static void
update_sb_crc_only(
struct xfs_buf *bp)
{
xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
}
/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
static void
force_needsrepair(
struct xfs_mount *mp)
{
struct xfs_buf_ops fake_ops;
struct xfs_buf *bp;
int error;
if (!xfs_has_crc(mp) ||
xfs_sb_version_needsrepair(&mp->m_sb))
return;
error = get_primary_sb(mp, &bp);
if (error) {
do_log(
_("couldn't get superblock to set needsrepair, err=%d\n"), error);
} else {
/*
* It's possible that we need to set NEEDSREPAIR before we've
* had a chance to fix the summary counters in the primary sb.
* With the exception of those counters, phase 1 already
* ensured that the geometry makes sense.
*
* Bad summary counters in the primary super can cause the
* write verifier to fail, so substitute a dummy that only sets
* the CRC. In the event of a crash, NEEDSREPAIR will prevent
* the kernel from mounting our potentially damaged filesystem
* until repair is run again, so it's ok to bypass the usual
* verification in this one case.
*/
fake_ops = xfs_sb_buf_ops; /* struct copy */
fake_ops.verify_write = update_sb_crc_only;
mp->m_sb.sb_features_incompat |=
XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
/* Force the primary super to disk immediately. */
bp->b_ops = &fake_ops;
error = -libxfs_bwrite(bp);
bp->b_ops = &xfs_sb_buf_ops;
if (error)
do_log(_("couldn't force needsrepair, err=%d\n"), error);
}
if (bp)
libxfs_buf_relse(bp);
}
/*
* Intercept the first non-super write to the filesystem so we can set
* NEEDSREPAIR to protect the filesystem from mount in case of a crash.
*/
static void
repair_capture_writeback(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_mount;
static pthread_mutex_t wb_mutex = PTHREAD_MUTEX_INITIALIZER;
/*
* This write hook ignores any buffer that looks like a superblock to
* avoid hook recursion when setting NEEDSREPAIR. Higher level code
* modifying an sb must control the flag manually.
*/
if (bp->b_ops == &xfs_sb_buf_ops || xfs_buf_daddr(bp) == XFS_SB_DADDR)
return;
pthread_mutex_lock(&wb_mutex);
/*
* If someone else already dropped the hook, then needsrepair has
* already been set on the filesystem and we can unlock.
*/
if (mp->m_buf_writeback_fn != repair_capture_writeback)
goto unlock;
/*
* If we get here, the buffer being written is not a superblock, and
* needsrepair needs to be set. The hook is kept in place to plug all
* other writes until the sb write finishes.
*/
force_needsrepair(mp);
/* We only set needsrepair once, so clear the hook now. */
mp->m_buf_writeback_fn = NULL;
unlock:
pthread_mutex_unlock(&wb_mutex);
}
static inline void
phase_end(int phase)
{
timestamp(PHASE_END, phase, NULL);
/* Fail if someone injected an post-phase error. */
if (fail_after_phase && phase == fail_after_phase)
platform_crash();
}
int
main(int argc, char **argv)
{
xfs_mount_t *temp_mp;
xfs_mount_t *mp;
struct xfs_buf *sbp;
xfs_mount_t xfs_m;
struct xlog log = {0};
char *msgbuf;
struct xfs_sb psb;
int rval;
struct xfs_ino_geometry *igeo;
int error;
progname = basename(argv[0]);
setlocale(LC_ALL, "");
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
dinode_bmbt_translation_init();
temp_mp = &xfs_m;
setbuf(stdout, NULL);
process_args(argc, argv);
xfs_init(&x);
msgbuf = malloc(DURATION_BUF_SIZE);
timestamp(PHASE_START, 0, NULL);
phase_end(0);
/* -f forces this, but let's be nice and autodetect it, as well. */
if (!isa_file) {
struct stat statbuf;
if (fstat(x.data.fd, &statbuf) < 0)
do_warn(_("%s: couldn't stat \"%s\"\n"),
progname, fs_name);
else if (S_ISREG(statbuf.st_mode))
isa_file = 1;
}
if (isa_file) {
/* Best effort attempt to validate fs vs host sector size */
rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
if (rval == XR_OK)
check_fs_vs_host_sectsize(&psb);
}
/* do phase1 to make sure we have a superblock */
phase1(temp_mp);
phase_end(1);
if (no_modify && primary_sb_modified) {
do_warn(_("Primary superblock would have been modified.\n"
"Cannot proceed further in no_modify mode.\n"
"Exiting now.\n"));
exit(1);
}
rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
if (rval != XR_OK) {
do_warn(_("Primary superblock bad after phase 1!\n"
"Exiting now.\n"));
exit(1);
}
/*
* Now that we have completely validated the superblock, geometry may
* have changed; re-check geometry vs the host filesystem geometry
*/
if (isa_file)
check_fs_vs_host_sectsize(&psb);
/*
* Prepare the mount structure. Point the log reference to our local
* copy so it's available to the various phases. The log bits are
* initialized in phase 2.
*/
memset(&xfs_m, 0, sizeof(xfs_mount_t));
mp = libxfs_mount(&xfs_m, &psb, &x, 0);
if (!mp) {
fprintf(stderr,
_("%s: cannot repair this filesystem. Sorry.\n"),
progname);
exit(1);
}
mp->m_log = &log;
igeo = M_IGEO(mp);
/* Spit out function & line on these corruption macros */
if (verbose > 2)
xfs_set_reporting_corruption(mp);
/* Capture the first writeback so that we can set needsrepair. */
if (xfs_has_crc(mp))
mp->m_buf_writeback_fn = repair_capture_writeback;
/*
* set XFS-independent status vars from the mount/sb structure
*/
glob_agcount = mp->m_sb.sb_agcount;
chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
/*
* Automatic striding for high agcount filesystems.
*
* More AGs indicates that the filesystem is either large or can handle
* more IO parallelism. Either way, we should try to process multiple
* AGs at a time in such a configuration to try to saturate the
* underlying storage and speed the repair process. Only do this if
* prefetching is enabled.
*
* Given mkfs defaults for 16AGs for "multidisk" configurations, we want
* to target these for an increase in thread count. Hence a stride value
* of 15 is chosen to ensure we get at least 2 AGs being scanned at once
* on such filesystems.
*
* Limit the maximum thread count based on the available CPU power that
* is available. If we use too many threads, we might run out of memory
* and CPU power before we run out of IO concurrency. We limit to 8
* threads/CPU as this is enough threads to saturate a CPU on fast
* devices, yet few enough that it will saturate but won't overload slow
* devices.
*
* Multidisk filesystems can handle more IO parallelism so we should try
* to process multiple AGs at a time in such a configuration to try to
* saturate the underlying storage and speed the repair process. Only do
* this if prefetching is enabled.
*/
if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
/*
* For small agcount multidisk systems, just double the
* parallelism. For larger AG count filesystems (32 and above)
* use more parallelism, and linearly increase the parallelism
* with the number of AGs.
*/
ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
}
if (ag_stride) {
int max_threads = platform_nproc() * 8;
thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
while (thread_count > max_threads) {
ag_stride *= 2;
thread_count = (glob_agcount + ag_stride - 1) /
ag_stride;
}
if (thread_count > 0)
thread_init();
else {
thread_count = 1;
ag_stride = 0;
}
}
if (ag_stride && report_interval) {
init_progress_rpt();
if (msgbuf) {
do_log(_(" - reporting progress in intervals of %s\n"),
duration(report_interval, msgbuf));
}
}
/*
* Adjust libxfs cache sizes based on system memory,
* filesystem size and inode count.
*
* We'll set the cache size based on 3/4s the memory minus
* space used by the inode AVL tree and block usage map.
*
* Inode AVL tree space is approximately 4 bytes per inode,
* block usage map is currently 1 byte for 2 blocks.
*
* We assume most blocks will be inode clusters.
*
* Calculations are done in kilobyte units.
*/
if (!bhash_option_used || max_mem_specified) {
unsigned long mem_used;
unsigned long max_mem;
struct rlimit rlim;
libxfs_bcache_purge();
cache_destroy(libxfs_bcache);
mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
(mp->m_sb.sb_dblocks >> (10 + 1)) +
50000; /* rough estimate of 50MB overhead */
max_mem = max_mem_specified ? max_mem_specified * 1024 :
platform_physmem() * 3 / 4;
if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
rlim.rlim_cur != RLIM_INFINITY) {
rlim.rlim_cur = rlim.rlim_max;
setrlimit(RLIMIT_AS, &rlim);
/* use approximately 80% of rlimit to avoid overrun */
max_mem = min(max_mem, rlim.rlim_cur / 1280);
} else
max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
if (verbose > 1)
do_log(
_(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
max_mem, mp->m_sb.sb_icount,
mp->m_sb.sb_icount >> (10 - 2),
mp->m_sb.sb_dblocks,
mp->m_sb.sb_dblocks >> (10 + 1));
if (max_mem <= mem_used) {
if (max_mem_specified) {
do_abort(
_("Required memory for repair is greater that the maximum specified\n"
"with the -m option. Please increase it to at least %lu.\n"),
mem_used / 1024);
}
do_log(
_("Memory available for repair (%luMB) may not be sufficient.\n"
"At least %luMB is needed to repair this filesystem efficiently\n"
"If repair fails due to lack of memory, please\n"),
max_mem / 1024, mem_used / 1024);
if (do_prefetch)
do_log(
_("turn prefetching off (-P) to reduce the memory footprint.\n"));
else
do_log(
_("increase system RAM and/or swap space to at least %luMB.\n"),
mem_used * 2 / 1024);
max_mem = mem_used;
}
max_mem -= mem_used;
if (max_mem >= (1 << 30))
max_mem = 1 << 30;
libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
(igeo->inode_cluster_size >> 10));
if (libxfs_bhash_size < 512)
libxfs_bhash_size = 512;
if (verbose)
do_log(_(" - block cache size set to %d entries\n"),
libxfs_bhash_size * HASH_CACHE_RATIO);
libxfs_bcache = cache_init(0, libxfs_bhash_size,
&libxfs_bcache_operations);
}
/*
* calculate what mkfs would do to this filesystem
*/
calc_mkfs(mp);
/*
* initialize block alloc map
*/
init_bmaps(mp);
incore_ino_init(mp);
incore_ext_init(mp);
rmaps_init(mp);
/* initialize random globals now that we know the fs geometry */
inodes_per_block = mp->m_sb.sb_inopblock;
if (parse_sb_version(mp)) {
do_warn(
_("Found unsupported filesystem features. Exiting now.\n"));
return(1);
}
/* make sure the per-ag freespace maps are ok so we can mount the fs */
phase2(mp, phase2_threads);
phase_end(2);
if (do_prefetch)
init_prefetch(mp);
phase3(mp, phase2_threads);
phase_end(3);
phase4(mp);
phase_end(4);
if (no_modify) {
printf(_("No modify flag set, skipping phase 5\n"));
if (mp->m_sb.sb_rblocks > 0)
check_rtmetadata(mp);
} else {
phase5(mp);
}
phase_end(5);
/*
* Done with the block usage maps, toss them...
*/
rmaps_free(mp);
free_bmaps(mp);
if (!bad_ino_btree) {
phase6(mp);
phase_end(6);
phase7(mp, phase2_threads);
phase_end(7);
} else {
do_warn(
_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
}
if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
if (!no_modify) {
do_warn(
_("Warning: no quota inodes were found. Quotas disabled.\n"));
} else {
do_warn(
_("Warning: no quota inodes were found. Quotas would be disabled.\n"));
}
} else if (lost_quotas) {
if (!no_modify) {
do_warn(
_("Warning: quota inodes were cleared. Quotas disabled.\n"));
} else {
do_warn(
_("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
}
} else {
if (lost_uquotino) {
if (!no_modify) {
do_warn(
_("Warning: user quota information was cleared.\n"
"User quotas can not be enforced until limit information is recreated.\n"));
} else {
do_warn(
_("Warning: user quota information would be cleared.\n"
"User quotas could not be enforced until limit information was recreated.\n"));
}
}
if (lost_gquotino) {
if (!no_modify) {
do_warn(
_("Warning: group quota information was cleared.\n"
"Group quotas can not be enforced until limit information is recreated.\n"));
} else {
do_warn(
_("Warning: group quota information would be cleared.\n"
"Group quotas could not be enforced until limit information was recreated.\n"));
}
}
if (lost_pquotino) {
if (!no_modify) {
do_warn(
_("Warning: project quota information was cleared.\n"
"Project quotas can not be enforced until limit information is recreated.\n"));
} else {
do_warn(
_("Warning: project quota information would be cleared.\n"
"Project quotas could not be enforced until limit information was recreated.\n"));
}
}
}
if (ag_stride && report_interval)
stop_progress_rpt();
if (no_modify) {
/*
* Warn if the current LSN is problematic and the log requires a
* reformat.
*/
format_log_max_lsn(mp);
do_log(
_("No modify flag set, skipping filesystem flush and exiting.\n"));
if (verbose)
summary_report();
if (fs_is_dirty)
return(1);
return(0);
}
/*
* Clear the quota flags if they're on.
*/
sbp = libxfs_getsb(mp);
if (!sbp)
do_error(_("couldn't get superblock\n"));
if ((mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD) != quotacheck_results()) {
do_warn(_("Note - quota info will be regenerated on next "
"quota mount.\n"));
mp->m_sb.sb_qflags &= ~(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD |
XFS_PQUOTA_CHKD | XFS_OQUOTA_CHKD);
libxfs_sb_to_disk(sbp->b_addr, &mp->m_sb);
}
if (copied_sunit) {
do_warn(
_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
"Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
mp->m_sb.sb_unit, mp->m_sb.sb_width);
}
libxfs_buf_mark_dirty(sbp);
libxfs_buf_relse(sbp);
/*
* If we upgraded V5 filesystem features, we need to update the
* secondary superblocks to include the new feature bits. Don't set
* NEEDSREPAIR on the secondaries.
*/
if (features_changed) {
mp->m_sb.sb_features_incompat &=
~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
error = -libxfs_update_secondary_sbs(mp);
if (error)
do_error(_("upgrading features of secondary supers"));
mp->m_sb.sb_features_incompat |=
XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
}
/*
* Done. Flush all cached buffers and inodes first to ensure all
* verifiers are run (where we discover the max metadata LSN), reformat
* the log if necessary and unmount.
*/
libxfs_bcache_flush();
format_log_max_lsn(mp);
if (xfs_sb_version_needsrepair(&mp->m_sb))
clear_needsrepair(mp);
/* Report failure if anything failed to get written to our fs. */
error = -libxfs_umount(mp);
if (error)
do_error(
_("File system metadata writeout failed, err=%d. Re-run xfs_repair.\n"),
error);
libxfs_destroy(&x);
if (verbose)
summary_report();
do_log(_("done\n"));
if (dangerously && !no_modify)
do_warn(
_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
pftrace_done();
free(msgbuf);
if (fs_is_dirty && report_corrected)
return (4);
return (0);
}