blob: 046a42c1da8bebcfd82d8994ca33002b2d56579b [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include <stdint.h>
#include <sys/types.h>
#include <sys/statvfs.h>
#include "list.h"
#include "libfrog/paths.h"
#include "libfrog/workqueue.h"
#include "libfrog/ptvar.h"
#include "xfs_scrub.h"
#include "common.h"
#include "counter.h"
#include "inodes.h"
#include "progress.h"
#include "scrub.h"
#include "repair.h"
/* Phase 3: Scan all inodes. */
struct scrub_inode_ctx {
struct scrub_ctx *ctx;
/* Number of inodes scanned. */
struct ptcounter *icount;
/* Per-thread lists of file repair items. */
struct ptvar *repair_ptlists;
/* Set to true to abort all threads. */
bool aborted;
/* Set to true if we want to defer file repairs to phase 4. */
bool always_defer_repairs;
};
/* Report a filesystem error that the vfs fed us on close. */
static void
report_close_error(
struct scrub_ctx *ctx,
struct xfs_bulkstat *bstat)
{
char descr[DESCR_BUFSZ];
int old_errno = errno;
scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino,
bstat->bs_gen, NULL);
errno = old_errno;
str_errno(ctx, descr);
}
/* Defer all the repairs until phase 4. */
static int
defer_inode_repair(
struct scrub_inode_ctx *ictx,
const struct scrub_item *sri)
{
struct action_list *alist;
struct action_item *aitem = NULL;
int ret;
ret = repair_item_to_action_item(ictx->ctx, sri, &aitem);
if (ret || !aitem)
return ret;
alist = ptvar_get(ictx->repair_ptlists, &ret);
if (ret) {
str_liberror(ictx->ctx, ret,
_("getting per-thread inode repair list"));
return ret;
}
action_list_add(alist, aitem);
return 0;
}
/* Run repair actions now and leave unfinished items for later. */
static int
try_inode_repair(
struct scrub_inode_ctx *ictx,
struct scrub_item *sri,
int fd)
{
/*
* If at the start of phase 3 we already had ag/rt metadata repairs
* queued up for phase 4, leave the action list untouched so that file
* metadata repairs will be deferred until phase 4.
*/
if (ictx->always_defer_repairs)
return 0;
/*
* Try to repair the file metadata. Unfixed metadata will remain in
* the scrub item state to be queued as a single action item.
*/
return repair_file_corruption(ictx->ctx, sri, fd);
}
/*
* If we couldn't check all the scheduled file metadata items, try performing
* spot repairs until we check everything or stop making forward progress.
*/
static int
repair_and_scrub_inode_loop(
struct scrub_ctx *ctx,
struct xfs_bulkstat *bstat,
int fd,
struct scrub_item *sri,
bool *defer)
{
unsigned int to_check;
int error;
*defer = false;
if (ctx->mode != SCRUB_MODE_REPAIR)
return 0;
to_check = scrub_item_count_needscheck(sri);
while (to_check > 0) {
unsigned int nr;
error = repair_file_corruption(ctx, sri, fd);
if (error)
return error;
error = scrub_item_check_file(ctx, sri, fd);
if (error)
return error;
nr = scrub_item_count_needscheck(sri);
if (nr == to_check) {
char descr[DESCR_BUFSZ];
/*
* We cannot make forward scanning progress with this
* inode, so defer the rest until phase 4.
*/
scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ,
bstat->bs_ino, bstat->bs_gen, NULL);
str_info(ctx, descr,
_("Unable to make forward checking progress; will try again in phase 4."));
*defer = true;
return 0;
}
to_check = nr;
}
return 0;
}
/* Verify the contents, xattrs, and extent maps of an inode. */
static int
scrub_inode(
struct scrub_ctx *ctx,
struct xfs_handle *handle,
struct xfs_bulkstat *bstat,
void *arg)
{
struct scrub_item sri;
struct scrub_inode_ctx *ictx = arg;
struct ptcounter *icount = ictx->icount;
int fd = -1;
int error;
scrub_item_init_file(&sri, bstat);
background_sleep();
/*
* Open this regular file to pin it in memory. Avoiding the use of
* scan-by-handle means that the in-kernel scrubber doesn't pay the
* cost of opening the handle (looking up the inode in the inode btree,
* grabbing the inode, checking the generation) with every scrub call.
*
* Ignore any runtime or corruption related errors here because we can
* fall back to scrubbing by handle. ESTALE can be ignored for the
* following reasons:
*
* - If the file has been deleted since bulkstat, there's nothing to
* check. Scrub-by-handle returns ENOENT for such inodes.
* - If the file has been deleted and reallocated since bulkstat,
* its ondisk metadata have been rewritten and is assumed to be ok.
* Scrub-by-handle also returns ENOENT if the generation doesn't
* match.
* - The file itself is corrupt and cannot be loaded. In this case,
* we fall back to scrub-by-handle.
*
* Note: We cannot use this same trick for directories because the VFS
* will try to reconnect directory file handles to the root directory
* by walking '..' entries upwards, and loops in the dirent index
* btree will cause livelocks.
*/
if (S_ISREG(bstat->bs_mode))
fd = scrub_open_handle(handle);
/* Scrub the inode. */
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_INODE);
/* Scrub all block mappings. */
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTD);
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTA);
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTC);
/*
* Check file data contents, e.g. symlink and directory entries.
*
* Note: bs_mode==0 occurs when inumbers says an inode is allocated,
* bulkstat skips the inode, and bulkstat_single errors out when
* loading the inode. This could be due to racing with ifree, but it
* could be a corrupt inode. Either way, schedule all the data fork
* content scrubbers. Better to have them return -ENOENT than miss
* some coverage.
*/
if (S_ISLNK(bstat->bs_mode) || !bstat->bs_mode)
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_SYMLINK);
if (S_ISDIR(bstat->bs_mode) || !bstat->bs_mode)
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_DIR);
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_XATTR);
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_PARENT);
/*
* Try to check all of the metadata items that we just scheduled. If
* we return with some types still needing a check and the space
* metadata isn't also in need of repairs, try repairing any damaged
* file metadata that we've found so far, and try checking the file
* again. Worst case, defer the repairs and the checks to phase 4 if
* we can't make any progress on anything.
*/
error = scrub_item_check_file(ctx, &sri, fd);
if (error)
goto out;
if (!ictx->always_defer_repairs) {
bool defer_repairs;
error = repair_and_scrub_inode_loop(ctx, bstat, fd, &sri,
&defer_repairs);
if (error || defer_repairs)
goto out;
}
/* Try to repair the file while it's open. */
error = try_inode_repair(ictx, &sri, fd);
if (error)
goto out;
out:
if (error)
ictx->aborted = true;
error = ptcounter_add(icount, 1);
if (error) {
str_liberror(ctx, error,
_("incrementing scanned inode counter"));
ictx->aborted = true;
}
progress_add(1);
if (!error && !ictx->aborted)
error = defer_inode_repair(ictx, &sri);
if (fd >= 0) {
int err2;
err2 = close(fd);
if (err2) {
report_close_error(ctx, bstat);
ictx->aborted = true;
}
}
if (!error && ictx->aborted)
error = ECANCELED;
return error;
}
/*
* Collect all the inode repairs in the file repair list. No need for locks
* here, since we're single-threaded.
*/
static int
collect_repairs(
struct ptvar *ptv,
void *data,
void *foreach_arg)
{
struct scrub_ctx *ctx = foreach_arg;
struct action_list *alist = data;
action_list_merge(ctx->file_repair_list, alist);
return 0;
}
/* Initialize this per-thread file repair item list. */
static void
action_ptlist_init(
void *priv)
{
struct action_list *alist = priv;
action_list_init(alist);
}
/* Verify all the inodes in a filesystem. */
int
phase3_func(
struct scrub_ctx *ctx)
{
struct scrub_inode_ctx ictx = { .ctx = ctx };
uint64_t val;
xfs_agnumber_t agno;
int err;
err = -ptvar_alloc(scrub_nproc(ctx), sizeof(struct action_list),
action_ptlist_init, &ictx.repair_ptlists);
if (err) {
str_liberror(ctx, err,
_("creating per-thread file repair item lists"));
return err;
}
err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
if (err) {
str_liberror(ctx, err, _("creating scanned inode counter"));
goto out_ptvar;
}
/*
* If we already have ag/fs metadata to repair from previous phases,
* we would rather not try to repair file metadata until we've tried
* to repair the space metadata.
*/
for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
if (!action_list_empty(ctx->fs_repair_list))
ictx.always_defer_repairs = true;
}
err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
if (!err && ictx.aborted)
err = ECANCELED;
if (err)
goto out_ptcounter;
/*
* Combine all of the file repair items into the main repair list.
* We don't need locks here since we're the only thread running now.
*/
err = -ptvar_foreach(ictx.repair_ptlists, collect_repairs, ctx);
if (err) {
str_liberror(ctx, err, _("collecting inode repair lists"));
goto out_ptcounter;
}
scrub_report_preen_triggers(ctx);
err = ptcounter_value(ictx.icount, &val);
if (err) {
str_liberror(ctx, err, _("summing scanned inode counter"));
goto out_ptcounter;
}
ctx->inodes_checked = val;
out_ptcounter:
ptcounter_free(ictx.icount);
out_ptvar:
ptvar_free(ictx.repair_ptlists);
return err;
}
/* Estimate how much work we're going to do. */
int
phase3_estimate(
struct scrub_ctx *ctx,
uint64_t *items,
unsigned int *nr_threads,
int *rshift)
{
*items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree;
*nr_threads = scrub_nproc(ctx);
*rshift = 0;
return 0;
}