blob: 4235c228c0e8545cb9cf78cdc9c490e12bee340e [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include <stdint.h>
#include <sys/types.h>
#include <sys/statvfs.h>
#include "list.h"
#include "libfrog/paths.h"
#include "libfrog/workqueue.h"
#include "xfs_scrub.h"
#include "common.h"
#include "counter.h"
#include "inodes.h"
#include "progress.h"
#include "scrub.h"
#include "repair.h"
/* Phase 3: Scan all inodes. */
struct scrub_inode_ctx {
struct scrub_ctx *ctx;
/* Number of inodes scanned. */
struct ptcounter *icount;
/* per-AG locks to protect the repair lists */
pthread_mutex_t *locks;
/* Set to true to abort all threads. */
bool aborted;
/* Set to true if we want to defer file repairs to phase 4. */
bool always_defer_repairs;
};
/* Report a filesystem error that the vfs fed us on close. */
static void
report_close_error(
struct scrub_ctx *ctx,
struct xfs_bulkstat *bstat)
{
char descr[DESCR_BUFSZ];
int old_errno = errno;
scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino,
bstat->bs_gen, NULL);
errno = old_errno;
str_errno(ctx, descr);
}
/*
* Defer all the repairs until phase 4, being careful about locking since the
* inode scrub threads are not per-AG.
*/
static void
defer_inode_repair(
struct scrub_inode_ctx *ictx,
xfs_agnumber_t agno,
struct action_list *alist)
{
if (alist->nr == 0)
return;
pthread_mutex_lock(&ictx->locks[agno]);
action_list_defer(ictx->ctx, agno, alist);
pthread_mutex_unlock(&ictx->locks[agno]);
}
/* Run repair actions now and defer unfinished items for later. */
static int
try_inode_repair(
struct scrub_inode_ctx *ictx,
int fd,
xfs_agnumber_t agno,
struct action_list *alist)
{
int ret;
/*
* If at the start of phase 3 we already had ag/rt metadata repairs
* queued up for phase 4, leave the action list untouched so that file
* metadata repairs will be deferred in scan order until phase 4.
*/
if (ictx->always_defer_repairs)
return 0;
ret = action_list_process(ictx->ctx, fd, alist,
ALP_REPAIR_ONLY | ALP_NOPROGRESS);
if (ret)
return ret;
defer_inode_repair(ictx, agno, alist);
return 0;
}
/* Verify the contents, xattrs, and extent maps of an inode. */
static int
scrub_inode(
struct scrub_ctx *ctx,
struct xfs_handle *handle,
struct xfs_bulkstat *bstat,
void *arg)
{
struct action_list alist;
struct scrub_inode_ctx *ictx = arg;
struct ptcounter *icount = ictx->icount;
xfs_agnumber_t agno;
int fd = -1;
int error;
action_list_init(&alist);
agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino);
background_sleep();
/*
* Open this regular file to pin it in memory. Avoiding the use of
* scan-by-handle means that the in-kernel scrubber doesn't pay the
* cost of opening the handle (looking up the inode in the inode btree,
* grabbing the inode, checking the generation) with every scrub call.
*
* Ignore any runtime or corruption related errors here because we can
* fall back to scrubbing by handle. ESTALE can be ignored for the
* following reasons:
*
* - If the file has been deleted since bulkstat, there's nothing to
* check. Scrub-by-handle returns ENOENT for such inodes.
* - If the file has been deleted and reallocated since bulkstat,
* its ondisk metadata have been rewritten and is assumed to be ok.
* Scrub-by-handle also returns ENOENT if the generation doesn't
* match.
* - The file itself is corrupt and cannot be loaded. In this case,
* we fall back to scrub-by-handle.
*
* Note: We cannot use this same trick for directories because the VFS
* will try to reconnect directory file handles to the root directory
* by walking '..' entries upwards, and loops in the dirent index
* btree will cause livelocks.
*/
if (S_ISREG(bstat->bs_mode))
fd = scrub_open_handle(handle);
/* Scrub the inode. */
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_INODE, &alist);
if (error)
goto out;
error = try_inode_repair(ictx, fd, agno, &alist);
if (error)
goto out;
/* Scrub all block mappings. */
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTD, &alist);
if (error)
goto out;
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTA, &alist);
if (error)
goto out;
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTC, &alist);
if (error)
goto out;
error = try_inode_repair(ictx, fd, agno, &alist);
if (error)
goto out;
if (S_ISLNK(bstat->bs_mode)) {
/* Check symlink contents. */
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_SYMLINK,
&alist);
} else if (S_ISDIR(bstat->bs_mode)) {
/* Check the directory entries. */
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_DIR, &alist);
}
if (error)
goto out;
/* Check all the extended attributes. */
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_XATTR, &alist);
if (error)
goto out;
/* Check parent pointers. */
error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_PARENT, &alist);
if (error)
goto out;
/* Try to repair the file while it's open. */
error = try_inode_repair(ictx, fd, agno, &alist);
if (error)
goto out;
out:
if (error)
ictx->aborted = true;
error = ptcounter_add(icount, 1);
if (error) {
str_liberror(ctx, error,
_("incrementing scanned inode counter"));
ictx->aborted = true;
}
progress_add(1);
if (!error && !ictx->aborted)
defer_inode_repair(ictx, agno, &alist);
if (fd >= 0) {
int err2;
err2 = close(fd);
if (err2) {
report_close_error(ctx, bstat);
ictx->aborted = true;
}
}
if (!error && ictx->aborted)
error = ECANCELED;
return error;
}
/* Verify all the inodes in a filesystem. */
int
phase3_func(
struct scrub_ctx *ctx)
{
struct scrub_inode_ctx ictx = { .ctx = ctx };
uint64_t val;
xfs_agnumber_t agno;
int err;
err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
if (err) {
str_liberror(ctx, err, _("creating scanned inode counter"));
return err;
}
ictx.locks = calloc(ctx->mnt.fsgeom.agcount, sizeof(pthread_mutex_t));
if (!ictx.locks) {
str_errno(ctx, _("creating per-AG repair list locks"));
err = ENOMEM;
goto out_ptcounter;
}
/*
* If we already have ag/fs metadata to repair from previous phases,
* we would rather not try to repair file metadata until we've tried
* to repair the space metadata.
*/
for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
pthread_mutex_init(&ictx.locks[agno], NULL);
if (!action_list_empty(&ctx->action_lists[agno]))
ictx.always_defer_repairs = true;
}
err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
if (!err && ictx.aborted)
err = ECANCELED;
if (err)
goto out_locks;
scrub_report_preen_triggers(ctx);
err = ptcounter_value(ictx.icount, &val);
if (err) {
str_liberror(ctx, err, _("summing scanned inode counter"));
goto out_locks;
}
ctx->inodes_checked = val;
out_locks:
for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++)
pthread_mutex_destroy(&ictx.locks[agno]);
free(ictx.locks);
out_ptcounter:
ptcounter_free(ictx.icount);
return err;
}
/* Estimate how much work we're going to do. */
int
phase3_estimate(
struct scrub_ctx *ctx,
uint64_t *items,
unsigned int *nr_threads,
int *rshift)
{
*items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree;
*nr_threads = scrub_nproc(ctx);
*rshift = 0;
return 0;
}