scrub/phase3.c - pub/scm/fs/xfs/xfsprogs-dev - Git at Google

 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2018-2024 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <djwong@kernel.org>
  */
 #include "xfs.h"
 #include <stdint.h>
 #include <sys/types.h>
 #include <sys/statvfs.h>
 #include "list.h"
 #include "libfrog/paths.h"
 #include "libfrog/workqueue.h"
 #include "xfs_scrub.h"
 #include "common.h"
 #include "counter.h"
 #include "inodes.h"
 #include "progress.h"
 #include "scrub.h"
 #include "repair.h"

 /* Phase 3: Scan all inodes. */

 struct scrub_inode_ctx {
 	struct scrub_ctx	*ctx;

 	/* Number of inodes scanned. */
 	struct ptcounter	*icount;

 	/* per-AG locks to protect the repair lists */
 	pthread_mutex_t		*locks;

 	/* Set to true to abort all threads. */
 	bool			aborted;

 	/* Set to true if we want to defer file repairs to phase 4. */
 	bool			always_defer_repairs;
 };

 /* Report a filesystem error that the vfs fed us on close. */
 static void
 report_close_error(
 	struct scrub_ctx	*ctx,
 	struct xfs_bulkstat	*bstat)
 {
 	char			descr[DESCR_BUFSZ];
 	int			old_errno = errno;

 	scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino,
 			bstat->bs_gen, NULL);
 	errno = old_errno;
 	str_errno(ctx, descr);
 }

 /*
  * Defer all the repairs until phase 4, being careful about locking since the
  * inode scrub threads are not per-AG.
  */
 static void
 defer_inode_repair(
 	struct scrub_inode_ctx	*ictx,
 	xfs_agnumber_t		agno,
 	struct action_list	*alist)
 {
 	if (alist->nr == 0)
 		return;

 	pthread_mutex_lock(&ictx->locks[agno]);
 	action_list_defer(ictx->ctx, agno, alist);
 	pthread_mutex_unlock(&ictx->locks[agno]);
 }

 /* Run repair actions now and defer unfinished items for later. */
 static int
 try_inode_repair(
 	struct scrub_inode_ctx	*ictx,
 	int			fd,
 	xfs_agnumber_t		agno,
 	struct action_list	*alist)
 {
 	int			ret;

 	/*
 	 * If at the start of phase 3 we already had ag/rt metadata repairs
 	 * queued up for phase 4, leave the action list untouched so that file
 	 * metadata repairs will be deferred in scan order until phase 4.
 	 */
 	if (ictx->always_defer_repairs)
 		return 0;

 	ret = action_list_process(ictx->ctx, fd, alist,
 			ALP_REPAIR_ONLY | ALP_NOPROGRESS);
 	if (ret)
 		return ret;

 	defer_inode_repair(ictx, agno, alist);
 	return 0;
 }

 /* Verify the contents, xattrs, and extent maps of an inode. */
 static int
 scrub_inode(
 	struct scrub_ctx	*ctx,
 	struct xfs_handle	*handle,
 	struct xfs_bulkstat	*bstat,
 	void			*arg)
 {
 	struct action_list	alist;
 	struct scrub_inode_ctx	*ictx = arg;
 	struct ptcounter	*icount = ictx->icount;
 	xfs_agnumber_t		agno;
 	int			fd = -1;
 	int			error;

 	action_list_init(&alist);
 	agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino);
 	background_sleep();

 	/*
 	 * Open this regular file to pin it in memory.  Avoiding the use of
 	 * scan-by-handle means that the in-kernel scrubber doesn't pay the
 	 * cost of opening the handle (looking up the inode in the inode btree,
 	 * grabbing the inode, checking the generation) with every scrub call.
 	 *
 	 * Ignore any runtime or corruption related errors here because we can
 	 * fall back to scrubbing by handle.  ESTALE can be ignored for the
 	 * following reasons:
 	 *
 	 *  - If the file has been deleted since bulkstat, there's nothing to
 	 *    check.  Scrub-by-handle returns ENOENT for such inodes.
 	 *  - If the file has been deleted and reallocated since bulkstat,
 	 *    its ondisk metadata have been rewritten and is assumed to be ok.
 	 *    Scrub-by-handle also returns ENOENT if the generation doesn't
 	 *    match.
 	 *  - The file itself is corrupt and cannot be loaded.  In this case,
 	 *    we fall back to scrub-by-handle.
 	 *
 	 * Note: We cannot use this same trick for directories because the VFS
 	 * will try to reconnect directory file handles to the root directory
 	 * by walking '..' entries upwards, and loops in the dirent index
 	 * btree will cause livelocks.
 	 */
 	if (S_ISREG(bstat->bs_mode))
 		fd = scrub_open_handle(handle);

 	/* Scrub the inode. */
 	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_INODE, &alist);
 	if (error)
 		goto out;

 	error = try_inode_repair(ictx, fd, agno, &alist);
 	if (error)
 		goto out;

 	/* Scrub all block mappings. */
 	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTD, &alist);
 	if (error)
 		goto out;
 	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTA, &alist);
 	if (error)
 		goto out;
 	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTC, &alist);
 	if (error)
 		goto out;

 	error = try_inode_repair(ictx, fd, agno, &alist);
 	if (error)
 		goto out;

 	if (S_ISLNK(bstat->bs_mode)) {
 		/* Check symlink contents. */
 		error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_SYMLINK,
 				&alist);
 	} else if (S_ISDIR(bstat->bs_mode)) {
 		/* Check the directory entries. */
 		error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_DIR, &alist);
 	}
 	if (error)
 		goto out;

 	/* Check all the extended attributes. */
 	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_XATTR, &alist);
 	if (error)
 		goto out;

 	/* Check parent pointers. */
 	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_PARENT, &alist);
 	if (error)
 		goto out;

 	/* Try to repair the file while it's open. */
 	error = try_inode_repair(ictx, fd, agno, &alist);
 	if (error)
 		goto out;

 out:
 	if (error)
 		ictx->aborted = true;

 	error = ptcounter_add(icount, 1);
 	if (error) {
 		str_liberror(ctx, error,
 				_("incrementing scanned inode counter"));
 		ictx->aborted = true;
 	}
 	progress_add(1);

 	if (!error && !ictx->aborted)
 		defer_inode_repair(ictx, agno, &alist);

 	if (fd >= 0) {
 		int	err2;

 		err2 = close(fd);
 		if (err2) {
 			report_close_error(ctx, bstat);
 			ictx->aborted = true;
 		}
 	}

 	if (!error && ictx->aborted)
 		error = ECANCELED;
 	return error;
 }

 /* Verify all the inodes in a filesystem. */
 int
 phase3_func(
 	struct scrub_ctx	*ctx)
 {
 	struct scrub_inode_ctx	ictx = { .ctx = ctx };
 	uint64_t		val;
 	xfs_agnumber_t		agno;
 	int			err;

 	err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
 	if (err) {
 		str_liberror(ctx, err, _("creating scanned inode counter"));
 		return err;
 	}

 	ictx.locks = calloc(ctx->mnt.fsgeom.agcount, sizeof(pthread_mutex_t));
 	if (!ictx.locks) {
 		str_errno(ctx, _("creating per-AG repair list locks"));
 		err = ENOMEM;
 		goto out_ptcounter;
 	}

 	/*
 	 * If we already have ag/fs metadata to repair from previous phases,
 	 * we would rather not try to repair file metadata until we've tried
 	 * to repair the space metadata.
 	 */
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
 		pthread_mutex_init(&ictx.locks[agno], NULL);

 		if (!action_list_empty(&ctx->action_lists[agno]))
 			ictx.always_defer_repairs = true;
 	}

 	err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
 	if (!err && ictx.aborted)
 		err = ECANCELED;
 	if (err)
 		goto out_locks;

 	scrub_report_preen_triggers(ctx);
 	err = ptcounter_value(ictx.icount, &val);
 	if (err) {
 		str_liberror(ctx, err, _("summing scanned inode counter"));
 		goto out_locks;
 	}

 	ctx->inodes_checked = val;
 out_locks:
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++)
 		pthread_mutex_destroy(&ictx.locks[agno]);
 	free(ictx.locks);
 out_ptcounter:
 	ptcounter_free(ictx.icount);
 	return err;
 }

 /* Estimate how much work we're going to do. */
 int
 phase3_estimate(
 	struct scrub_ctx	*ctx,
 	uint64_t		*items,
 	unsigned int		*nr_threads,
 	int			*rshift)
 {
 	*items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree;
 	*nr_threads = scrub_nproc(ctx);
 	*rshift = 0;
 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0-or-later
	/*
	* Copyright (C) 2018-2024 Oracle. All Rights Reserved.
	* Author: Darrick J. Wong <djwong@kernel.org>
	*/
	#include "xfs.h"
	#include <stdint.h>
	#include <sys/types.h>
	#include <sys/statvfs.h>
	#include "list.h"
	#include "libfrog/paths.h"
	#include "libfrog/workqueue.h"
	#include "xfs_scrub.h"
	#include "common.h"
	#include "counter.h"
	#include "inodes.h"
	#include "progress.h"
	#include "scrub.h"
	#include "repair.h"

	/* Phase 3: Scan all inodes. */

	struct scrub_inode_ctx {
	struct scrub_ctx *ctx;

	/* Number of inodes scanned. */
	struct ptcounter *icount;

	/* per-AG locks to protect the repair lists */
	pthread_mutex_t *locks;

	/* Set to true to abort all threads. */
	bool aborted;

	/* Set to true if we want to defer file repairs to phase 4. */
	bool always_defer_repairs;
	};

	/* Report a filesystem error that the vfs fed us on close. */
	static void
	report_close_error(
	struct scrub_ctx *ctx,
	struct xfs_bulkstat *bstat)
	{
	char descr[DESCR_BUFSZ];
	int old_errno = errno;

	scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino,
	bstat->bs_gen, NULL);
	errno = old_errno;
	str_errno(ctx, descr);
	}

	/*
	* Defer all the repairs until phase 4, being careful about locking since the
	* inode scrub threads are not per-AG.
	*/
	static void
	defer_inode_repair(
	struct scrub_inode_ctx *ictx,
	xfs_agnumber_t agno,
	struct action_list *alist)
	{
	if (alist->nr == 0)
	return;

	pthread_mutex_lock(&ictx->locks[agno]);
	action_list_defer(ictx->ctx, agno, alist);
	pthread_mutex_unlock(&ictx->locks[agno]);
	}

	/* Run repair actions now and defer unfinished items for later. */
	static int
	try_inode_repair(
	struct scrub_inode_ctx *ictx,
	int fd,
	xfs_agnumber_t agno,
	struct action_list *alist)
	{
	int ret;

	/*
	* If at the start of phase 3 we already had ag/rt metadata repairs
	* queued up for phase 4, leave the action list untouched so that file
	* metadata repairs will be deferred in scan order until phase 4.
	*/
	if (ictx->always_defer_repairs)
	return 0;

	ret = action_list_process(ictx->ctx, fd, alist,
	ALP_REPAIR_ONLY \| ALP_NOPROGRESS);
	if (ret)
	return ret;

	defer_inode_repair(ictx, agno, alist);
	return 0;
	}

	/* Verify the contents, xattrs, and extent maps of an inode. */
	static int
	scrub_inode(
	struct scrub_ctx *ctx,
	struct xfs_handle *handle,
	struct xfs_bulkstat *bstat,
	void *arg)
	{
	struct action_list alist;
	struct scrub_inode_ctx *ictx = arg;
	struct ptcounter *icount = ictx->icount;
	xfs_agnumber_t agno;
	int fd = -1;
	int error;

	action_list_init(&alist);
	agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino);
	background_sleep();

	/*
	* Open this regular file to pin it in memory. Avoiding the use of
	* scan-by-handle means that the in-kernel scrubber doesn't pay the
	* cost of opening the handle (looking up the inode in the inode btree,
	* grabbing the inode, checking the generation) with every scrub call.
	*
	* Ignore any runtime or corruption related errors here because we can
	* fall back to scrubbing by handle. ESTALE can be ignored for the
	* following reasons:
	*
	* - If the file has been deleted since bulkstat, there's nothing to
	* check. Scrub-by-handle returns ENOENT for such inodes.
	* - If the file has been deleted and reallocated since bulkstat,
	* its ondisk metadata have been rewritten and is assumed to be ok.
	* Scrub-by-handle also returns ENOENT if the generation doesn't
	* match.
	* - The file itself is corrupt and cannot be loaded. In this case,
	* we fall back to scrub-by-handle.
	*
	* Note: We cannot use this same trick for directories because the VFS
	* will try to reconnect directory file handles to the root directory
	* by walking '..' entries upwards, and loops in the dirent index
	* btree will cause livelocks.
	*/
	if (S_ISREG(bstat->bs_mode))
	fd = scrub_open_handle(handle);

	/* Scrub the inode. */
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_INODE, &alist);
	if (error)
	goto out;

	error = try_inode_repair(ictx, fd, agno, &alist);
	if (error)
	goto out;

	/* Scrub all block mappings. */
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTD, &alist);
	if (error)
	goto out;
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTA, &alist);
	if (error)
	goto out;
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTC, &alist);
	if (error)
	goto out;

	error = try_inode_repair(ictx, fd, agno, &alist);
	if (error)
	goto out;

	if (S_ISLNK(bstat->bs_mode)) {
	/* Check symlink contents. */
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_SYMLINK,
	&alist);
	} else if (S_ISDIR(bstat->bs_mode)) {
	/* Check the directory entries. */
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_DIR, &alist);
	}
	if (error)
	goto out;

	/* Check all the extended attributes. */
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_XATTR, &alist);
	if (error)
	goto out;

	/* Check parent pointers. */
	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_PARENT, &alist);
	if (error)
	goto out;

	/* Try to repair the file while it's open. */
	error = try_inode_repair(ictx, fd, agno, &alist);
	if (error)
	goto out;

	out:
	if (error)
	ictx->aborted = true;

	error = ptcounter_add(icount, 1);
	if (error) {
	str_liberror(ctx, error,
	_("incrementing scanned inode counter"));
	ictx->aborted = true;
	}
	progress_add(1);

	if (!error && !ictx->aborted)
	defer_inode_repair(ictx, agno, &alist);

	if (fd >= 0) {
	int err2;

	err2 = close(fd);
	if (err2) {
	report_close_error(ctx, bstat);
	ictx->aborted = true;
	}
	}

	if (!error && ictx->aborted)
	error = ECANCELED;
	return error;
	}

	/* Verify all the inodes in a filesystem. */
	int
	phase3_func(
	struct scrub_ctx *ctx)
	{
	struct scrub_inode_ctx ictx = { .ctx = ctx };
	uint64_t val;
	xfs_agnumber_t agno;
	int err;

	err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
	if (err) {
	str_liberror(ctx, err, _("creating scanned inode counter"));
	return err;
	}

	ictx.locks = calloc(ctx->mnt.fsgeom.agcount, sizeof(pthread_mutex_t));
	if (!ictx.locks) {
	str_errno(ctx, _("creating per-AG repair list locks"));
	err = ENOMEM;
	goto out_ptcounter;
	}

	/*
	* If we already have ag/fs metadata to repair from previous phases,
	* we would rather not try to repair file metadata until we've tried
	* to repair the space metadata.
	*/
	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
	pthread_mutex_init(&ictx.locks[agno], NULL);

	if (!action_list_empty(&ctx->action_lists[agno]))
	ictx.always_defer_repairs = true;
	}

	err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
	if (!err && ictx.aborted)
	err = ECANCELED;
	if (err)
	goto out_locks;

	scrub_report_preen_triggers(ctx);
	err = ptcounter_value(ictx.icount, &val);
	if (err) {
	str_liberror(ctx, err, _("summing scanned inode counter"));
	goto out_locks;
	}

	ctx->inodes_checked = val;
	out_locks:
	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++)
	pthread_mutex_destroy(&ictx.locks[agno]);
	free(ictx.locks);
	out_ptcounter:
	ptcounter_free(ictx.icount);
	return err;
	}

	/* Estimate how much work we're going to do. */
	int
	phase3_estimate(
	struct scrub_ctx *ctx,
	uint64_t *items,
	unsigned int *nr_threads,
	int *rshift)
	{
	*items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree;
	*nr_threads = scrub_nproc(ctx);
	*rshift = 0;
	return 0;
	}