scrub/phase3.c - pub/scm/linux/kernel/git/cem/xfsprogs-dev - Git at Google

 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2018-2024 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <djwong@kernel.org>
  */
 #include "xfs.h"
 #include <stdint.h>
 #include <sys/types.h>
 #include <sys/statvfs.h>
 #include "list.h"
 #include "libfrog/paths.h"
 #include "libfrog/workqueue.h"
 #include "libfrog/ptvar.h"
 #include "xfs_scrub.h"
 #include "common.h"
 #include "counter.h"
 #include "inodes.h"
 #include "progress.h"
 #include "scrub.h"
 #include "repair.h"

 /* Phase 3: Scan all inodes. */

 struct scrub_inode_ctx {
 	struct scrub_ctx	*ctx;

 	/* Number of inodes scanned. */
 	struct ptcounter	*icount;

 	/* Per-thread lists of file repair items. */
 	struct ptvar		*repair_ptlists;

 	/* Set to true to abort all threads. */
 	bool			aborted;

 	/* Set to true if we want to defer file repairs to phase 4. */
 	bool			always_defer_repairs;
 };

 /* Report a filesystem error that the vfs fed us on close. */
 static void
 report_close_error(
 	struct scrub_ctx	*ctx,
 	struct xfs_bulkstat	*bstat)
 {
 	char			descr[DESCR_BUFSZ];
 	int			old_errno = errno;

 	scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino,
 			bstat->bs_gen, NULL);
 	errno = old_errno;
 	str_errno(ctx, descr);
 }

 /* Defer all the repairs until phase 4. */
 static int
 defer_inode_repair(
 	struct scrub_inode_ctx		*ictx,
 	const struct scrub_item		*sri)
 {
 	struct action_list		*alist;
 	struct action_item		*aitem = NULL;
 	int				ret;

 	ret = repair_item_to_action_item(ictx->ctx, sri, &aitem);
 	if (ret || !aitem)
 		return ret;

 	alist = ptvar_get(ictx->repair_ptlists, &ret);
 	if (ret) {
 		str_liberror(ictx->ctx, ret,
  _("getting per-thread inode repair list"));
 		return ret;
 	}

 	action_list_add(alist, aitem);
 	return 0;
 }

 /* Run repair actions now and leave unfinished items for later. */
 static int
 try_inode_repair(
 	struct scrub_inode_ctx		*ictx,
 	struct scrub_item		*sri,
 	int				fd)
 {
 	/*
 	 * If at the start of phase 3 we already had ag/rt metadata repairs
 	 * queued up for phase 4, leave the action list untouched so that file
 	 * metadata repairs will be deferred until phase 4.
 	 */
 	if (ictx->always_defer_repairs)
 		return 0;

 	/*
 	 * Try to repair the file metadata.  Unfixed metadata will remain in
 	 * the scrub item state to be queued as a single action item.
 	 */
 	return repair_file_corruption(ictx->ctx, sri, fd);
 }

 /*
  * If we couldn't check all the scheduled file metadata items, try performing
  * spot repairs until we check everything or stop making forward progress.
  */
 static int
 repair_and_scrub_inode_loop(
 	struct scrub_ctx	*ctx,
 	struct xfs_bulkstat	*bstat,
 	int			fd,
 	struct scrub_item	*sri,
 	bool			*defer)
 {
 	unsigned int		to_check;
 	int			error;

 	*defer = false;
 	if (ctx->mode != SCRUB_MODE_REPAIR)
 		return 0;

 	to_check = scrub_item_count_needscheck(sri);
 	while (to_check > 0) {
 		unsigned int	nr;

 		error = repair_file_corruption(ctx, sri, fd);
 		if (error)
 			return error;

 		error = scrub_item_check_file(ctx, sri, fd);
 		if (error)
 			return error;

 		nr = scrub_item_count_needscheck(sri);
 		if (nr == to_check) {
 			char	descr[DESCR_BUFSZ];

 			/*
 			 * We cannot make forward scanning progress with this
 			 * inode, so defer the rest until phase 4.
 			 */
 			scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ,
 					bstat->bs_ino, bstat->bs_gen, NULL);
 			str_info(ctx, descr,
  _("Unable to make forward checking progress; will try again in phase 4."));
 			*defer = true;
 			return 0;
 		}
 		to_check = nr;
 	}

 	return 0;
 }

 /* Verify the contents, xattrs, and extent maps of an inode. */
 static int
 scrub_inode(
 	struct scrub_ctx	*ctx,
 	struct xfs_handle	*handle,
 	struct xfs_bulkstat	*bstat,
 	void			*arg)
 {
 	struct scrub_item	sri;
 	struct scrub_inode_ctx	*ictx = arg;
 	struct ptcounter	*icount = ictx->icount;
 	int			fd = -1;
 	int			error;

 	scrub_item_init_file(&sri, bstat);
 	background_sleep();

 	/*
 	 * Open this regular file to pin it in memory.  Avoiding the use of
 	 * scan-by-handle means that the in-kernel scrubber doesn't pay the
 	 * cost of opening the handle (looking up the inode in the inode btree,
 	 * grabbing the inode, checking the generation) with every scrub call.
 	 *
 	 * Ignore any runtime or corruption related errors here because we can
 	 * fall back to scrubbing by handle.  ESTALE can be ignored for the
 	 * following reasons:
 	 *
 	 *  - If the file has been deleted since bulkstat, there's nothing to
 	 *    check.  Scrub-by-handle returns ENOENT for such inodes.
 	 *  - If the file has been deleted and reallocated since bulkstat,
 	 *    its ondisk metadata have been rewritten and is assumed to be ok.
 	 *    Scrub-by-handle also returns ENOENT if the generation doesn't
 	 *    match.
 	 *  - The file itself is corrupt and cannot be loaded.  In this case,
 	 *    we fall back to scrub-by-handle.
 	 *
 	 * Note: We cannot use this same trick for directories because the VFS
 	 * will try to reconnect directory file handles to the root directory
 	 * by walking '..' entries upwards, and loops in the dirent index
 	 * btree will cause livelocks.
 	 */
 	if (S_ISREG(bstat->bs_mode))
 		fd = scrub_open_handle(handle);

 	/* Scrub the inode. */
 	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_INODE);

 	/* Scrub all block mappings. */
 	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTD);
 	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTA);
 	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTC);

 	/*
 	 * Check file data contents, e.g. symlink and directory entries.
 	 *
 	 * Note: bs_mode==0 occurs when inumbers says an inode is allocated,
 	 * bulkstat skips the inode, and bulkstat_single errors out when
 	 * loading the inode.  This could be due to racing with ifree, but it
 	 * could be a corrupt inode.  Either way, schedule all the data fork
 	 * content scrubbers.  Better to have them return -ENOENT than miss
 	 * some coverage.
 	 */
 	if (S_ISLNK(bstat->bs_mode) || !bstat->bs_mode)
 		scrub_item_schedule(&sri, XFS_SCRUB_TYPE_SYMLINK);
 	if (S_ISDIR(bstat->bs_mode) || !bstat->bs_mode)
 		scrub_item_schedule(&sri, XFS_SCRUB_TYPE_DIR);

 	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_XATTR);
 	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_PARENT);

 	/*
 	 * Try to check all of the metadata items that we just scheduled.  If
 	 * we return with some types still needing a check and the space
 	 * metadata isn't also in need of repairs, try repairing any damaged
 	 * file metadata that we've found so far, and try checking the file
 	 * again.  Worst case, defer the repairs and the checks to phase 4 if
 	 * we can't make any progress on anything.
 	 */
 	error = scrub_item_check_file(ctx, &sri, fd);
 	if (error)
 		goto out;

 	if (!ictx->always_defer_repairs) {
 		bool	defer_repairs;

 		error = repair_and_scrub_inode_loop(ctx, bstat, fd, &sri,
 				&defer_repairs);
 		if (error || defer_repairs)
 			goto out;
 	}

 	/* Try to repair the file while it's open. */
 	error = try_inode_repair(ictx, &sri, fd);
 	if (error)
 		goto out;

 out:
 	if (error)
 		ictx->aborted = true;

 	error = ptcounter_add(icount, 1);
 	if (error) {
 		str_liberror(ctx, error,
 				_("incrementing scanned inode counter"));
 		ictx->aborted = true;
 	}
 	progress_add(1);

 	if (!error && !ictx->aborted)
 		error = defer_inode_repair(ictx, &sri);

 	if (fd >= 0) {
 		int	err2;

 		err2 = close(fd);
 		if (err2) {
 			report_close_error(ctx, bstat);
 			ictx->aborted = true;
 		}
 	}

 	if (!error && ictx->aborted)
 		error = ECANCELED;
 	return error;
 }

 /*
  * Collect all the inode repairs in the file repair list.  No need for locks
  * here, since we're single-threaded.
  */
 static int
 collect_repairs(
 	struct ptvar		*ptv,
 	void			*data,
 	void			*foreach_arg)
 {
 	struct scrub_ctx	*ctx = foreach_arg;
 	struct action_list	*alist = data;

 	action_list_merge(ctx->file_repair_list, alist);
 	return 0;
 }

 /* Initialize this per-thread file repair item list. */
 static void
 action_ptlist_init(
 	void			*priv)
 {
 	struct action_list	*alist = priv;

 	action_list_init(alist);
 }

 /* Verify all the inodes in a filesystem. */
 int
 phase3_func(
 	struct scrub_ctx	*ctx)
 {
 	struct scrub_inode_ctx	ictx = { .ctx = ctx };
 	uint64_t		val;
 	xfs_agnumber_t		agno;
 	int			err;

 	err = -ptvar_alloc(scrub_nproc(ctx), sizeof(struct action_list),
 			action_ptlist_init, &ictx.repair_ptlists);
 	if (err) {
 		str_liberror(ctx, err,
 	_("creating per-thread file repair item lists"));
 		return err;
 	}

 	err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
 	if (err) {
 		str_liberror(ctx, err, _("creating scanned inode counter"));
 		goto out_ptvar;
 	}

 	/*
 	 * If we already have ag/fs metadata to repair from previous phases,
 	 * we would rather not try to repair file metadata until we've tried
 	 * to repair the space metadata.
 	 */
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
 		if (!action_list_empty(ctx->fs_repair_list))
 			ictx.always_defer_repairs = true;
 	}

 	err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
 	if (!err && ictx.aborted)
 		err = ECANCELED;
 	if (err)
 		goto out_ptcounter;

 	/*
 	 * Combine all of the file repair items into the main repair list.
 	 * We don't need locks here since we're the only thread running now.
 	 */
 	err = -ptvar_foreach(ictx.repair_ptlists, collect_repairs, ctx);
 	if (err) {
 		str_liberror(ctx, err, _("collecting inode repair lists"));
 		goto out_ptcounter;
 	}

 	scrub_report_preen_triggers(ctx);
 	err = ptcounter_value(ictx.icount, &val);
 	if (err) {
 		str_liberror(ctx, err, _("summing scanned inode counter"));
 		goto out_ptcounter;
 	}

 	ctx->inodes_checked = val;
 out_ptcounter:
 	ptcounter_free(ictx.icount);
 out_ptvar:
 	ptvar_free(ictx.repair_ptlists);
 	return err;
 }

 /* Estimate how much work we're going to do. */
 int
 phase3_estimate(
 	struct scrub_ctx	*ctx,
 	uint64_t		*items,
 	unsigned int		*nr_threads,
 	int			*rshift)
 {
 	*items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree;
 	*nr_threads = scrub_nproc(ctx);
 	*rshift = 0;
 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0-or-later
	/*
	* Copyright (C) 2018-2024 Oracle. All Rights Reserved.
	* Author: Darrick J. Wong <djwong@kernel.org>
	*/
	#include "xfs.h"
	#include <stdint.h>
	#include <sys/types.h>
	#include <sys/statvfs.h>
	#include "list.h"
	#include "libfrog/paths.h"
	#include "libfrog/workqueue.h"
	#include "libfrog/ptvar.h"
	#include "xfs_scrub.h"
	#include "common.h"
	#include "counter.h"
	#include "inodes.h"
	#include "progress.h"
	#include "scrub.h"
	#include "repair.h"

	/* Phase 3: Scan all inodes. */

	struct scrub_inode_ctx {
	struct scrub_ctx *ctx;

	/* Number of inodes scanned. */
	struct ptcounter *icount;

	/* Per-thread lists of file repair items. */
	struct ptvar *repair_ptlists;

	/* Set to true to abort all threads. */
	bool aborted;

	/* Set to true if we want to defer file repairs to phase 4. */
	bool always_defer_repairs;
	};

	/* Report a filesystem error that the vfs fed us on close. */
	static void
	report_close_error(
	struct scrub_ctx *ctx,
	struct xfs_bulkstat *bstat)
	{
	char descr[DESCR_BUFSZ];
	int old_errno = errno;

	scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino,
	bstat->bs_gen, NULL);
	errno = old_errno;
	str_errno(ctx, descr);
	}

	/* Defer all the repairs until phase 4. */
	static int
	defer_inode_repair(
	struct scrub_inode_ctx *ictx,
	const struct scrub_item *sri)
	{
	struct action_list *alist;
	struct action_item *aitem = NULL;
	int ret;

	ret = repair_item_to_action_item(ictx->ctx, sri, &aitem);
	if (ret \|\| !aitem)
	return ret;

	alist = ptvar_get(ictx->repair_ptlists, &ret);
	if (ret) {
	str_liberror(ictx->ctx, ret,
	_("getting per-thread inode repair list"));
	return ret;
	}

	action_list_add(alist, aitem);
	return 0;
	}

	/* Run repair actions now and leave unfinished items for later. */
	static int
	try_inode_repair(
	struct scrub_inode_ctx *ictx,
	struct scrub_item *sri,
	int fd)
	{
	/*
	* If at the start of phase 3 we already had ag/rt metadata repairs
	* queued up for phase 4, leave the action list untouched so that file
	* metadata repairs will be deferred until phase 4.
	*/
	if (ictx->always_defer_repairs)
	return 0;

	/*
	* Try to repair the file metadata. Unfixed metadata will remain in
	* the scrub item state to be queued as a single action item.
	*/
	return repair_file_corruption(ictx->ctx, sri, fd);
	}

	/*
	* If we couldn't check all the scheduled file metadata items, try performing
	* spot repairs until we check everything or stop making forward progress.
	*/
	static int
	repair_and_scrub_inode_loop(
	struct scrub_ctx *ctx,
	struct xfs_bulkstat *bstat,
	int fd,
	struct scrub_item *sri,
	bool *defer)
	{
	unsigned int to_check;
	int error;

	*defer = false;
	if (ctx->mode != SCRUB_MODE_REPAIR)
	return 0;

	to_check = scrub_item_count_needscheck(sri);
	while (to_check > 0) {
	unsigned int nr;

	error = repair_file_corruption(ctx, sri, fd);
	if (error)
	return error;

	error = scrub_item_check_file(ctx, sri, fd);
	if (error)
	return error;

	nr = scrub_item_count_needscheck(sri);
	if (nr == to_check) {
	char descr[DESCR_BUFSZ];

	/*
	* We cannot make forward scanning progress with this
	* inode, so defer the rest until phase 4.
	*/
	scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ,
	bstat->bs_ino, bstat->bs_gen, NULL);
	str_info(ctx, descr,
	_("Unable to make forward checking progress; will try again in phase 4."));
	*defer = true;
	return 0;
	}
	to_check = nr;
	}

	return 0;
	}

	/* Verify the contents, xattrs, and extent maps of an inode. */
	static int
	scrub_inode(
	struct scrub_ctx *ctx,
	struct xfs_handle *handle,
	struct xfs_bulkstat *bstat,
	void *arg)
	{
	struct scrub_item sri;
	struct scrub_inode_ctx *ictx = arg;
	struct ptcounter *icount = ictx->icount;
	int fd = -1;
	int error;

	scrub_item_init_file(&sri, bstat);
	background_sleep();

	/*
	* Open this regular file to pin it in memory. Avoiding the use of
	* scan-by-handle means that the in-kernel scrubber doesn't pay the
	* cost of opening the handle (looking up the inode in the inode btree,
	* grabbing the inode, checking the generation) with every scrub call.
	*
	* Ignore any runtime or corruption related errors here because we can
	* fall back to scrubbing by handle. ESTALE can be ignored for the
	* following reasons:
	*
	* - If the file has been deleted since bulkstat, there's nothing to
	* check. Scrub-by-handle returns ENOENT for such inodes.
	* - If the file has been deleted and reallocated since bulkstat,
	* its ondisk metadata have been rewritten and is assumed to be ok.
	* Scrub-by-handle also returns ENOENT if the generation doesn't
	* match.
	* - The file itself is corrupt and cannot be loaded. In this case,
	* we fall back to scrub-by-handle.
	*
	* Note: We cannot use this same trick for directories because the VFS
	* will try to reconnect directory file handles to the root directory
	* by walking '..' entries upwards, and loops in the dirent index
	* btree will cause livelocks.
	*/
	if (S_ISREG(bstat->bs_mode))
	fd = scrub_open_handle(handle);

	/* Scrub the inode. */
	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_INODE);

	/* Scrub all block mappings. */
	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTD);
	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTA);
	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_BMBTC);

	/*
	* Check file data contents, e.g. symlink and directory entries.
	*
	* Note: bs_mode==0 occurs when inumbers says an inode is allocated,
	* bulkstat skips the inode, and bulkstat_single errors out when
	* loading the inode. This could be due to racing with ifree, but it
	* could be a corrupt inode. Either way, schedule all the data fork
	* content scrubbers. Better to have them return -ENOENT than miss
	* some coverage.
	*/
	if (S_ISLNK(bstat->bs_mode) \|\| !bstat->bs_mode)
	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_SYMLINK);
	if (S_ISDIR(bstat->bs_mode) \|\| !bstat->bs_mode)
	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_DIR);

	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_XATTR);
	scrub_item_schedule(&sri, XFS_SCRUB_TYPE_PARENT);

	/*
	* Try to check all of the metadata items that we just scheduled. If
	* we return with some types still needing a check and the space
	* metadata isn't also in need of repairs, try repairing any damaged
	* file metadata that we've found so far, and try checking the file
	* again. Worst case, defer the repairs and the checks to phase 4 if
	* we can't make any progress on anything.
	*/
	error = scrub_item_check_file(ctx, &sri, fd);
	if (error)
	goto out;

	if (!ictx->always_defer_repairs) {
	bool defer_repairs;

	error = repair_and_scrub_inode_loop(ctx, bstat, fd, &sri,
	&defer_repairs);
	if (error \|\| defer_repairs)
	goto out;
	}

	/* Try to repair the file while it's open. */
	error = try_inode_repair(ictx, &sri, fd);
	if (error)
	goto out;

	out:
	if (error)
	ictx->aborted = true;

	error = ptcounter_add(icount, 1);
	if (error) {
	str_liberror(ctx, error,
	_("incrementing scanned inode counter"));
	ictx->aborted = true;
	}
	progress_add(1);

	if (!error && !ictx->aborted)
	error = defer_inode_repair(ictx, &sri);

	if (fd >= 0) {
	int err2;

	err2 = close(fd);
	if (err2) {
	report_close_error(ctx, bstat);
	ictx->aborted = true;
	}
	}

	if (!error && ictx->aborted)
	error = ECANCELED;
	return error;
	}

	/*
	* Collect all the inode repairs in the file repair list. No need for locks
	* here, since we're single-threaded.
	*/
	static int
	collect_repairs(
	struct ptvar *ptv,
	void *data,
	void *foreach_arg)
	{
	struct scrub_ctx *ctx = foreach_arg;
	struct action_list *alist = data;

	action_list_merge(ctx->file_repair_list, alist);
	return 0;
	}

	/* Initialize this per-thread file repair item list. */
	static void
	action_ptlist_init(
	void *priv)
	{
	struct action_list *alist = priv;

	action_list_init(alist);
	}

	/* Verify all the inodes in a filesystem. */
	int
	phase3_func(
	struct scrub_ctx *ctx)
	{
	struct scrub_inode_ctx ictx = { .ctx = ctx };
	uint64_t val;
	xfs_agnumber_t agno;
	int err;

	err = -ptvar_alloc(scrub_nproc(ctx), sizeof(struct action_list),
	action_ptlist_init, &ictx.repair_ptlists);
	if (err) {
	str_liberror(ctx, err,
	_("creating per-thread file repair item lists"));
	return err;
	}

	err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
	if (err) {
	str_liberror(ctx, err, _("creating scanned inode counter"));
	goto out_ptvar;
	}

	/*
	* If we already have ag/fs metadata to repair from previous phases,
	* we would rather not try to repair file metadata until we've tried
	* to repair the space metadata.
	*/
	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
	if (!action_list_empty(ctx->fs_repair_list))
	ictx.always_defer_repairs = true;
	}

	err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
	if (!err && ictx.aborted)
	err = ECANCELED;
	if (err)
	goto out_ptcounter;

	/*
	* Combine all of the file repair items into the main repair list.
	* We don't need locks here since we're the only thread running now.
	*/
	err = -ptvar_foreach(ictx.repair_ptlists, collect_repairs, ctx);
	if (err) {
	str_liberror(ctx, err, _("collecting inode repair lists"));
	goto out_ptcounter;
	}

	scrub_report_preen_triggers(ctx);
	err = ptcounter_value(ictx.icount, &val);
	if (err) {
	str_liberror(ctx, err, _("summing scanned inode counter"));
	goto out_ptcounter;
	}

	ctx->inodes_checked = val;
	out_ptcounter:
	ptcounter_free(ictx.icount);
	out_ptvar:
	ptvar_free(ictx.repair_ptlists);
	return err;
	}

	/* Estimate how much work we're going to do. */
	int
	phase3_estimate(
	struct scrub_ctx *ctx,
	uint64_t *items,
	unsigned int *nr_threads,
	int *rshift)
	{
	*items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree;
	*nr_threads = scrub_nproc(ctx);
	*rshift = 0;
	return 0;
	}