blob: c7828c332e7c3a786cda2c4726bc884b686d7870 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include <stdint.h>
#include <sys/types.h>
#include <sys/statvfs.h>
#include "list.h"
#include "libfrog/paths.h"
#include "libfrog/workqueue.h"
#include "libfrog/fsgeom.h"
#include "libfrog/scrub.h"
#include "xfs_scrub.h"
#include "common.h"
#include "scrub.h"
#include "repair.h"
/* Phase 2: Check internal metadata. */
struct scan_ctl {
/*
* Control mechanism to signal that each group's scan of the rt bitmap
* file scan is done and wake up any waiters.
*/
unsigned int rbm_group_count;
bool aborted;
};
/* Warn about the types of mutual inconsistencies that may make repairs hard. */
static inline void
warn_repair_difficulties(
struct scrub_ctx *ctx,
unsigned int difficulty,
const char *descr)
{
if (!(difficulty & REPAIR_DIFFICULTY_SECONDARY))
return;
if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
return;
if (difficulty & REPAIR_DIFFICULTY_PRIMARY)
str_info(ctx, descr, _("Corrupt primary and secondary metadata."));
else
str_info(ctx, descr, _("Corrupt secondary metadata."));
str_info(ctx, descr, _("Filesystem might not be repairable."));
}
/* Add a scrub item that needs more work to fs metadata repair list. */
static int
defer_fs_repair(
struct scrub_ctx *ctx,
const struct scrub_item *sri)
{
struct action_item *aitem = NULL;
int error;
error = repair_item_to_action_item(ctx, sri, &aitem);
if (error || !aitem)
return error;
pthread_mutex_lock(&ctx->lock);
action_list_add(ctx->fs_repair_list, aitem);
pthread_mutex_unlock(&ctx->lock);
return 0;
}
/*
* If we couldn't check all the scheduled metadata items, try performing spot
* repairs until we check everything or stop making forward progress.
*/
static int
repair_and_scrub_loop(
struct scrub_ctx *ctx,
struct scrub_item *sri,
const char *descr,
bool *defer)
{
unsigned int to_check;
int ret;
*defer = false;
if (ctx->mode != SCRUB_MODE_REPAIR)
return 0;
to_check = scrub_item_count_needscheck(sri);
while (to_check > 0) {
unsigned int nr;
ret = repair_item_corruption(ctx, sri);
if (ret)
return ret;
ret = scrub_item_check(ctx, sri);
if (ret)
return ret;
nr = scrub_item_count_needscheck(sri);
if (nr == to_check) {
/*
* We cannot make forward scanning progress with this
* metadata, so defer the rest until phase 4.
*/
str_info(ctx, descr,
_("Unable to make forward checking progress; will try again in phase 4."));
*defer = true;
return 0;
}
to_check = nr;
}
return 0;
}
/* Scrub each AG's metadata btrees. */
static void
scan_ag_metadata(
struct workqueue *wq,
xfs_agnumber_t agno,
void *arg)
{
struct scrub_item sri;
struct scrub_item fix_now;
struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
struct scan_ctl *sctl = arg;
char descr[DESCR_BUFSZ];
unsigned int difficulty;
bool defer_repairs;
int ret;
if (sctl->aborted)
return;
scrub_item_init_ag(&sri, agno);
snprintf(descr, DESCR_BUFSZ, _("AG %u"), agno);
/*
* First we scrub and fix the AG headers, because we need them to work
* well enough to check the AG btrees. Then scrub the AG btrees.
*/
scrub_item_schedule_group(&sri, XFROG_SCRUB_GROUP_AGHEADER);
scrub_item_schedule_group(&sri, XFROG_SCRUB_GROUP_PERAG);
/*
* Try to check all of the AG metadata items that we just scheduled.
* If we return with some types still needing a check, try repairing
* any damaged metadata that we've found so far, and try again. Abort
* if we stop making forward progress.
*/
ret = scrub_item_check(ctx, &sri);
if (ret)
goto err;
ret = repair_and_scrub_loop(ctx, &sri, descr, &defer_repairs);
if (ret)
goto err;
if (defer_repairs)
goto defer;
/*
* Figure out if we need to perform early fixing. The only
* reason we need to do this is if the inobt is broken, which
* prevents phase 3 (inode scan) from running. We can rebuild
* the inobt from rmapbt data, but if the rmapbt is broken even
* at this early phase then we are sunk.
*/
difficulty = repair_item_difficulty(&sri);
repair_item_mustfix(&sri, &fix_now);
warn_repair_difficulties(ctx, difficulty, descr);
/* Repair (inode) btree damage. */
ret = repair_item_corruption(ctx, &fix_now);
if (ret)
goto err;
defer:
/* Everything else gets fixed during phase 4. */
ret = defer_fs_repair(ctx, &sri);
if (ret)
goto err;
return;
err:
sctl->aborted = true;
}
/* Scan whole-fs metadata. */
static void
scan_fs_metadata(
struct workqueue *wq,
xfs_agnumber_t type,
void *arg)
{
struct scrub_item sri;
struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
struct scan_ctl *sctl = arg;
unsigned int difficulty;
bool defer_repairs;
int ret;
if (sctl->aborted)
return;
/*
* Try to check all of the metadata files that we just scheduled. If
* we return with some types still needing a check, try repairing any
* damaged metadata that we've found so far, and try again. Abort if
* we stop making forward progress.
*/
scrub_item_init_fs(&sri);
scrub_item_schedule(&sri, type);
ret = scrub_item_check(ctx, &sri);
if (ret) {
sctl->aborted = true;
return;
}
ret = repair_and_scrub_loop(ctx, &sri, xfrog_scrubbers[type].descr,
&defer_repairs);
if (ret) {
sctl->aborted = true;
return;
}
if (defer_repairs)
goto defer;
/* Complain about metadata corruptions that might not be fixable. */
difficulty = repair_item_difficulty(&sri);
warn_repair_difficulties(ctx, difficulty, xfrog_scrubbers[type].descr);
defer:
ret = defer_fs_repair(ctx, &sri);
if (ret) {
sctl->aborted = true;
return;
}
}
/*
* Scrub each rt group's metadata. For pre-rtgroup filesystems, we ask to
* scrub "rtgroup 0" because that's how the kernel ioctl works.
*/
static void
scan_rtgroup_metadata(
struct workqueue *wq,
xfs_agnumber_t rgno,
void *arg)
{
struct scrub_item sri;
struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
struct scan_ctl *sctl = arg;
char descr[DESCR_BUFSZ];
bool defer_repairs;
int ret;
if (sctl->aborted)
return;
scrub_item_init_rtgroup(&sri, rgno);
if (ctx->mnt.fsgeom.rgcount == 0)
snprintf(descr, DESCR_BUFSZ, _("realtime"));
else
snprintf(descr, DESCR_BUFSZ, _("rtgroup %u"), rgno);
/*
* Try to check all of the rtgroup metadata items that we just
* scheduled. If we return with some types still needing a check, try
* repairing any damaged metadata that we've found so far, and try
* again. Abort if we stop making forward progress.
*/
scrub_item_schedule_group(&sri, XFROG_SCRUB_GROUP_RTGROUP);
ret = scrub_item_check(ctx, &sri);
if (ret) {
sctl->aborted = true;
return;
}
ret = repair_and_scrub_loop(ctx, &sri, descr, &defer_repairs);
if (ret) {
sctl->aborted = true;
return;
}
/* Everything else gets fixed during phase 4. */
ret = defer_fs_repair(ctx, &sri);
if (ret) {
sctl->aborted = true;
return;
}
}
/* Scan all filesystem metadata. */
int
phase2_func(
struct scrub_ctx *ctx)
{
struct workqueue wq;
struct scan_ctl sctl = {
.aborted = false,
};
struct scrub_item sri;
const struct xfrog_scrub_descr *sc = xfrog_scrubbers;
xfs_agnumber_t agno;
xfs_rgnumber_t rgno;
unsigned int type;
int ret, ret2;
ret = -workqueue_create(&wq, (struct xfs_mount *)ctx,
scrub_nproc_workqueue(ctx));
if (ret) {
str_liberror(ctx, ret, _("creating scrub workqueue"));
goto out_wait;
}
/*
* Scrub primary superblock. This will be useful if we ever need to
* hook a filesystem-wide pre-scrub activity (e.g. enable filesystem
* upgrades) off of the sb 0 scrubber (which currently does nothing).
* If errors occur, this function will log them and return nonzero.
*/
scrub_item_init_ag(&sri, 0);
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_SB);
ret = scrub_item_check(ctx, &sri);
if (ret)
goto out_wq;
ret = repair_item_completely(ctx, &sri);
if (ret)
goto out_wq;
/* Scan each AG in parallel. */
for (agno = 0;
agno < ctx->mnt.fsgeom.agcount && !sctl.aborted;
agno++) {
ret = -workqueue_add(&wq, scan_ag_metadata, agno, &sctl);
if (ret) {
str_liberror(ctx, ret, _("queueing per-AG scrub work"));
goto out_wq;
}
}
if (sctl.aborted)
goto out_wq;
/*
* Scan all of the whole-fs metadata objects: realtime bitmap, realtime
* summary, and the three quota files. Each of the metadata files can
* be scanned in parallel except for the realtime summary file, which
* must run after the realtime bitmap has been scanned.
*/
for (type = 0; type < XFS_SCRUB_TYPE_NR; type++, sc++) {
if (sc->group != XFROG_SCRUB_GROUP_FS)
continue;
ret = -workqueue_add(&wq, scan_fs_metadata, type, &sctl);
if (ret) {
str_liberror(ctx, ret,
_("queueing whole-fs scrub work"));
goto out_wq;
}
}
if (sctl.aborted)
goto out_wq;
if (ctx->mnt.fsgeom.rgcount == 0) {
/*
* When rtgroups were added, the bitmap and summary files
* became per-rtgroup metadata so the scrub interface for the
* two started to accept sm_agno. For pre-rtgroups
* filesystems, we still accept sm_agno==0, so invoke scrub in
* this manner.
*/
ret = -workqueue_add(&wq, scan_rtgroup_metadata, 0, &sctl);
if (ret) {
str_liberror(ctx, ret,
_("queueing realtime scrub work"));
goto out_wq;
}
}
/* Scan each rtgroup in parallel. */
for (rgno = 0;
rgno < ctx->mnt.fsgeom.rgcount && !sctl.aborted;
rgno++) {
ret = -workqueue_add(&wq, scan_rtgroup_metadata, rgno, &sctl);
if (ret) {
str_liberror(ctx, ret,
_("queueing rtgroup scrub work"));
goto out_wq;
}
}
if (sctl.aborted)
goto out_wq;
out_wq:
ret2 = -workqueue_terminate(&wq);
if (ret2) {
str_liberror(ctx, ret2, _("finishing scrub work"));
if (!ret && ret2)
ret = ret2;
}
workqueue_destroy(&wq);
out_wait:
if (!ret && sctl.aborted)
ret = ECANCELED;
return ret;
}
/* Estimate how much work we're going to do. */
int
phase2_estimate(
struct scrub_ctx *ctx,
uint64_t *items,
unsigned int *nr_threads,
int *rshift)
{
*items = scrub_estimate_ag_work(ctx);
*nr_threads = scrub_nproc(ctx);
*rshift = 0;
return 0;
}