| // SPDX-License-Identifier: GPL-2.0+ |
| /* |
| * Copyright (C) 2018 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <darrick.wong@oracle.com> |
| */ |
| #include "xfs.h" |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <pthread.h> |
| #include <sys/statvfs.h> |
| #include "platform_defs.h" |
| #include "xfs_arch.h" |
| #include "handle.h" |
| #include "libfrog/paths.h" |
| #include "libfrog/workqueue.h" |
| #include "xfs_scrub.h" |
| #include "common.h" |
| #include "inodes.h" |
| #include "libfrog/fsgeom.h" |
| #include "libfrog/bulkstat.h" |
| |
| /* |
| * Iterate a range of inodes. |
| * |
| * This is a little more involved than repeatedly asking BULKSTAT for a |
| * buffer's worth of stat data for some number of inodes. We want to scan as |
| * many of the inodes that the inobt thinks there are, including the ones that |
| * are broken, but if we ask for n inodes starting at x, it'll skip the bad |
| * ones and fill from beyond the range (x + n). |
| * |
| * Therefore, we ask INUMBERS to return one inobt chunk's worth of inode |
| * bitmap information. Then we try to BULKSTAT only the inodes that were |
| * present in that chunk, and compare what we got against what INUMBERS said |
| * was there. If there's a mismatch, we know that we have an inode that fails |
| * the verifiers but we can inject the bulkstat information to force the scrub |
| * code to deal with the broken inodes. |
| * |
| * If the iteration function returns ESTALE, that means that the inode has |
| * been deleted and possibly recreated since the BULKSTAT call. We wil |
| * refresh the stat information and try again up to 30 times before reporting |
| * the staleness as an error. |
| */ |
| |
| /* |
| * Run bulkstat on an entire inode allocation group, then check that we got |
| * exactly the inodes we expected. If not, load them one at a time (or fake |
| * it) into the bulkstat data. |
| */ |
| static void |
| bulkstat_for_inumbers( |
| struct scrub_ctx *ctx, |
| const char *descr, |
| const struct xfs_inumbers *inumbers, |
| struct xfs_bulkstat_req *breq) |
| { |
| struct xfs_bulkstat *bstat = breq->bulkstat; |
| struct xfs_bulkstat *bs; |
| int i; |
| int error; |
| |
| /* First we try regular bulkstat, for speed. */ |
| breq->hdr.ino = inumbers->xi_startino; |
| breq->hdr.icount = inumbers->xi_alloccount; |
| error = -xfrog_bulkstat(&ctx->mnt, breq); |
| if (error) { |
| char errbuf[DESCR_BUFSZ]; |
| |
| str_info(ctx, descr, "%s", |
| strerror_r(error, errbuf, DESCR_BUFSZ)); |
| } |
| |
| /* |
| * Check each of the stats we got back to make sure we got the inodes |
| * we asked for. |
| */ |
| for (i = 0, bs = bstat; i < LIBFROG_BULKSTAT_CHUNKSIZE; i++) { |
| if (!(inumbers->xi_allocmask & (1ULL << i))) |
| continue; |
| if (bs->bs_ino == inumbers->xi_startino + i) { |
| bs++; |
| continue; |
| } |
| |
| /* Load the one inode. */ |
| error = -xfrog_bulkstat_single(&ctx->mnt, |
| inumbers->xi_startino + i, 0, bs); |
| if (error || bs->bs_ino != inumbers->xi_startino + i) { |
| memset(bs, 0, sizeof(struct xfs_bulkstat)); |
| bs->bs_ino = inumbers->xi_startino + i; |
| bs->bs_blksize = ctx->mnt_sv.f_frsize; |
| } |
| bs++; |
| } |
| } |
| |
| /* BULKSTAT wrapper routines. */ |
| struct scan_inodes { |
| scrub_inode_iter_fn fn; |
| void *arg; |
| bool aborted; |
| }; |
| |
| /* |
| * Call into the filesystem for inode/bulkstat information and call our |
| * iterator function. We'll try to fill the bulkstat information in batches, |
| * but we also can detect iget failures. |
| */ |
| static void |
| scan_ag_inodes( |
| struct workqueue *wq, |
| xfs_agnumber_t agno, |
| void *arg) |
| { |
| struct xfs_handle handle = { }; |
| char descr[DESCR_BUFSZ]; |
| struct xfs_inumbers_req *ireq; |
| struct xfs_bulkstat_req *breq; |
| struct scan_inodes *si = arg; |
| struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx; |
| struct xfs_bulkstat *bs; |
| struct xfs_inumbers *inumbers; |
| uint64_t nextino = cvt_agino_to_ino(&ctx->mnt, agno, 0); |
| int i; |
| int error; |
| int stale_count = 0; |
| |
| snprintf(descr, DESCR_BUFSZ, _("dev %d:%d AG %u inodes"), |
| major(ctx->fsinfo.fs_datadev), |
| minor(ctx->fsinfo.fs_datadev), |
| agno); |
| |
| memcpy(&handle.ha_fsid, ctx->fshandle, sizeof(handle.ha_fsid)); |
| handle.ha_fid.fid_len = sizeof(xfs_fid_t) - |
| sizeof(handle.ha_fid.fid_len); |
| handle.ha_fid.fid_pad = 0; |
| |
| error = -xfrog_bulkstat_alloc_req(LIBFROG_BULKSTAT_CHUNKSIZE, 0, &breq); |
| if (error) { |
| str_liberror(ctx, error, descr); |
| si->aborted = true; |
| return; |
| } |
| |
| error = -xfrog_inumbers_alloc_req(1, 0, &ireq); |
| if (error) { |
| str_liberror(ctx, error, descr); |
| free(breq); |
| si->aborted = true; |
| return; |
| } |
| inumbers = &ireq->inumbers[0]; |
| xfrog_inumbers_set_ag(ireq, agno); |
| |
| /* Find the inode chunk & alloc mask */ |
| error = -xfrog_inumbers(&ctx->mnt, ireq); |
| while (!error && !si->aborted && ireq->hdr.ocount > 0) { |
| /* |
| * Make sure that we always make forward progress while we |
| * scan the inode btree. |
| */ |
| if (nextino > inumbers->xi_startino) { |
| str_corrupt(ctx, descr, |
| _("AG %u inode btree is corrupt near agino %lu, got %lu"), agno, |
| cvt_ino_to_agino(&ctx->mnt, nextino), |
| cvt_ino_to_agino(&ctx->mnt, |
| ireq->inumbers[0].xi_startino)); |
| si->aborted = true; |
| break; |
| } |
| nextino = ireq->hdr.ino; |
| |
| /* |
| * We can have totally empty inode chunks on filesystems where |
| * there are more than 64 inodes per block. Skip these. |
| */ |
| if (inumbers->xi_alloccount == 0) |
| goto igrp_retry; |
| |
| bulkstat_for_inumbers(ctx, descr, inumbers, breq); |
| |
| /* Iterate all the inodes. */ |
| for (i = 0, bs = breq->bulkstat; |
| !si->aborted && i < inumbers->xi_alloccount; |
| i++, bs++) { |
| handle.ha_fid.fid_ino = bs->bs_ino; |
| handle.ha_fid.fid_gen = bs->bs_gen; |
| error = si->fn(ctx, &handle, bs, si->arg); |
| switch (error) { |
| case 0: |
| break; |
| case ESTALE: { |
| char idescr[DESCR_BUFSZ]; |
| |
| stale_count++; |
| if (stale_count < 30) { |
| ireq->hdr.ino = inumbers->xi_startino; |
| goto igrp_retry; |
| } |
| scrub_render_ino_descr(ctx, idescr, DESCR_BUFSZ, |
| bs->bs_ino, bs->bs_gen, NULL); |
| str_info(ctx, idescr, |
| _("Changed too many times during scan; giving up.")); |
| break; |
| } |
| case ECANCELED: |
| error = 0; |
| /* fall thru */ |
| default: |
| goto err; |
| } |
| if (scrub_excessive_errors(ctx)) { |
| si->aborted = true; |
| goto out; |
| } |
| } |
| |
| stale_count = 0; |
| igrp_retry: |
| error = -xfrog_inumbers(&ctx->mnt, ireq); |
| } |
| |
| err: |
| if (error) { |
| str_liberror(ctx, error, descr); |
| si->aborted = true; |
| } |
| out: |
| free(ireq); |
| free(breq); |
| } |
| |
| /* |
| * Scan all the inodes in a filesystem. On error, this function will log |
| * an error message and return -1. |
| */ |
| int |
| scrub_scan_all_inodes( |
| struct scrub_ctx *ctx, |
| scrub_inode_iter_fn fn, |
| void *arg) |
| { |
| struct scan_inodes si = { |
| .fn = fn, |
| .arg = arg, |
| }; |
| xfs_agnumber_t agno; |
| struct workqueue wq; |
| int ret; |
| |
| ret = -workqueue_create(&wq, (struct xfs_mount *)ctx, |
| scrub_nproc_workqueue(ctx)); |
| if (ret) { |
| str_liberror(ctx, ret, _("creating bulkstat workqueue")); |
| return -1; |
| } |
| |
| for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) { |
| ret = -workqueue_add(&wq, scan_ag_inodes, agno, &si); |
| if (ret) { |
| si.aborted = true; |
| str_liberror(ctx, ret, _("queueing bulkstat work")); |
| break; |
| } |
| } |
| |
| ret = -workqueue_terminate(&wq); |
| if (ret) { |
| si.aborted = true; |
| str_liberror(ctx, ret, _("finishing bulkstat work")); |
| } |
| workqueue_destroy(&wq); |
| |
| return si.aborted ? -1 : 0; |
| } |
| |
| /* Open a file by handle, returning either the fd or -1 on error. */ |
| int |
| scrub_open_handle( |
| struct xfs_handle *handle) |
| { |
| return open_by_fshandle(handle, sizeof(*handle), |
| O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); |
| } |