| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (C) 2018-2024 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <djwong@kernel.org> |
| */ |
| #include "xfs.h" |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <pthread.h> |
| #include <sys/statvfs.h> |
| #include "platform_defs.h" |
| #include "xfs_arch.h" |
| #include "handle.h" |
| #include "libfrog/paths.h" |
| #include "libfrog/workqueue.h" |
| #include "xfs_scrub.h" |
| #include "common.h" |
| #include "inodes.h" |
| #include "descr.h" |
| #include "libfrog/fsgeom.h" |
| #include "libfrog/bulkstat.h" |
| #include "libfrog/handle_priv.h" |
| #include "bitops.h" |
| #include "libfrog/bitmask.h" |
| |
| /* |
| * Iterate a range of inodes. |
| * |
| * This is a little more involved than repeatedly asking BULKSTAT for a |
| * buffer's worth of stat data for some number of inodes. We want to scan as |
| * many of the inodes that the inobt thinks there are, so we use the INUMBERS |
| * ioctl to walk all the inobt records in the filesystem and spawn a worker to |
| * bulkstat and iterate. The worker starts with an inumbers record that can |
| * look like this: |
| * |
| * {startino = S, allocmask = 0b11011} |
| * |
| * Given a starting inumber S and count C=64, bulkstat will return a sorted |
| * array of stat information. The bs_ino of those array elements can look like |
| * any of the following: |
| * |
| * 0. [S, S+1, S+3, S+4] |
| * 1. [S+e, S+e+1, S+e+3, S+e+4, S+e+C+1...], where e >= 0 |
| * 2. [S+e+n], where n >= 0 |
| * 3. [] |
| * 4. [], errno == EFSCORRUPTED |
| * |
| * We know that bulkstat scanned the entire inode range between S and bs_ino of |
| * the last array element, even though it only fills out an array element for |
| * allocated inodes. Therefore, we can say in cases 0-2 that S was filled, |
| * even if there is no bstat[] record for S. In turn, we can create a bitmask |
| * of inodes that we have seen, and set bits 0 through (bstat[-1].bs_ino - S), |
| * being careful not to set any bits past S+C. |
| * |
| * In case (0) we find that seen mask matches the inumber record |
| * exactly, so the caller can walk the stat records and move on. In case (1) |
| * this is also true, but we must be careful to reduce the array length to |
| * avoid scanning inodes that are not in the inumber chunk. In case (3) we |
| * conclude that there were no inodes left to scan and terminate. |
| * |
| * In case (2) and (4) we don't know why bulkstat returned fewer than C |
| * elements. We might have found the end of the filesystem, or the kernel |
| * might have found a corrupt inode and stopped. This we must investigate by |
| * trying to fill out the rest of the bstat array starting with the next |
| * inumber after the last bstat array element filled, and continuing until S' |
| * is beyond S0 + C, or the array is full. Each time we succeed in loading |
| * new records, the kernel increases S' for us; if instead we encounter case |
| * (4), we can increment S' ourselves. |
| * |
| * Inodes that are set in the allocmask but not set in the seen mask are the |
| * corrupt inodes. For each of these cases, we try to populate the bulkstat |
| * array one inode at a time. If the kernel returns a matching record we can |
| * use it; if instead we receive an error, we synthesize enough of a record |
| * to be able to run online scrub by handle. |
| * |
| * If the iteration function returns ESTALE, that means that the inode has |
| * been deleted and possibly recreated since the BULKSTAT call. We wil |
| * refresh the stat information and try again up to 30 times before reporting |
| * the staleness as an error. |
| */ |
| |
| /* |
| * Return the inumber of the highest inode in the bulkstat data, assuming the |
| * records are sorted in inumber order. |
| */ |
| static inline uint64_t last_bstat_ino(const struct xfs_bulkstat_req *b) |
| { |
| return b->hdr.ocount ? b->bulkstat[b->hdr.ocount - 1].bs_ino : 0; |
| } |
| |
| /* |
| * Deduce the bitmask of the inodes in inums that were seen by bulkstat. If |
| * the inode is present in the bstat array this is trivially true; or if it is |
| * not in the array but higher inumbers are present, then it was freed. |
| */ |
| static __u64 |
| seen_mask_from_bulkstat( |
| const struct xfs_inumbers *inums, |
| __u64 breq_startino, |
| const struct xfs_bulkstat_req *breq) |
| { |
| const __u64 limit_ino = |
| inums->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE; |
| const __u64 last = last_bstat_ino(breq); |
| __u64 ret = 0; |
| int i, maxi; |
| |
| /* Ignore the bulkstat results if they don't cover inumbers */ |
| if (breq_startino > limit_ino || last < inums->xi_startino) |
| return 0; |
| |
| maxi = min(LIBFROG_BULKSTAT_CHUNKSIZE, last - inums->xi_startino + 1); |
| for (i = breq_startino - inums->xi_startino; i < maxi; i++) |
| ret |= 1ULL << i; |
| |
| return ret; |
| } |
| |
| /* |
| * Try to fill the rest of orig_breq with bulkstat data by re-running bulkstat |
| * with increasing start_ino until we either hit the end of the inumbers info |
| * or fill up the bstat array with something. Returns a bitmask of the inodes |
| * within inums that were filled by the bulkstat requests. |
| */ |
| static __u64 |
| bulkstat_the_rest( |
| struct scrub_ctx *ctx, |
| const struct xfs_inumbers *inums, |
| struct xfs_bulkstat_req *orig_breq, |
| int orig_error) |
| { |
| struct xfs_bulkstat_req *new_breq; |
| struct xfs_bulkstat *old_bstat = |
| &orig_breq->bulkstat[orig_breq->hdr.ocount]; |
| const __u64 limit_ino = |
| inums->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE; |
| __u64 start_ino = orig_breq->hdr.ino; |
| __u64 seen_mask = 0; |
| int error; |
| |
| assert(orig_breq->hdr.ocount < orig_breq->hdr.icount); |
| |
| /* |
| * If the first bulkstat returned a corruption error, that means |
| * start_ino is corrupt. Restart instead at the next inumber. |
| */ |
| if (orig_error == EFSCORRUPTED) |
| start_ino++; |
| if (start_ino >= limit_ino) |
| return 0; |
| |
| error = -xfrog_bulkstat_alloc_req( |
| orig_breq->hdr.icount - orig_breq->hdr.ocount, |
| start_ino, &new_breq); |
| if (error) |
| return error; |
| new_breq->hdr.flags = orig_breq->hdr.flags; |
| |
| do { |
| /* |
| * Fill the new bulkstat request with stat data starting at |
| * start_ino. |
| */ |
| error = -xfrog_bulkstat(&ctx->mnt, new_breq); |
| if (error == EFSCORRUPTED) { |
| /* |
| * start_ino is corrupt, increment and try the next |
| * inode. |
| */ |
| start_ino++; |
| new_breq->hdr.ino = start_ino; |
| continue; |
| } |
| if (error) { |
| /* |
| * Any other error means the caller falls back to |
| * single stepping. |
| */ |
| break; |
| } |
| if (new_breq->hdr.ocount == 0) |
| break; |
| |
| /* Copy new results to the original bstat buffer */ |
| memcpy(old_bstat, new_breq->bulkstat, |
| new_breq->hdr.ocount * sizeof(struct xfs_bulkstat)); |
| orig_breq->hdr.ocount += new_breq->hdr.ocount; |
| old_bstat += new_breq->hdr.ocount; |
| seen_mask |= seen_mask_from_bulkstat(inums, start_ino, |
| new_breq); |
| |
| new_breq->hdr.icount -= new_breq->hdr.ocount; |
| start_ino = new_breq->hdr.ino; |
| } while (new_breq->hdr.icount > 0 && new_breq->hdr.ino < limit_ino); |
| |
| free(new_breq); |
| return seen_mask; |
| } |
| |
| #define cmp_int(l, r) ((l > r) - (l < r)) |
| |
| /* Compare two bulkstat records by inumber. */ |
| static int |
| compare_bstat( |
| const void *a, |
| const void *b) |
| { |
| const struct xfs_bulkstat *ba = a; |
| const struct xfs_bulkstat *bb = b; |
| |
| return cmp_int(ba->bs_ino, bb->bs_ino); |
| } |
| |
| /* |
| * Walk the xi_allocmask looking for set bits that aren't present in |
| * the fill mask. For each such inode, fill the entries at the end of |
| * the array with stat information one at a time, synthesizing them if |
| * necessary. At this point, (xi_allocmask & ~seen_mask) should be the |
| * corrupt inodes. |
| */ |
| static void |
| bulkstat_single_step( |
| struct scrub_ctx *ctx, |
| const struct xfs_inumbers *inumbers, |
| uint64_t seen_mask, |
| struct xfs_bulkstat_req *breq) |
| { |
| struct xfs_bulkstat *bs = NULL; |
| int i; |
| int error; |
| |
| for (i = 0; i < LIBFROG_BULKSTAT_CHUNKSIZE; i++) { |
| /* |
| * Don't single-step if inumbers said it wasn't allocated or |
| * bulkstat actually filled it. |
| */ |
| if (!(inumbers->xi_allocmask & (1ULL << i))) |
| continue; |
| if (seen_mask & (1ULL << i)) |
| continue; |
| |
| assert(breq->hdr.ocount < LIBFROG_BULKSTAT_CHUNKSIZE); |
| |
| if (!bs) |
| bs = &breq->bulkstat[breq->hdr.ocount]; |
| |
| /* |
| * Didn't get desired stat data and we've hit the end of the |
| * returned data. We can't distinguish between the inode being |
| * freed vs. the inode being to corrupt to load, so try a |
| * bulkstat single to see if we can load the inode. |
| */ |
| error = -xfrog_bulkstat_single(&ctx->mnt, |
| inumbers->xi_startino + i, breq->hdr.flags, bs); |
| switch (error) { |
| case ENOENT: |
| /* |
| * This inode wasn't found, and no results were |
| * returned. We've likely hit the end of the |
| * filesystem, but we'll move on to the next inode in |
| * the mask for the sake of caution. |
| */ |
| continue; |
| case 0: |
| /* |
| * If a result was returned but it wasn't the inode |
| * we were looking for, then the missing inode was |
| * freed. Move on to the next inode in the mask. |
| */ |
| if (bs->bs_ino != inumbers->xi_startino + i) |
| continue; |
| break; |
| default: |
| /* |
| * Some error happened. Synthesize a bulkstat record |
| * so that phase3 can try to see if there's a corrupt |
| * inode that needs repairing. |
| */ |
| memset(bs, 0, sizeof(struct xfs_bulkstat)); |
| bs->bs_ino = inumbers->xi_startino + i; |
| bs->bs_blksize = ctx->mnt_sv.f_frsize; |
| break; |
| } |
| |
| breq->hdr.ocount++; |
| bs++; |
| } |
| |
| /* If we added any entries, re-sort the array. */ |
| if (bs) |
| qsort(breq->bulkstat, breq->hdr.ocount, |
| sizeof(struct xfs_bulkstat), compare_bstat); |
| } |
| |
| /* Return the inumber of the highest allocated inode in the inumbers data. */ |
| static inline uint64_t last_allocmask_ino(const struct xfs_inumbers *i) |
| { |
| return i->xi_startino + xfrog_highbit64(i->xi_allocmask); |
| } |
| |
| /* |
| * Run bulkstat on an entire inode allocation group, then check that we got |
| * exactly the inodes we expected. If not, load them one at a time (or fake |
| * it) into the bulkstat data. |
| */ |
| static void |
| bulkstat_for_inumbers( |
| struct scrub_ctx *ctx, |
| const struct xfs_inumbers *inumbers, |
| struct xfs_bulkstat_req *breq) |
| { |
| const uint64_t limit_ino = |
| inumbers->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE; |
| uint64_t seen_mask = 0; |
| int i; |
| int error; |
| |
| assert(inumbers->xi_allocmask != 0); |
| |
| /* First we try regular bulkstat, for speed. */ |
| breq->hdr.ino = inumbers->xi_startino; |
| error = -xfrog_bulkstat(&ctx->mnt, breq); |
| if (!error) { |
| if (!breq->hdr.ocount) |
| return; |
| seen_mask |= seen_mask_from_bulkstat(inumbers, |
| inumbers->xi_startino, breq); |
| } |
| |
| /* |
| * If the last allocated inode as reported by inumbers is higher than |
| * the last inode reported by bulkstat, two things could have happened. |
| * Either all the inodes at the high end of the cluster were freed |
| * since the inumbers call; or bulkstat encountered a corrupt inode and |
| * returned early. Try to bulkstat the rest of the array. |
| */ |
| if (last_allocmask_ino(inumbers) > last_bstat_ino(breq)) |
| seen_mask |= bulkstat_the_rest(ctx, inumbers, breq, error); |
| |
| /* |
| * Bulkstat might return inodes beyond xi_startino + CHUNKSIZE. Reduce |
| * ocount to ignore inodes not described by the inumbers record. |
| */ |
| for (i = breq->hdr.ocount - 1; i >= 0; i--) { |
| if (breq->bulkstat[i].bs_ino < limit_ino) |
| break; |
| breq->hdr.ocount--; |
| } |
| |
| /* |
| * Fill in any missing inodes that are mentioned in the alloc mask but |
| * weren't previously seen by bulkstat. These are the corrupt inodes. |
| */ |
| bulkstat_single_step(ctx, inumbers, seen_mask, breq); |
| } |
| |
| /* BULKSTAT wrapper routines. */ |
| struct scan_inodes { |
| struct workqueue wq_bulkstat; |
| scrub_inode_iter_fn fn; |
| void *arg; |
| unsigned int nr_threads; |
| bool aborted; |
| }; |
| |
| /* |
| * A single unit of inode scan work. This contains a pointer to the parent |
| * information, followed by an INUMBERS request structure, followed by a |
| * BULKSTAT request structure. The last two are VLAs, so we can't represent |
| * them here. |
| */ |
| struct scan_ichunk { |
| struct scan_inodes *si; |
| }; |
| |
| static inline struct xfs_inumbers_req * |
| ichunk_to_inumbers( |
| struct scan_ichunk *ichunk) |
| { |
| char *p = (char *)ichunk; |
| |
| return (struct xfs_inumbers_req *)(p + sizeof(struct scan_ichunk)); |
| } |
| |
| static inline struct xfs_bulkstat_req * |
| ichunk_to_bulkstat( |
| struct scan_ichunk *ichunk) |
| { |
| char *p = (char *)ichunk_to_inumbers(ichunk); |
| |
| return (struct xfs_bulkstat_req *)(p + XFS_INUMBERS_REQ_SIZE(1)); |
| } |
| |
| static inline int |
| alloc_ichunk( |
| struct scrub_ctx *ctx, |
| struct scan_inodes *si, |
| uint32_t agno, |
| uint64_t startino, |
| struct scan_ichunk **ichunkp) |
| { |
| struct scan_ichunk *ichunk; |
| struct xfs_inumbers_req *ireq; |
| struct xfs_bulkstat_req *breq; |
| |
| ichunk = calloc(1, sizeof(struct scan_ichunk) + |
| XFS_INUMBERS_REQ_SIZE(1) + |
| XFS_BULKSTAT_REQ_SIZE(LIBFROG_BULKSTAT_CHUNKSIZE)); |
| if (!ichunk) |
| return -errno; |
| |
| ichunk->si = si; |
| |
| ireq = ichunk_to_inumbers(ichunk); |
| ireq->hdr.icount = 1; |
| ireq->hdr.ino = startino; |
| ireq->hdr.agno = agno; |
| ireq->hdr.flags |= XFS_BULK_IREQ_AGNO; |
| |
| breq = ichunk_to_bulkstat(ichunk); |
| breq->hdr.icount = LIBFROG_BULKSTAT_CHUNKSIZE; |
| |
| /* Scan the metadata directory tree too. */ |
| if (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_METADIR) |
| breq->hdr.flags |= XFS_BULK_IREQ_METADIR; |
| |
| *ichunkp = ichunk; |
| return 0; |
| } |
| |
| static int |
| render_ino_from_bulkstat( |
| struct scrub_ctx *ctx, |
| char *buf, |
| size_t buflen, |
| void *data) |
| { |
| struct xfs_bulkstat *bstat = data; |
| |
| return scrub_render_ino_descr(ctx, buf, buflen, bstat->bs_ino, |
| bstat->bs_gen, NULL); |
| } |
| |
| static int |
| render_inumbers_from_agno( |
| struct scrub_ctx *ctx, |
| char *buf, |
| size_t buflen, |
| void *data) |
| { |
| xfs_agnumber_t *agno = data; |
| |
| return snprintf(buf, buflen, _("dev %d:%d AG %u inodes"), |
| major(ctx->fsinfo.fs_datadev), |
| minor(ctx->fsinfo.fs_datadev), |
| *agno); |
| } |
| |
| /* |
| * Call BULKSTAT for information on a single chunk's worth of inodes and call |
| * our iterator function. We'll try to fill the bulkstat information in |
| * batches, but we also can detect iget failures. |
| */ |
| static void |
| scan_ag_bulkstat( |
| struct workqueue *wq, |
| xfs_agnumber_t agno, |
| void *arg) |
| { |
| struct xfs_handle handle; |
| struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx; |
| struct scan_ichunk *ichunk = arg; |
| struct xfs_inumbers_req *ireq = ichunk_to_inumbers(ichunk); |
| struct xfs_bulkstat_req *breq = ichunk_to_bulkstat(ichunk); |
| struct scan_inodes *si = ichunk->si; |
| struct xfs_bulkstat *bs = &breq->bulkstat[0]; |
| struct xfs_inumbers *inumbers = &ireq->inumbers[0]; |
| uint64_t last_ino = 0; |
| int i; |
| int error; |
| int stale_count = 0; |
| DEFINE_DESCR(dsc_bulkstat, ctx, render_ino_from_bulkstat); |
| DEFINE_DESCR(dsc_inumbers, ctx, render_inumbers_from_agno); |
| |
| descr_set(&dsc_inumbers, &agno); |
| handle_from_fshandle(&handle, ctx->fshandle, ctx->fshandle_len); |
| retry: |
| bulkstat_for_inumbers(ctx, inumbers, breq); |
| |
| /* Iterate all the inodes. */ |
| for (i = 0; !si->aborted && i < breq->hdr.ocount; i++, bs++) { |
| uint64_t scan_ino = bs->bs_ino; |
| |
| /* ensure forward progress if we retried */ |
| if (scan_ino < last_ino) |
| continue; |
| |
| descr_set(&dsc_bulkstat, bs); |
| handle_from_bulkstat(&handle, bs); |
| error = si->fn(ctx, &handle, bs, si->arg); |
| switch (error) { |
| case 0: |
| break; |
| case ESTALE: { |
| stale_count++; |
| if (stale_count < 30) { |
| uint64_t old_startino; |
| |
| ireq->hdr.ino = old_startino = |
| inumbers->xi_startino; |
| error = -xfrog_inumbers(&ctx->mnt, ireq); |
| if (error) |
| goto err; |
| /* |
| * Retry only if inumbers returns the same |
| * inobt record as the previous record and |
| * there are allocated inodes in it. |
| */ |
| if (!si->aborted && |
| ireq->hdr.ocount > 0 && |
| inumbers->xi_alloccount > 0 && |
| inumbers->xi_startino == old_startino) |
| goto retry; |
| goto out; |
| } |
| str_info(ctx, descr_render(&dsc_bulkstat), |
| _("Changed too many times during scan; giving up.")); |
| si->aborted = true; |
| goto out; |
| } |
| case ECANCELED: |
| error = 0; |
| fallthrough; |
| default: |
| goto err; |
| } |
| if (scrub_excessive_errors(ctx)) { |
| si->aborted = true; |
| goto out; |
| } |
| last_ino = scan_ino; |
| } |
| |
| err: |
| if (error) { |
| str_liberror(ctx, error, descr_render(&dsc_bulkstat)); |
| si->aborted = true; |
| } |
| out: |
| free(ichunk); |
| } |
| |
| /* |
| * Call INUMBERS for information about inode chunks, then queue the inumbers |
| * responses in the bulkstat workqueue. This helps us maximize CPU parallelism |
| * if the filesystem AGs are not evenly loaded. |
| */ |
| static void |
| scan_ag_inumbers( |
| struct workqueue *wq, |
| xfs_agnumber_t agno, |
| void *arg) |
| { |
| struct scan_ichunk *ichunk = NULL; |
| struct scan_inodes *si = arg; |
| struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx; |
| struct xfs_inumbers_req *ireq; |
| uint64_t nextino = cvt_agino_to_ino(&ctx->mnt, agno, 0); |
| int error; |
| DEFINE_DESCR(dsc, ctx, render_inumbers_from_agno); |
| |
| descr_set(&dsc, &agno); |
| |
| error = alloc_ichunk(ctx, si, agno, 0, &ichunk); |
| if (error) |
| goto err; |
| ireq = ichunk_to_inumbers(ichunk); |
| |
| /* Find the inode chunk & alloc mask */ |
| error = -xfrog_inumbers(&ctx->mnt, ireq); |
| while (!error && !si->aborted && ireq->hdr.ocount > 0) { |
| /* |
| * Make sure that we always make forward progress while we |
| * scan the inode btree. |
| */ |
| if (nextino > ireq->inumbers[0].xi_startino) { |
| str_corrupt(ctx, descr_render(&dsc), |
| _("AG %u inode btree is corrupt near agino %lu, got %lu"), agno, |
| cvt_ino_to_agino(&ctx->mnt, nextino), |
| cvt_ino_to_agino(&ctx->mnt, |
| ireq->inumbers[0].xi_startino)); |
| si->aborted = true; |
| break; |
| } |
| nextino = ireq->hdr.ino; |
| |
| if (ireq->inumbers[0].xi_alloccount == 0) { |
| /* |
| * We can have totally empty inode chunks on |
| * filesystems where there are more than 64 inodes per |
| * block. Skip these. |
| */ |
| ; |
| } else if (si->nr_threads > 0) { |
| /* Queue this inode chunk on the bulkstat workqueue. */ |
| error = -workqueue_add(&si->wq_bulkstat, |
| scan_ag_bulkstat, agno, ichunk); |
| if (error) { |
| si->aborted = true; |
| str_liberror(ctx, error, |
| _("queueing bulkstat work")); |
| goto out; |
| } |
| ichunk = NULL; |
| } else { |
| /* |
| * Only one thread, call bulkstat directly. Remember, |
| * ichunk is freed by the worker before returning. |
| */ |
| scan_ag_bulkstat(wq, agno, ichunk); |
| ichunk = NULL; |
| if (si->aborted) |
| break; |
| } |
| |
| if (!ichunk) { |
| error = alloc_ichunk(ctx, si, agno, nextino, &ichunk); |
| if (error) |
| goto err; |
| } |
| ireq = ichunk_to_inumbers(ichunk); |
| |
| error = -xfrog_inumbers(&ctx->mnt, ireq); |
| } |
| |
| err: |
| if (error) { |
| str_liberror(ctx, error, descr_render(&dsc)); |
| si->aborted = true; |
| } |
| out: |
| if (ichunk) |
| free(ichunk); |
| } |
| |
| /* |
| * Scan all the inodes in a filesystem, including metadata directory files and |
| * broken files. On error, this function will log an error message and return |
| * -1. |
| */ |
| int |
| scrub_scan_all_inodes( |
| struct scrub_ctx *ctx, |
| scrub_inode_iter_fn fn, |
| void *arg) |
| { |
| struct scan_inodes si = { |
| .fn = fn, |
| .arg = arg, |
| .nr_threads = scrub_nproc_workqueue(ctx), |
| }; |
| xfs_agnumber_t agno; |
| struct workqueue wq_inumbers; |
| unsigned int max_bulkstat; |
| int ret; |
| |
| /* |
| * The bulkstat workqueue should queue at most one inobt block's worth |
| * of inode chunk records per worker thread. If we're running in |
| * single thread mode (nr_threads==0) then we skip the workqueues. |
| */ |
| max_bulkstat = si.nr_threads * (ctx->mnt.fsgeom.blocksize / 16); |
| |
| ret = -workqueue_create_bound(&si.wq_bulkstat, (struct xfs_mount *)ctx, |
| si.nr_threads, max_bulkstat); |
| if (ret) { |
| str_liberror(ctx, ret, _("creating bulkstat workqueue")); |
| return -1; |
| } |
| |
| ret = -workqueue_create(&wq_inumbers, (struct xfs_mount *)ctx, |
| si.nr_threads); |
| if (ret) { |
| str_liberror(ctx, ret, _("creating inumbers workqueue")); |
| si.aborted = true; |
| goto kill_bulkstat; |
| } |
| |
| for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) { |
| ret = -workqueue_add(&wq_inumbers, scan_ag_inumbers, agno, &si); |
| if (ret) { |
| si.aborted = true; |
| str_liberror(ctx, ret, _("queueing inumbers work")); |
| break; |
| } |
| } |
| |
| ret = -workqueue_terminate(&wq_inumbers); |
| if (ret) { |
| si.aborted = true; |
| str_liberror(ctx, ret, _("finishing inumbers work")); |
| } |
| workqueue_destroy(&wq_inumbers); |
| |
| kill_bulkstat: |
| ret = -workqueue_terminate(&si.wq_bulkstat); |
| if (ret) { |
| si.aborted = true; |
| str_liberror(ctx, ret, _("finishing bulkstat work")); |
| } |
| workqueue_destroy(&si.wq_bulkstat); |
| |
| return si.aborted ? -1 : 0; |
| } |
| |
| struct user_bulkstat { |
| struct scan_inodes *si; |
| |
| /* vla, must be last */ |
| struct xfs_bulkstat_req breq; |
| }; |
| |
| /* Iterate all the user files returned by a bulkstat. */ |
| static void |
| scan_user_files( |
| struct workqueue *wq, |
| xfs_agnumber_t agno, |
| void *arg) |
| { |
| struct xfs_handle handle; |
| struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx; |
| struct user_bulkstat *ureq = arg; |
| struct xfs_bulkstat *bs = &ureq->breq.bulkstat[0]; |
| struct scan_inodes *si = ureq->si; |
| int i; |
| int error = 0; |
| DEFINE_DESCR(dsc_bulkstat, ctx, render_ino_from_bulkstat); |
| |
| handle_from_fshandle(&handle, ctx->fshandle, ctx->fshandle_len); |
| |
| for (i = 0; !si->aborted && i < ureq->breq.hdr.ocount; i++, bs++) { |
| descr_set(&dsc_bulkstat, bs); |
| handle_from_bulkstat(&handle, bs); |
| error = si->fn(ctx, &handle, bs, si->arg); |
| switch (error) { |
| case 0: |
| break; |
| case ESTALE: |
| case ECANCELED: |
| error = 0; |
| fallthrough; |
| default: |
| goto err; |
| } |
| if (scrub_excessive_errors(ctx)) { |
| si->aborted = true; |
| goto out; |
| } |
| } |
| |
| err: |
| if (error) { |
| str_liberror(ctx, error, descr_render(&dsc_bulkstat)); |
| si->aborted = true; |
| } |
| out: |
| free(ureq); |
| } |
| |
| /* |
| * Run one step of the user files bulkstat scan and schedule background |
| * processing of the stat data returned. Returns 1 to keep going, or 0 to |
| * stop. |
| */ |
| static int |
| scan_user_bulkstat( |
| struct scrub_ctx *ctx, |
| struct scan_inodes *si, |
| uint64_t *cursor) |
| { |
| struct user_bulkstat *ureq; |
| const char *what = NULL; |
| int ret; |
| |
| ureq = calloc(1, sizeof(struct user_bulkstat) + |
| XFS_BULKSTAT_REQ_SIZE(LIBFROG_BULKSTAT_CHUNKSIZE)); |
| if (!ureq) { |
| ret = ENOMEM; |
| what = _("creating bulkstat work item"); |
| goto err; |
| } |
| ureq->si = si; |
| ureq->breq.hdr.icount = LIBFROG_BULKSTAT_CHUNKSIZE; |
| ureq->breq.hdr.ino = *cursor; |
| |
| ret = -xfrog_bulkstat(&ctx->mnt, &ureq->breq); |
| if (ret) { |
| what = _("user files bulkstat"); |
| goto err_ureq; |
| } |
| if (ureq->breq.hdr.ocount == 0) { |
| *cursor = NULLFSINO; |
| free(ureq); |
| return 0; |
| } |
| |
| *cursor = ureq->breq.hdr.ino; |
| |
| /* scan_user_files frees ureq; do not access it */ |
| ret = -workqueue_add(&si->wq_bulkstat, scan_user_files, 0, ureq); |
| if (ret) { |
| what = _("queueing bulkstat work"); |
| goto err_ureq; |
| } |
| ureq = NULL; |
| |
| return 1; |
| |
| err_ureq: |
| free(ureq); |
| err: |
| si->aborted = true; |
| str_liberror(ctx, ret, what); |
| return 0; |
| } |
| |
| /* |
| * Scan all the user files in a filesystem in inumber order. On error, this |
| * function will log an error message and return -1. |
| */ |
| int |
| scrub_scan_user_files( |
| struct scrub_ctx *ctx, |
| scrub_inode_iter_fn fn, |
| void *arg) |
| { |
| struct scan_inodes si = { |
| .fn = fn, |
| .arg = arg, |
| .nr_threads = scrub_nproc_workqueue(ctx), |
| }; |
| uint64_t ino = 0; |
| int ret; |
| |
| /* Queue up to four bulkstat result sets per thread. */ |
| ret = -workqueue_create_bound(&si.wq_bulkstat, (struct xfs_mount *)ctx, |
| si.nr_threads, si.nr_threads * 4); |
| if (ret) { |
| str_liberror(ctx, ret, _("creating bulkstat workqueue")); |
| return -1; |
| } |
| |
| while ((ret = scan_user_bulkstat(ctx, &si, &ino)) == 1) { |
| /* empty */ |
| } |
| |
| ret = -workqueue_terminate(&si.wq_bulkstat); |
| if (ret) { |
| si.aborted = true; |
| str_liberror(ctx, ret, _("finishing bulkstat work")); |
| } |
| workqueue_destroy(&si.wq_bulkstat); |
| |
| return si.aborted ? -1 : 0; |
| } |
| |
| /* Open a file by handle, returning either the fd or -1 on error. */ |
| int |
| scrub_open_handle( |
| struct xfs_handle *handle) |
| { |
| return open_by_fshandle(handle, sizeof(*handle), |
| O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); |
| } |