blob: 7bb11510d3328332d2c651d0aa4224213ba69718 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include <stdint.h>
#include <dirent.h>
#include <sys/statvfs.h>
#include <linux/fsmap.h>
#include "handle.h"
#include "libfrog/paths.h"
#include "libfrog/workqueue.h"
#include "xfs_scrub.h"
#include "common.h"
#include "libfrog/bitmap.h"
#include "disk.h"
#include "filemap.h"
#include "fscounters.h"
#include "inodes.h"
#include "read_verify.h"
#include "spacemap.h"
#include "vfs.h"
#include "common.h"
#include "libfrog/bulkstat.h"
#include "descr.h"
#include "progress.h"
#include <sys/mman.h>
/*
* Phase 6: Verify data file integrity.
*
* Identify potential data block extents with GETFSMAP, then feed those
* extents to the read-verify pool to get the verify commands batched,
* issued, and (if there are problems) reported back to us. If there
* are errors, we'll record the bad regions and (if available) use rmap
* to tell us if metadata are now corrupt. Otherwise, we'll scan the
* whole directory tree looking for files that overlap the bad regions
* and report the paths of the now corrupt files.
*
* If the filesystem supports verity, read the contents of each verity file to
* force it to validate the file contents.
*/
/* Verify disk blocks with GETFSMAP */
struct media_verify_state {
struct read_verify_pool *rvp_data;
struct read_verify_pool *rvp_log;
struct read_verify_pool *rvp_realtime;
struct bitmap *d_bad; /* bytes */
struct bitmap *r_bad; /* bytes */
};
/* Find the fd for a given device identifier. */
static struct read_verify_pool *
dev_to_pool(
struct scrub_ctx *ctx,
struct media_verify_state *vs,
dev_t dev)
{
if (dev == ctx->fsinfo.fs_datadev)
return vs->rvp_data;
else if (dev == ctx->fsinfo.fs_logdev)
return vs->rvp_log;
else if (dev == ctx->fsinfo.fs_rtdev)
return vs->rvp_realtime;
abort();
}
/* Find the device major/minor for a given file descriptor. */
static dev_t
disk_to_dev(
struct scrub_ctx *ctx,
struct disk *disk)
{
if (disk == ctx->datadev)
return ctx->fsinfo.fs_datadev;
else if (disk == ctx->logdev)
return ctx->fsinfo.fs_logdev;
else if (disk == ctx->rtdev)
return ctx->fsinfo.fs_rtdev;
abort();
}
/* Find the incore bad blocks bitmap for a given disk. */
static struct bitmap *
bitmap_for_disk(
struct scrub_ctx *ctx,
struct disk *disk,
struct media_verify_state *vs)
{
dev_t dev = disk_to_dev(ctx, disk);
if (dev == ctx->fsinfo.fs_datadev)
return vs->d_bad;
else if (dev == ctx->fsinfo.fs_rtdev)
return vs->r_bad;
return NULL;
}
struct disk_ioerr_report {
struct scrub_ctx *ctx;
struct disk *disk;
};
struct owner_decode {
uint64_t owner;
const char *descr;
};
static const struct owner_decode special_owners[] = {
{XFS_FMR_OWN_FREE, "free space"},
{XFS_FMR_OWN_UNKNOWN, "unknown owner"},
{XFS_FMR_OWN_FS, "static FS metadata"},
{XFS_FMR_OWN_LOG, "journalling log"},
{XFS_FMR_OWN_AG, "per-AG metadata"},
{XFS_FMR_OWN_INOBT, "inode btree blocks"},
{XFS_FMR_OWN_INODES, "inodes"},
{XFS_FMR_OWN_REFC, "refcount btree"},
{XFS_FMR_OWN_COW, "CoW staging"},
{XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
{0, NULL},
};
/* Decode a special owner. */
static const char *
decode_special_owner(
uint64_t owner)
{
const struct owner_decode *od = special_owners;
while (od->descr) {
if (od->owner == owner)
return od->descr;
od++;
}
return NULL;
}
/* Routines to translate bad physical extents into file paths and offsets. */
struct badfile_report {
struct scrub_ctx *ctx;
const char *descr;
struct media_verify_state *vs;
struct file_bmap *bmap;
};
/* Report on bad extents found during a media scan. */
static int
report_badfile(
uint64_t start,
uint64_t length,
void *arg)
{
struct badfile_report *br = arg;
unsigned long long bad_offset;
unsigned long long bad_length;
/* Clamp the bad region to the file mapping. */
if (start < br->bmap->bm_physical) {
length -= br->bmap->bm_physical - start;
start = br->bmap->bm_physical;
}
length = min(length, br->bmap->bm_length);
/* Figure out how far into the bmap is the bad mapping and report it. */
bad_offset = start - br->bmap->bm_physical;
bad_length = min(start + length,
br->bmap->bm_physical + br->bmap->bm_length) - start;
str_unfixable_error(br->ctx, br->descr,
_("media error at data offset %llu length %llu."),
br->bmap->bm_offset + bad_offset, bad_length);
return 0;
}
/* Report if this extent overlaps a bad region. */
static int
report_data_loss(
struct scrub_ctx *ctx,
int fd,
int whichfork,
struct fsxattr *fsx,
struct file_bmap *bmap,
void *arg)
{
struct badfile_report *br = arg;
struct media_verify_state *vs = br->vs;
struct bitmap *bmp;
br->bmap = bmap;
/* Only report errors for real extents. */
if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
return 0;
if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
bmp = vs->r_bad;
else
bmp = vs->d_bad;
return -bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length,
report_badfile, br);
}
/* Report if the extended attribute data overlaps a bad region. */
static int
report_attr_loss(
struct scrub_ctx *ctx,
int fd,
int whichfork,
struct fsxattr *fsx,
struct file_bmap *bmap,
void *arg)
{
struct badfile_report *br = arg;
struct media_verify_state *vs = br->vs;
struct bitmap *bmp = vs->d_bad;
/* Complain about attr fork extents that don't look right. */
if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) {
str_info(ctx, br->descr,
_("found unexpected unwritten/delalloc attr fork extent."));
return 0;
}
if (fsx->fsx_xflags & FS_XFLAG_REALTIME) {
str_info(ctx, br->descr,
_("found unexpected realtime attr fork extent."));
return 0;
}
if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
str_corrupt(ctx, br->descr,
_("media error in extended attribute data."));
return 0;
}
/* Iterate the extent mappings of a file to report errors. */
static int
report_fd_loss(
struct scrub_ctx *ctx,
const char *descr,
int fd,
void *arg)
{
struct badfile_report br = {
.ctx = ctx,
.vs = arg,
.descr = descr,
};
struct file_bmap key = {0};
int ret;
/* data fork */
ret = scrub_iterate_filemaps(ctx, fd, XFS_DATA_FORK, &key,
report_data_loss, &br);
if (ret) {
str_liberror(ctx, ret, descr);
return ret;
}
/* attr fork */
ret = scrub_iterate_filemaps(ctx, fd, XFS_ATTR_FORK, &key,
report_attr_loss, &br);
if (ret) {
str_liberror(ctx, ret, descr);
return ret;
}
return 0;
}
/* Report read verify errors in unlinked (but still open) files. */
static int
report_inode_loss(
struct scrub_ctx *ctx,
struct xfs_handle *handle,
struct xfs_bulkstat *bstat,
void *arg)
{
char descr[DESCR_BUFSZ];
int fd;
int error, err2;
/* Ignore linked files and things we can't open. */
if (bstat->bs_nlink != 0)
return 0;
if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
return 0;
scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ,
bstat->bs_ino, bstat->bs_gen, _("(unlinked)"));
/* Try to open the inode. */
fd = scrub_open_handle(handle);
if (fd < 0) {
/* Handle is stale, try again. */
if (errno == ESTALE)
return ESTALE;
str_error(ctx, descr,
_("Could not open to report read errors: %s."),
strerror(errno));
return 0;
}
/* Go find the badness. */
error = report_fd_loss(ctx, descr, fd, arg);
err2 = close(fd);
if (err2)
str_errno(ctx, descr);
return error;
}
/* Scan a directory for matches in the read verify error list. */
static int
report_dir_loss(
struct scrub_ctx *ctx,
const char *path,
int dir_fd,
void *arg)
{
return report_fd_loss(ctx, path, dir_fd, arg);
}
/*
* Scan the inode associated with a directory entry for matches with
* the read verify error list.
*/
static int
report_dirent_loss(
struct scrub_ctx *ctx,
const char *path,
int dir_fd,
struct dirent *dirent,
struct stat *sb,
void *arg)
{
int fd;
int error, err2;
/* Ignore things we can't open. */
if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
return 0;
/* Ignore . and .. */
if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
return 0;
/*
* If we were given a dirent, open the associated file under
* dir_fd for badblocks scanning. If dirent is NULL, then it's
* the directory itself we want to scan.
*/
fd = openat(dir_fd, dirent->d_name,
O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
if (fd < 0) {
char descr[PATH_MAX + 1];
if (errno == ENOENT)
return 0;
snprintf(descr, PATH_MAX, "%s/%s", path, dirent->d_name);
descr[PATH_MAX] = 0;
str_error(ctx, descr,
_("Could not open to report read errors: %s."),
strerror(errno));
return 0;
}
/* Go find the badness. */
error = report_fd_loss(ctx, path, fd, arg);
err2 = close(fd);
if (err2)
str_errno(ctx, path);
if (!error && err2)
error = err2;
return error;
}
struct ioerr_filerange {
uint64_t physical;
uint64_t length;
};
/*
* If reverse mapping and parent pointers are enabled, we can map media errors
* directly back to a filename and a file position without needing to walk the
* directory tree.
*/
static inline bool
can_use_pptrs(
const struct scrub_ctx *ctx)
{
return (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_PARENT) &&
(ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT);
}
/* Use a fsmap to report metadata lost to a media error. */
static int
report_ioerr_fsmap(
struct scrub_ctx *ctx,
struct fsmap *map,
void *arg)
{
const char *type;
struct xfs_bulkstat bs = { };
char buf[DESCR_BUFSZ];
struct ioerr_filerange *fr = arg;
uint64_t err_off;
int ret;
/* Don't care about unwritten extents. */
if (map->fmr_flags & FMR_OF_PREALLOC)
return 0;
if (fr->physical > map->fmr_physical)
err_off = fr->physical - map->fmr_physical;
else
err_off = 0;
/* Report special owners */
if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
snprintf(buf, DESCR_BUFSZ, _("disk offset %"PRIu64),
(uint64_t)map->fmr_physical + err_off);
type = decode_special_owner(map->fmr_owner);
/*
* On filesystems that don't store reverse mappings, the
* GETFSMAP call returns OWNER_UNKNOWN for allocated space.
* We'll have to let the directory tree walker find the file
* that lost data.
*/
if (!(ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT) &&
map->fmr_owner == XFS_FMR_OWN_UNKNOWN) {
str_info(ctx, buf, _("media error detected."));
} else {
str_corrupt(ctx, buf, _("media error in %s."), type);
}
}
if (can_use_pptrs(ctx)) {
ret = -xfrog_bulkstat_single(&ctx->mnt, map->fmr_owner, 0, &bs);
if (ret)
str_liberror(ctx, ret,
_("bulkstat for media error report"));
}
/* Report extent maps */
if (map->fmr_flags & FMR_OF_EXTENT_MAP) {
bool attr = (map->fmr_flags & FMR_OF_ATTR_FORK);
scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ,
map->fmr_owner, bs.bs_gen, " %s",
attr ? _("extended attribute") :
_("file data"));
str_corrupt(ctx, buf, _("media error in extent map"));
}
/*
* If directory parent pointers are available, use that to find the
* pathname to a file, and report that path as having lost its
* extended attributes, or the precise offset of the lost file data.
*/
if (!can_use_pptrs(ctx))
return 0;
scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, map->fmr_owner,
bs.bs_gen, NULL);
if (map->fmr_flags & FMR_OF_ATTR_FORK) {
str_corrupt(ctx, buf, _("media error in extended attributes"));
return 0;
}
str_unfixable_error(ctx, buf,
_("media error at data offset %llu length %llu."),
err_off, fr->length);
return 0;
}
/*
* For a range of bad blocks, visit each space mapping that overlaps the bad
* range so that we can report lost metadata.
*/
static int
report_ioerr(
uint64_t start,
uint64_t length,
void *arg)
{
struct fsmap keys[2];
struct ioerr_filerange fr = {
.physical = start,
.length = length,
};
struct disk_ioerr_report *dioerr = arg;
dev_t dev;
dev = disk_to_dev(dioerr->ctx, dioerr->disk);
/* Go figure out which blocks are bad from the fsmap. */
memset(keys, 0, sizeof(struct fsmap) * 2);
keys->fmr_device = dev;
keys->fmr_physical = start;
(keys + 1)->fmr_device = dev;
(keys + 1)->fmr_physical = start + length - 1;
(keys + 1)->fmr_owner = ULLONG_MAX;
(keys + 1)->fmr_offset = ULLONG_MAX;
(keys + 1)->fmr_flags = UINT_MAX;
return -scrub_iterate_fsmap(dioerr->ctx, keys, report_ioerr_fsmap,
&fr);
}
/* Report all the media errors found on a disk. */
static int
report_disk_ioerrs(
struct scrub_ctx *ctx,
struct disk *disk,
struct media_verify_state *vs)
{
struct disk_ioerr_report dioerr = {
.ctx = ctx,
.disk = disk,
};
struct bitmap *tree;
if (!disk)
return 0;
tree = bitmap_for_disk(ctx, disk, vs);
if (!tree)
return 0;
return -bitmap_iterate(tree, report_ioerr, &dioerr);
}
/* Given bad extent lists for the data & rtdev, find bad files. */
static int
report_all_media_errors(
struct scrub_ctx *ctx,
struct media_verify_state *vs)
{
int ret;
ret = report_disk_ioerrs(ctx, ctx->datadev, vs);
if (ret) {
str_liberror(ctx, ret, _("walking datadev io errors"));
return ret;
}
ret = report_disk_ioerrs(ctx, ctx->rtdev, vs);
if (ret) {
str_liberror(ctx, ret, _("walking rtdev io errors"));
return ret;
}
/*
* Scan the directory tree to get file paths if we didn't already use
* directory parent pointers to report the loss.
*/
if (!can_use_pptrs(ctx)) {
ret = scan_fs_tree(ctx, report_dir_loss, report_dirent_loss,
vs);
if (ret)
return ret;
}
/* Scan for unlinked files. */
return scrub_scan_all_inodes(ctx, report_inode_loss, 0, vs);
}
/* Schedule a read-verify of a (data block) extent. */
static int
check_rmap(
struct scrub_ctx *ctx,
struct fsmap *map,
void *arg)
{
struct media_verify_state *vs = arg;
struct read_verify_pool *rvp;
int ret;
rvp = dev_to_pool(ctx, vs, map->fmr_device);
dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
" offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
major(map->fmr_device), minor(map->fmr_device),
(uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
(uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
map->fmr_flags);
/* "Unknown" extents should be verified; they could be data. */
if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
/*
* We only care about read-verifying data extents that have been
* written to disk. This means we can skip "special" owners
* (metadata), xattr blocks, unwritten extents, and extent maps.
* These should all get checked elsewhere in the scrubber.
*/
if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
return 0;
/* XXX: Filter out directory data blocks. */
/* Schedule the read verify command for (eventual) running. */
ret = read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length,
vs);
if (ret) {
str_liberror(ctx, ret, _("scheduling media verify command"));
return ret;
}
return 0;
}
/* Wait for read/verify actions to finish, then return # bytes checked. */
static int
clean_pool(
struct read_verify_pool *rvp,
unsigned long long *bytes_checked)
{
uint64_t pool_checked;
int ret;
if (!rvp)
return 0;
ret = read_verify_force_io(rvp);
if (ret)
return ret;
ret = read_verify_pool_flush(rvp);
if (ret)
goto out_destroy;
ret = read_verify_bytes(rvp, &pool_checked);
if (ret)
goto out_destroy;
*bytes_checked += pool_checked;
out_destroy:
read_verify_pool_destroy(rvp);
return ret;
}
/* Remember a media error for later. */
static void
remember_ioerr(
struct scrub_ctx *ctx,
struct disk *disk,
uint64_t start,
uint64_t length,
int error,
void *arg)
{
struct media_verify_state *vs = arg;
struct bitmap *tree;
int ret;
tree = bitmap_for_disk(ctx, disk, vs);
if (!tree) {
str_liberror(ctx, ENOENT, _("finding bad block bitmap"));
return;
}
ret = -bitmap_set(tree, start, length);
if (ret)
str_liberror(ctx, ret, _("setting bad block bitmap"));
}
struct verity_ctx {
struct scrub_ctx *ctx;
struct workqueue wq_ddev;
struct workqueue wq_rtdev;
bool aborted;
};
struct verity_file_ctx {
struct xfs_handle handle;
struct verity_ctx *vc;
};
static int
render_ino_from_handle(
struct scrub_ctx *ctx,
char *buf,
size_t buflen,
void *data)
{
struct xfs_handle *han = data;
return scrub_render_ino_descr(ctx, buf, buflen, han->ha_fid.fid_ino,
han->ha_fid.fid_gen, NULL);
}
static inline void
report_verity_error(
struct scrub_ctx *ctx,
struct descr *dsc,
off_t fail_pos,
off_t fail_len)
{
if (fail_pos < 0)
return;
str_unfixable_error(ctx, descr_render(dsc),
_("verity error at offsets %llu-%llu"),
(unsigned long long)fail_pos,
(unsigned long long)(fail_pos + fail_len - 1));
}
/* Record a verity validation error and maybe log an old error. */
static inline void
record_verity_error(
struct scrub_ctx *ctx,
struct descr *dsc,
off_t pos,
size_t len,
off_t *fail_pos,
off_t *fail_len)
{
if (*fail_pos < 0)
goto record;
if (pos == *fail_pos + *fail_len) {
*fail_len += len;
return;
}
report_verity_error(ctx, dsc, *fail_pos, *fail_len);
record:
*fail_pos = pos;
*fail_len = len;
}
/* Record a verity validation success and maybe log an old error. */
static inline void
record_verity_success(
struct scrub_ctx *ctx,
struct descr *dsc,
off_t *fail_pos,
off_t *fail_len)
{
if (*fail_pos >= 0)
report_verity_error(ctx, dsc, *fail_pos, *fail_len);
*fail_pos = -1;
*fail_len = 0;
}
/* Map at most this many bytes at a time. */
#define MMAP_LENGTH (4194304)
/*
* Use MADV_POPULATE_READ to validate verity file contents. Returns @length if
* the entire region validated ok; 0 to signal to the caller that they should
* fall back to regular reads; or a negative errno if some other error
* happened.
*/
static ssize_t
validate_mmap(
int fd,
off_t pos,
size_t length)
{
void *addr;
int ret;
/*
* Try to map this file into the address space. If that fails, we can
* fall back to reading the file contents with read(), so collapse all
* error codes to EFAULT.
*/
addr = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, pos);
if (addr == MAP_FAILED)
return 0;
/* Returns EFAULT for read IO errors. */
ret = madvise(addr, length, MADV_POPULATE_READ);
if (ret) {
munmap(addr, length);
if (errno == EFAULT)
return 0;
return -errno;
}
ret = munmap(addr, length);
if (ret)
return -errno;
return length;
}
/*
* Use pread to validate verity file contents. Returns the number of bytes
* validated; 0 to signal to the caller that EOF was encountered; or a negative
* errno if some other error happened.
*/
static ssize_t
validate_pread(
struct scrub_ctx *ctx,
struct descr *dsc,
int fd,
const struct stat *statbuf,
off_t pos,
size_t length,
off_t *fail_pos,
off_t *fail_len)
{
ssize_t validated;
for (validated = 0;
validated < length;
validated += statbuf->st_blksize, pos += statbuf->st_blksize) {
char c;
ssize_t bytes_read;
bytes_read = pread(fd, &c, 1, pos);
if (!bytes_read)
break;
if (bytes_read > 0) {
record_verity_success(ctx, dsc, fail_pos, fail_len);
continue;
}
if (errno == EIO) {
size_t length = min(statbuf->st_size - pos,
statbuf->st_blksize);
record_verity_error(ctx, dsc, pos, length, fail_pos,
fail_len);
continue;
}
str_errno(ctx, descr_render(dsc));
return -errno;
}
return validated;
}
/* Scan a verity file's data looking for validation errors. */
static void
scan_verity_file(
struct workqueue *wq,
uint32_t index,
void *arg)
{
struct stat sb;
struct verity_file_ctx *vf = arg;
struct scrub_ctx *ctx = vf->vc->ctx;
off_t pos;
off_t max_map_pos;
off_t fail_pos = -1, fail_len = 0;
int fd;
int ret;
DEFINE_DESCR(dsc, ctx, render_ino_from_handle);
static long pagesize;
if (!pagesize)
pagesize = sysconf(_SC_PAGESIZE);
descr_set(&dsc, &vf->handle);
if (vf->vc->aborted) {
ret = ECANCELED;
goto out_vf;
}
fd = scrub_open_handle(&vf->handle);
if (fd < 0) {
/*
* Stale file handle means that the verity file is gone.
*
* Even if there's a replacement file, its contents have been
* freshly written and checked. Either way, we can skip
* scanning this file.
*/
if (errno == ESTALE) {
ret = 0;
goto out_vf;
}
/*
* If the fsverity metadata is missing, inform the user and
* move on to the next file.
*/
if (fsverity_meta_is_missing(errno)) {
str_error(ctx, descr_render(&dsc),
_("fsverity metadata missing."));
ret = 0;
goto out_vf;
}
ret = -errno;
str_errno(ctx, descr_render(&dsc));
goto out_vf;
}
ret = fstat(fd, &sb);
if (ret) {
str_errno(ctx, descr_render(&dsc));
goto out_fd;
}
/* Validate the file contents with MADV_POPULATE_READ and pread */
max_map_pos = roundup(sb.st_size, pagesize);
for (pos = 0; pos < max_map_pos; pos += MMAP_LENGTH) {
size_t length = min(max_map_pos - pos, MMAP_LENGTH);
ssize_t validated;
validated = validate_mmap(fd, pos, length);
if (validated > 0) {
record_verity_success(ctx, &dsc, &fail_pos, &fail_len);
progress_add(validated);
continue;
}
if (validated < 0) {
errno = -validated;
str_errno(ctx, descr_render(&dsc));
goto out_fd;
}
validated = validate_pread(ctx, &dsc, fd, &sb, pos, length,
&fail_pos, &fail_len);
if (validated <= 0)
break;
progress_add(validated);
}
report_verity_error(ctx, &dsc, fail_pos, fail_len);
ret = close(fd);
if (ret) {
str_errno(ctx, descr_render(&dsc));
goto out_vf;
}
fd = -1;
out_fd:
if (fd >= 0)
close(fd);
out_vf:
if (ret)
vf->vc->aborted = true;
free(vf);
return;
}
/* If this is a verity file, queue it for scanning. */
static int
schedule_verity_file(
struct scrub_ctx *ctx,
struct xfs_handle *handle,
struct xfs_bulkstat *bs,
void *arg)
{
struct verity_ctx *vc = arg;
struct verity_file_ctx *vf;
int ret;
if (vc->aborted)
return ECANCELED;
if (!(bs->bs_xflags & FS_XFLAG_VERITY)) {
progress_add(bs->bs_size);
return 0;
}
vf = malloc(sizeof(struct verity_file_ctx));
if (!vf) {
str_errno(ctx, _("could not allocate fsverity scan context"));
vc->aborted = true;
return ENOMEM;
}
/* Queue the validation work. */
vf->handle = *handle; /* struct copy */
vf->vc = vc;
if (bs->bs_xflags & FS_XFLAG_REALTIME)
ret = -workqueue_add(&vc->wq_rtdev, scan_verity_file, 0, vf);
else
ret = -workqueue_add(&vc->wq_ddev, scan_verity_file, 0, vf);
if (ret) {
str_liberror(ctx, ret, _("could not schedule fsverity scan"));
vc->aborted = true;
return ECANCELED;
}
return 0;
}
static int
scan_verity_files(
struct scrub_ctx *ctx)
{
struct verity_ctx vc = {
.ctx = ctx,
};
unsigned int verifier_threads;
int ret;
/* Create thread pool for data dev fsverity processing. */
verifier_threads = disk_heads(ctx->datadev);
if (verifier_threads == 1)
verifier_threads = 0;
ret = -workqueue_create_bound(&vc.wq_ddev, ctx, verifier_threads, 500);
if (ret) {
str_liberror(ctx, ret, _("creating data dev fsverity workqueue"));
return ret;
}
/* Create thread pool for rtdev fsverity processing. */
if (ctx->rtdev) {
verifier_threads = disk_heads(ctx->rtdev);
if (verifier_threads == 1)
verifier_threads = 0;
ret = -workqueue_create_bound(&vc.wq_rtdev, ctx,
verifier_threads, 500);
if (ret) {
str_liberror(ctx, ret,
_("creating rt dev fsverity workqueue"));
goto out_ddev;
}
}
/* Find all the verity inodes. */
ret = scrub_scan_all_inodes(ctx, schedule_verity_file, 0, &vc);
if (ret)
goto out_rtdev;
if (vc.aborted) {
ret = ECANCELED;
goto out_rtdev;
}
out_rtdev:
workqueue_terminate(&vc.wq_rtdev);
workqueue_destroy(&vc.wq_rtdev);
out_ddev:
workqueue_terminate(&vc.wq_ddev);
workqueue_destroy(&vc.wq_ddev);
return ret;
}
/*
* Read verify all the file data blocks in a filesystem. Since XFS doesn't
* do data checksums, we trust that the underlying storage will pass back
* an IO error if it can't retrieve whatever we previously stored there.
* If we hit an IO error, we'll record the bad blocks in a bitmap and then
* scan the extent maps of the entire fs tree to figure (and the unlinked
* inodes) out which files are now broken.
*/
int
phase6_func(
struct scrub_ctx *ctx)
{
struct media_verify_state vs = { NULL };
int ret, ret2, ret3;
if (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_VERITY) {
ret = scan_verity_files(ctx);
if (ret)
return ret;
}
ret = -bitmap_alloc(&vs.d_bad);
if (ret) {
str_liberror(ctx, ret, _("creating datadev badblock bitmap"));
return ret;
}
ret = -bitmap_alloc(&vs.r_bad);
if (ret) {
str_liberror(ctx, ret, _("creating realtime badblock bitmap"));
goto out_dbad;
}
ret = read_verify_pool_alloc(ctx, ctx->datadev,
ctx->mnt.fsgeom.blocksize, remember_ioerr,
scrub_nproc(ctx), &vs.rvp_data);
if (ret) {
str_liberror(ctx, ret, _("creating datadev media verifier"));
goto out_rbad;
}
if (ctx->logdev) {
ret = read_verify_pool_alloc(ctx, ctx->logdev,
ctx->mnt.fsgeom.blocksize, remember_ioerr,
scrub_nproc(ctx), &vs.rvp_log);
if (ret) {
str_liberror(ctx, ret,
_("creating logdev media verifier"));
goto out_datapool;
}
}
if (ctx->rtdev) {
ret = read_verify_pool_alloc(ctx, ctx->rtdev,
ctx->mnt.fsgeom.blocksize, remember_ioerr,
scrub_nproc(ctx), &vs.rvp_realtime);
if (ret) {
str_liberror(ctx, ret,
_("creating rtdev media verifier"));
goto out_logpool;
}
}
ret = scrub_scan_all_spacemaps(ctx, check_rmap, &vs);
if (ret)
goto out_rtpool;
ret = clean_pool(vs.rvp_data, &ctx->bytes_checked);
if (ret)
str_liberror(ctx, ret, _("flushing datadev verify pool"));
ret2 = clean_pool(vs.rvp_log, &ctx->bytes_checked);
if (ret2)
str_liberror(ctx, ret2, _("flushing logdev verify pool"));
ret3 = clean_pool(vs.rvp_realtime, &ctx->bytes_checked);
if (ret3)
str_liberror(ctx, ret3, _("flushing rtdev verify pool"));
/*
* If the verify flush didn't work or we found no bad blocks, we're
* done! No errors detected.
*/
if (ret || ret2 || ret3)
goto out_rbad;
if (bitmap_empty(vs.d_bad) && bitmap_empty(vs.r_bad))
goto out_rbad;
/* Scan the whole dir tree to see what matches the bad extents. */
ret = report_all_media_errors(ctx, &vs);
bitmap_free(&vs.r_bad);
bitmap_free(&vs.d_bad);
return ret;
out_rtpool:
if (vs.rvp_realtime) {
read_verify_pool_abort(vs.rvp_realtime);
read_verify_pool_destroy(vs.rvp_realtime);
}
out_logpool:
if (vs.rvp_log) {
read_verify_pool_abort(vs.rvp_log);
read_verify_pool_destroy(vs.rvp_log);
}
out_datapool:
read_verify_pool_abort(vs.rvp_data);
read_verify_pool_destroy(vs.rvp_data);
out_rbad:
bitmap_free(&vs.r_bad);
out_dbad:
bitmap_free(&vs.d_bad);
return ret;
}
/* Estimate how much work we're going to do. */
int
phase6_estimate(
struct scrub_ctx *ctx,
uint64_t *items,
unsigned int *nr_threads,
int *rshift)
{
unsigned long long d_blocks;
unsigned long long d_bfree;
unsigned long long r_blocks;
unsigned long long r_bfree;
unsigned long long dontcare;
int ret;
ret = scrub_scan_estimate_blocks(ctx, &d_blocks, &d_bfree, &r_blocks,
&r_bfree, &dontcare);
if (ret) {
str_liberror(ctx, ret, _("estimating verify work"));
return ret;
}
*items = cvt_off_fsb_to_b(&ctx->mnt,
(d_blocks - d_bfree) + (r_blocks - r_bfree));
/*
* Each read-verify pool starts a thread pool, and each worker thread
* can contribute to the progress counter. Hence we need to set
* nr_threads appropriately to handle that many threads.
*/
*nr_threads = disk_heads(ctx->datadev);
if (ctx->rtdev)
*nr_threads += disk_heads(ctx->rtdev);
if (ctx->logdev)
*nr_threads += disk_heads(ctx->logdev);
*rshift = 20;
/*
* If fsverity is active, double the amount of progress items because
* we will want to validate individual files' data with fsverity.
* Bump the thread counts for the separate verity thread pools and the
* inode scanner.
*/
if (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_VERITY) {
*items *= 2;
*nr_threads += disk_heads(ctx->datadev);
*nr_threads += scrub_nproc_workqueue(ctx);
if (ctx->rtdev)
*nr_threads += disk_heads(ctx->rtdev);
}
return 0;
}