blob: f6c295c64adae1e3b25e5d30018228a28e541d03 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include <stdint.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/statvfs.h>
#ifdef HAVE_LIBATTR
# include <attr/attributes.h>
#endif
#include <linux/fs.h>
#include "handle.h"
#include "list.h"
#include "libfrog/paths.h"
#include "libfrog/workqueue.h"
#include "libfrog/fsgeom.h"
#include "libfrog/scrub.h"
#include "libfrog/bitmap.h"
#include "libfrog/bulkstat.h"
#include "xfs_scrub.h"
#include "common.h"
#include "inodes.h"
#include "progress.h"
#include "scrub.h"
#include "descr.h"
#include "unicrash.h"
#include "repair.h"
/* Phase 5: Full inode scans and check directory connectivity. */
struct ncheck_state {
struct scrub_ctx *ctx;
/* Have we aborted this scan? */
bool aborted;
/* Is this the last time we're going to process deferred inodes? */
bool last_call;
/* Did we fix at least one thing while walking @cur->deferred? */
bool fixed_something;
/* Lock for this structure */
pthread_mutex_t lock;
/*
* Inodes that are involved with directory tree structure corruptions
* are marked here. This will be NULL until the first corruption is
* noted.
*/
struct bitmap *new_deferred;
/*
* Inodes that we're reprocessing due to earlier directory tree
* structure corruption problems are marked here. This will be NULL
* during the first (parallel) inode scan.
*/
struct bitmap *cur_deferred;
};
/*
* Warn about problematic bytes in a directory/attribute name. That means
* terminal control characters and escape sequences, since that could be used
* to do something naughty to the user's computer and/or break scripts. XFS
* doesn't consider any byte sequence invalid, so don't flag these as errors.
*
* Returns 0 for success or -1 for error. This function logs errors.
*/
static int
simple_check_name(
struct scrub_ctx *ctx,
struct descr *dsc,
const char *namedescr,
const char *name)
{
const char *p;
bool bad = false;
char *errname;
/* Complain about zero length names. */
if (*name == '\0' && should_warn_about_name(ctx)) {
str_warn(ctx, descr_render(dsc), _("Zero length name found."));
return 0;
}
/* control characters */
for (p = name; *p; p++) {
if ((*p >= 1 && *p <= 31) || *p == 127) {
bad = true;
break;
}
}
if (bad && should_warn_about_name(ctx)) {
errname = string_escape(name);
if (!errname) {
str_errno(ctx, descr_render(dsc));
return -1;
}
str_info(ctx, descr_render(dsc),
_("Control character found in %s name \"%s\"."),
namedescr, errname);
free(errname);
}
return 0;
}
/*
* Iterate a directory looking for filenames with problematic
* characters.
*/
static int
check_dirent_names(
struct scrub_ctx *ctx,
struct descr *dsc,
int *fd,
struct xfs_bulkstat *bstat)
{
struct unicrash *uc = NULL;
DIR *dir;
struct dirent *dentry;
int ret;
dir = fdopendir(*fd);
if (!dir) {
str_errno(ctx, descr_render(dsc));
return errno;
}
*fd = -1; /* closedir will close *fd for us */
ret = unicrash_dir_init(&uc, ctx, bstat);
if (ret) {
str_liberror(ctx, ret, descr_render(dsc));
goto out_unicrash;
}
errno = 0;
dentry = readdir(dir);
while (dentry) {
if (uc)
ret = unicrash_check_dir_name(uc, dsc, dentry);
else
ret = simple_check_name(ctx, dsc, _("directory"),
dentry->d_name);
if (ret) {
str_liberror(ctx, ret, descr_render(dsc));
break;
}
errno = 0;
dentry = readdir(dir);
}
if (errno) {
ret = errno;
str_liberror(ctx, ret, descr_render(dsc));
}
unicrash_free(uc);
out_unicrash:
closedir(dir);
return ret;
}
#ifdef HAVE_LIBATTR
/* Routines to scan all of an inode's xattrs for name problems. */
struct attrns_decode {
int flags;
const char *name;
};
static const struct attrns_decode attr_ns[] = {
{0, "user"},
{ATTR_ROOT, "system"},
{ATTR_SECURE, "secure"},
{0, NULL},
};
/*
* Check all the xattr names in a particular namespace of a file handle
* for Unicode normalization problems or collisions.
*/
static int
check_xattr_ns_names(
struct scrub_ctx *ctx,
struct descr *dsc,
struct xfs_handle *handle,
struct xfs_bulkstat *bstat,
const struct attrns_decode *attr_ns)
{
struct attrlist_cursor cur;
char attrbuf[XFS_XATTR_LIST_MAX];
char keybuf[XATTR_NAME_MAX + 1];
struct attrlist *attrlist = (struct attrlist *)attrbuf;
struct attrlist_ent *ent;
struct unicrash *uc = NULL;
int i;
int error;
error = unicrash_xattr_init(&uc, ctx, bstat);
if (error) {
str_liberror(ctx, error, descr_render(dsc));
return error;
}
memset(attrbuf, 0, XFS_XATTR_LIST_MAX);
memset(&cur, 0, sizeof(cur));
memset(keybuf, 0, XATTR_NAME_MAX + 1);
error = attr_list_by_handle(handle, sizeof(*handle), attrbuf,
XFS_XATTR_LIST_MAX, attr_ns->flags, &cur);
while (!error) {
/* Examine the xattrs. */
for (i = 0; i < attrlist->al_count; i++) {
ent = ATTR_ENTRY(attrlist, i);
snprintf(keybuf, XATTR_NAME_MAX, "%s.%s", attr_ns->name,
ent->a_name);
if (uc)
error = unicrash_check_xattr_name(uc, dsc,
keybuf);
else
error = simple_check_name(ctx, dsc,
_("extended attribute"),
keybuf);
if (error) {
str_liberror(ctx, error, descr_render(dsc));
goto out;
}
}
if (!attrlist->al_more)
break;
error = attr_list_by_handle(handle, sizeof(*handle), attrbuf,
XFS_XATTR_LIST_MAX, attr_ns->flags, &cur);
}
if (error) {
if (errno == ESTALE)
errno = 0;
error = errno;
if (errno)
str_errno(ctx, descr_render(dsc));
}
out:
unicrash_free(uc);
return error;
}
/*
* Check all the xattr names in all the xattr namespaces for problematic
* characters.
*/
static int
check_xattr_names(
struct scrub_ctx *ctx,
struct descr *dsc,
struct xfs_handle *handle,
struct xfs_bulkstat *bstat)
{
const struct attrns_decode *ns;
int ret;
for (ns = attr_ns; ns->name; ns++) {
ret = check_xattr_ns_names(ctx, dsc, handle, bstat, ns);
if (ret)
break;
}
return ret;
}
#else
# define check_xattr_names(c, d, h, b) (0)
#endif /* HAVE_LIBATTR */
static int
render_ino_from_handle(
struct scrub_ctx *ctx,
char *buf,
size_t buflen,
void *data)
{
struct xfs_bulkstat *bstat = data;
return scrub_render_ino_descr(ctx, buf, buflen, bstat->bs_ino,
bstat->bs_gen, NULL);
}
/* Defer this inode until later. */
static inline int
defer_inode(
struct ncheck_state *ncs,
uint64_t ino)
{
int error;
pthread_mutex_lock(&ncs->lock);
if (!ncs->new_deferred) {
error = -bitmap_alloc(&ncs->new_deferred);
if (error)
goto unlock;
}
error = -bitmap_set(ncs->new_deferred, ino, 1);
unlock:
pthread_mutex_unlock(&ncs->lock);
return error;
}
/*
* Check the directory structure for problems that could cause open_by_handle
* not to work. Returns 0 for no problems; EADDRNOTAVAIL if the there are
* problems that would prevent name checking.
*/
static int
check_dir_connection(
struct scrub_ctx *ctx,
struct ncheck_state *ncs,
const struct xfs_bulkstat *bstat)
{
struct scrub_item sri = { };
int error;
/* The dirtree scrubber only works when parent pointers are enabled */
if (!(ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_PARENT))
return 0;
scrub_item_init_file(&sri, bstat);
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_DIRTREE);
error = scrub_item_check_file(ctx, &sri, -1);
if (error) {
str_liberror(ctx, error, _("checking directory loops"));
return error;
}
if (ncs->last_call)
error = repair_file_corruption_now(ctx, &sri, -1);
else
error = repair_file_corruption(ctx, &sri, -1);
if (error) {
str_liberror(ctx, error, _("repairing directory loops"));
return error;
}
/* No directory tree problems? Clear this inode if it was deferred. */
if (repair_item_count_needsrepair(&sri) == 0) {
if (ncs->cur_deferred)
ncs->fixed_something = true;
return 0;
}
/* Don't defer anything during last call. */
if (ncs->last_call)
return 0;
/* Directory tree structure problems exist; do not check names yet. */
error = defer_inode(ncs, bstat->bs_ino);
if (error)
return error;
return EADDRNOTAVAIL;
}
/*
* Verify the connectivity of the directory tree.
* We know that the kernel's open-by-handle function will try to reconnect
* parents of an opened directory, so we'll accept that as sufficient.
*
* Check for potential Unicode collisions in names.
*/
static int
check_inode_names(
struct scrub_ctx *ctx,
struct xfs_handle *handle,
struct xfs_bulkstat *bstat,
void *arg)
{
DEFINE_DESCR(dsc, ctx, render_ino_from_handle);
struct ncheck_state *ncs = arg;
int fd = -1;
int error = 0;
int err2;
descr_set(&dsc, bstat);
background_sleep();
/*
* Try to fix directory loops before we have problems opening files by
* handle.
*/
if (S_ISDIR(bstat->bs_mode)) {
error = check_dir_connection(ctx, ncs, bstat);
if (error == EADDRNOTAVAIL) {
error = 0;
goto out;
}
if (error)
goto err;
}
/* Warn about naming problems in xattrs. */
if (bstat->bs_xflags & FS_XFLAG_HASATTR) {
error = check_xattr_names(ctx, &dsc, handle, bstat);
if (error)
goto err;
}
/*
* Warn about naming problems in the directory entries. Opening the
* dir by handle means the kernel will try to reconnect it to the root.
* If the reconnection fails due to corruption in the parents we get
* ESTALE, which is why we skip phase 5 if we found corruption.
*/
if (S_ISDIR(bstat->bs_mode)) {
fd = scrub_open_handle(handle);
if (fd < 0) {
error = errno;
if (error == ESTALE)
return ESTALE;
str_errno(ctx, descr_render(&dsc));
goto err;
}
error = check_dirent_names(ctx, &dsc, &fd, bstat);
if (error)
goto err_fd;
}
progress_add(1);
err_fd:
if (fd >= 0) {
err2 = close(fd);
if (err2)
str_errno(ctx, descr_render(&dsc));
if (!error && err2)
error = err2;
}
err:
if (error)
ncs->aborted = true;
out:
if (!error && ncs->aborted)
error = ECANCELED;
return error;
}
/* Try to check_inode_names on a specific inode. */
static int
retry_deferred_inode(
struct ncheck_state *ncs,
struct xfs_handle *handle,
uint64_t ino)
{
struct xfs_bulkstat bstat;
struct scrub_ctx *ctx = ncs->ctx;
unsigned int flags = 0;
int error;
error = -xfrog_bulkstat_single(&ctx->mnt, ino, flags, &bstat);
if (error == ENOENT) {
/* Directory is gone, mark it clear. */
ncs->fixed_something = true;
return 0;
}
if (error)
return error;
handle->ha_fid.fid_ino = bstat.bs_ino;
handle->ha_fid.fid_gen = bstat.bs_gen;
return check_inode_names(ncs->ctx, handle, &bstat, ncs);
}
/* Try to check_inode_names on a range of inodes from the bitmap. */
static int
retry_deferred_inode_range(
uint64_t ino,
uint64_t len,
void *arg)
{
struct xfs_handle handle = { };
struct ncheck_state *ncs = arg;
struct scrub_ctx *ctx = ncs->ctx;
uint64_t i;
int error;
memcpy(&handle.ha_fsid, ctx->fshandle, sizeof(handle.ha_fsid));
handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
sizeof(handle.ha_fid.fid_len);
handle.ha_fid.fid_pad = 0;
for (i = 0; i < len; i++) {
error = retry_deferred_inode(ncs, &handle, ino + i);
if (error)
return error;
}
return 0;
}
/*
* Try to check_inode_names on inodes that were deferred due to directory tree
* problems until we stop making progress.
*/
static int
retry_deferred_inodes(
struct scrub_ctx *ctx,
struct ncheck_state *ncs)
{
int error;
if (!ncs->new_deferred)
return 0;
/*
* Try to repair things until we stop making forward progress or we
* don't observe any new corruptions. During the loop, we do not
* complain about the corruptions that do not get fixed.
*/
do {
ncs->cur_deferred = ncs->new_deferred;
ncs->new_deferred = NULL;
ncs->fixed_something = false;
error = -bitmap_iterate(ncs->cur_deferred,
retry_deferred_inode_range, ncs);
if (error)
return error;
bitmap_free(&ncs->cur_deferred);
} while (ncs->fixed_something && ncs->new_deferred);
/*
* Try one last time to fix things, and complain about any problems
* that remain.
*/
if (!ncs->new_deferred)
return 0;
ncs->cur_deferred = ncs->new_deferred;
ncs->new_deferred = NULL;
ncs->last_call = true;
error = -bitmap_iterate(ncs->cur_deferred,
retry_deferred_inode_range, ncs);
if (error)
return error;
bitmap_free(&ncs->cur_deferred);
return 0;
}
#ifndef FS_IOC_GETFSLABEL
# define FSLABEL_MAX 256
# define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
#endif /* FS_IOC_GETFSLABEL */
static int
scrub_render_mountpoint(
struct scrub_ctx *ctx,
char *buf,
size_t buflen,
void *data)
{
return snprintf(buf, buflen, _("%s"), ctx->mntpoint);
}
/*
* Check the filesystem label for Unicode normalization problems or misleading
* sequences.
*/
static int
check_fs_label(
struct scrub_ctx *ctx)
{
DEFINE_DESCR(dsc, ctx, scrub_render_mountpoint);
char label[FSLABEL_MAX];
struct unicrash *uc = NULL;
int error;
error = unicrash_fs_label_init(&uc, ctx);
if (error) {
str_liberror(ctx, error, descr_render(&dsc));
return error;
}
descr_set(&dsc, NULL);
/* Retrieve label; quietly bail if we don't support that. */
error = ioctl(ctx->mnt.fd, FS_IOC_GETFSLABEL, &label);
if (error) {
if (errno != EOPNOTSUPP && errno != ENOTTY) {
error = errno;
perror(ctx->mntpoint);
}
goto out;
}
/* Ignore empty labels. */
if (label[0] == 0)
goto out;
/* Otherwise check for weirdness. */
if (uc)
error = unicrash_check_fs_label(uc, &dsc, label);
else
error = simple_check_name(ctx, &dsc, _("filesystem label"),
label);
if (error)
str_liberror(ctx, error, descr_render(&dsc));
out:
unicrash_free(uc);
return error;
}
struct fs_scan_item {
struct scrub_item sri;
bool *abortedp;
};
/* Run one full-fs scan scrubber in this thread. */
static void
fs_scan_worker(
struct workqueue *wq,
xfs_agnumber_t nr,
void *arg)
{
struct timespec tv;
struct fs_scan_item *item = arg;
struct scrub_ctx *ctx = wq->wq_ctx;
int ret;
/*
* Delay each successive fs scan by a second so that the threads are
* less likely to contend on the inobt and inode buffers.
*/
if (nr) {
tv.tv_sec = nr;
tv.tv_nsec = 0;
nanosleep(&tv, NULL);
}
ret = scrub_item_check(ctx, &item->sri);
if (ret) {
str_liberror(ctx, ret, _("checking fs scan metadata"));
*item->abortedp = true;
goto out;
}
ret = repair_item_completely(ctx, &item->sri);
if (ret) {
str_liberror(ctx, ret, _("repairing fs scan metadata"));
*item->abortedp = true;
goto out;
}
out:
free(item);
return;
}
/* Queue one full-fs scan scrubber. */
static int
queue_fs_scan(
struct workqueue *wq,
bool *abortedp,
xfs_agnumber_t nr,
unsigned int scrub_type)
{
struct fs_scan_item *item;
struct scrub_ctx *ctx = wq->wq_ctx;
int ret;
item = malloc(sizeof(struct fs_scan_item));
if (!item) {
ret = ENOMEM;
str_liberror(ctx, ret, _("setting up fs scan"));
return ret;
}
scrub_item_init_fs(&item->sri);
scrub_item_schedule(&item->sri, scrub_type);
item->abortedp = abortedp;
ret = -workqueue_add(wq, fs_scan_worker, nr, item);
if (ret)
str_liberror(ctx, ret, _("queuing fs scan work"));
return ret;
}
/* Run multiple full-fs scan scrubbers at the same time. */
static int
run_kernel_fs_scan_scrubbers(
struct scrub_ctx *ctx)
{
struct workqueue wq_fs_scan;
unsigned int nr_threads = scrub_nproc_workqueue(ctx);
xfs_agnumber_t nr = 0;
bool aborted = false;
int ret, ret2;
ret = -workqueue_create(&wq_fs_scan, (struct xfs_mount *)ctx,
nr_threads);
if (ret) {
str_liberror(ctx, ret, _("setting up fs scan workqueue"));
return ret;
}
/*
* The nlinks scanner is much faster than quotacheck because it only
* walks directories, so we start it first.
*/
ret = queue_fs_scan(&wq_fs_scan, &aborted, nr, XFS_SCRUB_TYPE_NLINKS);
if (ret)
goto wait;
if (nr_threads > 1)
nr++;
ret = queue_fs_scan(&wq_fs_scan, &aborted, nr,
XFS_SCRUB_TYPE_QUOTACHECK);
if (ret)
goto wait;
wait:
ret2 = -workqueue_terminate(&wq_fs_scan);
if (ret2) {
str_liberror(ctx, ret2, _("joining fs scan workqueue"));
if (!ret)
ret = ret2;
}
if (aborted && !ret)
ret = ECANCELED;
workqueue_destroy(&wq_fs_scan);
return ret;
}
/* Check directory connectivity. */
int
phase5_func(
struct scrub_ctx *ctx)
{
struct ncheck_state ncs = { .ctx = ctx };
int ret;
/*
* Check and fix anything that requires a full filesystem scan. We do
* this after we've checked all inodes and repaired anything that could
* get in the way of a scan.
*/
ret = run_kernel_fs_scan_scrubbers(ctx);
if (ret)
return ret;
if (ctx->corruptions_found || ctx->unfixable_errors) {
str_info(ctx, ctx->mntpoint,
_("Filesystem has errors, skipping connectivity checks."));
return 0;
}
ret = check_fs_label(ctx);
if (ret)
return ret;
pthread_mutex_init(&ncs.lock, NULL);
ret = scrub_scan_all_inodes(ctx, check_inode_names, &ncs);
if (ret)
goto out_lock;
if (ncs.aborted) {
ret = ECANCELED;
goto out_lock;
}
ret = retry_deferred_inodes(ctx, &ncs);
if (ret)
goto out_lock;
scrub_report_preen_triggers(ctx);
out_lock:
pthread_mutex_destroy(&ncs.lock);
if (ncs.new_deferred)
bitmap_free(&ncs.new_deferred);
if (ncs.cur_deferred)
bitmap_free(&ncs.cur_deferred);
return ret;
}
/* Estimate how much work we're going to do. */
int
phase5_estimate(
struct scrub_ctx *ctx,
uint64_t *items,
unsigned int *nr_threads,
int *rshift)
{
*items = scrub_estimate_iscan_work(ctx);
*nr_threads = scrub_nproc(ctx) * 2;
*rshift = 0;
return 0;
}