blob: 57542507770c598ec0c6beba4a66e2092a4fd528 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2022 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "libxfs_priv.h"
#include "libxfs.h"
#include "libxfs/xfile.h"
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
/*
* Swappable Temporary Memory
* ==========================
*
* Offline checking sometimes needs to be able to stage a large amount of data
* in memory. This information might not fit in the available memory and it
* doesn't all need to be accessible at all times. In other words, we want an
* indexed data buffer to store data that can be paged out.
*
* memfd files meet those requirements. Therefore, the xfile mechanism uses
* one to store our staging data. The xfile must be freed with xfile_destroy.
*
* xfiles assume that the caller will handle all required concurrency
* management; file locks are not taken.
*/
/*
* Open a memory-backed fd to back an xfile. We require close-on-exec here,
* because these memfd files function as windowed RAM and hence should never
* be shared with other processes.
*/
static int
xfile_create_fd(
const char *description)
{
int fd = -1;
#ifdef HAVE_MEMFD_CLOEXEC
/* memfd_create exists in kernel 3.17 (2014) and glibc 2.27 (2018). */
fd = memfd_create(description, MFD_CLOEXEC);
if (fd >= 0)
return fd;
#endif
#ifdef HAVE_O_TMPFILE
/*
* O_TMPFILE exists as of kernel 3.11 (2013), which means that if we
* find it, we're pretty safe in assuming O_CLOEXEC exists too.
*/
fd = open("/dev/shm", O_TMPFILE | O_CLOEXEC | O_RDWR, 0600);
if (fd >= 0)
return fd;
fd = open("/tmp", O_TMPFILE | O_CLOEXEC | O_RDWR, 0600);
if (fd >= 0)
return fd;
#endif
#ifdef HAVE_MKOSTEMP_CLOEXEC
/*
* mkostemp exists as of glibc 2.7 (2007) and O_CLOEXEC exists as of
* kernel 2.6.23 (2007).
*/
fd = mkostemp("libxfsXXXXXX", O_CLOEXEC);
if (fd >= 0)
return fd;
#endif
#if !defined(HAVE_MEMFD_CLOEXEC) && \
!defined(HAVE_O_TMPFILE) && \
!defined(HAVE_MKOSTEMP_CLOEXEC)
# error System needs memfd_create, O_TMPFILE, or O_CLOEXEC to build!
#endif
return fd;
}
/*
* Create an xfile of the given size. The description will be used in the
* trace output.
*/
int
xfile_create(
struct xfs_mount *mp,
const char *description,
struct xfile **xfilep)
{
struct xfile *xf;
char fname[MAXNAMELEN];
int error;
snprintf(fname, MAXNAMELEN - 1, "XFS (%s): %s", mp->m_fsname,
description);
fname[MAXNAMELEN - 1] = 0;
xf = kmem_alloc(sizeof(struct xfile), KM_MAYFAIL);
if (!xf)
return -ENOMEM;
xf->fd = xfile_create_fd(fname);
if (xf->fd < 0) {
error = -errno;
kmem_free(xf);
return error;
}
*xfilep = xf;
return 0;
}
/* Close the file and release all resources. */
void
xfile_destroy(
struct xfile *xf)
{
close(xf->fd);
kmem_free(xf);
}
static inline loff_t
xfile_maxbytes(
struct xfile *xf)
{
if (sizeof(loff_t) == 8)
return LLONG_MAX;
return LONG_MAX;
}
/*
* Read a memory object directly from the xfile's page cache. Unlike regular
* pread, we return -E2BIG and -EFBIG for reads that are too large or at too
* high an offset, instead of truncating the read. Otherwise, we return
* bytes read or an error code, like regular pread.
*/
ssize_t
xfile_pread(
struct xfile *xf,
void *buf,
size_t count,
loff_t pos)
{
ssize_t ret;
if (count > INT_MAX)
return -E2BIG;
if (xfile_maxbytes(xf) - pos < count)
return -EFBIG;
ret = pread(xf->fd, buf, count, pos);
if (ret >= 0)
return ret;
return -errno;
}
/*
* Write a memory object directly to the xfile's page cache. Unlike regular
* pwrite, we return -E2BIG and -EFBIG for writes that are too large or at too
* high an offset, instead of truncating the write. Otherwise, we return
* bytes written or an error code, like regular pwrite.
*/
ssize_t
xfile_pwrite(
struct xfile *xf,
const void *buf,
size_t count,
loff_t pos)
{
ssize_t ret;
if (count > INT_MAX)
return -E2BIG;
if (xfile_maxbytes(xf) - pos < count)
return -EFBIG;
ret = pwrite(xf->fd, buf, count, pos);
if (ret >= 0)
return ret;
return -errno;
}
/* Query stat information for an xfile. */
int
xfile_stat(
struct xfile *xf,
struct xfile_stat *statbuf)
{
struct stat ks;
int error;
error = fstat(xf->fd, &ks);
if (error)
return -errno;
statbuf->size = ks.st_size;
statbuf->bytes = (unsigned long long)ks.st_blocks << 9;
return 0;
}
/* Dump an xfile to stdout. */
int
xfile_dump(
struct xfile *xf)
{
char *argv[] = {"od", "-tx1", "-Ad", "-c", NULL};
pid_t child;
int i;
child = fork();
if (child != 0) {
int wstatus;
wait(&wstatus);
return wstatus == 0 ? 0 : -EIO;
}
/* reroute our xfile to stdin and shut everything else */
dup2(xf->fd, 0);
for (i = 3; i < 1024; i++)
close(i);
return execvp("od", argv);
}
/* Discard pages backing a range of the xfile. */
void
xfile_discard(
struct xfile *xf,
loff_t pos,
unsigned long long count)
{
fallocate(xf->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
pos, count);
}