blob: e5b91d3cfe04868dca42a514329cab2f2a286261 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2023-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "libxfs_priv.h"
#include "libxfs.h"
#include "libxfs/xfile.h"
#include "libxfs/buf_mem.h"
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
/*
* Buffer Cache for In-Memory Files
* ================================
*
* Offline fsck wants to create ephemeral ordered recordsets. The existing
* btree infrastructure can do this, but we need the buffer cache to target
* memory instead of block devices.
*
* xfiles meet those requirements. Therefore, the xmbuf mechanism uses a
* partition on an xfile to store the staging data.
*
* xmbufs assume that the caller will handle all required concurrency
* management. The resulting xfs_buf objects are kept private to the xmbuf
* (they are not recycled to the LRU) because b_addr is mapped directly to the
* memfd file.
*
* The only supported block size is the system page size.
*/
/* Figure out the xfile buffer cache block size here */
unsigned int XMBUF_BLOCKSIZE;
unsigned int XMBUF_BLOCKSHIFT;
void
xmbuf_libinit(void)
{
long ret = sysconf(_SC_PAGESIZE);
/* If we don't find a power-of-two page size, go with 4k. */
if (ret < 0 || !is_power_of_2(ret))
ret = 4096;
XMBUF_BLOCKSIZE = ret;
XMBUF_BLOCKSHIFT = libxfs_highbit32(XMBUF_BLOCKSIZE);
}
/* Allocate a new cache node (aka a xfs_buf) */
static struct cache_node *
xmbuf_cache_alloc(
cache_key_t key)
{
struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
struct xfs_buf *bp;
int error;
bp = kmem_cache_zalloc(xfs_buf_cache, 0);
if (!bp)
return NULL;
bp->b_cache_key = bufkey->blkno;
bp->b_length = bufkey->bblen;
bp->b_target = bufkey->buftarg;
bp->b_mount = bufkey->buftarg->bt_mount;
pthread_mutex_init(&bp->b_lock, NULL);
INIT_LIST_HEAD(&bp->b_li_list);
bp->b_maps = &bp->__b_map;
bp->b_nmaps = 1;
bp->b_maps[0].bm_bn = bufkey->blkno;
bp->b_maps[0].bm_len = bp->b_length;
error = xmbuf_map_page(bp);
if (error) {
fprintf(stderr,
_("%s: %s can't mmap %u bytes at xfile offset %llu: %s\n"),
progname, __FUNCTION__, BBTOB(bp->b_length),
(unsigned long long)BBTOB(bufkey->blkno),
strerror(error));
kmem_cache_free(xfs_buf_cache, bp);
return NULL;
}
return &bp->b_node;
}
/* Flush a buffer to disk before purging the cache node */
static int
xmbuf_cache_flush(
struct cache_node *node)
{
/* direct mapped buffers do not need writing */
return 0;
}
/* Release resources, free the buffer. */
static void
xmbuf_cache_relse(
struct cache_node *node)
{
struct xfs_buf *bp;
bp = container_of(node, struct xfs_buf, b_node);
xmbuf_unmap_page(bp);
kmem_cache_free(xfs_buf_cache, bp);
}
/* Release a bunch of buffers */
static unsigned int
xmbuf_cache_bulkrelse(
struct cache *cache,
struct list_head *list)
{
struct cache_node *cn, *n;
int count = 0;
if (list_empty(list))
return 0;
list_for_each_entry_safe(cn, n, list, cn_mru) {
xmbuf_cache_relse(cn);
count++;
}
return count;
}
static struct cache_operations xmbuf_bcache_operations = {
.hash = libxfs_bhash,
.alloc = xmbuf_cache_alloc,
.flush = xmbuf_cache_flush,
.relse = xmbuf_cache_relse,
.compare = libxfs_bcompare,
.bulkrelse = xmbuf_cache_bulkrelse
};
/*
* Allocate a buffer cache target for a memory-backed file and set up the
* buffer target.
*/
int
xmbuf_alloc(
struct xfs_mount *mp,
const char *descr,
unsigned long long maxpos,
struct xfs_buftarg **btpp)
{
struct xfs_buftarg *btp;
struct xfile *xfile;
struct cache *cache;
int error;
btp = kzalloc(sizeof(*btp), GFP_KERNEL);
if (!btp)
return -ENOMEM;
error = xfile_create(descr, maxpos, &xfile);
if (error)
goto out_btp;
cache = cache_init(0, LIBXFS_BHASHSIZE(NULL), &xmbuf_bcache_operations);
if (!cache) {
error = -ENOMEM;
goto out_xfile;
}
/* Initialize buffer target */
btp->bt_mount = mp;
btp->bt_bdev = (dev_t)-1;
btp->bt_bdev_fd = -1;
btp->bt_xfile = xfile;
btp->bcache = cache;
error = pthread_mutex_init(&btp->lock, NULL);
if (error)
goto out_cache;
*btpp = btp;
return 0;
out_cache:
cache_destroy(cache);
out_xfile:
xfile_destroy(xfile);
out_btp:
kfree(btp);
return error;
}
/* Free a buffer cache target for a memory-backed file. */
void
xmbuf_free(
struct xfs_buftarg *btp)
{
ASSERT(xfs_buftarg_is_mem(btp));
cache_destroy(btp->bcache);
pthread_mutex_destroy(&btp->lock);
xfile_destroy(btp->bt_xfile);
kfree(btp);
}
/* Directly map a memfd page into the buffer cache. */
int
xmbuf_map_page(
struct xfs_buf *bp)
{
struct xfile *xfile = bp->b_target->bt_xfile;
void *p;
loff_t pos;
pos = xfile->partition_pos + BBTOB(xfs_buf_daddr(bp));
p = mmap(NULL, BBTOB(bp->b_length), PROT_READ | PROT_WRITE, MAP_SHARED,
xfile->fcb->fd, pos);
if (p == MAP_FAILED)
return -errno;
bp->b_addr = p;
bp->b_flags |= LIBXFS_B_UPTODATE | LIBXFS_B_UNCHECKED;
bp->b_error = 0;
return 0;
}
/* Unmap a memfd page that was mapped into the buffer cache. */
void
xmbuf_unmap_page(
struct xfs_buf *bp)
{
munmap(bp->b_addr, BBTOB(bp->b_length));
bp->b_addr = NULL;
}
/* Is this a valid daddr within the buftarg? */
bool
xmbuf_verify_daddr(
struct xfs_buftarg *btp,
xfs_daddr_t daddr)
{
struct xfile *xf = btp->bt_xfile;
ASSERT(xfs_buftarg_is_mem(btp));
return daddr < (xf->maxbytes >> BBSHIFT);
}
/* Discard the page backing this buffer. */
static void
xmbuf_stale(
struct xfs_buf *bp)
{
struct xfile *xf = bp->b_target->bt_xfile;
loff_t pos;
ASSERT(xfs_buftarg_is_mem(bp->b_target));
pos = BBTOB(xfs_buf_daddr(bp)) + xf->partition_pos;
fallocate(xf->fcb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, pos,
BBTOB(bp->b_length));
}
/*
* Finalize a buffer -- discard the backing page if it's stale, or run the
* write verifier to detect problems.
*/
int
xmbuf_finalize(
struct xfs_buf *bp)
{
xfs_failaddr_t fa;
int error = 0;
if (bp->b_flags & LIBXFS_B_STALE) {
xmbuf_stale(bp);
return 0;
}
/*
* Although this btree is ephemeral, validate the buffer structure so
* that we can detect memory corruption errors and software bugs.
*/
fa = bp->b_ops->verify_struct(bp);
if (fa) {
error = -EFSCORRUPTED;
xfs_verifier_error(bp, error, fa);
}
return error;
}
/*
* Detach this xmbuf buffer from the transaction by any means necessary.
* All buffers are direct-mapped, so they do not need bwrite.
*/
void
xmbuf_trans_bdetach(
struct xfs_trans *tp,
struct xfs_buf *bp)
{
struct xfs_buf_log_item *bli = bp->b_log_item;
ASSERT(bli != NULL);
bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
XFS_BLI_STALE);
clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
while (bp->b_log_item != NULL)
xfs_trans_bdetach(tp, bp);
}