|  | // SPDX-License-Identifier: GPL-2.0-or-later | 
|  | /* | 
|  | * Copyright (c) 2023-2024 Oracle.  All Rights Reserved. | 
|  | * Author: Darrick J. Wong <djwong@kernel.org> | 
|  | */ | 
|  | #include "libxfs_priv.h" | 
|  | #include "libxfs.h" | 
|  | #include "libxfs/xfile.h" | 
|  | #include "libxfs/buf_mem.h" | 
|  | #include <sys/mman.h> | 
|  | #include <sys/types.h> | 
|  | #include <sys/wait.h> | 
|  |  | 
|  | /* | 
|  | * Buffer Cache for In-Memory Files | 
|  | * ================================ | 
|  | * | 
|  | * Offline fsck wants to create ephemeral ordered recordsets.  The existing | 
|  | * btree infrastructure can do this, but we need the buffer cache to target | 
|  | * memory instead of block devices. | 
|  | * | 
|  | * xfiles meet those requirements.  Therefore, the xmbuf mechanism uses a | 
|  | * partition on an xfile to store the staging data. | 
|  | * | 
|  | * xmbufs assume that the caller will handle all required concurrency | 
|  | * management.  The resulting xfs_buf objects are kept private to the xmbuf | 
|  | * (they are not recycled to the LRU) because b_addr is mapped directly to the | 
|  | * memfd file. | 
|  | * | 
|  | * The only supported block size is the system page size. | 
|  | */ | 
|  |  | 
|  | /* Figure out the xfile buffer cache block size here */ | 
|  | unsigned int	XMBUF_BLOCKSIZE; | 
|  | unsigned int	XMBUF_BLOCKSHIFT; | 
|  |  | 
|  | void | 
|  | xmbuf_libinit(void) | 
|  | { | 
|  | long		ret = sysconf(_SC_PAGESIZE); | 
|  |  | 
|  | /* If we don't find a power-of-two page size, go with 4k. */ | 
|  | if (ret < 0 || !is_power_of_2(ret)) | 
|  | ret = 4096; | 
|  |  | 
|  | XMBUF_BLOCKSIZE = ret; | 
|  | XMBUF_BLOCKSHIFT = libxfs_highbit32(XMBUF_BLOCKSIZE); | 
|  | } | 
|  |  | 
|  | /* Allocate a new cache node (aka a xfs_buf) */ | 
|  | static struct cache_node * | 
|  | xmbuf_cache_alloc( | 
|  | cache_key_t		key) | 
|  | { | 
|  | struct xfs_bufkey	*bufkey = (struct xfs_bufkey *)key; | 
|  | struct xfs_buf		*bp; | 
|  | int			error; | 
|  |  | 
|  | bp = kmem_cache_zalloc(xfs_buf_cache, 0); | 
|  | if (!bp) | 
|  | return NULL; | 
|  |  | 
|  | bp->b_cache_key = bufkey->blkno; | 
|  | bp->b_length = bufkey->bblen; | 
|  | bp->b_target = bufkey->buftarg; | 
|  | bp->b_mount = bufkey->buftarg->bt_mount; | 
|  |  | 
|  | pthread_mutex_init(&bp->b_lock, NULL); | 
|  | INIT_LIST_HEAD(&bp->b_li_list); | 
|  | bp->b_maps = &bp->__b_map; | 
|  |  | 
|  | bp->b_nmaps = 1; | 
|  | bp->b_maps[0].bm_bn = bufkey->blkno; | 
|  | bp->b_maps[0].bm_len = bp->b_length; | 
|  |  | 
|  | error = xmbuf_map_page(bp); | 
|  | if (error) { | 
|  | fprintf(stderr, | 
|  | _("%s: %s can't mmap %u bytes at xfile offset %llu: %s\n"), | 
|  | progname, __FUNCTION__, BBTOB(bp->b_length), | 
|  | (unsigned long long)BBTOB(bufkey->blkno), | 
|  | strerror(error)); | 
|  |  | 
|  | kmem_cache_free(xfs_buf_cache, bp); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return &bp->b_node; | 
|  | } | 
|  |  | 
|  | /* Flush a buffer to disk before purging the cache node */ | 
|  | static int | 
|  | xmbuf_cache_flush( | 
|  | struct cache_node	*node) | 
|  | { | 
|  | /* direct mapped buffers do not need writing */ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Release resources, free the buffer. */ | 
|  | static void | 
|  | xmbuf_cache_relse( | 
|  | struct cache_node	*node) | 
|  | { | 
|  | struct xfs_buf		*bp; | 
|  |  | 
|  | bp = container_of(node, struct xfs_buf, b_node); | 
|  | xmbuf_unmap_page(bp); | 
|  | kmem_cache_free(xfs_buf_cache, bp); | 
|  | } | 
|  |  | 
|  | /* Release a bunch of buffers */ | 
|  | static unsigned int | 
|  | xmbuf_cache_bulkrelse( | 
|  | struct cache		*cache, | 
|  | struct list_head	*list) | 
|  | { | 
|  | struct cache_node	*cn, *n; | 
|  | int			count = 0; | 
|  |  | 
|  | if (list_empty(list)) | 
|  | return 0; | 
|  |  | 
|  | list_for_each_entry_safe(cn, n, list, cn_mru) { | 
|  | xmbuf_cache_relse(cn); | 
|  | count++; | 
|  | } | 
|  |  | 
|  | return count; | 
|  | } | 
|  |  | 
|  | static struct cache_operations xmbuf_bcache_operations = { | 
|  | .hash		= libxfs_bhash, | 
|  | .alloc		= xmbuf_cache_alloc, | 
|  | .flush		= xmbuf_cache_flush, | 
|  | .relse		= xmbuf_cache_relse, | 
|  | .compare	= libxfs_bcompare, | 
|  | .bulkrelse	= xmbuf_cache_bulkrelse | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Allocate a buffer cache target for a memory-backed file and set up the | 
|  | * buffer target. | 
|  | */ | 
|  | int | 
|  | xmbuf_alloc( | 
|  | struct xfs_mount	*mp, | 
|  | const char		*descr, | 
|  | unsigned long long	maxpos, | 
|  | struct xfs_buftarg	**btpp) | 
|  | { | 
|  | struct xfs_buftarg	*btp; | 
|  | struct xfile		*xfile; | 
|  | struct cache		*cache; | 
|  | int			error; | 
|  |  | 
|  | btp = kzalloc(sizeof(*btp), GFP_KERNEL); | 
|  | if (!btp) | 
|  | return -ENOMEM; | 
|  |  | 
|  | error = xfile_create(descr, maxpos, &xfile); | 
|  | if (error) | 
|  | goto out_btp; | 
|  |  | 
|  | cache = cache_init(0, LIBXFS_BHASHSIZE(NULL), &xmbuf_bcache_operations); | 
|  | if (!cache) { | 
|  | error = -ENOMEM; | 
|  | goto out_xfile; | 
|  | } | 
|  |  | 
|  | /* Initialize buffer target */ | 
|  | btp->bt_mount = mp; | 
|  | btp->bt_bdev = (dev_t)-1; | 
|  | btp->bt_bdev_fd = -1; | 
|  | btp->bt_xfile = xfile; | 
|  | btp->bcache = cache; | 
|  |  | 
|  | error = pthread_mutex_init(&btp->lock, NULL); | 
|  | if (error) | 
|  | goto out_cache; | 
|  |  | 
|  | *btpp = btp; | 
|  | return 0; | 
|  |  | 
|  | out_cache: | 
|  | cache_destroy(cache); | 
|  | out_xfile: | 
|  | xfile_destroy(xfile); | 
|  | out_btp: | 
|  | kfree(btp); | 
|  | return error; | 
|  | } | 
|  |  | 
|  | /* Free a buffer cache target for a memory-backed file. */ | 
|  | void | 
|  | xmbuf_free( | 
|  | struct xfs_buftarg	*btp) | 
|  | { | 
|  | ASSERT(xfs_buftarg_is_mem(btp)); | 
|  |  | 
|  | cache_destroy(btp->bcache); | 
|  | pthread_mutex_destroy(&btp->lock); | 
|  | xfile_destroy(btp->bt_xfile); | 
|  | kfree(btp); | 
|  | } | 
|  |  | 
|  | /* Directly map a memfd page into the buffer cache. */ | 
|  | int | 
|  | xmbuf_map_page( | 
|  | struct xfs_buf		*bp) | 
|  | { | 
|  | struct xfile		*xfile = bp->b_target->bt_xfile; | 
|  | void			*p; | 
|  | loff_t			pos; | 
|  |  | 
|  | pos = xfile->partition_pos + BBTOB(xfs_buf_daddr(bp)); | 
|  | p = mmap(NULL, BBTOB(bp->b_length), PROT_READ | PROT_WRITE, MAP_SHARED, | 
|  | xfile->fcb->fd, pos); | 
|  | if (p == MAP_FAILED) | 
|  | return -errno; | 
|  |  | 
|  | bp->b_addr = p; | 
|  | bp->b_flags |= LIBXFS_B_UPTODATE | LIBXFS_B_UNCHECKED; | 
|  | bp->b_error = 0; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Unmap a memfd page that was mapped into the buffer cache. */ | 
|  | void | 
|  | xmbuf_unmap_page( | 
|  | struct xfs_buf		*bp) | 
|  | { | 
|  | munmap(bp->b_addr, BBTOB(bp->b_length)); | 
|  | bp->b_addr = NULL; | 
|  | } | 
|  |  | 
|  | /* Is this a valid daddr within the buftarg? */ | 
|  | bool | 
|  | xmbuf_verify_daddr( | 
|  | struct xfs_buftarg	*btp, | 
|  | xfs_daddr_t		daddr) | 
|  | { | 
|  | struct xfile		*xf = btp->bt_xfile; | 
|  |  | 
|  | ASSERT(xfs_buftarg_is_mem(btp)); | 
|  |  | 
|  | return daddr < (xf->maxbytes >> BBSHIFT); | 
|  | } | 
|  |  | 
|  | /* Discard the page backing this buffer. */ | 
|  | static void | 
|  | xmbuf_stale( | 
|  | struct xfs_buf		*bp) | 
|  | { | 
|  | struct xfile		*xf = bp->b_target->bt_xfile; | 
|  | loff_t			pos; | 
|  |  | 
|  | ASSERT(xfs_buftarg_is_mem(bp->b_target)); | 
|  |  | 
|  | pos = BBTOB(xfs_buf_daddr(bp)) + xf->partition_pos; | 
|  | fallocate(xf->fcb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, pos, | 
|  | BBTOB(bp->b_length)); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Finalize a buffer -- discard the backing page if it's stale, or run the | 
|  | * write verifier to detect problems. | 
|  | */ | 
|  | int | 
|  | xmbuf_finalize( | 
|  | struct xfs_buf		*bp) | 
|  | { | 
|  | xfs_failaddr_t		fa; | 
|  | int			error = 0; | 
|  |  | 
|  | if (bp->b_flags & LIBXFS_B_STALE) { | 
|  | xmbuf_stale(bp); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Although this btree is ephemeral, validate the buffer structure so | 
|  | * that we can detect memory corruption errors and software bugs. | 
|  | */ | 
|  | fa = bp->b_ops->verify_struct(bp); | 
|  | if (fa) { | 
|  | error = -EFSCORRUPTED; | 
|  | xfs_verifier_error(bp, error, fa); | 
|  | } | 
|  |  | 
|  | return error; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Detach this xmbuf buffer from the transaction by any means necessary. | 
|  | * All buffers are direct-mapped, so they do not need bwrite. | 
|  | */ | 
|  | void | 
|  | xmbuf_trans_bdetach( | 
|  | struct xfs_trans	*tp, | 
|  | struct xfs_buf		*bp) | 
|  | { | 
|  | struct xfs_buf_log_item	*bli = bp->b_log_item; | 
|  |  | 
|  | ASSERT(bli != NULL); | 
|  |  | 
|  | bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED | | 
|  | XFS_BLI_STALE); | 
|  | clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags); | 
|  |  | 
|  | while (bp->b_log_item != NULL) | 
|  | xfs_trans_bdetach(tp, bp); | 
|  | } |