blob: e2036a6e1617eaa2d757ebef25a6fd3e9deed720 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-only
/*
* zblock.c
*
* Author: Vitaly Wool <vitaly.wool@konsulko.se>
* Based on the work from Ananda Badmaev <a.badmaev@clicknet.pro>
* Copyright (C) 2022-2025, Konsulko AB.
*
* Zblock is a small object allocator with the intention to serve as a
* zpool backend. It operates on page blocks which consist of number
* of physical pages being a power of 2 and store integer number of
* compressed pages per block which results in determinism and simplicity.
*
* zblock doesn't export any API and is meant to be used via zpool API.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/atomic.h>
#include <linux/debugfs.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/preempt.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/zpool.h>
#include "zblock.h"
static struct rb_root block_desc_tree = RB_ROOT;
static struct dentry *zblock_debugfs_root;
/* Encode handle of a particular slot in the pool using metadata */
static inline unsigned long metadata_to_handle(struct zblock_block *block,
unsigned int block_type, unsigned int slot)
{
return (unsigned long)(block) | (block_type << SLOT_BITS) | slot;
}
/* Return block, block type and slot in the pool corresponding to handle */
static inline struct zblock_block *handle_to_metadata(unsigned long handle,
unsigned int *block_type, unsigned int *slot)
{
*block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS;
*slot = handle & SLOT_MASK;
return (struct zblock_block *)(handle & PAGE_MASK);
}
/*
* Find a block with at least one free slot and claim it.
* We make sure that the first block, if exists, will always work.
*/
static inline struct zblock_block *find_and_claim_block(struct block_list *b,
int block_type, unsigned long *handle)
{
struct list_head *l = &b->active_list;
unsigned int slot;
if (!list_empty(l)) {
struct zblock_block *z = list_first_entry(l, typeof(*z), link);
if (--z->free_slots == 0)
list_move(&z->link, &b->full_list);
/*
* There is a slot in the block and we just made sure it would
* remain.
* Find that slot and set the busy bit.
*/
for (slot = find_first_zero_bit(z->slot_info,
block_desc[block_type].slots_per_block);
slot < block_desc[block_type].slots_per_block;
slot = find_next_zero_bit(z->slot_info,
block_desc[block_type].slots_per_block,
slot)) {
if (!test_and_set_bit(slot, z->slot_info))
break;
barrier();
}
WARN_ON(slot >= block_desc[block_type].slots_per_block);
*handle = metadata_to_handle(z, block_type, slot);
return z;
}
return NULL;
}
/*
* allocate new block and add it to corresponding block list
*/
static struct zblock_block *alloc_block(struct zblock_pool *pool,
int block_type, gfp_t gfp,
unsigned long *handle)
{
struct zblock_block *block;
struct block_list *block_list;
block = (void *)__get_free_pages(gfp, block_desc[block_type].order);
if (!block)
return NULL;
block_list = &pool->block_lists[block_type];
/* init block data */
block->free_slots = block_desc[block_type].slots_per_block - 1;
memset(&block->slot_info, 0, sizeof(block->slot_info));
set_bit(0, block->slot_info);
*handle = metadata_to_handle(block, block_type, 0);
spin_lock(&block_list->lock);
list_add(&block->link, &block_list->active_list);
block_list->block_count++;
spin_unlock(&block_list->lock);
return block;
}
static int zblock_blocks_show(struct seq_file *s, void *v)
{
struct zblock_pool *pool = s->private;
int i;
for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
struct block_list *block_list = &pool->block_lists[i];
seq_printf(s, "%d: %ld blocks of %d pages (total %ld pages)\n",
i, block_list->block_count, 1 << block_desc[i].order,
block_list->block_count << block_desc[i].order);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(zblock_blocks);
/*****************
* API Functions
*****************/
/**
* zblock_create_pool() - create a new zblock pool
* @gfp: gfp flags when allocating the zblock pool structure
* @ops: user-defined operations for the zblock pool
*
* Return: pointer to the new zblock pool or NULL if the metadata allocation
* failed.
*/
static struct zblock_pool *zblock_create_pool(gfp_t gfp)
{
struct zblock_pool *pool;
struct block_list *block_list;
int i;
pool = kmalloc(sizeof(struct zblock_pool), gfp);
if (!pool)
return NULL;
/* init each block list */
for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
block_list = &pool->block_lists[i];
spin_lock_init(&block_list->lock);
INIT_LIST_HEAD(&block_list->full_list);
INIT_LIST_HEAD(&block_list->active_list);
block_list->block_count = 0;
}
debugfs_create_file("blocks", S_IFREG | 0444, zblock_debugfs_root,
pool, &zblock_blocks_fops);
return pool;
}
/**
* zblock_destroy_pool() - destroys an existing zblock pool
* @pool: the zblock pool to be destroyed
*
*/
static void zblock_destroy_pool(struct zblock_pool *pool)
{
kfree(pool);
}
/**
* zblock_alloc() - allocates a slot of appropriate size
* @pool: zblock pool from which to allocate
* @size: size in bytes of the desired allocation
* @gfp: gfp flags used if the pool needs to grow
* @handle: handle of the new allocation
*
* Return: 0 if success and handle is set, otherwise -EINVAL if the size or
* gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
* a new slot.
*/
static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp,
unsigned long *handle)
{
int block_type = -1;
struct zblock_block *block;
struct block_list *block_list;
if (!size)
return -EINVAL;
if (size > PAGE_SIZE)
return -ENOSPC;
/* find basic block type with suitable slot size */
if (size < block_desc[0].slot_size)
block_type = 0;
else {
struct block_desc_node *block_node;
struct rb_node *node = block_desc_tree.rb_node;
while (node) {
block_node = container_of(node, typeof(*block_node), node);
if (size < block_node->this_slot_size)
node = node->rb_left;
else if (size >= block_node->next_slot_size)
node = node->rb_right;
else {
block_type = block_node->block_idx + 1;
break;
}
}
}
if (WARN_ON(block_type < 0))
return -EINVAL;
if (block_type >= ARRAY_SIZE(block_desc))
return -ENOSPC;
block_list = &pool->block_lists[block_type];
spin_lock(&block_list->lock);
block = find_and_claim_block(block_list, block_type, handle);
spin_unlock(&block_list->lock);
if (block)
return 0;
/* not found block with free slots try to allocate new empty block */
block = alloc_block(pool, block_type, gfp & ~(__GFP_MOVABLE | __GFP_HIGHMEM), handle);
return block ? 0 : -ENOMEM;
}
/**
* zblock_free() - frees the allocation associated with the given handle
* @pool: pool in which the allocation resided
* @handle: handle associated with the allocation returned by zblock_alloc()
*
*/
static void zblock_free(struct zblock_pool *pool, unsigned long handle)
{
unsigned int slot, block_type;
struct zblock_block *block;
struct block_list *block_list;
block = handle_to_metadata(handle, &block_type, &slot);
block_list = &pool->block_lists[block_type];
spin_lock(&block_list->lock);
/* if all slots in block are empty delete whole block */
if (++block->free_slots == block_desc[block_type].slots_per_block) {
block_list->block_count--;
list_del(&block->link);
spin_unlock(&block_list->lock);
free_pages((unsigned long)block, block_desc[block_type].order);
return;
} else if (block->free_slots == 1)
list_move_tail(&block->link, &block_list->active_list);
clear_bit(slot, block->slot_info);
spin_unlock(&block_list->lock);
}
/**
* zblock_map() - maps the allocation associated with the given handle
* @pool: pool in which the allocation resides
* @handle: handle associated with the allocation to be mapped
*
*
* Returns: a pointer to the mapped allocation
*/
static void *zblock_map(struct zblock_pool *pool, unsigned long handle)
{
unsigned int block_type, slot;
struct zblock_block *block;
unsigned long offs;
void *p;
block = handle_to_metadata(handle, &block_type, &slot);
offs = ZBLOCK_HEADER_SIZE + slot * block_desc[block_type].slot_size;
p = (void *)block + offs;
return p;
}
/**
* zblock_unmap() - unmaps the allocation associated with the given handle
* @pool: pool in which the allocation resides
* @handle: handle associated with the allocation to be unmapped
*/
static void zblock_unmap(struct zblock_pool *pool, unsigned long handle)
{
}
/**
* zblock_write() - write to the memory area defined by handle
* @pool: pool in which the allocation resides
* @handle: handle associated with the allocation
* @handle_mem: pointer to source memory block
* @mem_len: length of the memory block to write
*/
static void zblock_write(struct zblock_pool *pool, unsigned long handle,
void *handle_mem, size_t mem_len)
{
unsigned int block_type, slot;
struct zblock_block *block;
unsigned long offs;
void *p;
block = handle_to_metadata(handle, &block_type, &slot);
offs = ZBLOCK_HEADER_SIZE + slot * block_desc[block_type].slot_size;
p = (void *)block + offs;
memcpy(p, handle_mem, mem_len);
}
/**
* zblock_get_total_pages() - gets the zblock pool size in pages
* @pool: pool being queried
*
* Returns: size in bytes of the given pool.
*/
static u64 zblock_get_total_pages(struct zblock_pool *pool)
{
u64 total_size;
int i;
total_size = 0;
for (i = 0; i < ARRAY_SIZE(block_desc); i++)
total_size += pool->block_lists[i].block_count << block_desc[i].order;
return total_size;
}
/*****************
* zpool
****************/
static void *zblock_zpool_create(const char *name, gfp_t gfp)
{
return zblock_create_pool(gfp);
}
static void zblock_zpool_destroy(void *pool)
{
zblock_destroy_pool(pool);
}
static int zblock_zpool_malloc(void *pool, size_t size, gfp_t gfp,
unsigned long *handle, const int nid)
{
return zblock_alloc(pool, size, gfp, handle);
}
static void zblock_zpool_free(void *pool, unsigned long handle)
{
zblock_free(pool, handle);
}
static void *zblock_zpool_read_begin(void *pool, unsigned long handle,
void *local_copy)
{
return zblock_map(pool, handle);
}
static void zblock_zpool_obj_write(void *pool, unsigned long handle,
void *handle_mem, size_t mem_len)
{
zblock_write(pool, handle, handle_mem, mem_len);
}
static void zblock_zpool_read_end(void *pool, unsigned long handle,
void *handle_mem)
{
zblock_unmap(pool, handle);
}
static u64 zblock_zpool_total_pages(void *pool)
{
return zblock_get_total_pages(pool);
}
static struct zpool_driver zblock_zpool_driver = {
.type = "zblock",
.owner = THIS_MODULE,
.create = zblock_zpool_create,
.destroy = zblock_zpool_destroy,
.malloc = zblock_zpool_malloc,
.free = zblock_zpool_free,
.obj_read_begin = zblock_zpool_read_begin,
.obj_read_end = zblock_zpool_read_end,
.obj_write = zblock_zpool_obj_write,
.total_pages = zblock_zpool_total_pages,
};
MODULE_ALIAS("zpool-zblock");
static void delete_rbtree(void)
{
while (!RB_EMPTY_ROOT(&block_desc_tree))
rb_erase(block_desc_tree.rb_node, &block_desc_tree);
}
static int __init create_rbtree(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
struct block_desc_node *block_node = kmalloc(sizeof(*block_node),
GFP_KERNEL);
struct rb_node **new = &block_desc_tree.rb_node, *parent = NULL;
if (!block_node) {
delete_rbtree();
return -ENOMEM;
}
if (i > 0 && block_desc[i].slot_size <= block_desc[i-1].slot_size) {
pr_err("%s: block descriptors not in ascending order\n",
__func__);
delete_rbtree();
return -EINVAL;
}
block_node->this_slot_size = block_desc[i].slot_size;
block_node->block_idx = i;
if (i == ARRAY_SIZE(block_desc) - 1)
block_node->next_slot_size = PAGE_SIZE;
else
block_node->next_slot_size = block_desc[i+1].slot_size;
while (*new) {
parent = *new;
/* the array is sorted so we will always go to the right */
new = &((*new)->rb_right);
}
rb_link_node(&block_node->node, parent, new);
rb_insert_color(&block_node->node, &block_desc_tree);
}
return 0;
}
static int __init init_zblock(void)
{
int ret = create_rbtree();
if (ret)
return ret;
zpool_register_driver(&zblock_zpool_driver);
zblock_debugfs_root = debugfs_create_dir("zblock", NULL);
return 0;
}
static void __exit exit_zblock(void)
{
zpool_unregister_driver(&zblock_zpool_driver);
debugfs_remove_recursive(zblock_debugfs_root);
delete_rbtree();
}
module_init(init_zblock);
module_exit(exit_zblock);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vitaly Wool <vitaly.wool@konsulko.se>");
MODULE_DESCRIPTION("Block allocator for compressed pages");