blob: 52686672e99f561f0c3ece7973edc0c0f1c39c2e [file]
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#include <linux/bug.h>
#include <linux/export.h>
#include <linux/kmemleak.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/gpu_buddy.h>
/**
* gpu_buddy_assert - assert a condition in the buddy allocator
* @condition: condition expected to be true
*
* When CONFIG_KUNIT is enabled, evaluates @condition and, if false, triggers
* a WARN_ON() and also calls kunit_fail_current_test() so that any running
* kunit test is properly marked as failed. The stringified condition is
* included in the failure message for easy identification.
*
* When CONFIG_KUNIT is not enabled, this reduces to WARN_ON() so production
* builds retain the same warning semantics as before.
*/
#if IS_ENABLED(CONFIG_KUNIT)
#include <kunit/test-bug.h>
#define gpu_buddy_assert(condition) do { \
if (WARN_ON(!(condition))) \
kunit_fail_current_test("gpu_buddy_assert(" #condition ")"); \
} while (0)
#else
#define gpu_buddy_assert(condition) WARN_ON(!(condition))
#endif
static struct kmem_cache *slab_blocks;
static unsigned int
gpu_buddy_block_state(struct gpu_buddy_block *block)
{
return block->header & GPU_BUDDY_HEADER_STATE;
}
static bool
gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
{
return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
}
static bool
gpu_buddy_block_is_split(struct gpu_buddy_block *block)
{
return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
}
static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block)
{
u64 offset = gpu_buddy_block_offset(block);
if (!offset)
/*
* __ffs64(0) is undefined; offset 0 is maximally aligned, so return
* a value greater than any possible alignment.
*/
return 64 + 1;
return __ffs64(offset);
}
RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb,
struct gpu_buddy_block, rb,
unsigned int, subtree_max_alignment,
gpu_buddy_block_offset_alignment);
static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
struct gpu_buddy_block *parent,
unsigned int order,
u64 offset)
{
struct gpu_buddy_block *block;
BUG_ON(order > GPU_BUDDY_MAX_ORDER);
block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
if (!block)
return NULL;
block->header = offset;
block->header |= order;
block->parent = parent;
RB_CLEAR_NODE(&block->rb);
BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
return block;
}
static void gpu_block_free(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
kmem_cache_free(slab_blocks, block);
}
static enum gpu_buddy_free_tree
get_block_tree(struct gpu_buddy_block *block)
{
return gpu_buddy_block_is_clear(block) ?
GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
}
static struct gpu_buddy_block *
rbtree_get_free_block(const struct rb_node *node)
{
return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
}
static struct gpu_buddy_block *
rbtree_last_free_block(struct rb_root *root)
{
return rbtree_get_free_block(rb_last(root));
}
static bool rbtree_is_empty(struct rb_root *root)
{
return RB_EMPTY_ROOT(root);
}
static void rbtree_insert(struct gpu_buddy *mm,
struct gpu_buddy_block *block,
enum gpu_buddy_free_tree tree)
{
struct rb_node **link, *parent = NULL;
unsigned int block_alignment, order;
struct gpu_buddy_block *node;
struct rb_root *root;
order = gpu_buddy_block_order(block);
block_alignment = gpu_buddy_block_offset_alignment(block);
root = &mm->free_trees[tree][order];
link = &root->rb_node;
while (*link) {
parent = *link;
node = rbtree_get_free_block(parent);
/*
* Manual augmentation update during insertion traversal. Required
* because rb_insert_augmented() only calls rotate callback during
* rotations. This ensures all ancestors on the insertion path have
* correct subtree_max_alignment values.
*/
if (node->subtree_max_alignment < block_alignment)
node->subtree_max_alignment = block_alignment;
if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node))
link = &parent->rb_left;
else
link = &parent->rb_right;
}
block->subtree_max_alignment = block_alignment;
rb_link_node(&block->rb, parent, link);
rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb);
}
static void rbtree_remove(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
unsigned int order = gpu_buddy_block_order(block);
enum gpu_buddy_free_tree tree;
struct rb_root *root;
tree = get_block_tree(block);
root = &mm->free_trees[tree][order];
rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb);
RB_CLEAR_NODE(&block->rb);
}
static void clear_reset(struct gpu_buddy_block *block)
{
block->header &= ~GPU_BUDDY_HEADER_CLEAR;
}
static void mark_cleared(struct gpu_buddy_block *block)
{
block->header |= GPU_BUDDY_HEADER_CLEAR;
}
static void mark_allocated(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_ALLOCATED;
rbtree_remove(mm, block);
}
static void mark_free(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
enum gpu_buddy_free_tree tree;
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_FREE;
tree = get_block_tree(block);
rbtree_insert(mm, block, tree);
}
static void mark_split(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_SPLIT;
rbtree_remove(mm, block);
}
static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
{
return s1 <= e2 && e1 >= s2;
}
static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
{
return s1 <= s2 && e1 >= e2;
}
static struct gpu_buddy_block *
__get_buddy(struct gpu_buddy_block *block)
{
struct gpu_buddy_block *parent;
parent = block->parent;
if (!parent)
return NULL;
if (parent->left == block)
return parent->right;
return parent->left;
}
static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
struct gpu_buddy_block *block,
bool force_merge)
{
struct gpu_buddy_block *parent;
unsigned int order;
while ((parent = block->parent)) {
struct gpu_buddy_block *buddy;
buddy = __get_buddy(block);
if (!gpu_buddy_block_is_free(buddy))
break;
if (!force_merge) {
/*
* Check the block and its buddy clear state and exit
* the loop if they both have the dissimilar state.
*/
if (gpu_buddy_block_is_clear(block) !=
gpu_buddy_block_is_clear(buddy))
break;
if (gpu_buddy_block_is_clear(block))
mark_cleared(parent);
}
rbtree_remove(mm, buddy);
if (force_merge && gpu_buddy_block_is_clear(buddy))
mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
gpu_block_free(mm, block);
gpu_block_free(mm, buddy);
block = parent;
}
order = gpu_buddy_block_order(block);
mark_free(mm, block);
return order;
}
static int __force_merge(struct gpu_buddy *mm,
u64 start,
u64 end,
unsigned int min_order)
{
unsigned int tree, order;
int i;
if (!min_order)
return -ENOMEM;
if (min_order > mm->max_order)
return -EINVAL;
for_each_free_tree(tree) {
for (i = min_order - 1; i >= 0; i--) {
struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
while (iter) {
struct gpu_buddy_block *block, *buddy;
u64 block_start, block_end;
block = rbtree_get_free_block(iter);
iter = rb_prev(iter);
if (!block || !block->parent)
continue;
block_start = gpu_buddy_block_offset(block);
block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
if (!contains(start, end, block_start, block_end))
continue;
buddy = __get_buddy(block);
if (!gpu_buddy_block_is_free(buddy))
continue;
gpu_buddy_assert(gpu_buddy_block_is_clear(block) !=
gpu_buddy_block_is_clear(buddy));
/*
* Advance to the next node when the current node is the buddy,
* as freeing the block will also remove its buddy from the tree.
*/
if (iter == &buddy->rb)
iter = rb_prev(iter);
rbtree_remove(mm, block);
if (gpu_buddy_block_is_clear(block))
mm->clear_avail -= gpu_buddy_block_size(mm, block);
order = __gpu_buddy_free(mm, block, true);
if (order >= min_order)
return 0;
}
}
}
return -ENOMEM;
}
/**
* gpu_buddy_init - init memory manager
*
* @mm: GPU buddy manager to initialize
* @size: size in bytes to manage
* @chunk_size: minimum page size in bytes for our allocations
*
* Initializes the memory manager and its resources.
*
* Returns:
* 0 on success, error code on failure.
*/
int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
{
unsigned int i, j, root_count = 0;
u64 offset = 0;
if (size < chunk_size)
return -EINVAL;
if (chunk_size < SZ_4K)
return -EINVAL;
if (!is_power_of_2(chunk_size))
return -EINVAL;
size = round_down(size, chunk_size);
mm->size = size;
mm->avail = size;
mm->clear_avail = 0;
mm->chunk_size = chunk_size;
mm->max_order = ilog2(size) - ilog2(chunk_size);
BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
sizeof(*mm->free_trees),
GFP_KERNEL);
if (!mm->free_trees)
return -ENOMEM;
for_each_free_tree(i) {
mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
sizeof(struct rb_root),
GFP_KERNEL);
if (!mm->free_trees[i])
goto out_free_tree;
for (j = 0; j <= mm->max_order; ++j)
mm->free_trees[i][j] = RB_ROOT;
}
mm->n_roots = hweight64(size);
mm->roots = kmalloc_array(mm->n_roots,
sizeof(struct gpu_buddy_block *),
GFP_KERNEL);
if (!mm->roots)
goto out_free_tree;
/*
* Split into power-of-two blocks, in case we are given a size that is
* not itself a power-of-two.
*/
do {
struct gpu_buddy_block *root;
unsigned int order;
u64 root_size;
order = ilog2(size) - ilog2(chunk_size);
root_size = chunk_size << order;
root = gpu_block_alloc(mm, NULL, order, offset);
if (!root)
goto out_free_roots;
mark_free(mm, root);
BUG_ON(root_count > mm->max_order);
BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
mm->roots[root_count] = root;
offset += root_size;
size -= root_size;
root_count++;
} while (size);
return 0;
out_free_roots:
while (root_count--)
gpu_block_free(mm, mm->roots[root_count]);
kfree(mm->roots);
out_free_tree:
while (i--)
kfree(mm->free_trees[i]);
kfree(mm->free_trees);
return -ENOMEM;
}
EXPORT_SYMBOL(gpu_buddy_init);
/**
* gpu_buddy_fini - tear down the memory manager
*
* @mm: GPU buddy manager to free
*
* Cleanup memory manager resources and the freetree
*/
void gpu_buddy_fini(struct gpu_buddy *mm)
{
u64 root_size, size, start;
unsigned int order;
int i;
size = mm->size;
for (i = 0; i < mm->n_roots; ++i) {
order = ilog2(size) - ilog2(mm->chunk_size);
start = gpu_buddy_block_offset(mm->roots[i]);
__force_merge(mm, start, start + size, order);
gpu_buddy_assert(gpu_buddy_block_is_free(mm->roots[i]));
gpu_block_free(mm, mm->roots[i]);
root_size = mm->chunk_size << order;
size -= root_size;
}
gpu_buddy_assert(mm->avail == mm->size);
for_each_free_tree(i)
kfree(mm->free_trees[i]);
kfree(mm->free_trees);
kfree(mm->roots);
}
EXPORT_SYMBOL(gpu_buddy_fini);
static int split_block(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
unsigned int block_order = gpu_buddy_block_order(block) - 1;
u64 offset = gpu_buddy_block_offset(block);
BUG_ON(!gpu_buddy_block_is_free(block));
BUG_ON(!gpu_buddy_block_order(block));
block->left = gpu_block_alloc(mm, block, block_order, offset);
if (!block->left)
return -ENOMEM;
block->right = gpu_block_alloc(mm, block, block_order,
offset + (mm->chunk_size << block_order));
if (!block->right) {
gpu_block_free(mm, block->left);
return -ENOMEM;
}
mark_split(mm, block);
if (gpu_buddy_block_is_clear(block)) {
mark_cleared(block->left);
mark_cleared(block->right);
clear_reset(block);
}
mark_free(mm, block->left);
mark_free(mm, block->right);
return 0;
}
/**
* gpu_buddy_reset_clear - reset blocks clear state
*
* @mm: GPU buddy manager
* @is_clear: blocks clear state
*
* Reset the clear state based on @is_clear value for each block
* in the freetree.
*/
void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
{
enum gpu_buddy_free_tree src_tree, dst_tree;
u64 root_size, size, start;
unsigned int order;
int i;
size = mm->size;
for (i = 0; i < mm->n_roots; ++i) {
order = ilog2(size) - ilog2(mm->chunk_size);
start = gpu_buddy_block_offset(mm->roots[i]);
__force_merge(mm, start, start + size, order);
root_size = mm->chunk_size << order;
size -= root_size;
}
src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
for (i = 0; i <= mm->max_order; ++i) {
struct rb_root *root = &mm->free_trees[src_tree][i];
struct gpu_buddy_block *block, *tmp;
rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
rbtree_remove(mm, block);
if (is_clear) {
mark_cleared(block);
mm->clear_avail += gpu_buddy_block_size(mm, block);
} else {
clear_reset(block);
mm->clear_avail -= gpu_buddy_block_size(mm, block);
}
rbtree_insert(mm, block, dst_tree);
}
}
}
EXPORT_SYMBOL(gpu_buddy_reset_clear);
/**
* gpu_buddy_free_block - free a block
*
* @mm: GPU buddy manager
* @block: block to be freed
*/
void gpu_buddy_free_block(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
BUG_ON(!gpu_buddy_block_is_allocated(block));
mm->avail += gpu_buddy_block_size(mm, block);
if (gpu_buddy_block_is_clear(block))
mm->clear_avail += gpu_buddy_block_size(mm, block);
__gpu_buddy_free(mm, block, false);
}
EXPORT_SYMBOL(gpu_buddy_free_block);
static void __gpu_buddy_free_list(struct gpu_buddy *mm,
struct list_head *objects,
bool mark_clear,
bool mark_dirty)
{
struct gpu_buddy_block *block, *on;
gpu_buddy_assert(!(mark_dirty && mark_clear));
list_for_each_entry_safe(block, on, objects, link) {
if (mark_clear)
mark_cleared(block);
else if (mark_dirty)
clear_reset(block);
gpu_buddy_free_block(mm, block);
cond_resched();
}
INIT_LIST_HEAD(objects);
}
static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
struct list_head *objects)
{
/*
* Don't touch the clear/dirty bit, since allocation is still internal
* at this point. For example we might have just failed part of the
* allocation.
*/
__gpu_buddy_free_list(mm, objects, false, false);
}
/**
* gpu_buddy_free_list - free blocks
*
* @mm: GPU buddy manager
* @objects: input list head to free blocks
* @flags: optional flags like GPU_BUDDY_CLEARED
*/
void gpu_buddy_free_list(struct gpu_buddy *mm,
struct list_head *objects,
unsigned int flags)
{
bool mark_clear = flags & GPU_BUDDY_CLEARED;
__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
}
EXPORT_SYMBOL(gpu_buddy_free_list);
static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
{
bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
return needs_clear != gpu_buddy_block_is_clear(block);
}
static struct gpu_buddy_block *
__alloc_range_bias(struct gpu_buddy *mm,
u64 start, u64 end,
unsigned int order,
unsigned long flags,
bool fallback)
{
u64 req_size = mm->chunk_size << order;
struct gpu_buddy_block *block;
struct gpu_buddy_block *buddy;
LIST_HEAD(dfs);
int err;
int i;
end = end - 1;
for (i = 0; i < mm->n_roots; ++i)
list_add_tail(&mm->roots[i]->tmp_link, &dfs);
do {
u64 block_start;
u64 block_end;
block = list_first_entry_or_null(&dfs,
struct gpu_buddy_block,
tmp_link);
if (!block)
break;
list_del(&block->tmp_link);
if (gpu_buddy_block_order(block) < order)
continue;
block_start = gpu_buddy_block_offset(block);
block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
if (!overlaps(start, end, block_start, block_end))
continue;
if (gpu_buddy_block_is_allocated(block))
continue;
if (block_start < start || block_end > end) {
u64 adjusted_start = max(block_start, start);
u64 adjusted_end = min(block_end, end);
if (round_down(adjusted_end + 1, req_size) <=
round_up(adjusted_start, req_size))
continue;
}
if (!fallback && block_incompatible(block, flags))
continue;
if (contains(start, end, block_start, block_end) &&
order == gpu_buddy_block_order(block)) {
/*
* Find the free block within the range.
*/
if (gpu_buddy_block_is_free(block))
return block;
continue;
}
if (!gpu_buddy_block_is_split(block)) {
err = split_block(mm, block);
if (unlikely(err))
goto err_undo;
}
list_add(&block->right->tmp_link, &dfs);
list_add(&block->left->tmp_link, &dfs);
} while (1);
return ERR_PTR(-ENOSPC);
err_undo:
/*
* We really don't want to leave around a bunch of split blocks, since
* bigger is better, so make sure we merge everything back before we
* free the allocated blocks.
*/
buddy = __get_buddy(block);
if (buddy &&
(gpu_buddy_block_is_free(block) &&
gpu_buddy_block_is_free(buddy)))
__gpu_buddy_free(mm, block, false);
return ERR_PTR(err);
}
static struct gpu_buddy_block *
__gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
u64 start, u64 end,
unsigned int order,
unsigned long flags)
{
struct gpu_buddy_block *block;
bool fallback = false;
block = __alloc_range_bias(mm, start, end, order,
flags, fallback);
if (IS_ERR(block))
return __alloc_range_bias(mm, start, end, order,
flags, !fallback);
return block;
}
static struct gpu_buddy_block *
get_maxblock(struct gpu_buddy *mm,
unsigned int order,
enum gpu_buddy_free_tree tree)
{
struct gpu_buddy_block *max_block = NULL, *block = NULL;
struct rb_root *root;
unsigned int i;
for (i = order; i <= mm->max_order; ++i) {
root = &mm->free_trees[tree][i];
block = rbtree_last_free_block(root);
if (!block)
continue;
if (!max_block) {
max_block = block;
continue;
}
if (gpu_buddy_block_offset(block) >
gpu_buddy_block_offset(max_block)) {
max_block = block;
}
}
return max_block;
}
static struct gpu_buddy_block *
alloc_from_freetree(struct gpu_buddy *mm,
unsigned int order,
unsigned long flags)
{
struct gpu_buddy_block *block = NULL;
struct rb_root *root;
enum gpu_buddy_free_tree tree;
unsigned int tmp;
int err;
tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
block = get_maxblock(mm, order, tree);
if (block)
/* Store the obtained block order */
tmp = gpu_buddy_block_order(block);
} else {
for (tmp = order; tmp <= mm->max_order; ++tmp) {
/* Get RB tree root for this order and tree */
root = &mm->free_trees[tree][tmp];
block = rbtree_last_free_block(root);
if (block)
break;
}
}
if (!block) {
/* Try allocating from the other tree */
tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
for (tmp = order; tmp <= mm->max_order; ++tmp) {
root = &mm->free_trees[tree][tmp];
block = rbtree_last_free_block(root);
if (block)
break;
}
if (!block)
return ERR_PTR(-ENOSPC);
}
BUG_ON(!gpu_buddy_block_is_free(block));
while (tmp != order) {
err = split_block(mm, block);
if (unlikely(err))
goto err_undo;
block = block->right;
tmp--;
}
return block;
err_undo:
if (tmp != order)
__gpu_buddy_free(mm, block, false);
return ERR_PTR(err);
}
static bool
gpu_buddy_can_offset_align(u64 size, u64 min_block_size)
{
return size < min_block_size && is_power_of_2(size);
}
static bool gpu_buddy_subtree_can_satisfy(struct rb_node *node,
unsigned int alignment)
{
struct gpu_buddy_block *block;
block = rbtree_get_free_block(node);
return block->subtree_max_alignment >= alignment;
}
static struct gpu_buddy_block *
gpu_buddy_find_block_aligned(struct gpu_buddy *mm,
enum gpu_buddy_free_tree tree,
unsigned int order,
unsigned int alignment,
unsigned long flags)
{
struct rb_root *root = &mm->free_trees[tree][order];
struct rb_node *rb = root->rb_node;
while (rb) {
struct gpu_buddy_block *block = rbtree_get_free_block(rb);
struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
if (right_node) {
if (gpu_buddy_subtree_can_satisfy(right_node, alignment)) {
rb = right_node;
continue;
}
}
if (gpu_buddy_block_offset_alignment(block) >= alignment)
return block;
if (left_node) {
if (gpu_buddy_subtree_can_satisfy(left_node, alignment)) {
rb = left_node;
continue;
}
}
break;
}
return NULL;
}
static struct gpu_buddy_block *
gpu_buddy_offset_aligned_allocation(struct gpu_buddy *mm,
u64 size,
u64 min_block_size,
unsigned long flags)
{
struct gpu_buddy_block *block = NULL;
unsigned int order, tmp, alignment;
struct gpu_buddy_block *buddy;
enum gpu_buddy_free_tree tree;
unsigned long pages;
int err;
alignment = ilog2(min_block_size);
pages = size >> ilog2(mm->chunk_size);
order = fls(pages) - 1;
tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
for (tmp = order; tmp <= mm->max_order; ++tmp) {
block = gpu_buddy_find_block_aligned(mm, tree, tmp,
alignment, flags);
if (!block) {
tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
block = gpu_buddy_find_block_aligned(mm, tree, tmp,
alignment, flags);
}
if (block)
break;
}
if (!block)
return ERR_PTR(-ENOSPC);
while (gpu_buddy_block_order(block) > order) {
struct gpu_buddy_block *left, *right;
err = split_block(mm, block);
if (unlikely(err))
goto err_undo;
left = block->left;
right = block->right;
if (gpu_buddy_block_offset_alignment(right) >= alignment)
block = right;
else
block = left;
}
return block;
err_undo:
/*
* We really don't want to leave around a bunch of split blocks, since
* bigger is better, so make sure we merge everything back before we
* free the allocated blocks.
*/
buddy = __get_buddy(block);
if (buddy &&
(gpu_buddy_block_is_free(block) &&
gpu_buddy_block_is_free(buddy)))
__gpu_buddy_free(mm, block, false);
return ERR_PTR(err);
}
static int __alloc_range(struct gpu_buddy *mm,
struct list_head *dfs,
u64 start, u64 size,
struct list_head *blocks,
u64 *total_allocated_on_err)
{
struct gpu_buddy_block *block;
struct gpu_buddy_block *buddy;
u64 total_allocated = 0;
LIST_HEAD(allocated);
u64 end;
int err;
end = start + size - 1;
do {
u64 block_start;
u64 block_end;
block = list_first_entry_or_null(dfs,
struct gpu_buddy_block,
tmp_link);
if (!block)
break;
list_del(&block->tmp_link);
block_start = gpu_buddy_block_offset(block);
block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
if (!overlaps(start, end, block_start, block_end))
continue;
if (gpu_buddy_block_is_allocated(block)) {
err = -ENOSPC;
goto err_free;
}
if (contains(start, end, block_start, block_end)) {
if (gpu_buddy_block_is_free(block)) {
mark_allocated(mm, block);
total_allocated += gpu_buddy_block_size(mm, block);
mm->avail -= gpu_buddy_block_size(mm, block);
if (gpu_buddy_block_is_clear(block))
mm->clear_avail -= gpu_buddy_block_size(mm, block);
list_add_tail(&block->link, &allocated);
continue;
} else if (!mm->clear_avail) {
err = -ENOSPC;
goto err_free;
}
}
if (!gpu_buddy_block_is_split(block)) {
err = split_block(mm, block);
if (unlikely(err))
goto err_undo;
}
list_add(&block->right->tmp_link, dfs);
list_add(&block->left->tmp_link, dfs);
} while (1);
if (total_allocated < size) {
err = -ENOSPC;
goto err_free;
}
list_splice_tail(&allocated, blocks);
return 0;
err_undo:
/*
* We really don't want to leave around a bunch of split blocks, since
* bigger is better, so make sure we merge everything back before we
* free the allocated blocks.
*/
buddy = __get_buddy(block);
if (buddy &&
(gpu_buddy_block_is_free(block) &&
gpu_buddy_block_is_free(buddy)))
__gpu_buddy_free(mm, block, false);
err_free:
if (err == -ENOSPC && total_allocated_on_err) {
list_splice_tail(&allocated, blocks);
*total_allocated_on_err = total_allocated;
} else {
gpu_buddy_free_list_internal(mm, &allocated);
}
return err;
}
static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
u64 start,
u64 size,
u64 *total_allocated_on_err,
struct list_head *blocks)
{
LIST_HEAD(dfs);
int i;
for (i = 0; i < mm->n_roots; ++i)
list_add_tail(&mm->roots[i]->tmp_link, &dfs);
return __alloc_range(mm, &dfs, start, size,
blocks, total_allocated_on_err);
}
static int __alloc_contig_try_harder(struct gpu_buddy *mm,
u64 size,
u64 min_block_size,
struct list_head *blocks)
{
u64 rhs_offset, lhs_offset, lhs_size, filled;
struct gpu_buddy_block *block;
unsigned int tree, order;
LIST_HEAD(blocks_lhs);
unsigned long pages;
u64 modify_size;
int err;
modify_size = rounddown_pow_of_two(size);
pages = modify_size >> ilog2(mm->chunk_size);
order = fls(pages) - 1;
if (order == 0)
return -ENOSPC;
for_each_free_tree(tree) {
struct rb_root *root;
struct rb_node *iter;
root = &mm->free_trees[tree][order];
if (rbtree_is_empty(root))
continue;
iter = rb_last(root);
while (iter) {
block = rbtree_get_free_block(iter);
/* Allocate blocks traversing RHS */
rhs_offset = gpu_buddy_block_offset(block);
err = __gpu_buddy_alloc_range(mm, rhs_offset, size,
&filled, blocks);
if (!err || err != -ENOSPC)
return err;
lhs_size = max((size - filled), min_block_size);
if (!IS_ALIGNED(lhs_size, min_block_size))
lhs_size = round_up(lhs_size, min_block_size);
/* Allocate blocks traversing LHS */
lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
err = __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
NULL, &blocks_lhs);
if (!err) {
list_splice(&blocks_lhs, blocks);
return 0;
} else if (err != -ENOSPC) {
gpu_buddy_free_list_internal(mm, blocks);
return err;
}
/* Free blocks for the next iteration */
gpu_buddy_free_list_internal(mm, blocks);
iter = rb_prev(iter);
}
}
return -ENOSPC;
}
/**
* gpu_buddy_block_trim - free unused pages
*
* @mm: GPU buddy manager
* @start: start address to begin the trimming.
* @new_size: original size requested
* @blocks: Input and output list of allocated blocks.
* MUST contain single block as input to be trimmed.
* On success will contain the newly allocated blocks
* making up the @new_size. Blocks always appear in
* ascending order
*
* For contiguous allocation, we round up the size to the nearest
* power of two value, drivers consume *actual* size, so remaining
* portions are unused and can be optionally freed with this function
*
* Returns:
* 0 on success, error code on failure.
*/
int gpu_buddy_block_trim(struct gpu_buddy *mm,
u64 *start,
u64 new_size,
struct list_head *blocks)
{
struct gpu_buddy_block *parent;
struct gpu_buddy_block *block;
u64 block_start, block_end;
LIST_HEAD(dfs);
u64 new_start;
int err;
if (!list_is_singular(blocks))
return -EINVAL;
block = list_first_entry(blocks,
struct gpu_buddy_block,
link);
block_start = gpu_buddy_block_offset(block);
block_end = block_start + gpu_buddy_block_size(mm, block);
if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
return -EINVAL;
if (new_size > gpu_buddy_block_size(mm, block))
return -EINVAL;
if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
return -EINVAL;
if (new_size == gpu_buddy_block_size(mm, block))
return 0;
new_start = block_start;
if (start) {
new_start = *start;
if (new_start < block_start)
return -EINVAL;
if (!IS_ALIGNED(new_start, mm->chunk_size))
return -EINVAL;
if (range_overflows(new_start, new_size, block_end))
return -EINVAL;
}
list_del(&block->link);
mark_free(mm, block);
mm->avail += gpu_buddy_block_size(mm, block);
if (gpu_buddy_block_is_clear(block))
mm->clear_avail += gpu_buddy_block_size(mm, block);
/* Prevent recursively freeing this node */
parent = block->parent;
block->parent = NULL;
list_add(&block->tmp_link, &dfs);
err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
if (err) {
mark_allocated(mm, block);
mm->avail -= gpu_buddy_block_size(mm, block);
if (gpu_buddy_block_is_clear(block))
mm->clear_avail -= gpu_buddy_block_size(mm, block);
list_add(&block->link, blocks);
}
block->parent = parent;
return err;
}
EXPORT_SYMBOL(gpu_buddy_block_trim);
static struct gpu_buddy_block *
__gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
u64 start, u64 end,
u64 size, u64 min_block_size,
unsigned int order,
unsigned long flags)
{
if (flags & GPU_BUDDY_RANGE_ALLOCATION)
/* Allocate traversing within the range */
return __gpu_buddy_alloc_range_bias(mm, start, end,
order, flags);
else if (size < min_block_size)
/* Allocate from an offset-aligned region without size rounding */
return gpu_buddy_offset_aligned_allocation(mm, size,
min_block_size,
flags);
else
/* Allocate from freetree */
return alloc_from_freetree(mm, order, flags);
}
/**
* gpu_buddy_alloc_blocks - allocate power-of-two blocks
*
* @mm: GPU buddy manager to allocate from
* @start: start of the allowed range for this block
* @end: end of the allowed range for this block
* @size: size of the allocation in bytes
* @min_block_size: alignment of the allocation
* @blocks: output list head to add allocated blocks
* @flags: GPU_BUDDY_*_ALLOCATION flags
*
* alloc_range_bias() called on range limitations, which traverses
* the tree and returns the desired block.
*
* alloc_from_freetree() called when *no* range restrictions
* are enforced, which picks the block from the freetree.
*
* Returns:
* 0 on success, error code on failure.
*/
int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
u64 start, u64 end, u64 size,
u64 min_block_size,
struct list_head *blocks,
unsigned long flags)
{
struct gpu_buddy_block *block = NULL;
u64 original_size, original_min_size;
unsigned int min_order, order;
LIST_HEAD(allocated);
unsigned long pages;
int err;
if (size < mm->chunk_size)
return -EINVAL;
if (min_block_size < mm->chunk_size)
return -EINVAL;
if (!is_power_of_2(min_block_size))
return -EINVAL;
if (!IS_ALIGNED(start | end | size, mm->chunk_size))
return -EINVAL;
if (end > mm->size)
return -EINVAL;
if (range_overflows(start, size, mm->size))
return -EINVAL;
/* Actual range allocation */
if (start + size == end) {
if (!IS_ALIGNED(start | end, min_block_size))
return -EINVAL;
return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
}
original_size = size;
original_min_size = min_block_size;
/* Roundup the size to power of 2 */
if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
size = roundup_pow_of_two(size);
min_block_size = size;
/*
* Normalize the requested size to min_block_size for regular allocations.
* Offset-aligned allocations intentionally skip size rounding.
*/
} else if (!gpu_buddy_can_offset_align(size, min_block_size)) {
size = round_up(size, min_block_size);
}
pages = size >> ilog2(mm->chunk_size);
order = fls(pages) - 1;
min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
if (order > mm->max_order || size > mm->size) {
if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
!(flags & GPU_BUDDY_RANGE_ALLOCATION))
return __alloc_contig_try_harder(mm, original_size,
original_min_size, blocks);
return -EINVAL;
}
do {
order = min(order, (unsigned int)fls(pages) - 1);
BUG_ON(order > mm->max_order);
/*
* Regular allocations must not allocate blocks smaller than min_block_size.
* Offset-aligned allocations deliberately bypass this constraint.
*/
BUG_ON(size >= min_block_size && order < min_order);
do {
unsigned int fallback_order;
block = __gpu_buddy_alloc_blocks(mm, start,
end,
size,
min_block_size,
order,
flags);
if (!IS_ERR(block))
break;
if (size < min_block_size) {
fallback_order = order;
} else if (order == min_order) {
fallback_order = min_order;
} else {
order--;
continue;
}
/* Try allocation through force merge method */
if (mm->clear_avail &&
!__force_merge(mm, start, end, fallback_order)) {
block = __gpu_buddy_alloc_blocks(mm, start,
end,
size,
min_block_size,
fallback_order,
flags);
if (!IS_ERR(block)) {
order = fallback_order;
break;
}
}
/*
* Try contiguous block allocation through
* try harder method.
*/
if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
!(flags & GPU_BUDDY_RANGE_ALLOCATION))
return __alloc_contig_try_harder(mm,
original_size,
original_min_size,
blocks);
err = -ENOSPC;
goto err_free;
} while (1);
mark_allocated(mm, block);
mm->avail -= gpu_buddy_block_size(mm, block);
if (gpu_buddy_block_is_clear(block))
mm->clear_avail -= gpu_buddy_block_size(mm, block);
kmemleak_update_trace(block);
list_add_tail(&block->link, &allocated);
pages -= BIT(order);
if (!pages)
break;
} while (1);
/* Trim the allocated block to the required size */
if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
original_size != size) {
struct list_head *trim_list;
LIST_HEAD(temp);
u64 trim_size;
trim_list = &allocated;
trim_size = original_size;
if (!list_is_singular(&allocated)) {
block = list_last_entry(&allocated, typeof(*block), link);
list_move(&block->link, &temp);
trim_list = &temp;
trim_size = gpu_buddy_block_size(mm, block) -
(size - original_size);
}
gpu_buddy_block_trim(mm,
NULL,
trim_size,
trim_list);
if (!list_empty(&temp))
list_splice_tail(trim_list, &allocated);
}
list_splice_tail(&allocated, blocks);
return 0;
err_free:
gpu_buddy_free_list_internal(mm, &allocated);
return err;
}
EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
/**
* gpu_buddy_block_print - print block information
*
* @mm: GPU buddy manager
* @block: GPU buddy block
*/
void gpu_buddy_block_print(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
u64 start = gpu_buddy_block_offset(block);
u64 size = gpu_buddy_block_size(mm, block);
pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
}
EXPORT_SYMBOL(gpu_buddy_block_print);
/**
* gpu_buddy_print - print allocator state
*
* @mm: GPU buddy manager
* @p: GPU printer to use
*/
void gpu_buddy_print(struct gpu_buddy *mm)
{
int order;
pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
for (order = mm->max_order; order >= 0; order--) {
struct gpu_buddy_block *block, *tmp;
struct rb_root *root;
u64 count = 0, free;
unsigned int tree;
for_each_free_tree(tree) {
root = &mm->free_trees[tree][order];
rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
BUG_ON(!gpu_buddy_block_is_free(block));
count++;
}
}
free = count * (mm->chunk_size << order);
if (free < SZ_1M)
pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
order, free >> 10, count);
else
pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
order, free >> 20, count);
}
}
EXPORT_SYMBOL(gpu_buddy_print);
static void gpu_buddy_module_exit(void)
{
kmem_cache_destroy(slab_blocks);
}
static int __init gpu_buddy_module_init(void)
{
slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
if (!slab_blocks)
return -ENOMEM;
return 0;
}
module_init(gpu_buddy_module_init);
module_exit(gpu_buddy_module_exit);
MODULE_DESCRIPTION("GPU Buddy Allocator");
MODULE_LICENSE("Dual MIT/GPL");