| /* |
| * Generic btree operations |
| * |
| * Original copyright (c) 2008 Daniel Phillips <phillips@phunq.net> |
| * Portions copyright (c) 2006-2008 Google Inc. |
| * Licensed under the GPL version 2 |
| * |
| * By contributing changes to this file you grant the original copyright holder |
| * the right to distribute those changes under any license. |
| */ |
| |
| #include "tux3.h" |
| |
| #ifndef trace |
| #define trace trace_off |
| #endif |
| |
| /* This value is special case to tell btree doesn't have root yet. */ |
| struct root no_root = { |
| .block = 0, |
| .depth = 0, |
| }; |
| |
| struct bnode { |
| __be16 magic; |
| __be16 unused; |
| __be32 count; |
| struct index_entry { |
| __be64 key; |
| __be64 block; |
| } entries[]; |
| }; |
| |
| /* |
| * Note that the first key of an index block is never accessed. This is |
| * because for a btree, there is always one more key than nodes in each |
| * index node. In other words, keys lie between node pointers. I |
| * micro-optimize by placing the node count in the first key, which allows |
| * a node to contain an esthetically pleasing binary number of pointers. |
| * (Not done yet.) |
| */ |
| |
| unsigned calc_entries_per_node(unsigned blocksize) |
| { |
| return (blocksize - sizeof(struct bnode)) / sizeof(struct index_entry); |
| } |
| |
| static inline unsigned bcount(struct bnode *node) |
| { |
| return be32_to_cpu(node->count); |
| } |
| |
| static struct buffer_head *new_block(struct btree *btree) |
| { |
| block_t block; |
| |
| block = balloc_one(btree->sb); |
| if (block < 0) |
| return ERR_PTR(block); |
| struct buffer_head *buffer = vol_getblk(btree->sb, block); |
| if (!buffer) |
| return ERR_PTR(-ENOMEM); // ERR_PTR me!!! and bfree? |
| return buffer; |
| } |
| |
| struct buffer_head *new_leaf(struct btree *btree) |
| { |
| struct buffer_head *buffer = new_block(btree); |
| |
| if (!IS_ERR(buffer)) { |
| memset(bufdata(buffer), 0, bufsize(buffer)); |
| (btree->ops->leaf_init)(btree, bufdata(buffer)); |
| mark_buffer_dirty_atomic(buffer); |
| } |
| return buffer; |
| } |
| |
| static inline void bnode_buffer_init(struct buffer_head *buffer) |
| { |
| struct bnode *bnode = bufdata(buffer); |
| memset(bnode, 0, bufsize(buffer)); |
| bnode->magic = cpu_to_be16(TUX3_MAGIC_BNODE); |
| } |
| |
| static inline int bnode_sniff(struct bnode *bnode) |
| { |
| return bnode->magic == cpu_to_be16(TUX3_MAGIC_BNODE); |
| } |
| |
| static struct buffer_head *new_node(struct btree *btree) |
| { |
| struct buffer_head *buffer = new_block(btree); |
| |
| if (!IS_ERR(buffer)) { |
| bnode_buffer_init(buffer); |
| mark_buffer_unify_atomic(buffer); |
| } |
| return buffer; |
| } |
| |
| /* |
| * A btree cursor has n + 1 entries for a btree of depth n, with the first n |
| * entries pointing at internal nodes and entry n + 1 pointing at a leaf. |
| * The next field points at the next index entry that will be loaded in a left |
| * to right tree traversal, not the current entry. The next pointer is null |
| * for the leaf, which has its own specialized traversal algorithms. |
| */ |
| |
| static inline struct bnode *level_node(struct cursor *cursor, int level) |
| { |
| return bufdata(cursor->path[level].buffer); |
| } |
| |
| struct buffer_head *cursor_leafbuf(struct cursor *cursor) |
| { |
| assert(cursor->level == cursor->btree->root.depth); |
| return cursor->path[cursor->level].buffer; |
| } |
| |
| static void cursor_root_add(struct cursor *cursor, struct buffer_head *buffer, |
| struct index_entry *next) |
| { |
| #ifdef CURSOR_DEBUG |
| assert(cursor->level < cursor->maxlevel); |
| assert(cursor->path[cursor->level + 1].buffer == FREE_BUFFER); |
| assert(cursor->path[cursor->level + 1].next == FREE_NEXT); |
| #endif |
| vecmove(cursor->path + 1, cursor->path, cursor->level + 1); |
| cursor->level++; |
| cursor->path[0].buffer = buffer; |
| cursor->path[0].next = next; |
| } |
| |
| static void level_replace_blockput(struct cursor *cursor, int level, |
| struct buffer_head *buffer, |
| struct index_entry *next) |
| { |
| #ifdef CURSOR_DEBUG |
| assert(buffer); |
| assert(level <= cursor->level); |
| assert(cursor->path[level].buffer != FREE_BUFFER); |
| assert(cursor->path[level].next != FREE_NEXT); |
| #endif |
| blockput(cursor->path[level].buffer); |
| cursor->path[level].buffer = buffer; |
| cursor->path[level].next = next; |
| } |
| |
| static void cursor_push(struct cursor *cursor, struct buffer_head *buffer, |
| struct index_entry *next) |
| { |
| cursor->level++; |
| #ifdef CURSOR_DEBUG |
| assert(cursor->level <= cursor->maxlevel); |
| assert(cursor->path[cursor->level].buffer == FREE_BUFFER); |
| assert(cursor->path[cursor->level].next == FREE_NEXT); |
| #endif |
| cursor->path[cursor->level].buffer = buffer; |
| cursor->path[cursor->level].next = next; |
| } |
| |
| static struct buffer_head *cursor_pop(struct cursor *cursor) |
| { |
| struct buffer_head *buffer; |
| |
| #ifdef CURSOR_DEBUG |
| assert(cursor->level >= 0); |
| #endif |
| buffer = cursor->path[cursor->level].buffer; |
| #ifdef CURSOR_DEBUG |
| cursor->path[cursor->level].buffer = FREE_BUFFER; |
| cursor->path[cursor->level].next = FREE_NEXT; |
| #endif |
| cursor->level--; |
| return buffer; |
| } |
| |
| static inline void cursor_pop_blockput(struct cursor *cursor) |
| { |
| blockput(cursor_pop(cursor)); |
| } |
| |
| /* There is no next entry? */ |
| static inline int level_finished(struct cursor *cursor, int level) |
| { |
| struct bnode *node = level_node(cursor, level); |
| return cursor->path[level].next == node->entries + bcount(node); |
| } |
| // also write level_beginning!!! |
| |
| void release_cursor(struct cursor *cursor) |
| { |
| while (cursor->level >= 0) |
| cursor_pop_blockput(cursor); |
| } |
| |
| /* unused */ |
| void show_cursor(struct cursor *cursor, int depth) |
| { |
| __tux3_dbg(">>> cursor %p/%i:", cursor, depth); |
| for (int i = 0; i < depth; i++) { |
| __tux3_dbg(" [%Lx/%i]", |
| bufindex(cursor->path[i].buffer), |
| bufcount(cursor->path[i].buffer)); |
| } |
| __tux3_dbg("\n"); |
| } |
| |
| static void cursor_check(struct cursor *cursor) |
| { |
| if (cursor->level == -1) |
| return; |
| tuxkey_t key = 0; |
| block_t block = cursor->btree->root.block; |
| |
| for (int i = 0; i <= cursor->level; i++) { |
| assert(bufindex(cursor->path[i].buffer) == block); |
| if (i == cursor->level) |
| break; |
| |
| struct bnode *bnode = level_node(cursor, i); |
| struct index_entry *entry = cursor->path[i].next - 1; |
| assert(bnode->entries <= entry); |
| assert(entry < bnode->entries + bcount(bnode)); |
| /* |
| * If this entry is most left, it should be same key |
| * with parent. Otherwise, most left key may not be |
| * correct as next key. |
| */ |
| if (bnode->entries == entry) |
| assert(be64_to_cpu(entry->key) == key); |
| else |
| assert(be64_to_cpu(entry->key) > key); |
| |
| block = be64_to_cpu(entry->block); |
| key = be64_to_cpu(entry->key); |
| } |
| } |
| |
| static inline int alloc_cursor_size(int count) |
| { |
| return sizeof(struct cursor) + sizeof(struct path_level) * count; |
| } |
| |
| struct cursor *alloc_cursor(struct btree *btree, int extra) |
| { |
| int maxlevel = btree->root.depth + extra; |
| struct cursor *cursor = malloc(alloc_cursor_size(maxlevel + 1)); |
| |
| if (cursor) { |
| cursor->btree = btree; |
| cursor->level = -1; |
| #ifdef CURSOR_DEBUG |
| cursor->maxlevel = maxlevel; |
| for (int i = 0; i <= maxlevel; i++) { |
| cursor->path[i].buffer = FREE_BUFFER; /* for debug */ |
| cursor->path[i].next = FREE_NEXT; /* for debug */ |
| } |
| #endif |
| } |
| return cursor; |
| } |
| |
| void free_cursor(struct cursor *cursor) |
| { |
| #ifdef CURSOR_DEBUG |
| assert(cursor->level == -1); |
| #endif |
| free(cursor); |
| } |
| |
| /* Lookup the index entry contains key */ |
| static struct index_entry *bnode_lookup(struct bnode *node, tuxkey_t key) |
| { |
| struct index_entry *next = node->entries, *top = next + bcount(node); |
| assert(bcount(node) > 0); |
| /* binary search goes here */ |
| while (++next < top) { |
| if (be64_to_cpu(next->key) > key) |
| break; |
| } |
| return next - 1; |
| } |
| |
| static int cursor_level_finished(struct cursor *cursor) |
| { |
| /* must not be leaf */ |
| assert(cursor->level < cursor->btree->root.depth); |
| return level_finished(cursor, cursor->level); |
| } |
| |
| /* |
| * Climb up the cursor until we find the first level where we have not yet read |
| * all the way to the end of the index block, there we find the key that |
| * separates the subtree we are in (a leaf) from the next subtree to the right. |
| */ |
| tuxkey_t cursor_next_key(struct cursor *cursor) |
| { |
| int level = cursor->level; |
| assert(cursor->level == cursor->btree->root.depth); |
| while (level--) { |
| if (!level_finished(cursor, level)) |
| return be64_to_cpu(cursor->path[level].next->key); |
| } |
| return TUXKEY_LIMIT; |
| } |
| |
| static tuxkey_t cursor_level_next_key(struct cursor *cursor) |
| { |
| int level = cursor->level; |
| assert(cursor->level < cursor->btree->root.depth); |
| while (level >= 0) { |
| if (!level_finished(cursor, level)) |
| return be64_to_cpu(cursor->path[level].next->key); |
| level--; |
| } |
| return TUXKEY_LIMIT; |
| } |
| |
| /* Return key of this leaf */ |
| tuxkey_t cursor_this_key(struct cursor *cursor) |
| { |
| assert(cursor->level == cursor->btree->root.depth); |
| return be64_to_cpu((cursor->path[cursor->level - 1].next - 1)->key); |
| } |
| |
| static tuxkey_t cursor_level_this_key(struct cursor *cursor) |
| { |
| assert(cursor->level < cursor->btree->root.depth); |
| return be64_to_cpu((cursor->path[cursor->level].next - 1)->key); |
| } |
| |
| /* |
| * Cursor read root node. |
| * < 0 - error |
| * 0 - success |
| */ |
| static int cursor_read_root(struct cursor *cursor) |
| { |
| struct btree *btree = cursor->btree; |
| struct buffer_head *buffer; |
| |
| assert(has_root(btree)); |
| |
| buffer = vol_bread(btree->sb, btree->root.block); |
| if (!buffer) |
| return -EIO; /* FIXME: stupid, it might have been NOMEM */ |
| assert(bnode_sniff(bufdata(buffer))); |
| cursor_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); |
| return 0; |
| } |
| |
| /* |
| * Cursor up to parent node. |
| * 0 - there is no further parent (root was popped) |
| * 1 - there is parent |
| */ |
| static int cursor_advance_up(struct cursor *cursor) |
| { |
| assert(cursor->level >= 0); |
| cursor_pop_blockput(cursor); |
| return cursor->level >= 0; |
| } |
| |
| /* |
| * Cursor down to child node or leaf, and update ->next. |
| * < 0 - error |
| * 0 - there is no further child (leaf was pushed) |
| * 1 - there is child |
| */ |
| static int cursor_advance_down(struct cursor *cursor) |
| { |
| struct btree *btree = cursor->btree; |
| struct buffer_head *buffer; |
| block_t child; |
| |
| assert(cursor->level < btree->root.depth); |
| |
| child = be64_to_cpu(cursor->path[cursor->level].next->block); |
| buffer = vol_bread(btree->sb, child); |
| if (!buffer) |
| return -EIO; /* FIXME: stupid, it might have been NOMEM */ |
| cursor->path[cursor->level].next++; |
| |
| if (cursor->level < btree->root.depth - 1) { |
| struct bnode *node = bufdata(buffer); |
| assert(bnode_sniff(node)); |
| cursor_push(cursor, buffer, node->entries); |
| cursor_check(cursor); |
| return 1; |
| } |
| |
| assert(btree->ops->leaf_sniff(btree, bufdata(buffer))); |
| cursor_push(cursor, buffer, NULL); |
| cursor_check(cursor); |
| return 0; |
| } |
| |
| /* |
| * Cursor advance for btree traverse. |
| * < 0 - error |
| * 0 - Finished traverse |
| * 1 - Reached leaf |
| */ |
| static int cursor_advance(struct cursor *cursor) |
| { |
| int ret; |
| |
| do { |
| if (!cursor_advance_up(cursor)) |
| return 0; |
| } while (cursor_level_finished(cursor)); |
| do { |
| ret = cursor_advance_down(cursor); |
| if (ret < 0) |
| return ret; |
| } while (ret); |
| |
| return 1; |
| } |
| |
| /* Lookup index and set it as next down path */ |
| static void cursor_bnode_lookup(struct cursor *cursor, tuxkey_t key) |
| { |
| struct path_level *at = &cursor->path[cursor->level]; |
| at->next = bnode_lookup(bufdata(at->buffer), key); |
| } |
| |
| int btree_probe(struct cursor *cursor, tuxkey_t key) |
| { |
| int ret; |
| |
| ret = cursor_read_root(cursor); |
| if (ret < 0) |
| return ret; |
| do { |
| cursor_bnode_lookup(cursor, key); |
| |
| ret = cursor_advance_down(cursor); |
| if (ret < 0) |
| goto error; |
| } while (ret); |
| |
| return 0; |
| |
| error: |
| release_cursor(cursor); |
| return ret; |
| } |
| |
| /* |
| * Traverse btree for specified range |
| * key: start to traverse (cursor should point leaf is including key) |
| * len: length to traverse |
| * |
| * return value: |
| * < 0 - error |
| * 0 - traversed all range |
| * 0 < - traverse was stopped by func, and return value of func |
| */ |
| int btree_traverse(struct cursor *cursor, tuxkey_t key, u64 len, |
| btree_traverse_func_t func, void *data) |
| { |
| struct btree *btree = cursor->btree; |
| int ret; |
| |
| do { |
| tuxkey_t bottom = cursor_this_key(cursor); |
| tuxkey_t limit = cursor_next_key(cursor); |
| void *leaf = bufdata(cursor_leafbuf(cursor)); |
| assert(btree->ops->leaf_sniff(btree, leaf)); |
| |
| if (key < bottom) { |
| len -= min_t(u64, len, bottom - key); |
| if (len == 0) |
| break; |
| key = bottom; |
| } |
| |
| ret = func(btree, bottom, limit, leaf, key, len, data); |
| /* Stop traverse if ret >= 1, or error */ |
| if (ret) |
| goto out; |
| |
| /* If next key is out of range, done */ |
| if (key + len <= limit) |
| break; |
| |
| ret = cursor_advance(cursor); |
| if (ret < 0) |
| goto out; |
| } while (ret); |
| |
| ret = 0; |
| out: |
| return ret; |
| } |
| |
| void show_tree_range(struct btree *btree, tuxkey_t start, unsigned count) |
| { |
| __tux3_dbg("%i level btree at %Li:\n", |
| btree->root.depth, btree->root.block); |
| if (!has_root(btree)) |
| return; |
| |
| struct cursor *cursor = alloc_cursor(btree, 0); |
| if (!cursor) { |
| tux3_err(btree->sb, "out of memory"); |
| return; |
| } |
| if (btree_probe(cursor, start)) { |
| tux3_fs_error(btree->sb, "tell me why!!!"); |
| goto out; |
| } |
| |
| struct buffer_head *buffer; |
| do { |
| buffer = cursor_leafbuf(cursor); |
| assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); |
| (btree->ops->leaf_dump)(btree, bufdata(buffer)); |
| } while (--count && cursor_advance(cursor)); |
| |
| out: |
| free_cursor(cursor); |
| } |
| |
| void show_tree(struct btree *btree) |
| { |
| show_tree_range(btree, 0, -1); |
| } |
| |
| static void level_redirect_blockput(struct cursor *cursor, int level, struct buffer_head *clone) |
| { |
| struct buffer_head *buffer = cursor->path[level].buffer; |
| struct index_entry *next = cursor->path[level].next; |
| |
| /* If this level has ->next, update ->next to the clone buffer */ |
| if (next) |
| next = ptr_redirect(next, bufdata(buffer), bufdata(clone)); |
| |
| memcpy(bufdata(clone), bufdata(buffer), bufsize(clone)); |
| level_replace_blockput(cursor, level, clone, next); |
| } |
| |
| static int leaf_need_redirect(struct sb *sb, struct buffer_head *buffer) |
| { |
| /* FIXME: leaf doesn't have delta number, we might want to |
| * remove exception for leaf */ |
| /* If this is not re-dirty, we need to redirect */ |
| return !buffer_dirty(buffer); |
| } |
| |
| static int bnode_need_redirect(struct sb *sb, struct buffer_head *buffer) |
| { |
| /* If this is not re-dirty for sb->unify, we need to redirect */ |
| return !buffer_already_dirty(buffer, sb->unify); |
| } |
| |
| /* |
| * Recursively redirect non-dirty buffers on path to modify leaf. |
| * |
| * Redirect order is from root to leaf. Otherwise, blocks of path will |
| * be allocated by reverse order. |
| * |
| * FIXME: We can allocate/copy blocks before change common ancestor |
| * (before changing common ancestor, changes are not visible for |
| * reader). With this, we may be able to reduce locking time. |
| */ |
| int cursor_redirect(struct cursor *cursor) |
| { |
| struct btree *btree = cursor->btree; |
| struct sb *sb = btree->sb; |
| int level; |
| |
| for (level = 0; level <= btree->root.depth; level++) { |
| struct buffer_head *buffer, *clone; |
| block_t parent, oldblock, newblock; |
| struct index_entry *entry; |
| int redirect, is_leaf = (level == btree->root.depth); |
| |
| buffer = cursor->path[level].buffer; |
| /* If buffer needs to redirect to dirty, redirect it */ |
| if (is_leaf) |
| redirect = leaf_need_redirect(sb, buffer); |
| else |
| redirect = bnode_need_redirect(sb, buffer); |
| |
| /* No need to redirect */ |
| if (!redirect) |
| continue; |
| |
| /* Redirect buffer before changing */ |
| clone = new_block(btree); |
| if (IS_ERR(clone)) |
| return PTR_ERR(clone); |
| oldblock = bufindex(buffer); |
| newblock = bufindex(clone); |
| trace("redirect %Lx to %Lx", oldblock, newblock); |
| level_redirect_blockput(cursor, level, clone); |
| if (is_leaf) { |
| /* This is leaf buffer */ |
| mark_buffer_dirty_atomic(clone); |
| log_leaf_redirect(sb, oldblock, newblock); |
| defer_bfree(sb, &sb->defree, oldblock, 1); |
| } else { |
| /* This is bnode buffer */ |
| mark_buffer_unify_atomic(clone); |
| log_bnode_redirect(sb, oldblock, newblock); |
| defer_bfree(sb, &sb->deunify, oldblock, 1); |
| } |
| |
| trace("update parent"); |
| if (!level) { |
| /* Update pointer in btree->root */ |
| trace("redirect root"); |
| assert(oldblock == btree->root.block); |
| btree->root.block = newblock; |
| tux3_mark_btree_dirty(btree); |
| continue; |
| } |
| /* Update entry on parent for the redirected block */ |
| parent = bufindex(cursor->path[level - 1].buffer); |
| entry = cursor->path[level - 1].next - 1; |
| entry->block = cpu_to_be64(newblock); |
| log_bnode_update(sb, parent, newblock, be64_to_cpu(entry->key)); |
| } |
| |
| cursor_check(cursor); |
| return 0; |
| } |
| |
| /* Deletion */ |
| |
| static void bnode_remove_index(struct bnode *node, struct index_entry *p, |
| int count) |
| { |
| unsigned total = bcount(node); |
| void *end = node->entries + total; |
| memmove(p, p + count, end - (void *)(p + count)); |
| node->count = cpu_to_be32(total - count); |
| } |
| |
| static int bnode_merge_nodes(struct sb *sb, struct bnode *into, |
| struct bnode *from) |
| { |
| unsigned into_count = bcount(into), from_count = bcount(from); |
| |
| if (from_count + into_count > sb->entries_per_node) |
| return 0; |
| |
| veccopy(&into->entries[into_count], from->entries, from_count); |
| into->count = cpu_to_be32(into_count + from_count); |
| |
| return 1; |
| } |
| |
| static void adjust_parent_sep(struct cursor *cursor, int level, __be64 newsep) |
| { |
| /* Update separating key until nearest common parent */ |
| while (level >= 0) { |
| struct path_level *parent_at = &cursor->path[level]; |
| struct index_entry *parent = parent_at->next - 1; |
| |
| assert(0 < be64_to_cpu(parent->key)); |
| assert(be64_to_cpu(parent->key) < be64_to_cpu(newsep)); |
| log_bnode_adjust(cursor->btree->sb, |
| bufindex(parent_at->buffer), |
| be64_to_cpu(parent->key), |
| be64_to_cpu(newsep)); |
| parent->key = newsep; |
| mark_buffer_unify_non(parent_at->buffer); |
| |
| if (parent != level_node(cursor, level)->entries) |
| break; |
| |
| level--; |
| } |
| } |
| |
| /* Tracking info for chopped bnode indexes */ |
| struct chopped_index_info { |
| tuxkey_t start; |
| int count; |
| }; |
| |
| static void remove_index(struct cursor *cursor, struct chopped_index_info *cii) |
| { |
| int level = cursor->level; |
| struct bnode *node = level_node(cursor, level); |
| struct chopped_index_info *ciil = &cii[level]; |
| |
| /* Collect chopped index in this node for logging later */ |
| if (!ciil->count) |
| ciil->start = be64_to_cpu((cursor->path[level].next - 1)->key); |
| ciil->count++; |
| |
| /* Remove an index */ |
| bnode_remove_index(node, cursor->path[level].next - 1, 1); |
| --(cursor->path[level].next); |
| mark_buffer_unify_non(cursor->path[level].buffer); |
| |
| /* |
| * Climb up to common parent and update separating key. |
| * |
| * What if index is now empty? (no deleted key) |
| * |
| * Then some key above is going to be deleted and used to set sep |
| * Climb the cursor while at first entry, bail out at root find the |
| * node with the old sep, set it to deleted key |
| */ |
| |
| /* There is no separator for last entry or root node */ |
| if (!level || cursor_level_finished(cursor)) |
| return; |
| /* If removed index was not first entry, no change to separator */ |
| if (cursor->path[level].next != node->entries) |
| return; |
| |
| adjust_parent_sep(cursor, level - 1, cursor->path[level].next->key); |
| } |
| |
| static int try_leaf_merge(struct btree *btree, struct buffer_head *intobuf, |
| struct buffer_head *frombuf) |
| { |
| struct vleaf *from = bufdata(frombuf); |
| struct vleaf *into = bufdata(intobuf); |
| |
| /* Try to merge leaves */ |
| if (btree->ops->leaf_merge(btree, into, from)) { |
| struct sb *sb = btree->sb; |
| /* |
| * We know frombuf is redirected and dirty. So, in |
| * here, we can just cancel leaf_redirect by bfree(), |
| * instead of defered_bfree() |
| * FIXME: we can optimize freeing leaf without |
| * leaf_redirect, and if we did, this is not true. |
| */ |
| bfree(sb, bufindex(frombuf), 1); |
| log_leaf_free(sb, bufindex(frombuf)); |
| return 1; |
| } |
| return 0; |
| } |
| |
| static int try_bnode_merge(struct sb *sb, struct buffer_head *intobuf, |
| struct buffer_head *frombuf) |
| { |
| struct bnode *into = bufdata(intobuf); |
| struct bnode *from = bufdata(frombuf); |
| |
| /* Try to merge nodes */ |
| if (bnode_merge_nodes(sb, into, from)) { |
| /* |
| * We know frombuf is redirected and dirty. So, in |
| * here, we can just cancel bnode_redirect by bfree(), |
| * instead of defered_bfree() |
| * FIXME: we can optimize freeing bnode without |
| * bnode_redirect, and if we did, this is not true. |
| */ |
| bfree(sb, bufindex(frombuf), 1); |
| log_bnode_merge(sb, bufindex(frombuf), bufindex(intobuf)); |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * This is range deletion. So, instead of adjusting balance of the |
| * space on sibling nodes for each change, this just removes the range |
| * and merges from right to left even if it is not same parent. |
| * |
| * +--------------- (A, B, C)--------------------+ |
| * | | | |
| * +-- (AA, AB, AC) -+ +- (BA, BB, BC) -+ + (CA, CB, CC) + |
| * | | | | | | | | | |
| * (AAA,AAB)(ABA,ABB)(ACA,ACB) (BAA,BAB)(BBA)(BCA,BCB) (CAA)(CBA,CBB)(CCA) |
| * |
| * [less : A, AA, AAA, AAB, AB, ABA, ABB, AC, ACA, ACB, B, BA ... : greater] |
| * |
| * If we merged from cousin (or re-distributed), we may have to update |
| * the index until common parent. (e.g. removed (ACB), then merged |
| * from (BAA,BAB) to (ACA), we have to adjust B in root node to BB) |
| * |
| * See, adjust_parent_sep(). |
| * |
| * FIXME: no re-distribute. so, we don't guarantee above than 50% |
| * space efficiency. And if range is end of key (truncate() case), we |
| * don't need to merge, and adjust_parent_sep(). |
| * |
| * FIXME2: we may want to split chop work for each step. instead of |
| * blocking for a long time. |
| */ |
| int btree_chop(struct btree *btree, tuxkey_t start, u64 len) |
| { |
| struct sb *sb = btree->sb; |
| struct btree_ops *ops = btree->ops; |
| struct buffer_head **prev, *leafprev = NULL; |
| struct chopped_index_info *cii; |
| struct cursor *cursor; |
| tuxkey_t limit; |
| int ret, done = 0; |
| |
| if (!has_root(btree)) |
| return 0; |
| |
| /* Chop all range if len >= TUXKEY_LIMIT */ |
| limit = (len >= TUXKEY_LIMIT) ? TUXKEY_LIMIT : start + len; |
| |
| prev = malloc(sizeof(*prev) * btree->root.depth); |
| if (prev == NULL) |
| return -ENOMEM; |
| memset(prev, 0, sizeof(*prev) * btree->root.depth); |
| |
| cii = malloc(sizeof(*cii) * btree->root.depth); |
| if (cii == NULL) { |
| ret = -ENOMEM; |
| goto error_cii; |
| } |
| memset(cii, 0, sizeof(*cii) * btree->root.depth); |
| |
| cursor = alloc_cursor(btree, 0); |
| if (!cursor) { |
| ret = -ENOMEM; |
| goto error_alloc_cursor; |
| } |
| |
| down_write(&btree->lock); |
| ret = btree_probe(cursor, start); |
| if (ret) |
| goto error_btree_probe; |
| |
| /* Walk leaves */ |
| while (1) { |
| struct buffer_head *leafbuf; |
| tuxkey_t this_key; |
| |
| /* |
| * FIXME: If leaf was merged and freed later, we don't |
| * need to redirect leaf and leaf_chop() |
| */ |
| if ((ret = cursor_redirect(cursor))) |
| goto out; |
| leafbuf = cursor_pop(cursor); |
| |
| /* Adjust start and len for this leaf */ |
| this_key = cursor_level_this_key(cursor); |
| if (start < this_key) { |
| if (limit < TUXKEY_LIMIT) |
| len -= this_key - start; |
| start = this_key; |
| } |
| |
| ret = ops->leaf_chop(btree, start, len, bufdata(leafbuf)); |
| if (ret) { |
| if (ret < 0) { |
| blockput(leafbuf); |
| goto out; |
| } |
| mark_buffer_dirty_non(leafbuf); |
| } |
| |
| /* Try to merge this leaf with prev */ |
| if (leafprev) { |
| if (try_leaf_merge(btree, leafprev, leafbuf)) { |
| trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev); |
| remove_index(cursor, cii); |
| mark_buffer_dirty_non(leafprev); |
| blockput_free(sb, leafbuf); |
| goto keep_prev_leaf; |
| } |
| blockput(leafprev); |
| } |
| leafprev = leafbuf; |
| |
| keep_prev_leaf: |
| |
| if (cursor_level_next_key(cursor) >= limit) |
| done = 1; |
| /* Pop and try to merge finished nodes */ |
| while (done || cursor_level_finished(cursor)) { |
| struct buffer_head *buf; |
| int level = cursor->level; |
| struct chopped_index_info *ciil = &cii[level]; |
| |
| |
| /* Get merge src buffer, and go parent level */ |
| buf = cursor_pop(cursor); |
| |
| /* |
| * Logging chopped indexes |
| * FIXME: If node is freed later (e.g. merged), |
| * we dont't need to log this |
| */ |
| if (ciil->count) { |
| log_bnode_del(sb, bufindex(buf), ciil->start, |
| ciil->count); |
| } |
| memset(ciil, 0, sizeof(*ciil)); |
| |
| /* Try to merge node with prev */ |
| if (prev[level]) { |
| assert(level); |
| if (try_bnode_merge(sb, prev[level], buf)) { |
| trace(">>> can merge node %p into node %p", buf, prev[level]); |
| remove_index(cursor, cii); |
| mark_buffer_unify_non(prev[level]); |
| blockput_free_unify(sb, buf); |
| goto keep_prev_node; |
| } |
| blockput(prev[level]); |
| } |
| prev[level] = buf; |
| keep_prev_node: |
| |
| if (!level) |
| goto chop_root; |
| } |
| |
| /* Push back down to leaf level */ |
| do { |
| ret = cursor_advance_down(cursor); |
| if (ret < 0) |
| goto out; |
| } while (ret); |
| } |
| |
| chop_root: |
| /* Remove depth if possible */ |
| while (btree->root.depth > 1 && bcount(bufdata(prev[0])) == 1) { |
| trace("drop btree level"); |
| btree->root.block = bufindex(prev[1]); |
| btree->root.depth--; |
| tux3_mark_btree_dirty(btree); |
| |
| /* |
| * We know prev[0] is redirected and dirty. So, in |
| * here, we can just cancel bnode_redirect by bfree(), |
| * instead of defered_bfree() |
| * FIXME: we can optimize freeing bnode without |
| * bnode_redirect, and if we did, this is not true. |
| */ |
| bfree(sb, bufindex(prev[0]), 1); |
| log_bnode_free(sb, bufindex(prev[0])); |
| blockput_free_unify(sb, prev[0]); |
| |
| vecmove(prev, prev + 1, btree->root.depth); |
| } |
| ret = 0; |
| |
| out: |
| if (leafprev) |
| blockput(leafprev); |
| for (int i = 0; i < btree->root.depth; i++) { |
| if (prev[i]) |
| blockput(prev[i]); |
| } |
| release_cursor(cursor); |
| error_btree_probe: |
| up_write(&btree->lock); |
| |
| free_cursor(cursor); |
| error_alloc_cursor: |
| free(cii); |
| error_cii: |
| free(prev); |
| |
| return ret; |
| } |
| |
| /* root must be initialized by zero */ |
| static void bnode_init_root(struct bnode *root, unsigned count, block_t left, |
| block_t right, tuxkey_t rkey) |
| { |
| root->count = cpu_to_be32(count); |
| root->entries[0].block = cpu_to_be64(left); |
| root->entries[1].block = cpu_to_be64(right); |
| root->entries[1].key = cpu_to_be64(rkey); |
| } |
| |
| /* Insertion */ |
| |
| static void bnode_add_index(struct bnode *node, struct index_entry *p, |
| block_t child, u64 childkey) |
| { |
| unsigned count = bcount(node); |
| vecmove(p + 1, p, node->entries + count - p); |
| p->block = cpu_to_be64(child); |
| p->key = cpu_to_be64(childkey); |
| node->count = cpu_to_be32(count + 1); |
| } |
| |
| static void bnode_split(struct bnode *src, unsigned pos, struct bnode *dst) |
| { |
| dst->count = cpu_to_be32(bcount(src) - pos); |
| src->count = cpu_to_be32(pos); |
| |
| memcpy(&dst->entries[0], &src->entries[pos], |
| bcount(dst) * sizeof(struct index_entry)); |
| } |
| |
| /* |
| * Insert new leaf to next cursor position. |
| * keep == 1: keep current cursor position. |
| * keep == 0, set cursor position to new leaf. |
| */ |
| static int insert_leaf(struct cursor *cursor, tuxkey_t childkey, struct buffer_head *leafbuf, int keep) |
| { |
| struct btree *btree = cursor->btree; |
| struct sb *sb = btree->sb; |
| int level = btree->root.depth; |
| block_t childblock = bufindex(leafbuf); |
| |
| if (keep) |
| blockput(leafbuf); |
| else { |
| cursor_pop_blockput(cursor); |
| cursor_push(cursor, leafbuf, NULL); |
| } |
| while (level--) { |
| struct path_level *at = &cursor->path[level]; |
| struct buffer_head *parentbuf = at->buffer; |
| struct bnode *parent = bufdata(parentbuf); |
| |
| /* insert and exit if not full */ |
| if (bcount(parent) < btree->sb->entries_per_node) { |
| bnode_add_index(parent, at->next, childblock, childkey); |
| if (!keep) |
| at->next++; |
| log_bnode_add(sb, bufindex(parentbuf), childblock, childkey); |
| mark_buffer_unify_non(parentbuf); |
| cursor_check(cursor); |
| return 0; |
| } |
| |
| /* split a full index node */ |
| struct buffer_head *newbuf = new_node(btree); |
| if (IS_ERR(newbuf)) |
| return PTR_ERR(newbuf); |
| |
| struct bnode *newnode = bufdata(newbuf); |
| unsigned half = bcount(parent) / 2; |
| u64 newkey = be64_to_cpu(parent->entries[half].key); |
| |
| bnode_split(parent, half, newnode); |
| log_bnode_split(sb, bufindex(parentbuf), half, bufindex(newbuf)); |
| |
| /* if the cursor is in the new node, use that as the parent */ |
| int child_is_left = at->next <= parent->entries + half; |
| if (!child_is_left) { |
| struct index_entry *newnext; |
| mark_buffer_unify_non(parentbuf); |
| newnext = newnode->entries + (at->next - &parent->entries[half]); |
| get_bh(newbuf); |
| level_replace_blockput(cursor, level, newbuf, newnext); |
| parentbuf = newbuf; |
| parent = newnode; |
| } else |
| mark_buffer_unify_non(newbuf); |
| |
| bnode_add_index(parent, at->next, childblock, childkey); |
| if (!keep) |
| at->next++; |
| log_bnode_add(sb, bufindex(parentbuf), childblock, childkey); |
| mark_buffer_unify_non(parentbuf); |
| |
| childkey = newkey; |
| childblock = bufindex(newbuf); |
| blockput(newbuf); |
| |
| /* |
| * If child is in left bnode, we should keep the |
| * cursor position to child, otherwise adjust cursor |
| * to new bnode. |
| */ |
| keep = child_is_left; |
| } |
| |
| /* Make new root bnode */ |
| trace("add tree level"); |
| struct buffer_head *newbuf = new_node(btree); |
| if (IS_ERR(newbuf)) |
| return PTR_ERR(newbuf); |
| |
| struct bnode *newroot = bufdata(newbuf); |
| block_t newrootblock = bufindex(newbuf); |
| block_t oldrootblock = btree->root.block; |
| int left_node = bufindex(cursor->path[0].buffer) != childblock; |
| bnode_init_root(newroot, 2, oldrootblock, childblock, childkey); |
| cursor_root_add(cursor, newbuf, newroot->entries + 1 + !left_node); |
| log_bnode_root(sb, newrootblock, 2, oldrootblock, childblock, childkey); |
| |
| /* Change btree to point the new root */ |
| btree->root.block = newrootblock; |
| btree->root.depth++; |
| |
| mark_buffer_unify_non(newbuf); |
| tux3_mark_btree_dirty(btree); |
| cursor_check(cursor); |
| |
| return 0; |
| } |
| |
| /* Insert new leaf to next cursor position, then set cursor to new leaf */ |
| int btree_insert_leaf(struct cursor *cursor, tuxkey_t key, struct buffer_head *leafbuf) |
| { |
| return insert_leaf(cursor, key, leafbuf, 0); |
| } |
| |
| /* |
| * Split leaf, then insert to parent. |
| * key: key to add after split (cursor will point leaf which is including key) |
| * hint: hint for split |
| * |
| * return value: |
| * 0 - success |
| * < 0 - error |
| */ |
| static int btree_leaf_split(struct cursor *cursor, tuxkey_t key, tuxkey_t hint) |
| { |
| trace("split leaf"); |
| struct btree *btree = cursor->btree; |
| struct buffer_head *newbuf; |
| |
| newbuf = new_leaf(btree); |
| if (IS_ERR(newbuf)) |
| return PTR_ERR(newbuf); |
| log_balloc(btree->sb, bufindex(newbuf), 1); |
| |
| struct buffer_head *leafbuf = cursor_leafbuf(cursor); |
| tuxkey_t newkey = btree->ops->leaf_split(btree, hint, bufdata(leafbuf), |
| bufdata(newbuf)); |
| assert(cursor_this_key(cursor) < newkey); |
| assert(newkey < cursor_next_key(cursor)); |
| if (key < newkey) |
| mark_buffer_dirty_non(newbuf); |
| else |
| mark_buffer_dirty_non(leafbuf); |
| return insert_leaf(cursor, newkey, newbuf, key < newkey); |
| } |
| |
| static int btree_advance(struct cursor *cursor, struct btree_key_range *key) |
| { |
| tuxkey_t limit = cursor_next_key(cursor); |
| int skip = 0; |
| |
| while (key->start >= limit) { |
| int ret = cursor_advance(cursor); |
| assert(ret != 0); /* wrong key range? */ |
| if (ret < 0) |
| return ret; |
| |
| limit = cursor_next_key(cursor); |
| skip++; |
| } |
| if (skip > 1) { |
| /* key should on next leaf */ |
| tux3_dbg("skipped more than 1 leaf: why, and probe is better"); |
| assert(0); |
| } |
| |
| return 0; |
| } |
| |
| int noop_pre_write(struct btree *btree, tuxkey_t key_bottom, tuxkey_t key_limit, |
| void *leaf, struct btree_key_range *key) |
| { |
| return BTREE_DO_DIRTY; |
| } |
| |
| int btree_write(struct cursor *cursor, struct btree_key_range *key) |
| { |
| struct btree *btree = cursor->btree; |
| struct btree_ops *ops = btree->ops; |
| tuxkey_t split_hint; |
| int err; |
| |
| while (key->len > 0) { |
| tuxkey_t bottom, limit; |
| void *leaf; |
| int ret; |
| |
| err = btree_advance(cursor, key); |
| if (err) |
| return err; /* FIXME: error handling */ |
| |
| bottom = cursor_this_key(cursor); |
| limit = cursor_next_key(cursor); |
| assert(bottom <= key->start && key->start < limit); |
| |
| leaf = bufdata(cursor_leafbuf(cursor)); |
| ret = ops->leaf_pre_write(btree, bottom, limit, leaf, key); |
| assert(ret >= 0); |
| if (ret == BTREE_DO_RETRY) |
| continue; |
| |
| if (ret == BTREE_DO_DIRTY) { |
| err = cursor_redirect(cursor); |
| if (err) |
| return err; /* FIXME: error handling */ |
| |
| /* Reread leaf after redirect */ |
| leaf = bufdata(cursor_leafbuf(cursor)); |
| assert(ops->leaf_sniff(btree, leaf)); |
| |
| ret = ops->leaf_write(btree, bottom, limit, leaf, key, |
| &split_hint); |
| if (ret < 0) |
| return ret; |
| if (ret == BTREE_DO_RETRY) { |
| mark_buffer_dirty_non(cursor_leafbuf(cursor)); |
| continue; |
| } |
| } |
| |
| if (ret == BTREE_DO_SPLIT) { |
| err = btree_leaf_split(cursor, key->start, split_hint); |
| if (err) |
| return err; /* FIXME: error handling */ |
| } |
| } |
| |
| return 0; |
| } |
| |
| int btree_read(struct cursor *cursor, struct btree_key_range *key) |
| { |
| struct btree *btree = cursor->btree; |
| struct btree_ops *ops = btree->ops; |
| void *leaf = bufdata(cursor_leafbuf(cursor)); |
| tuxkey_t bottom = cursor_this_key(cursor); |
| tuxkey_t limit = cursor_next_key(cursor); |
| |
| /* FIXME: we might be better to support multiple leaves */ |
| |
| assert(bottom <= key->start && key->start < limit); |
| assert(ops->leaf_sniff(btree, leaf)); |
| |
| return ops->leaf_read(btree, bottom, limit, leaf, key); |
| } |
| |
| void init_btree(struct btree *btree, struct sb *sb, struct root root, struct btree_ops *ops) |
| { |
| btree->sb = sb; |
| btree->ops = ops; |
| btree->root = root; |
| init_rwsem(&btree->lock); |
| ops->btree_init(btree); |
| } |
| |
| int alloc_empty_btree(struct btree *btree) |
| { |
| struct sb *sb = btree->sb; |
| struct buffer_head *rootbuf = new_node(btree); |
| if (IS_ERR(rootbuf)) |
| goto error; |
| struct buffer_head *leafbuf = new_leaf(btree); |
| if (IS_ERR(leafbuf)) |
| goto error_leafbuf; |
| |
| assert(!has_root(btree)); |
| struct bnode *rootnode = bufdata(rootbuf); |
| block_t rootblock = bufindex(rootbuf); |
| block_t leafblock = bufindex(leafbuf); |
| trace("root at %Lx", rootblock); |
| trace("leaf at %Lx", leafblock); |
| bnode_init_root(rootnode, 1, leafblock, 0, 0); |
| log_bnode_root(sb, rootblock, 1, leafblock, 0, 0); |
| log_balloc(sb, leafblock, 1); |
| |
| mark_buffer_unify_non(rootbuf); |
| blockput(rootbuf); |
| mark_buffer_dirty_non(leafbuf); |
| blockput(leafbuf); |
| |
| btree->root = (struct root){ .block = rootblock, .depth = 1 }; |
| tux3_mark_btree_dirty(btree); |
| |
| return 0; |
| |
| error_leafbuf: |
| bfree(sb, bufindex(rootbuf), 1); |
| blockput(rootbuf); |
| rootbuf = leafbuf; |
| error: |
| return PTR_ERR(rootbuf); |
| } |
| |
| /* FIXME: right? and this should be done by btree_chop()? */ |
| int free_empty_btree(struct btree *btree) |
| { |
| struct btree_ops *ops = btree->ops; |
| |
| if (!has_root(btree)) |
| return 0; |
| |
| assert(btree->root.depth == 1); |
| struct sb *sb = btree->sb; |
| struct buffer_head *rootbuf = vol_bread(sb, btree->root.block); |
| if (!rootbuf) |
| return -EIO; |
| assert(bnode_sniff(bufdata(rootbuf))); |
| /* Make btree has no root */ |
| btree->root = no_root; |
| tux3_mark_btree_dirty(btree); |
| |
| struct bnode *rootnode = bufdata(rootbuf); |
| assert(bcount(rootnode) == 1); |
| block_t leaf = be64_to_cpu(rootnode->entries[0].block); |
| struct buffer_head *leafbuf = vol_find_get_block(sb, leaf); |
| |
| if (leafbuf && !leaf_need_redirect(sb, leafbuf)) { |
| /* |
| * This is redirected leaf. So, in here, we can just |
| * cancel leaf_redirect by bfree(), instead of |
| * defered_bfree(). |
| */ |
| bfree(sb, leaf, 1); |
| log_leaf_free(sb, leaf); |
| assert(ops->leaf_can_free(btree, bufdata(leafbuf))); |
| blockput_free(sb, leafbuf); |
| } else { |
| defer_bfree(sb, &sb->defree, leaf, 1); |
| log_bfree(sb, leaf, 1); |
| if (leafbuf) { |
| assert(ops->leaf_can_free(btree, bufdata(leafbuf))); |
| blockput(leafbuf); |
| } |
| } |
| |
| if (!bnode_need_redirect(sb, rootbuf)) { |
| /* |
| * This is redirected bnode. So, in here, we can just |
| * cancel bnode_redirect by bfree(), instead of |
| * defered_bfree(). |
| */ |
| bfree(sb, bufindex(rootbuf), 1); |
| log_bnode_free(sb, bufindex(rootbuf)); |
| blockput_free_unify(sb, rootbuf); |
| } else { |
| defer_bfree(sb, &sb->deunify, bufindex(rootbuf), 1); |
| log_bfree_on_unify(sb, bufindex(rootbuf), 1); |
| blockput(rootbuf); |
| } |
| |
| return 0; |
| } |
| |
| int replay_bnode_redirect(struct replay *rp, block_t oldblock, block_t newblock) |
| { |
| struct sb *sb = rp->sb; |
| struct buffer_head *newbuf, *oldbuf; |
| int err = 0; |
| |
| newbuf = vol_getblk(sb, newblock); |
| if (!newbuf) { |
| err = -ENOMEM; /* FIXME: error code */ |
| goto error; |
| } |
| oldbuf = vol_bread(sb, oldblock); |
| if (!oldbuf) { |
| err = -EIO; /* FIXME: error code */ |
| goto error_put_newbuf; |
| } |
| assert(bnode_sniff(bufdata(oldbuf))); |
| |
| memcpy(bufdata(newbuf), bufdata(oldbuf), bufsize(newbuf)); |
| mark_buffer_unify_atomic(newbuf); |
| |
| blockput(oldbuf); |
| error_put_newbuf: |
| blockput(newbuf); |
| error: |
| return err; |
| } |
| |
| int replay_bnode_root(struct replay *rp, block_t root, unsigned count, |
| block_t left, block_t right, tuxkey_t rkey) |
| { |
| struct sb *sb = rp->sb; |
| struct buffer_head *rootbuf; |
| |
| rootbuf = vol_getblk(sb, root); |
| if (!rootbuf) |
| return -ENOMEM; |
| bnode_buffer_init(rootbuf); |
| |
| bnode_init_root(bufdata(rootbuf), count, left, right, rkey); |
| |
| mark_buffer_unify_atomic(rootbuf); |
| blockput(rootbuf); |
| |
| return 0; |
| } |
| |
| /* |
| * Before this replay, replay should already dirty the buffer of src. |
| * (e.g. by redirect) |
| */ |
| int replay_bnode_split(struct replay *rp, block_t src, unsigned pos, |
| block_t dst) |
| { |
| struct sb *sb = rp->sb; |
| struct buffer_head *srcbuf, *dstbuf; |
| int err = 0; |
| |
| srcbuf = vol_getblk(sb, src); |
| if (!srcbuf) { |
| err = -ENOMEM; /* FIXME: error code */ |
| goto error; |
| } |
| |
| dstbuf = vol_getblk(sb, dst); |
| if (!dstbuf) { |
| err = -ENOMEM; /* FIXME: error code */ |
| goto error_put_srcbuf; |
| } |
| bnode_buffer_init(dstbuf); |
| |
| bnode_split(bufdata(srcbuf), pos, bufdata(dstbuf)); |
| |
| mark_buffer_unify_non(srcbuf); |
| mark_buffer_unify_atomic(dstbuf); |
| |
| blockput(dstbuf); |
| error_put_srcbuf: |
| blockput(srcbuf); |
| error: |
| return err; |
| } |
| |
| /* |
| * Before this replay, replay should already dirty the buffer of bnodeblock. |
| * (e.g. by redirect) |
| */ |
| static int replay_bnode_change(struct sb *sb, block_t bnodeblock, |
| u64 val1, u64 val2, |
| void (*change)(struct bnode *, u64, u64)) |
| { |
| struct buffer_head *bnodebuf; |
| |
| bnodebuf = vol_getblk(sb, bnodeblock); |
| if (!bnodebuf) |
| return -ENOMEM; /* FIXME: error code */ |
| |
| struct bnode *bnode = bufdata(bnodebuf); |
| change(bnode, val1, val2); |
| |
| mark_buffer_unify_non(bnodebuf); |
| blockput(bnodebuf); |
| |
| return 0; |
| } |
| |
| static void add_func(struct bnode *bnode, u64 child, u64 key) |
| { |
| struct index_entry *entry = bnode_lookup(bnode, key) + 1; |
| bnode_add_index(bnode, entry, child, key); |
| } |
| |
| int replay_bnode_add(struct replay *rp, block_t parent, block_t child, |
| tuxkey_t key) |
| { |
| return replay_bnode_change(rp->sb, parent, child, key, add_func); |
| } |
| |
| static void update_func(struct bnode *bnode, u64 child, u64 key) |
| { |
| struct index_entry *entry = bnode_lookup(bnode, key); |
| assert(be64_to_cpu(entry->key) == key); |
| entry->block = cpu_to_be64(child); |
| } |
| |
| int replay_bnode_update(struct replay *rp, block_t parent, block_t child, |
| tuxkey_t key) |
| { |
| return replay_bnode_change(rp->sb, parent, child, key, update_func); |
| } |
| |
| int replay_bnode_merge(struct replay *rp, block_t src, block_t dst) |
| { |
| struct sb *sb = rp->sb; |
| struct buffer_head *srcbuf, *dstbuf; |
| int err = 0, ret; |
| |
| srcbuf = vol_getblk(sb, src); |
| if (!srcbuf) { |
| err = -ENOMEM; /* FIXME: error code */ |
| goto error; |
| } |
| |
| dstbuf = vol_getblk(sb, dst); |
| if (!dstbuf) { |
| err = -ENOMEM; /* FIXME: error code */ |
| goto error_put_srcbuf; |
| } |
| |
| ret = bnode_merge_nodes(sb, bufdata(dstbuf), bufdata(srcbuf)); |
| assert(ret == 1); |
| |
| mark_buffer_unify_non(dstbuf); |
| mark_buffer_unify_non(srcbuf); |
| |
| blockput(dstbuf); |
| error_put_srcbuf: |
| blockput(srcbuf); |
| error: |
| return err; |
| } |
| |
| static void del_func(struct bnode *bnode, u64 key, u64 count) |
| { |
| struct index_entry *entry = bnode_lookup(bnode, key); |
| assert(be64_to_cpu(entry->key) == key); |
| bnode_remove_index(bnode, entry, count); |
| } |
| |
| int replay_bnode_del(struct replay *rp, block_t bnode, tuxkey_t key, |
| unsigned count) |
| { |
| return replay_bnode_change(rp->sb, bnode, key, count, del_func); |
| } |
| |
| static void adjust_func(struct bnode *bnode, u64 from, u64 to) |
| { |
| struct index_entry *entry = bnode_lookup(bnode, from); |
| assert(be64_to_cpu(entry->key) == from); |
| entry->key = cpu_to_be64(to); |
| } |
| |
| int replay_bnode_adjust(struct replay *rp, block_t bnode, tuxkey_t from, |
| tuxkey_t to) |
| { |
| return replay_bnode_change(rp->sb, bnode, from, to, adjust_func); |
| } |