blob: 6f0853892dcb5505be3978d57d08d120f88aa138 [file] [log] [blame]
/*
* linux/fs/hfs/binsert.c
*
* Copyright (C) 1995-1997 Paul H. Hargrove
* This file may be distributed under the terms of the GNU General Public License.
*
* This file contains the code to insert records in a B-tree.
*
* "XXX" in a comment is a note to myself to consider changing something.
*
* In function preconditions the term "valid" applied to a pointer to
* a structure means that the pointer is non-NULL and the structure it
* points to has all fields initialized to consistent values.
*/
#include "hfs_btree.h"
/*================ File-local functions ================*/
/* btree locking functions */
static inline void hfs_btree_lock(struct hfs_btree *tree)
{
while (tree->lock)
hfs_sleep_on(&tree->wait);
tree->lock = 1;
}
static inline void hfs_btree_unlock(struct hfs_btree *tree)
{
tree->lock = 0;
hfs_wake_up(&tree->wait);
}
/*
* binsert_nonfull()
*
* Description:
* Inserts a record in a given bnode known to have sufficient space.
* Input Variable(s):
* struct hfs_brec* brec: pointer to the brec for the insertion
* struct hfs_belem* belem: the element in the search path to insert in
* struct hfs_bkey* key: pointer to the key for the record to insert
* void* data: pointer to the record to insert
* hfs_u16 keysize: size of the key to insert
* hfs_u16 datasize: size of the record to insert
* Output Variable(s):
* NONE
* Returns:
* NONE
* Preconditions:
* 'brec' points to a valid (struct hfs_brec).
* 'belem' points to a valid (struct hfs_belem) in 'brec', the node
* of which has enough free space to insert 'key' and 'data'.
* 'key' is a pointer to a valid (struct hfs_bkey) of length 'keysize'
* which, in sorted order, belongs at the location indicated by 'brec'.
* 'data' is non-NULL an points to appropriate data of length 'datasize'
* Postconditions:
* The record has been inserted in the position indicated by 'brec'.
*/
static void binsert_nonfull(struct hfs_brec *brec, struct hfs_belem *belem,
const struct hfs_bkey *key, const void *data,
hfs_u8 keysize, hfs_u16 datasize)
{
int i, rec, nrecs, size, tomove;
hfs_u8 *start;
struct hfs_bnode *bnode = belem->bnr.bn;
rec = ++(belem->record);
size = ROUND(keysize+1) + datasize;
nrecs = bnode->ndNRecs + 1;
tomove = bnode_offset(bnode, nrecs) - bnode_offset(bnode, rec);
/* adjust the record table */
for (i = nrecs; i >= rec; --i) {
hfs_put_hs(bnode_offset(bnode,i) + size, RECTBL(bnode,i+1));
}
/* make room */
start = bnode_key(bnode, rec);
memmove(start + size, start, tomove);
/* copy in the key and the data*/
*start = keysize;
keysize = ROUND(keysize+1);
memcpy(start + 1, (hfs_u8 *)key + 1, keysize-1);
memcpy(start + keysize, data, datasize);
/* update record count */
++bnode->ndNRecs;
}
/*
* add_root()
*
* Description:
* Adds a new root to a B*-tree, increasing its height.
* Input Variable(s):
* struct hfs_btree *tree: the tree to add a new root to
* struct hfs_bnode *left: the new root's first child or NULL
* struct hfs_bnode *right: the new root's second child or NULL
* Output Variable(s):
* NONE
* Returns:
* void
* Preconditions:
* 'tree' points to a valid (struct hfs_btree).
* 'left' and 'right' point to valid (struct hfs_bnode)s, which
* resulted from splitting the old root node, or are both NULL
* if there was no root node before.
* Postconditions:
* Upon success a new root node is added to 'tree' with either
* two children ('left' and 'right') or none.
*/
static void add_root(struct hfs_btree *tree,
struct hfs_bnode *left,
struct hfs_bnode *right)
{
struct hfs_bnode_ref bnr;
struct hfs_bnode *root;
struct hfs_bkey *key;
int keylen = tree->bthKeyLen;
if (left && !right) {
hfs_warn("add_root: LEFT but no RIGHT\n");
return;
}
bnr = hfs_bnode_alloc(tree);
if (!(root = bnr.bn)) {
return;
}
root->sticky = HFS_STICKY;
tree->root = root;
tree->bthRoot = root->node;
++tree->bthDepth;
root->ndNHeight = tree->bthDepth;
root->ndFLink = 0;
root->ndBLink = 0;
if (!left) {
/* tree was empty */
root->ndType = ndLeafNode;
root->ndNRecs = 0;
tree->bthFNode = root->node;
tree->bthLNode = root->node;
} else {
root->ndType = ndIndxNode;
root->ndNRecs = 2;
hfs_put_hs(sizeof(struct NodeDescriptor) + ROUND(1+keylen) +
sizeof(hfs_u32), RECTBL(root, 2));
key = bnode_key(root, 1);
key->KeyLen = keylen;
memcpy(key->value,
((struct hfs_bkey *)bnode_key(left, 1))->value, keylen);
hfs_put_hl(left->node, bkey_record(key));
hfs_put_hs(sizeof(struct NodeDescriptor) + 2*ROUND(1+keylen) +
2*sizeof(hfs_u32), RECTBL(root, 3));
key = bnode_key(root, 2);
key->KeyLen = keylen;
memcpy(key->value,
((struct hfs_bkey *)bnode_key(right, 1))->value, keylen);
hfs_put_hl(right->node, bkey_record(key));
/* the former root (left) is now just a normal node */
left->sticky = HFS_NOT_STICKY;
if ((left->next = bhash(tree, left->node))) {
left->next->prev = left;
}
bhash(tree, left->node) = left;
}
hfs_bnode_relse(&bnr);
tree->dirt = 1;
}
/*
* insert_empty_bnode()
*
* Description:
* Adds an empty node to the right of 'left'.
* Input Variable(s):
* struct hfs_btree *tree: the tree to add a node to
* struct hfs_bnode *left: the node to add a node after
* Output Variable(s):
* NONE
* Returns:
* struct hfs_bnode_ref *: reference to the new bnode.
* Preconditions:
* 'tree' points to a valid (struct hfs_btree) with at least 1 free node.
* 'left' points to a valid (struct hfs_bnode) belonging to 'tree'.
* Postconditions:
* If NULL is returned then 'tree' and 'left' are unchanged.
* Otherwise a node with 0 records is inserted in the tree to the right
* of the node 'left'. The 'ndFLink' of 'left' and the 'ndBLink' of
* the former right-neighbor of 'left' (if one existed) point to the
* new node. If 'left' had no right neighbor and is a leaf node the
* the 'bthLNode' of 'tree' points to the new node. The free-count and
* bitmap for 'tree' are kept current by hfs_bnode_alloc() which supplies
* the required node.
*/
static struct hfs_bnode_ref insert_empty_bnode(struct hfs_btree *tree,
struct hfs_bnode *left)
{
struct hfs_bnode_ref retval;
struct hfs_bnode_ref right;
retval = hfs_bnode_alloc(tree);
if (!retval.bn) {
hfs_warn("hfs_binsert: out of bnodes?.\n");
goto done;
}
retval.bn->sticky = HFS_NOT_STICKY;
if ((retval.bn->next = bhash(tree, retval.bn->node))) {
retval.bn->next->prev = retval.bn;
}
bhash(tree, retval.bn->node) = retval.bn;
if (left->ndFLink) {
right = hfs_bnode_find(tree, left->ndFLink, HFS_LOCK_WRITE);
if (!right.bn) {
hfs_warn("hfs_binsert: corrupt btree.\n");
hfs_bnode_bitop(tree, retval.bn->node, 0);
hfs_bnode_relse(&retval);
goto done;
}
right.bn->ndBLink = retval.bn->node;
hfs_bnode_relse(&right);
} else if (left->ndType == ndLeafNode) {
tree->bthLNode = retval.bn->node;
tree->dirt = 1;
}
retval.bn->ndFLink = left->ndFLink;
retval.bn->ndBLink = left->node;
retval.bn->ndType = left->ndType;
retval.bn->ndNHeight = left->ndNHeight;
retval.bn->ndNRecs = 0;
left->ndFLink = retval.bn->node;
done:
return retval;
}
/*
* split()
*
* Description:
* Splits an over full node during insertion.
* Picks the split point that results in the most-nearly equal
* space usage in the new and old nodes.
* Input Variable(s):
* struct hfs_belem *elem: the over full node.
* int size: the number of bytes to be used by the new record and its key.
* Output Variable(s):
* struct hfs_belem *elem: changed to indicate where the new record
* should be inserted.
* Returns:
* struct hfs_bnode_ref: reference to the new bnode.
* Preconditions:
* 'elem' points to a valid path element corresponding to the over full node.
* 'size' is positive.
* Postconditions:
* The records in the node corresponding to 'elem' are redistributed across
* the old and new nodes so that after inserting the new record, the space
* usage in these two nodes is as equal as possible.
* 'elem' is updated so that a call to binsert_nonfull() will insert the
* new record in the correct location.
*/
static inline struct hfs_bnode_ref split(struct hfs_belem *elem, int size)
{
struct hfs_bnode *bnode = elem->bnr.bn;
int nrecs, cutoff, index, tmp, used, in_right;
struct hfs_bnode_ref right;
right = insert_empty_bnode(bnode->tree, bnode);
if (right.bn) {
nrecs = bnode->ndNRecs;
cutoff = (size + bnode_end(bnode) -
sizeof(struct NodeDescriptor) +
(nrecs+1)*sizeof(hfs_u16))/2;
used = 0;
in_right = 1;
/* note that this only works because records sizes are even */
for (index=1; index <= elem->record; ++index) {
tmp = (sizeof(hfs_u16) + bnode_rsize(bnode, index))/2;
used += tmp;
if (used > cutoff) {
goto found;
}
used += tmp;
}
tmp = (size + sizeof(hfs_u16))/2;
used += tmp;
if (used > cutoff) {
goto found;
}
in_right = 0;
used += tmp;
for (; index <= nrecs; ++index) {
tmp = (sizeof(hfs_u16) + bnode_rsize(bnode, index))/2;
used += tmp;
if (used > cutoff) {
goto found;
}
used += tmp;
}
/* couldn't find the split point! */
hfs_bnode_relse(&right);
}
return right;
found:
if (in_right) {
elem->bnr = right;
elem->record -= index-1;
}
hfs_bnode_shift_right(bnode, right.bn, index);
return right;
}
/*
* binsert()
*
* Description:
* Inserts a record in a tree known to have enough room, even if the
* insertion requires the splitting of nodes.
* Input Variable(s):
* struct hfs_brec *brec: partial path to the node to insert in
* const struct hfs_bkey *key: key for the new record
* const void *data: data for the new record
* hfs_u8 keysize: size of the key
* hfs_u16 datasize: size of the data
* int reserve: number of nodes reserved in case of splits
* Output Variable(s):
* *brec = NULL
* Returns:
* int: 0 on success, error code on failure
* Preconditions:
* 'brec' points to a valid (struct hfs_brec) corresponding to a
* record in a leaf node, after which a record is to be inserted,
* or to "record 0" of the leaf node if the record is to be inserted
* before all existing records in the node. The (struct hfs_brec)
* includes all ancestors of the leaf node that are needed to
* complete the insertion including the parents of any nodes that
* will be split.
* 'key' points to a valid (struct hfs_bkey) which is appropriate
* to this tree, and which belongs at the insertion point.
* 'data' points data appropriate for the indicated node.
* 'keysize' gives the size in bytes of the key.
* 'datasize' gives the size in bytes of the data.
* 'reserve' gives the number of nodes that have been reserved in the
* tree to allow for splitting of nodes.
* Postconditions:
* All 'reserve'd nodes have been either used or released.
* *brec = NULL
* On success the key and data have been inserted at the indicated
* location in the tree, all appropriate fields of the in-core data
* structures have been changed and updated versions of the on-disk
* data structures have been scheduled for write-back to disk.
* On failure the B*-tree is probably invalid both on disk and in-core.
*
* XXX: Some attempt at repair might be made in the event of failure,
* or the fs should be remounted read-only so things don't get worse.
*/
static int binsert(struct hfs_brec *brec, const struct hfs_bkey *key,
const void *data, hfs_u8 keysize, hfs_u16 datasize,
int reserve)
{
struct hfs_bnode_ref left, right, other;
struct hfs_btree *tree = brec->tree;
struct hfs_belem *belem = brec->bottom;
int tmpsize = 1 + tree->bthKeyLen;
struct hfs_bkey *tmpkey = hfs_malloc(tmpsize);
hfs_u32 node;
while ((belem >= brec->top) && (belem->flags & HFS_BPATH_OVERFLOW)) {
left = belem->bnr;
if (left.bn->ndFLink &&
hfs_bnode_in_brec(left.bn->ndFLink, brec)) {
hfs_warn("hfs_binsert: corrupt btree\n");
tree->reserved -= reserve;
hfs_free(tmpkey, tmpsize);
return -EIO;
}
right = split(belem, ROUND(keysize+1) + ROUND(datasize));
--reserve;
--tree->reserved;
if (!right.bn) {
hfs_warn("hfs_binsert: unable to split node!\n");
tree->reserved -= reserve;
hfs_free(tmpkey, tmpsize);
return -ENOSPC;
}
binsert_nonfull(brec, belem, key, data, keysize, datasize);
if (belem->bnr.bn == left.bn) {
other = right;
if (belem->record == 1) {
hfs_bnode_update_key(brec, belem, left.bn, 0);
}
} else {
other = left;
}
if (left.bn->node == tree->root->node) {
add_root(tree, left.bn, right.bn);
hfs_bnode_relse(&other);
goto done;
}
data = &node;
datasize = sizeof(node);
node = htonl(right.bn->node);
key = tmpkey;
keysize = tree->bthKeyLen;
memcpy(tmpkey, bnode_key(right.bn, 1), keysize+1);
hfs_bnode_relse(&other);
--belem;
}
if (belem < brec->top) {
hfs_warn("hfs_binsert: Missing parent.\n");
tree->reserved -= reserve;
hfs_free(tmpkey, tmpsize);
return -EIO;
}
binsert_nonfull(brec, belem, key, data, keysize, datasize);
done:
tree->reserved -= reserve;
hfs_free(tmpkey, tmpsize);
return 0;
}
/*================ Global functions ================*/
/*
* hfs_binsert()
*
* Description:
* This function inserts a new record into a b-tree.
* Input Variable(s):
* struct hfs_btree *tree: pointer to the (struct hfs_btree) to insert in
* struct hfs_bkey *key: pointer to the (struct hfs_bkey) to insert
* void *data: pointer to the data to associate with 'key' in the b-tree
* unsigned int datasize: the size of the data
* Output Variable(s):
* NONE
* Returns:
* int: 0 on success, error code on failure
* Preconditions:
* 'tree' points to a valid (struct hfs_btree)
* 'key' points to a valid (struct hfs_bkey)
* 'data' points to valid memory of length 'datasize'
* Postconditions:
* If zero is returned then the record has been inserted in the
* indicated location updating all in-core data structures and
* scheduling all on-disk data structures for write-back.
*/
int hfs_binsert(struct hfs_btree *tree, const struct hfs_bkey *key,
const void *data, hfs_u16 datasize)
{
struct hfs_brec brec;
struct hfs_belem *belem;
int err, reserve, retval;
hfs_u8 keysize;
if (!tree || (tree->magic != HFS_BTREE_MAGIC) || !key || !data) {
hfs_warn("hfs_binsert: invalid arguments.\n");
return -EINVAL;
}
if (key->KeyLen > tree->bthKeyLen) {
hfs_warn("hfs_binsert: oversized key\n");
return -EINVAL;
}
restart:
if (!tree->bthNRecs) {
/* create the root bnode */
add_root(tree, NULL, NULL);
if (!hfs_brec_init(&brec, tree, HFS_BFIND_INSERT)) {
hfs_warn("hfs_binsert: failed to create root.\n");
return -ENOSPC;
}
} else {
err = hfs_bfind(&brec, tree, key, HFS_BFIND_INSERT);
if (err < 0) {
hfs_warn("hfs_binsert: hfs_brec_find failed.\n");
return err;
} else if (err == 0) {
hfs_brec_relse(&brec, NULL);
return -EEXIST;
}
}
keysize = key->KeyLen;
datasize = ROUND(datasize);
belem = brec.bottom;
belem->flags = 0;
if (bnode_freespace(belem->bnr.bn) <
(sizeof(hfs_u16) + ROUND(keysize+1) + datasize)) {
belem->flags |= HFS_BPATH_OVERFLOW;
}
if (belem->record == 0) {
belem->flags |= HFS_BPATH_FIRST;
}
if (!belem->flags) {
hfs_brec_lock(&brec, brec.bottom);
reserve = 0;
} else {
reserve = brec.bottom - brec.top;
if (brec.top == 0) {
++reserve;
}
/* make certain we have enough nodes to proceed */
if ((tree->bthFree - tree->reserved) < reserve) {
hfs_brec_relse(&brec, NULL);
hfs_btree_lock(tree);
if ((tree->bthFree - tree->reserved) < reserve) {
hfs_btree_extend(tree);
}
hfs_btree_unlock(tree);
if ((tree->bthFree - tree->reserved) < reserve) {
return -ENOSPC;
} else {
goto restart;
}
}
tree->reserved += reserve;
hfs_brec_lock(&brec, NULL);
}
retval = binsert(&brec, key, data, keysize, datasize, reserve);
hfs_brec_relse(&brec, NULL);
if (!retval) {
++tree->bthNRecs;
tree->dirt = 1;
}
return retval;
}