fs/tux3/ileaf.c - pub/scm/linux/kernel/git/daniel/linux-tux3 - Git at Google

 /*
  * Inode btree leaf operations.
  *
  * Copyright (c) 2008-2014 Daniel Phillips
  * Copyright (c) 2008-2014 OGAWA Hirofumi
  */

 #include "tux3.h"
 #include "ileaf.h"

 #ifndef trace
 #define trace trace_on
 #endif

 struct ileaf {
 	__be16 magic;		/* Magic number */
 	__be16 count;		/* Counts of used offset info entries */
 	u32 pad;
 	__be64 ibase;		/* Base inode number */
 	char table[];		/* ileaf data: inode attrs ... offset info */
 };

 /*
  * inode leaf format
  *
  * A leaf has a small header followed by a table of attributes.  A vector of
  * offsets within the block grows down from the top of the leaf towards the
  * top of the attribute table, indexed by the difference between inum and
  * leaf->ibase, the base inum of the table block.
  */

 static inline __be16 *ileaf_dict(struct btree *btree, struct ileaf *ileaf)
 {
 	return (void *)ileaf + btree->sb->blocksize;
 }

 static inline unsigned __atdict(__be16 *dict, unsigned at)
 {
 	assert(at);
 	return be16_to_cpu(*(dict - at));
 }

 static inline unsigned atdict(__be16 *dict, unsigned at)
 {
 	return at ? __atdict(dict, at) : 0;
 }

 static inline void add_idict(__be16 *dict, int n)
 {
 	be16_add_cpu(dict, n);
 }

 static inline unsigned icount(struct ileaf *leaf)
 {
 	return be16_to_cpu(leaf->count);
 }

 static inline tuxkey_t ibase(struct ileaf *leaf)
 {
 	return be64_to_cpu(leaf->ibase);
 }

 static void ileaf_btree_init(struct btree *btree)
 {
 	btree->entries_per_leaf = 1 << (btree->sb->blockbits - 6);
 }

 static int ileaf_init(struct btree *btree, void *leaf)
 {
 	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
 	trace("initialize inode leaf %p", leaf);
 	*(struct ileaf *)leaf = (struct ileaf){
 		.magic = attr_ops->magic,
 	};
 	return 0;
 }

 static int ileaf_need(struct btree *btree, struct ileaf *ileaf)
 {
 	__be16 *dict = ileaf_dict(btree, ileaf);
 	unsigned count = icount(ileaf);
 	return atdict(dict, count) + count * sizeof(*dict);
 }

 static int ileaf_free(struct btree *btree, struct ileaf *ileaf)
 {
 	return btree->sb->blocksize
 		- ileaf_need(btree, ileaf) - sizeof(struct ileaf);
 }

 static int ileaf_sniff(struct btree *btree, void *leaf)
 {
 	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
 	if (((struct ileaf *)leaf)->magic != attr_ops->magic)
 		return -1;
 	return 0;
 }

 static int ileaf_can_free(struct btree *btree, void *leaf)
 {
 	struct ileaf *ileaf = leaf;
 	return icount(ileaf) == 0;
 }

 void *ileaf_lookup(struct btree *btree, inum_t inum, struct ileaf *leaf, unsigned *result)
 {
 	assert(inum >= ibase(leaf));
 	tuxkey_t at = inum - ibase(leaf), size = 0;
 	void *attrs = NULL;

 	trace("lookup inode 0x%Lx, %Lx + %Lx", inum, ibase(leaf), at);
 	if (at < icount(leaf)) {
 		__be16 *dict = ileaf_dict(btree, leaf);
 		unsigned offset = atdict(dict, at);
 		if ((size = __atdict(dict, at + 1) - offset))
 			attrs = leaf->table + offset;
 	}
 	*result = size;
 	return attrs;
 }

 static int isinorder(struct btree *btree, struct ileaf *leaf)
 {
 	__be16 *dict = ileaf_dict(btree, leaf);

 	for (int i = 0, offset = 0, limit; i < icount(leaf); i++, offset = limit)
 		if ((limit = __atdict(dict, i + 1)) < offset)
 			return 0;
 	return 1;
 }

 /* userland only */
 int ileaf_check(struct btree *btree, struct ileaf *leaf)
 {
 	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
 	char *why;

 	why = "not an ileaf";
 	if (leaf->magic != attr_ops->magic)
 		goto eek;
 	why = "dict out of order";
 	if (!isinorder(btree, leaf))
 		goto eek;
 	return 0;
 eek:
 	tux3_err(btree->sb, "%s!", why);
 	return -1;
 }

 static void ileaf_trim(struct btree *btree, struct ileaf *leaf)
 {
 	__be16 *dict = ileaf_dict(btree, leaf);
 	unsigned count = icount(leaf);

 	while (count > 1 && *(dict - count) == *(dict - count + 1))
 		count--;
 	if (count == 1 && !*(dict - 1))
 		count = 0;
 	leaf->count = cpu_to_be16(count);
 }

 #define SPLIT_AT_INUM

 static tuxkey_t ileaf_split(struct btree *btree, tuxkey_t hint,
 			    void *from, void *into)
 {
 	assert(!ileaf_sniff(btree, from));
 	struct ileaf *leaf = from, *dest = into;
 	__be16 *dict = ileaf_dict(btree, from);
 	__be16 *destdict = ileaf_dict(btree, into);

 #ifdef SPLIT_AT_INUM
 	/*
 	 * This is to prevent to have same ibase on both of from and into
 	 * FIXME: we would want to split at better position.
 	 */
 	if (hint == ibase(leaf))
 		hint++;

 	trace("split at inum 0x%Lx", hint);
 	unsigned at = min_t(tuxkey_t, hint - ibase(leaf), icount(leaf));
 #else
 	/* binsearch inum starting nearest middle of block */
 	unsigned at = 1, hi = icount(leaf);
 	while (at < hi) {
 		int mid = (at + hi) / 2;
 		if (*(dict - mid) < (btree->sb->blocksize / 2))
 			at = mid + 1;
 		else
 			hi = mid;
 	}
 #endif
 	/* should trim leading empty inodes on copy */
 	unsigned split = atdict(dict, at), free = atdict(dict, icount(leaf));
 	trace("split at %x of %x", at, icount(leaf));
 	trace("copy out %x bytes at %x", free - split, split);
 	assert(free >= split);
 	memcpy(dest->table, leaf->table + split, free - split);
 	dest->count = cpu_to_be16(icount(leaf) - at);
 	veccopy(destdict - icount(dest), dict - icount(leaf), icount(dest));
 	for (int i = 1; i <= icount(dest); i++)
 		add_idict(destdict - i, -split);
 #ifdef SPLIT_AT_INUM
 	/* round down to multiple of 64 above ibase */
 	inum_t round = hint & ~(inum_t)(btree->entries_per_leaf - 1);
 	dest->ibase = cpu_to_be64(round > ibase(leaf) + icount(leaf) ? round : hint);
 #else
 	dest->ibase = cpu_to_be64(ibase(leaf) + at);
 #endif
 	leaf->count = cpu_to_be16(at);
 	memset(leaf->table + split, 0, (char *)(dict - icount(leaf)) - (leaf->table + split));
 	ileaf_trim(btree, leaf);
 	return ibase(dest);
 }

 static int ileaf_merge(struct btree *btree, void *vinto, void *vfrom)
 {
 	struct ileaf *into = vinto, *from = vfrom;
 	unsigned fromcount = icount(from);

 	/* If "from" is empty, does nothing */
 	if (!fromcount)
 		return 1;

 	assert(ibase(from) > ibase(into));
 	tuxkey_t fromibase = ibase(from);
 	unsigned count = icount(into);
 	int hole = fromibase - ibase(into) + count;

 	__be16 *dict = ileaf_dict(btree, into);
 	__be16 *fromdict = ileaf_dict(btree, from);
 	int need_size = hole * sizeof(*dict) + ileaf_need(btree, from);

 	if (ileaf_free(btree, into) < need_size)
 		return 0;

 	/* Fill hole of dict until from_ibase */
 	unsigned limit = atdict(dict, count);
 	__be16 __limit = cpu_to_be16(limit);
 	while (hole--) {
 		count++;
 		*(dict - count) = __limit;
 	}

 	/* Copy data from "from" */
 	unsigned fromlimit = atdict(fromdict, fromcount);
 	memcpy(into->table + limit, from->table, fromlimit);

 	/* Adjust copying fromdict */
 	if (limit) {
 		int i;
 		for (i = 1; i <= fromcount; i++)
 			add_idict(dict - i, limit);
 	}
 	veccopy(dict - count - fromcount, fromdict - fromcount, fromcount);

 	into->count = cpu_to_be16(count + fromcount);

 	return 1;
 }

 /*
  * Chop inums
  * return value:
  * < 0 - error
  *   1 - modified
  *   0 - not modified
  */
 static int ileaf_chop(struct btree *btree, tuxkey_t start, u64 len, void *leaf)
 {
 	struct ileaf *ileaf = leaf;
 	__be16 *dict = ileaf_dict(btree, leaf);
 	tuxkey_t base = ibase(ileaf);
 	unsigned count = icount(ileaf);
 	tuxkey_t at = start - base;
 	void *startp, *endp, *tailp;
 	unsigned size;

 	if (at + 1 > count)
 		return 0;

 	len = min_t(u64, len, count - at);

 	startp = ileaf->table + atdict(dict, at);
 	endp = ileaf->table + atdict(dict, at + len);
 	if (startp == endp)
 		return 0;

 	/* Remove data */
 	tailp = ileaf->table + atdict(dict, count);
 	memmove(startp, endp, tailp - endp);

 	/* Adjust dict */
 	size = endp - startp;
 	while (at < count) {
 		at++;
 		add_idict(dict - at, -size);
 	}

 	ileaf_trim(btree, leaf);

 	return 1;
 }

 static void *ileaf_resize(struct btree *btree, tuxkey_t inum, void *vleaf,
 			  int newsize)
 {
 	struct ileaf *ileaf = vleaf;
 	__be16 *dict = ileaf_dict(btree, ileaf);
 	unsigned count = icount(ileaf);
 	tuxkey_t at = inum - ibase(ileaf);
 	int extend_dict, offset, size;

 	assert(inum >= ibase(ileaf));

 	/* Get existent attributes, and calculate expand/shrink size */
 	if (at + 1 > count) {
 		/* Check size roughly to avoid overflow int */
 		if ((at + 1) * sizeof(*dict) >= btree->sb->blocksize)
 			goto overflow;

 		/* Need to extend dict */
 		extend_dict = (at + 1 - count) * sizeof(*dict);
 		offset = atdict(dict, count);
 		size = 0;
 	} else {
 		/* "at" is in dict, so get attr size */
 		extend_dict = 0;
 		offset = atdict(dict, at);
 		size = __atdict(dict, at + 1) - offset;
 	}

 	if (ileaf_free(btree, ileaf) < newsize - size + extend_dict) {
 overflow:
 		return NULL;
 	}

 	/* Extend dict */
 	if (extend_dict) {
 		__be16 limit = cpu_to_be16(atdict(dict, count));
 		while (count < at + 1) {
 			count++;
 			*(dict - count) = limit;
 		}
 		ileaf->count = cpu_to_be16(count);
 	}

 	void *attrs = ileaf->table + offset;
 	if (newsize != size) {
 		/* Expand/Shrink attr space */
 		unsigned limit = __atdict(dict, count);
 		assert(limit >= offset + size);
 		memmove(attrs + newsize, attrs + size, limit - offset - size);

 		/* Adjust dict */
 		int diff = newsize - size;
 		at++;
 		while (at <= count) {
 			add_idict(dict - at, diff);
 			at++;
 		}
 	}

 	return attrs;
 }

 static tuxkey_t ileaf_split_hint(struct btree *btree, struct ileaf *ileaf,
 				 tuxkey_t key, int size)
 {
 	/*
 	 * FIXME: make sure there is space for size.
 	 * FIXME: better split position?
 	 */

 	tuxkey_t base = ibase(ileaf);
 	unsigned count = icount(ileaf);
 	if (key >= base + count)
 		return key & ~(btree->entries_per_leaf - 1);

 	return base + count / 2;
 }

 /*
  * Write inode attributes.
  */
 static int ileaf_write(struct btree *btree, tuxkey_t key_bottom,
 		       tuxkey_t key_limit,
 		       void *leaf, struct btree_key_range *key,
 		       tuxkey_t *split_hint)
 {
 	struct ileaf_req *rq = container_of(key, struct ileaf_req, key);
 	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
 	struct ileaf *ileaf = leaf;
 	void *attrs;
 	int size;

 	assert(key->len == 1);

 	size = attr_ops->encoded_size(btree, rq->data);
 	assert(size);

 	attrs = ileaf_resize(btree, key->start, ileaf, size);
 	if (attrs == NULL) {
 		/* There is no space to store */
 		*split_hint = ileaf_split_hint(btree, ileaf, key->start, size);
 		return BTREE_DO_SPLIT;
 	}

 	attr_ops->encode(btree, rq->data, attrs, size);

 	key->start++;
 	key->len--;

 	return BTREE_DO_RETRY;
 }

 static int ileaf_read(struct btree *btree, tuxkey_t key_bottom,
 		      tuxkey_t key_limit,
 		      void *leaf, struct btree_key_range *key)
 {
 	struct ileaf_req *rq = container_of(key, struct ileaf_req, key);
 	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
 	struct ileaf *ileaf = leaf;
 	void *attrs;
 	unsigned size;

 	attrs = ileaf_lookup(btree, key->start, ileaf, &size);
 	if (attrs == NULL)
 		return -ENOENT;

 	return attr_ops->decode(btree, rq->data, attrs, size);
 }

 struct btree_ops itree_ops = {
 	.btree_init	= ileaf_btree_init,
 	.leaf_init	= ileaf_init,
 	.leaf_split	= ileaf_split,
 	.leaf_merge	= ileaf_merge,
 	.leaf_chop	= ileaf_chop,
 	.leaf_pre_write	= noop_pre_write,
 	.leaf_write	= ileaf_write,
 	.leaf_read	= ileaf_read,
 	.private_ops	= &iattr_ops,

 	.leaf_sniff	= ileaf_sniff,
 	.leaf_can_free	= ileaf_can_free,
 };

 struct btree_ops otree_ops = {
 	.btree_init	= ileaf_btree_init,
 	.leaf_init	= ileaf_init,
 	.leaf_split	= ileaf_split,
 	.leaf_merge	= ileaf_merge,
 	.leaf_chop	= ileaf_chop,
 	.leaf_pre_write	= noop_pre_write,
 	.leaf_write	= ileaf_write,
 	.leaf_read	= ileaf_read,
 	.private_ops	= &oattr_ops,

 	.leaf_sniff	= ileaf_sniff,
 	.leaf_can_free	= ileaf_can_free,
 };

 /*
  * Find free inum
  * (callback for btree_traverse())
  *
  * return value:
  * 1 - found
  * 0 - not found
  */
 int ileaf_find_free(struct btree *btree, tuxkey_t key_bottom,
 		    tuxkey_t key_limit, void *leaf,
 		    tuxkey_t key, u64 len, void *data)
 {
 	tuxkey_t at = key - ibase(leaf);
 	unsigned count = icount(leaf);

 	key_limit = min(key_limit, key + len);

 	if (at < count) {
 		__be16 *dict = ileaf_dict(btree, leaf);
 		unsigned limit, offset = atdict(dict, at);

 		while (at < count) {
 			at++;
 			limit = __atdict(dict, at);
 			if (offset == limit) {
 				at--;
 				break;
 			}
 			offset = limit;
 		}
 	}

 	if (ibase(leaf) + at < key_limit) {
 		*(inum_t *)data = ibase(leaf) + at;
 		return 1;
 	}

 	return 0;
 }

 /*
  * Enumerate inum
  * (callback for btree_traverse())
  */
 int ileaf_enumerate(struct btree *btree, tuxkey_t key_bottom,
 		    tuxkey_t key_limit, void *leaf,
 		    tuxkey_t key, u64 len, void *data)
 {
 	struct ileaf *ileaf = leaf;
 	__be16 *dict = ileaf_dict(btree, ileaf);
 	struct ileaf_enumrate_cb *cb = data;
 	tuxkey_t at, base = ibase(ileaf);
 	unsigned count;

 	at = key - base;
 	count = min_t(u64, key + len - base, icount(ileaf));

 	if (at < count) {
 		unsigned offset = atdict(dict, at);
 		for (; at < count; at++) {
 			unsigned size, limit;
 			inum_t inum;
 			void *attrs;
 			int err;

 			limit = __atdict(dict, at + 1);
 			if (limit <= offset)
 				continue;
 			attrs = ileaf->table + offset;
 			size = limit - offset;

 			inum = base + at;
 			err = cb->callback(btree, inum, attrs, size, cb->data);
 			if (err)
 				return err;

 			offset = limit;
 		}
 	}

 	return 0;
 }
	/*
	* Inode btree leaf operations.
	*
	* Copyright (c) 2008-2014 Daniel Phillips
	* Copyright (c) 2008-2014 OGAWA Hirofumi
	*/

	#include "tux3.h"
	#include "ileaf.h"

	#ifndef trace
	#define trace trace_on
	#endif

	struct ileaf {
	__be16 magic; /* Magic number */
	__be16 count; /* Counts of used offset info entries */
	u32 pad;
	__be64 ibase; /* Base inode number */
	char table[]; /* ileaf data: inode attrs ... offset info */
	};

	/*
	* inode leaf format
	*
	* A leaf has a small header followed by a table of attributes. A vector of
	* offsets within the block grows down from the top of the leaf towards the
	* top of the attribute table, indexed by the difference between inum and
	* leaf->ibase, the base inum of the table block.
	*/

	static inline __be16 ileaf_dict(struct btree btree, struct ileaf *ileaf)
	{
	return (void *)ileaf + btree->sb->blocksize;
	}

	static inline unsigned __atdict(__be16 *dict, unsigned at)
	{
	assert(at);
	return be16_to_cpu(*(dict - at));
	}

	static inline unsigned atdict(__be16 *dict, unsigned at)
	{
	return at ? __atdict(dict, at) : 0;
	}

	static inline void add_idict(__be16 *dict, int n)
	{
	be16_add_cpu(dict, n);
	}

	static inline unsigned icount(struct ileaf *leaf)
	{
	return be16_to_cpu(leaf->count);
	}

	static inline tuxkey_t ibase(struct ileaf *leaf)
	{
	return be64_to_cpu(leaf->ibase);
	}

	static void ileaf_btree_init(struct btree *btree)
	{
	btree->entries_per_leaf = 1 << (btree->sb->blockbits - 6);
	}

	static int ileaf_init(struct btree btree, void leaf)
	{
	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
	trace("initialize inode leaf %p", leaf);
	(struct ileaf )leaf = (struct ileaf){
	.magic = attr_ops->magic,
	};
	return 0;
	}

	static int ileaf_need(struct btree btree, struct ileaf ileaf)
	{
	__be16 *dict = ileaf_dict(btree, ileaf);
	unsigned count = icount(ileaf);
	return atdict(dict, count) + count * sizeof(*dict);
	}

	static int ileaf_free(struct btree btree, struct ileaf ileaf)
	{
	return btree->sb->blocksize
	- ileaf_need(btree, ileaf) - sizeof(struct ileaf);
	}

	static int ileaf_sniff(struct btree btree, void leaf)
	{
	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
	if (((struct ileaf *)leaf)->magic != attr_ops->magic)
	return -1;
	return 0;
	}

	static int ileaf_can_free(struct btree btree, void leaf)
	{
	struct ileaf *ileaf = leaf;
	return icount(ileaf) == 0;
	}

	void ileaf_lookup(struct btree btree, inum_t inum, struct ileaf leaf, unsigned result)
	{
	assert(inum >= ibase(leaf));
	tuxkey_t at = inum - ibase(leaf), size = 0;
	void *attrs = NULL;

	trace("lookup inode 0x%Lx, %Lx + %Lx", inum, ibase(leaf), at);
	if (at < icount(leaf)) {
	__be16 *dict = ileaf_dict(btree, leaf);
	unsigned offset = atdict(dict, at);
	if ((size = __atdict(dict, at + 1) - offset))
	attrs = leaf->table + offset;
	}
	*result = size;
	return attrs;
	}

	static int isinorder(struct btree btree, struct ileaf leaf)
	{
	__be16 *dict = ileaf_dict(btree, leaf);

	for (int i = 0, offset = 0, limit; i < icount(leaf); i++, offset = limit)
	if ((limit = __atdict(dict, i + 1)) < offset)
	return 0;
	return 1;
	}

	/* userland only */
	int ileaf_check(struct btree btree, struct ileaf leaf)
	{
	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
	char *why;

	why = "not an ileaf";
	if (leaf->magic != attr_ops->magic)
	goto eek;
	why = "dict out of order";
	if (!isinorder(btree, leaf))
	goto eek;
	return 0;
	eek:
	tux3_err(btree->sb, "%s!", why);
	return -1;
	}

	static void ileaf_trim(struct btree btree, struct ileaf leaf)
	{
	__be16 *dict = ileaf_dict(btree, leaf);
	unsigned count = icount(leaf);

	while (count > 1 && (dict - count) == (dict - count + 1))
	count--;
	if (count == 1 && !*(dict - 1))
	count = 0;
	leaf->count = cpu_to_be16(count);
	}

	#define SPLIT_AT_INUM

	static tuxkey_t ileaf_split(struct btree *btree, tuxkey_t hint,
	void from, void into)
	{
	assert(!ileaf_sniff(btree, from));
	struct ileaf leaf = from, dest = into;
	__be16 *dict = ileaf_dict(btree, from);
	__be16 *destdict = ileaf_dict(btree, into);

	#ifdef SPLIT_AT_INUM
	/*
	* This is to prevent to have same ibase on both of from and into
	* FIXME: we would want to split at better position.
	*/
	if (hint == ibase(leaf))
	hint++;

	trace("split at inum 0x%Lx", hint);
	unsigned at = min_t(tuxkey_t, hint - ibase(leaf), icount(leaf));
	#else
	/* binsearch inum starting nearest middle of block */
	unsigned at = 1, hi = icount(leaf);
	while (at < hi) {
	int mid = (at + hi) / 2;
	if (*(dict - mid) < (btree->sb->blocksize / 2))
	at = mid + 1;
	else
	hi = mid;
	}
	#endif
	/* should trim leading empty inodes on copy */
	unsigned split = atdict(dict, at), free = atdict(dict, icount(leaf));
	trace("split at %x of %x", at, icount(leaf));
	trace("copy out %x bytes at %x", free - split, split);
	assert(free >= split);
	memcpy(dest->table, leaf->table + split, free - split);
	dest->count = cpu_to_be16(icount(leaf) - at);
	veccopy(destdict - icount(dest), dict - icount(leaf), icount(dest));
	for (int i = 1; i <= icount(dest); i++)
	add_idict(destdict - i, -split);
	#ifdef SPLIT_AT_INUM
	/* round down to multiple of 64 above ibase */
	inum_t round = hint & ~(inum_t)(btree->entries_per_leaf - 1);
	dest->ibase = cpu_to_be64(round > ibase(leaf) + icount(leaf) ? round : hint);
	#else
	dest->ibase = cpu_to_be64(ibase(leaf) + at);
	#endif
	leaf->count = cpu_to_be16(at);
	memset(leaf->table + split, 0, (char *)(dict - icount(leaf)) - (leaf->table + split));
	ileaf_trim(btree, leaf);
	return ibase(dest);
	}

	static int ileaf_merge(struct btree btree, void vinto, void *vfrom)
	{
	struct ileaf into = vinto, from = vfrom;
	unsigned fromcount = icount(from);

	/* If "from" is empty, does nothing */
	if (!fromcount)
	return 1;

	assert(ibase(from) > ibase(into));
	tuxkey_t fromibase = ibase(from);
	unsigned count = icount(into);
	int hole = fromibase - ibase(into) + count;

	__be16 *dict = ileaf_dict(btree, into);
	__be16 *fromdict = ileaf_dict(btree, from);
	int need_size = hole * sizeof(*dict) + ileaf_need(btree, from);

	if (ileaf_free(btree, into) < need_size)
	return 0;

	/* Fill hole of dict until from_ibase */
	unsigned limit = atdict(dict, count);
	__be16 __limit = cpu_to_be16(limit);
	while (hole--) {
	count++;
	*(dict - count) = __limit;
	}

	/* Copy data from "from" */
	unsigned fromlimit = atdict(fromdict, fromcount);
	memcpy(into->table + limit, from->table, fromlimit);

	/* Adjust copying fromdict */
	if (limit) {
	int i;
	for (i = 1; i <= fromcount; i++)
	add_idict(dict - i, limit);
	}
	veccopy(dict - count - fromcount, fromdict - fromcount, fromcount);

	into->count = cpu_to_be16(count + fromcount);

	return 1;
	}

	/*
	* Chop inums
	* return value:
	* < 0 - error
	* 1 - modified
	* 0 - not modified
	*/
	static int ileaf_chop(struct btree btree, tuxkey_t start, u64 len, void leaf)
	{
	struct ileaf *ileaf = leaf;
	__be16 *dict = ileaf_dict(btree, leaf);
	tuxkey_t base = ibase(ileaf);
	unsigned count = icount(ileaf);
	tuxkey_t at = start - base;
	void startp, endp, *tailp;
	unsigned size;

	if (at + 1 > count)
	return 0;

	len = min_t(u64, len, count - at);

	startp = ileaf->table + atdict(dict, at);
	endp = ileaf->table + atdict(dict, at + len);
	if (startp == endp)
	return 0;

	/* Remove data */
	tailp = ileaf->table + atdict(dict, count);
	memmove(startp, endp, tailp - endp);

	/* Adjust dict */
	size = endp - startp;
	while (at < count) {
	at++;
	add_idict(dict - at, -size);
	}

	ileaf_trim(btree, leaf);

	return 1;
	}

	static void ileaf_resize(struct btree btree, tuxkey_t inum, void *vleaf,
	int newsize)
	{
	struct ileaf *ileaf = vleaf;
	__be16 *dict = ileaf_dict(btree, ileaf);
	unsigned count = icount(ileaf);
	tuxkey_t at = inum - ibase(ileaf);
	int extend_dict, offset, size;

	assert(inum >= ibase(ileaf));

	/* Get existent attributes, and calculate expand/shrink size */
	if (at + 1 > count) {
	/* Check size roughly to avoid overflow int */
	if ((at + 1) * sizeof(*dict) >= btree->sb->blocksize)
	goto overflow;

	/* Need to extend dict */
	extend_dict = (at + 1 - count) * sizeof(*dict);
	offset = atdict(dict, count);
	size = 0;
	} else {
	/* "at" is in dict, so get attr size */
	extend_dict = 0;
	offset = atdict(dict, at);
	size = __atdict(dict, at + 1) - offset;
	}

	if (ileaf_free(btree, ileaf) < newsize - size + extend_dict) {
	overflow:
	return NULL;
	}

	/* Extend dict */
	if (extend_dict) {
	__be16 limit = cpu_to_be16(atdict(dict, count));
	while (count < at + 1) {
	count++;
	*(dict - count) = limit;
	}
	ileaf->count = cpu_to_be16(count);
	}

	void *attrs = ileaf->table + offset;
	if (newsize != size) {
	/* Expand/Shrink attr space */
	unsigned limit = __atdict(dict, count);
	assert(limit >= offset + size);
	memmove(attrs + newsize, attrs + size, limit - offset - size);

	/* Adjust dict */
	int diff = newsize - size;
	at++;
	while (at <= count) {
	add_idict(dict - at, diff);
	at++;
	}
	}

	return attrs;
	}

	static tuxkey_t ileaf_split_hint(struct btree btree, struct ileaf ileaf,
	tuxkey_t key, int size)
	{
	/*
	* FIXME: make sure there is space for size.
	* FIXME: better split position?
	*/

	tuxkey_t base = ibase(ileaf);
	unsigned count = icount(ileaf);
	if (key >= base + count)
	return key & ~(btree->entries_per_leaf - 1);

	return base + count / 2;
	}

	/*
	* Write inode attributes.
	*/
	static int ileaf_write(struct btree *btree, tuxkey_t key_bottom,
	tuxkey_t key_limit,
	void leaf, struct btree_key_range key,
	tuxkey_t *split_hint)
	{
	struct ileaf_req *rq = container_of(key, struct ileaf_req, key);
	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
	struct ileaf *ileaf = leaf;
	void *attrs;
	int size;

	assert(key->len == 1);

	size = attr_ops->encoded_size(btree, rq->data);
	assert(size);

	attrs = ileaf_resize(btree, key->start, ileaf, size);
	if (attrs == NULL) {
	/* There is no space to store */
	*split_hint = ileaf_split_hint(btree, ileaf, key->start, size);
	return BTREE_DO_SPLIT;
	}

	attr_ops->encode(btree, rq->data, attrs, size);

	key->start++;
	key->len--;

	return BTREE_DO_RETRY;
	}

	static int ileaf_read(struct btree *btree, tuxkey_t key_bottom,
	tuxkey_t key_limit,
	void leaf, struct btree_key_range key)
	{
	struct ileaf_req *rq = container_of(key, struct ileaf_req, key);
	struct ileaf_attr_ops *attr_ops = btree->ops->private_ops;
	struct ileaf *ileaf = leaf;
	void *attrs;
	unsigned size;

	attrs = ileaf_lookup(btree, key->start, ileaf, &size);
	if (attrs == NULL)
	return -ENOENT;

	return attr_ops->decode(btree, rq->data, attrs, size);
	}

	struct btree_ops itree_ops = {
	.btree_init = ileaf_btree_init,
	.leaf_init = ileaf_init,
	.leaf_split = ileaf_split,
	.leaf_merge = ileaf_merge,
	.leaf_chop = ileaf_chop,
	.leaf_pre_write = noop_pre_write,
	.leaf_write = ileaf_write,
	.leaf_read = ileaf_read,
	.private_ops = &iattr_ops,

	.leaf_sniff = ileaf_sniff,
	.leaf_can_free = ileaf_can_free,
	};

	struct btree_ops otree_ops = {
	.btree_init = ileaf_btree_init,
	.leaf_init = ileaf_init,
	.leaf_split = ileaf_split,
	.leaf_merge = ileaf_merge,
	.leaf_chop = ileaf_chop,
	.leaf_pre_write = noop_pre_write,
	.leaf_write = ileaf_write,
	.leaf_read = ileaf_read,
	.private_ops = &oattr_ops,

	.leaf_sniff = ileaf_sniff,
	.leaf_can_free = ileaf_can_free,
	};

	/*
	* Find free inum
	* (callback for btree_traverse())
	*
	* return value:
	* 1 - found
	* 0 - not found
	*/
	int ileaf_find_free(struct btree *btree, tuxkey_t key_bottom,
	tuxkey_t key_limit, void *leaf,
	tuxkey_t key, u64 len, void *data)
	{
	tuxkey_t at = key - ibase(leaf);
	unsigned count = icount(leaf);

	key_limit = min(key_limit, key + len);

	if (at < count) {
	__be16 *dict = ileaf_dict(btree, leaf);
	unsigned limit, offset = atdict(dict, at);

	while (at < count) {
	at++;
	limit = __atdict(dict, at);
	if (offset == limit) {
	at--;
	break;
	}
	offset = limit;
	}
	}

	if (ibase(leaf) + at < key_limit) {
	(inum_t )data = ibase(leaf) + at;
	return 1;
	}

	return 0;
	}

	/*
	* Enumerate inum
	* (callback for btree_traverse())
	*/
	int ileaf_enumerate(struct btree *btree, tuxkey_t key_bottom,
	tuxkey_t key_limit, void *leaf,
	tuxkey_t key, u64 len, void *data)
	{
	struct ileaf *ileaf = leaf;
	__be16 *dict = ileaf_dict(btree, ileaf);
	struct ileaf_enumrate_cb *cb = data;
	tuxkey_t at, base = ibase(ileaf);
	unsigned count;

	at = key - base;
	count = min_t(u64, key + len - base, icount(ileaf));

	if (at < count) {
	unsigned offset = atdict(dict, at);
	for (; at < count; at++) {
	unsigned size, limit;
	inum_t inum;
	void *attrs;
	int err;

	limit = __atdict(dict, at + 1);
	if (limit <= offset)
	continue;
	attrs = ileaf->table + offset;
	size = limit - offset;

	inum = base + at;
	err = cb->callback(btree, inum, attrs, size, cb->data);
	if (err)
	return err;

	offset = limit;
	}
	}

	return 0;
	}