current work

what im currently working on.

Signed-off-by: Josef Bacik <josef@redhat.com>
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 634608d..eb51cb9 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -129,6 +129,7 @@
 	 */
 	u64 csum_bytes;
 
+	u64 reserved;
 	/* flags field from the on disk inode */
 	u32 flags;
 
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6a1a680..d1b3d5c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -595,8 +595,12 @@
 
 	num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
-	if (!ret)
+	if (!ret) {
+		trace_printk("%pU: delayed_item: %Lu reserved %Lu\n",
+			     root->fs_info->fsid, item->key.objectid,
+			     num_bytes);
 		item->bytes_reserved = num_bytes;
+	}
 
 	return ret;
 }
@@ -610,6 +614,9 @@
 		return;
 
 	rsv = &root->fs_info->delayed_block_rsv;
+	trace_printk("%pU: delayed_item: %Lu released %Lu\n",
+		     root->fs_info->fsid, item->key.objectid,
+		     item->bytes_reserved);
 	btrfs_block_rsv_release(root, rsv,
 				item->bytes_reserved);
 }
@@ -624,7 +631,7 @@
 	struct btrfs_block_rsv *dst_rsv;
 	u64 num_bytes;
 	int ret;
-	int release = false;
+	bool release = false;
 
 	src_rsv = trans->block_rsv;
 	dst_rsv = &root->fs_info->delayed_block_rsv;
@@ -651,12 +658,18 @@
 		 */
 		if (ret == -EAGAIN)
 			ret = -ENOSPC;
-		if (!ret)
+		if (!ret) {
 			node->bytes_reserved = num_bytes;
+			trace_printk("%pU: delayed_inode: %Lu reserved %Lu\n",
+				     root->fs_info->fsid, btrfs_ino(inode),
+				     num_bytes);
+		}
 		return ret;
 	} else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
 		spin_lock(&BTRFS_I(inode)->lock);
 		if (BTRFS_I(inode)->delalloc_meta_reserved) {
+			BUG_ON(BTRFS_I(inode)->reserved < num_bytes);
+			BTRFS_I(inode)->reserved -= num_bytes;
 			BTRFS_I(inode)->delalloc_meta_reserved = 0;
 			spin_unlock(&BTRFS_I(inode)->lock);
 			release = true;
@@ -676,9 +689,9 @@
 		if (!ret)
 			goto out;
 
-		ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
-		if (!ret)
-			goto out;
+//		ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+//		if (!ret)
+//			goto out;
 
 		/*
 		 * Ok this is a problem, let's just steal from the global rsv
@@ -707,11 +720,17 @@
 	 * reservation here.  I think it may be time for a documentation page on
 	 * how block rsvs. work.
 	 */
-	if (!ret)
+	if (!ret) {
+		trace_printk("%pU: delayed_inode: %Lu reserved %Lu\n",
+			     root->fs_info->fsid, btrfs_ino(inode), num_bytes);
 		node->bytes_reserved = num_bytes;
+	}
 
-	if (release)
+	if (release) {
+		trace_printk("%pU: delalloc: %Lu released %Lu\n",
+			     root->fs_info->fsid, btrfs_ino(inode), num_bytes);
 		btrfs_block_rsv_release(root, src_rsv, num_bytes);
+	}
 
 	return ret;
 }
@@ -725,6 +744,9 @@
 		return;
 
 	rsv = &root->fs_info->delayed_block_rsv;
+	trace_printk("%pU: delayed_inode: %Lu released %Lu\n",
+		     root->fs_info->fsid, node->inode_id,
+		     node->bytes_reserved);
 	btrfs_block_rsv_release(root, rsv,
 				node->bytes_reserved);
 	node->bytes_reserved = 0;
@@ -1372,13 +1394,6 @@
 		goto release_node;
 	}
 
-	ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
-	/*
-	 * we have reserved enough space when we start a new transaction,
-	 * so reserving metadata failure is impossible
-	 */
-	BUG_ON(ret);
-
 	delayed_item->key.objectid = btrfs_ino(dir);
 	btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
 	delayed_item->key.offset = index;
@@ -1391,6 +1406,14 @@
 	dir_item->type = type;
 	memcpy((char *)(dir_item + 1), name, name_len);
 
+	ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
+	/*
+	 * we have reserved enough space when we start a new transaction,
+	 * so reserving metadata failure is impossible
+	 */
+	BUG_ON(ret);
+
+
 	mutex_lock(&delayed_node->mutex);
 	ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
 	if (unlikely(ret)) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 24cfd10..7f28f10 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -24,6 +24,7 @@
 #include <linux/kthread.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
+#include <linux/kallsyms.h>
 #include "compat.h"
 #include "hash.h"
 #include "ctree.h"
@@ -4047,6 +4048,10 @@
 	if (!trans->bytes_reserved)
 		return;
 
+	WARN_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
+	BUG_ON(trans->block_rsv == &root->fs_info->delalloc_block_rsv);
+	trace_printk("%pU: transaction: %p release %Lu\n", root->fs_info->fsid,
+		     trans, trans->bytes_reserved);
 	btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 	trans->bytes_reserved = 0;
 }
@@ -4064,6 +4069,8 @@
 	 * when we are truly done with the orphan item.
 	 */
 	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+	trace_printk("%pU: orphan: %Lu reserve %Lu\n", root->fs_info->fsid,
+		     btrfs_ino(inode), num_bytes);
 	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
@@ -4071,6 +4078,8 @@
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+	trace_printk("%pU: orphan: %Lu release %Lu\n", root->fs_info->fsid,
+		     btrfs_ino(inode), num_bytes);
 	btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
 }
 
@@ -4162,6 +4171,7 @@
 		BTRFS_I(inode)->csum_bytes += num_bytes;
 	else
 		BTRFS_I(inode)->csum_bytes -= num_bytes;
+
 	csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
 	num_csums_per_leaf = (int)div64_u64(csum_size,
 					    sizeof(struct btrfs_csum_item) +
@@ -4177,10 +4187,13 @@
 	if (old_csums == num_csums)
 		return 0;
 
-	if (reserve)
+	if (reserve) {
+		BUG_ON(num_csums < old_csums);
 		return btrfs_calc_trans_metadata_size(root,
 						      num_csums - old_csums);
+	}
 
+	BUG_ON(old_csums < num_csums);
 	return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
 }
 
@@ -4189,10 +4202,16 @@
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
 	u64 to_reserve = 0;
+	u64 csum_reserve = 0;
+	u64 csum_bytes;
 	unsigned nr_extents = 0;
 	int flush = 1;
+	int delalloc_meta_reserved = 0;
 	int ret;
 
+	/* Need to be holding the i_mutex here */
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
+
 	if (btrfs_is_free_space_inode(root, inode))
 		flush = 0;
 
@@ -4203,13 +4222,10 @@
 
 	spin_lock(&BTRFS_I(inode)->lock);
 	BTRFS_I(inode)->outstanding_extents++;
-
 	if (BTRFS_I(inode)->outstanding_extents >
-	    BTRFS_I(inode)->reserved_extents) {
+	    BTRFS_I(inode)->reserved_extents)
 		nr_extents = BTRFS_I(inode)->outstanding_extents -
 			BTRFS_I(inode)->reserved_extents;
-		BTRFS_I(inode)->reserved_extents += nr_extents;
-	}
 
 	/*
 	 * Add an item to reserve for updating the inode when we complete the
@@ -4217,36 +4233,60 @@
 	 */
 	if (!BTRFS_I(inode)->delalloc_meta_reserved) {
 		nr_extents++;
-		BTRFS_I(inode)->delalloc_meta_reserved = 1;
+		delalloc_meta_reserved = 1;
 	}
 
 	to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
-	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+	csum_reserve = calc_csum_metadata_size(inode, num_bytes, 1);
+	csum_bytes = BTRFS_I(inode)->csum_bytes;
 	spin_unlock(&BTRFS_I(inode)->lock);
 
-	ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
+	ret = reserve_metadata_bytes(root, block_rsv, to_reserve + csum_reserve, flush);
 	if (ret) {
 		u64 to_free = 0;
-		unsigned dropped;
+		int dropped;
 
 		spin_lock(&BTRFS_I(inode)->lock);
 		dropped = drop_outstanding_extent(inode);
-		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
-		spin_unlock(&BTRFS_I(inode)->lock);
-		to_free += btrfs_calc_trans_metadata_size(root, dropped);
 
 		/*
-		 * Somebody could have come in and twiddled with the
-		 * reservation, so if we have to free more than we would have
-		 * reserved from this reservation go ahead and release those
-		 * bytes.
+		 * If the inode's csum_bytes is the same as the original
+		 * csum_bytes then we know we haven't raced with any free()ers
+		 * so we can just reduce our inode's csum bytes and carry on.
+		 * Otherwise we have to do the normal free thing to account for
+		 * the case that the free side didn't free up it's reserve
+		 * because of this outstanding reservation.
 		 */
-		to_free -= to_reserve;
+		if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+			calc_csum_metadata_size(inode, num_bytes, 0);
+		else
+			to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+		if (dropped > 0)
+			to_free += btrfs_calc_trans_metadata_size(root, dropped);
+//		BUG_ON(BTRFS_I(inode)->reserved < to_free);
+	//	BTRFS_I(inode)->reserved -= to_free;
+		spin_unlock(&BTRFS_I(inode)->lock);
 		if (to_free)
 			btrfs_block_rsv_release(root, block_rsv, to_free);
+		trace_printk("%pU: delalloc: %Lu released %Lu\n", root->fs_info->fsid,
+			     btrfs_ino(inode), to_free);
 		return ret;
 	}
 
+	to_reserve += csum_reserve;
+	spin_lock(&BTRFS_I(inode)->lock);
+	if (delalloc_meta_reserved) {
+		BTRFS_I(inode)->delalloc_meta_reserved = 1;
+		nr_extents--;
+	}
+	BTRFS_I(inode)->reserved_extents += nr_extents;
+	BTRFS_I(inode)->reserved += to_reserve;
+//	trace_printk("delalloc: %Lu to_reserve=%Lu, csum_reserve=%Lu, nr_extents=%u, outstanding_extents=%u, reserved_extents=%u, delalloc_meta_reserved=%d, reserved=%Lu, csum_bytes=%Lu\n",
+//		     btrfs_ino(inode), to_reserve, csum_reserve, nr_extents, BTRFS_I(inode)->outstanding_extents, BTRFS_I(inode)->reserved_extents, delalloc_meta_reserved, BTRFS_I(inode)->reserved, BTRFS_I(inode)->csum_bytes);
+	spin_unlock(&BTRFS_I(inode)->lock);
+
+	trace_printk("%pU: delalloc: %Lu reserved %Lu\n", root->fs_info->fsid,
+		     btrfs_ino(inode), to_reserve);
 	block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
 	return 0;
@@ -4265,17 +4305,46 @@
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 to_free = 0;
+	u64 extent_free = 0;
+	u64 csum_free = 0;
 	unsigned dropped;
+	unsigned dropping_reserve = 0;
+//	char symname[KSYM_NAME_LEN];
 
+//	sprint_symbol(symname, (unsigned long)__builtin_return_address(0));
 	num_bytes = ALIGN(num_bytes, root->sectorsize);
 	spin_lock(&BTRFS_I(inode)->lock);
+
+	if (BTRFS_I(inode)->delalloc_meta_reserved)
+		dropping_reserve = 1;
+
 	dropped = drop_outstanding_extent(inode);
 
-	to_free = calc_csum_metadata_size(inode, num_bytes, 0);
-	spin_unlock(&BTRFS_I(inode)->lock);
-	if (dropped > 0)
-		to_free += btrfs_calc_trans_metadata_size(root, dropped);
+	if (BTRFS_I(inode)->delalloc_meta_reserved)
+		dropping_reserve = 0;
 
+	csum_free = to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+	if (dropped > 0) {
+		extent_free = btrfs_calc_trans_metadata_size(root, dropped);
+		to_free += extent_free;
+	}
+//	BUG_ON(BTRFS_I(inode)->reserved < to_free);
+	/*
+	if (BTRFS_I(inode)->reserved < to_free)
+		trace_printk("inode %Lu has %Lu want to free %Lu, dropped reserve %d\n",
+		       btrfs_ino(inode), BTRFS_I(inode)->reserved, to_free, dropping_reserve);
+	*/
+	BTRFS_I(inode)->reserved -= to_free;
+	/*
+	trace_printk("delalloc: %Lu to_free=%Lu, csum_free=%Lu, dropped=%u, outstanding_extents=%u, reserved_extents=%u, drop_reserved=%u, reserved=%Lu, csum_bytes=%Lu\n",
+		     btrfs_ino(inode), extent_free, csum_free, dropped, BTRFS_I(inode)->outstanding_extents, BTRFS_I(inode)->reserved_extents, dropping_reserve, BTRFS_I(inode)->reserved, BTRFS_I(inode)->csum_bytes);
+	*/
+	spin_unlock(&BTRFS_I(inode)->lock);
+
+	trace_printk("%pU: delalloc: %Lu released %Lu\n", root->fs_info->fsid,
+		     btrfs_ino(inode), to_free);
+//	trace_printk("%pU: delalloc: %s: %Lu released %Lu\n", root->fs_info->fsid,
+//		     symname, btrfs_ino(inode), to_free);
 	btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
 				to_free);
 }
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index f8962a9..48a59eb 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -438,6 +438,8 @@
 					  trans->bytes_reserved);
 	if (ret)
 		goto out;
+	trace_printk("%pU: ino_cache: %p reserved %Lu\n",
+		     root->fs_info->fsid, trans, trans->bytes_reserved);
 again:
 	inode = lookup_free_ino_inode(root, path);
 	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -482,7 +484,10 @@
 	/* Just to make sure we have enough space */
 	prealloc += 8 * PAGE_CACHE_SIZE;
 
+	/* Not really needed but for correctness sake */
+	mutex_lock(&inode->i_mutex);
 	ret = btrfs_delalloc_reserve_space(inode, prealloc);
+	mutex_unlock(&inode->i_mutex);
 	if (ret)
 		goto out_put;
 
@@ -498,6 +503,8 @@
 out_put:
 	iput(inode);
 out_release:
+	trace_printk("%pU: ino_cache: %p released %Lu\n", root->fs_info->fsid,
+		     trans, trans->bytes_reserved);
 	btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 out:
 	trans->block_rsv = rsv;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9de15f1..0b8e6af 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -630,7 +630,6 @@
 
 		trans = btrfs_join_transaction(root);
 		BUG_ON(IS_ERR(trans));
-		trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 		ret = btrfs_reserve_extent(trans, root,
 					   async_extent->compressed_size,
 					   async_extent->compressed_size,
@@ -1375,13 +1374,10 @@
 		u64 len = state->end + 1 - state->start;
 		bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-		if (*bits & EXTENT_FIRST_DELALLOC) {
+		if (*bits & EXTENT_FIRST_DELALLOC)
 			*bits &= ~EXTENT_FIRST_DELALLOC;
-		} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
-			spin_lock(&BTRFS_I(inode)->lock);
-			BTRFS_I(inode)->outstanding_extents--;
-			spin_unlock(&BTRFS_I(inode)->lock);
-		}
+		else if (!(*bits & EXTENT_DO_ACCOUNTING))
+			WARN_ON(1);
 
 		if (*bits & EXTENT_DO_ACCOUNTING)
 			btrfs_delalloc_release_metadata(inode, len);
@@ -2207,7 +2203,13 @@
 				continue;
 			}
 			nr_truncate++;
+			/* Need to hold the imutex for reservation purposes, not
+			 * a huge deal here but I've got a WARN_ON in
+			 * btrfs_delalloc_reserve_space to catch offenders.
+			 */
+			mutex_lock(&inode->i_mutex);
 			ret = btrfs_truncate(inode);
+			mutex_unlock(&inode->i_mutex);
 		} else {
 			nr_unlink++;
 		}
@@ -6127,7 +6129,10 @@
 	lockend = offset + count - 1;
 
 	if (writing) {
+		/* Need this to keep space reservations serialized. */
+		mutex_lock(&inode->i_mutex);
 		ret = btrfs_delalloc_reserve_space(inode, count);
+		mutex_unlock(&inode->i_mutex);
 		if (ret)
 			goto out;
 	}
@@ -6357,7 +6362,13 @@
 	u64 page_start;
 	u64 page_end;
 
+	/*
+	 * Reservation code has to be gaurded by the i_mutex to keep multiple
+	 * reservations from happening at the same time.
+	 */
+	mutex_lock(&inode->i_mutex);
 	ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+	mutex_unlock(&inode->i_mutex);
 	if (!ret)
 		ret = btrfs_update_time(vma->vm_file);
 	if (ret) {
@@ -6685,6 +6696,7 @@
 	spin_lock_init(&ei->lock);
 	ei->outstanding_extents = 0;
 	ei->reserved_extents = 0;
+	ei->reserved = 0;
 
 	ei->ordered_data_close = 0;
 	ei->orphan_meta_reserved = 0;
@@ -6727,6 +6739,7 @@
 	WARN_ON(BTRFS_I(inode)->reserved_extents);
 	WARN_ON(BTRFS_I(inode)->delalloc_bytes);
 	WARN_ON(BTRFS_I(inode)->csum_bytes);
+	WARN_ON(BTRFS_I(inode)->reserved);
 
 	/*
 	 * This can happen where we create an inode, but somebody else also
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4a34c47..2551a01 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -858,8 +858,10 @@
 		return 0;
 	file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
 
+	mutex_lock(&inode->i_mutex);
 	ret = btrfs_delalloc_reserve_space(inode,
 					   num_pages << PAGE_CACHE_SHIFT);
+	mutex_unlock(&inode->i_mutex);
 	if (ret)
 		return ret;
 again:
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index dff29d5..cfb5543 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2947,7 +2947,9 @@
 	index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
 	last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
 	while (index <= last_index) {
+		mutex_lock(&inode->i_mutex);
 		ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
+		mutex_unlock(&inode->i_mutex);
 		if (ret)
 			goto out;
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 81376d9..8f1430c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -314,6 +314,9 @@
 	h->block_rsv = NULL;
 	h->orig_rsv = NULL;
 
+	if (num_bytes)
+		trace_printk("%pU: transaction: %p reserve %Lu\n",
+			     root->fs_info->fsid, h, num_bytes);
 	smp_mb();
 	if (cur_trans->blocked && may_wait_transaction(root, type)) {
 		btrfs_commit_transaction(h, root);