Btrfs: use a list for dirty log pages

Setting bits in an io tree for the dirty log pages is really expensive,
taking somewhere around 7us on my box.  So replace this with a basic list,
which makes this operation take .2 us per dirty eb.  This makes my sync test
go from 4.4 mb/s to 5.8 mb/s.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f4255db..37b8c72 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1552,7 +1552,8 @@
 	struct btrfs_root_item root_item;
 	struct btrfs_key root_key;
 	struct btrfs_fs_info *fs_info;
-	struct extent_io_tree dirty_log_pages;
+	spinlock_t log_pages_lock;
+	struct list_head dirty_log_pages[2];
 
 	struct kobject root_kobj;
 	struct completion kobj_unregister;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 31f4ca6..604678dd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1197,8 +1197,9 @@
 	atomic_set(&root->orphan_inodes, 0);
 	root->log_transid = 0;
 	root->last_log_commit = 0;
-	extent_io_tree_init(&root->dirty_log_pages,
-			     fs_info->btree_inode->i_mapping);
+	spin_lock_init(&root->log_pages_lock);
+	INIT_LIST_HEAD(&root->dirty_log_pages[0]);
+	INIT_LIST_HEAD(&root->dirty_log_pages[1]);
 
 	memset(&root->root_key, 0, sizeof(root->root_key));
 	memset(&root->root_item, 0, sizeof(root->root_item));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 98af837..6f2d283 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6249,12 +6249,11 @@
 		 * we allow two log transactions at a time, use different
 		 * EXENT bit to differentiate dirty pages.
 		 */
-		if (root->log_transid % 2 == 0)
-			set_extent_dirty(&root->dirty_log_pages, buf->start,
-					buf->start + buf->len - 1, GFP_NOFS);
-		else
-			set_extent_new(&root->dirty_log_pages, buf->start,
-					buf->start + buf->len - 1, GFP_NOFS);
+		spin_lock(&root->log_pages_lock);
+		list_add_tail(&buf->dirty_list,
+			      &root->dirty_log_pages[root->log_transid % 2]);
+		spin_unlock(&root->log_pages_lock);
+		extent_buffer_get(buf);
 	} else {
 		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
 			 buf->start + buf->len - 1, GFP_NOFS);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1b319df..e2c0f69 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -12,6 +12,7 @@
 #include <linux/pagevec.h>
 #include <linux/prefetch.h>
 #include <linux/cleancache.h>
+#include <linux/list_sort.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "compat.h"
@@ -2788,6 +2789,8 @@
 				      struct writeback_control *wbc,
 				      unsigned long nr_written)
 {
+	if (!wbc)
+		return;
 	wbc->nr_to_write -= nr_written;
 	if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
 	    wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
@@ -3247,6 +3250,61 @@
 	return ret;
 }
 
+static int eb_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct extent_buffer *eb1, *eb2;
+
+	eb1 = list_entry(a, struct extent_buffer, dirty_list);
+	eb2 = list_entry(b, struct extent_buffer, dirty_list);
+
+	if (eb1->start < eb2->start)
+		return -1;
+	else if (eb1->start > eb2->start)
+		return 1;
+	return 0;
+}
+
+int btrfs_sync_eb_list(struct btrfs_fs_info *fs_info, struct list_head *list)
+{
+	struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
+	struct extent_buffer *eb;
+	struct extent_page_data epd = {
+		.bio = NULL,
+		.tree = tree,
+		.extent_locked = 0,
+		.sync_io = 1,
+		.bio_flags = 0,
+	};
+	int ret = 0;
+	int err = 0;
+
+	list_sort(NULL, list, eb_cmp);
+
+	list_for_each_entry(eb, list, dirty_list) {
+		if (!lock_extent_buffer_for_io(eb, fs_info, &epd))
+			continue;
+
+		ret = write_one_eb(eb, fs_info, NULL, &epd);
+		if (ret && !err)
+			err = ret;
+	}
+	flush_write_bio(&epd);
+
+	while(!list_empty(list)) {
+		eb = list_first_entry(list, struct extent_buffer, dirty_list);
+		list_del_init(&eb->dirty_list);
+		if (err) {
+			free_extent_buffer(eb);
+			continue;
+		}
+
+		wait_on_extent_buffer_writeback(eb);
+		free_extent_buffer(eb);
+	}
+
+	return err;
+}
+
 int btree_write_cache_pages(struct address_space *mapping,
 				   struct writeback_control *wbc)
 {
@@ -4024,6 +4082,7 @@
 	atomic_set(&eb->blocking_writers, 0);
 	atomic_set(&eb->spinning_readers, 0);
 	atomic_set(&eb->spinning_writers, 0);
+	INIT_LIST_HEAD(&eb->dirty_list);
 	eb->lock_nested = 0;
 	init_waitqueue_head(&eb->write_lock_wq);
 	init_waitqueue_head(&eb->read_lock_wq);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2eacfab..cc01dce 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -136,6 +136,7 @@
 	atomic_t io_pages;
 	int read_mirror;
 	struct list_head leak_list;
+	struct list_head dirty_list;
 	struct rcu_head rcu_head;
 	pid_t lock_owner;
 
@@ -345,4 +346,5 @@
 int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
 int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
 			 int mirror_num);
+int btrfs_sync_eb_list(struct btrfs_fs_info *fs_info, struct list_head *list);
 #endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6d73baf..909bd1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2278,8 +2278,10 @@
 	int ret;
 	struct btrfs_root *log = root->log_root;
 	struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
+	struct list_head list;
 	unsigned long log_transid = 0;
 
+	INIT_LIST_HEAD(&list);
 	mutex_lock(&root->log_mutex);
 	log_transid = root->log_transid;
 	index1 = root->log_transid % 2;
@@ -2319,17 +2321,6 @@
 	else
 		mark = EXTENT_NEW;
 
-	/* we start IO on  all the marked extents here, but we don't actually
-	 * wait for them until later.
-	 */
-	ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
-	if (ret) {
-		btrfs_abort_transaction(trans, root, ret);
-		btrfs_free_logged_extents(log, log_transid);
-		mutex_unlock(&root->log_mutex);
-		goto out;
-	}
-
 	btrfs_set_root_node(&log->root_item, log->node);
 
 	root->log_transid++;
@@ -2364,7 +2355,6 @@
 			goto out;
 		}
 		root->fs_info->last_trans_log_full_commit = trans->transid;
-		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
@@ -2373,7 +2363,6 @@
 
 	index2 = log_root_tree->log_transid % 2;
 	if (atomic_read(&log_root_tree->log_commit[index2])) {
-		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 		wait_log_commit(trans, log_root_tree,
 				log_root_tree->log_transid);
 		btrfs_free_logged_extents(log, log_transid);
@@ -2395,23 +2384,28 @@
 	 * check the full commit flag again
 	 */
 	if (root->fs_info->last_trans_log_full_commit == trans->transid) {
-		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
 		goto out_wake_log_root;
 	}
 
-	ret = btrfs_write_and_wait_marked_extents(log_root_tree,
-				&log_root_tree->dirty_log_pages,
-				EXTENT_DIRTY | EXTENT_NEW);
+	spin_lock(&log->log_pages_lock);
+	list_splice_init(&log->dirty_log_pages[log_transid % 2], &list);
+	spin_unlock(&log->log_pages_lock);
+
+	spin_lock(&log_root_tree->log_pages_lock);
+	list_splice_init(&log_root_tree->dirty_log_pages[0], &list);
+	list_splice_init(&log_root_tree->dirty_log_pages[1], &list);
+	spin_unlock(&log_root_tree->log_pages_lock);
+
+	ret = btrfs_sync_eb_list(log_root_tree->fs_info, &list);
 	if (ret) {
 		btrfs_abort_transaction(trans, root, ret);
 		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		goto out_wake_log_root;
 	}
-	btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 	btrfs_wait_logged_extents(log, log_transid);
 
 	btrfs_set_super_log_root(root->fs_info->super_for_commit,
@@ -2461,25 +2455,27 @@
 			  struct btrfs_root *log)
 {
 	int ret;
-	u64 start;
-	u64 end;
+	struct list_head list;
 	struct walk_control wc = {
 		.free = 1,
 		.process_func = process_one_buffer
 	};
 
+	INIT_LIST_HEAD(&list);
 	ret = walk_log_tree(trans, log, &wc);
 	BUG_ON(ret);
 
-	while (1) {
-		ret = find_first_extent_bit(&log->dirty_log_pages,
-				0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
-				NULL);
-		if (ret)
-			break;
+	spin_lock(&log->log_pages_lock);
+	list_splice_init(&log->dirty_log_pages[0], &list);
+	list_splice_init(&log->dirty_log_pages[1], &list);
+	spin_unlock(&log->log_pages_lock);
 
-		clear_extent_bits(&log->dirty_log_pages, start, end,
-				  EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
+	while (!list_empty(&list)) {
+		struct extent_buffer *eb;
+
+		eb = list_first_entry(&list, struct extent_buffer, dirty_list);
+		list_del_init(&eb->dirty_list);
+		free_extent_buffer(eb);
 	}
 
 	free_extent_buffer(log->node);