for-test: add 0001-bcache-dont-reset-bio-opf-in-bch_data_insert_start.patch from for-next
for-next: add 4 patches and update 1 patch
diff --git a/for-next/0001-bcache-consider-the-fragmentation-when-update-the-wr.patch b/for-next/0001-bcache-consider-the-fragmentation-when-update-the-wr.patch
index a7a8d03..fa7e827 100644
--- a/for-next/0001-bcache-consider-the-fragmentation-when-update-the-wr.patch
+++ b/for-next/0001-bcache-consider-the-fragmentation-when-update-the-wr.patch
@@ -1,7 +1,8 @@
-From 3398f9f459244fa141a031e92f0cd86a047c47a5 Mon Sep 17 00:00:00 2001
+From 5b756fccaa5a77ce84362be304d57eb29229b728 Mon Sep 17 00:00:00 2001
 From: dongdong tao <dongdong.tao@canonical.com>
 Date: Wed, 20 Jan 2021 20:01:52 +0800
-Subject: [PATCH] bcache: consider the fragmentation when update the writeback rate
+Subject: [PATCH 1/5] bcache: consider the fragmentation when update the
+ writeback rate
 
 Current way to calculate the writeback rate only considered the
 dirty sectors, this usually works fine when the fragmentation
diff --git a/for-next/0002-bcache-Fix-register_device_aync-typo.patch b/for-next/0002-bcache-Fix-register_device_aync-typo.patch
new file mode 100644
index 0000000..dddf32c
--- /dev/null
+++ b/for-next/0002-bcache-Fix-register_device_aync-typo.patch
@@ -0,0 +1,39 @@
+From 40c0086acb7d8384e9998715d70bfff12b2de4d7 Mon Sep 17 00:00:00 2001
+From: Kai Krakow <kai@kaishome.de>
+Date: Thu, 28 Jan 2021 15:33:19 +0100
+Subject: [PATCH 2/5] bcache: Fix register_device_aync typo
+
+Should be `register_device_async`.
+
+Cc: Coly Li <colyli@suse.de>
+Signed-off-by: Kai Krakow <kai@kaishome.de>
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/super.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 2047a9cccdb5..e7d1b52c5cc8 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -2517,7 +2517,7 @@ static void register_cache_worker(struct work_struct *work)
+ 	module_put(THIS_MODULE);
+ }
+ 
+-static void register_device_aync(struct async_reg_args *args)
++static void register_device_async(struct async_reg_args *args)
+ {
+ 	if (SB_IS_BDEV(args->sb))
+ 		INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker);
+@@ -2611,7 +2611,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
+ 		args->sb	= sb;
+ 		args->sb_disk	= sb_disk;
+ 		args->bdev	= bdev;
+-		register_device_aync(args);
++		register_device_async(args);
+ 		/* No wait and returns to user space */
+ 		goto async_done;
+ 	}
+-- 
+2.26.2
+
diff --git a/for-next/0003-Revert-bcache-Kill-btree_io_wq.patch b/for-next/0003-Revert-bcache-Kill-btree_io_wq.patch
new file mode 100644
index 0000000..3e40770
--- /dev/null
+++ b/for-next/0003-Revert-bcache-Kill-btree_io_wq.patch
@@ -0,0 +1,118 @@
+From 0e29284793e52fd086da2fed409b0af9bca03b53 Mon Sep 17 00:00:00 2001
+From: Kai Krakow <kai@kaishome.de>
+Date: Fri, 29 Jan 2021 17:40:05 +0100
+Subject: [PATCH 3/5] Revert "bcache: Kill btree_io_wq"
+
+This reverts commit 56b30770b27d54d68ad51eccc6d888282b568cee.
+
+With the btree using the `system_wq`, I seem to see a lot more desktop
+latency than I should.
+
+After some more investigation, it looks like the original assumption
+of 56b3077 no longer is true, and bcache has a very high potential of
+congesting the `system_wq`. In turn, this introduces laggy desktop
+performance, IO stalls (at least with btrfs), and input events may be
+delayed.
+
+So let's revert this. It's important to note that the semantics of
+using `system_wq` previously mean that `btree_io_wq` should be created
+before and destroyed after other bcache wqs to keep the same
+assumptions.
+
+Cc: Coly Li <colyli@suse.de>
+Cc: stable@vger.kernel.org # 5.4+
+Signed-off-by: Kai Krakow <kai@kaishome.de>
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/bcache.h |  2 ++
+ drivers/md/bcache/btree.c  | 21 +++++++++++++++++++--
+ drivers/md/bcache/super.c  |  4 ++++
+ 3 files changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index d7a84327b7f1..2b8c7dd2cfae 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -1046,5 +1046,7 @@ void bch_debug_exit(void);
+ void bch_debug_init(void);
+ void bch_request_exit(void);
+ int bch_request_init(void);
++void bch_btree_exit(void);
++int bch_btree_init(void);
+ 
+ #endif /* _BCACHE_H */
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index 910df242c83d..952f022db5a5 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -99,6 +99,8 @@
+ #define PTR_HASH(c, k)							\
+ 	(((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0))
+ 
++static struct workqueue_struct *btree_io_wq;
++
+ #define insert_lock(s, b)	((b)->level <= (s)->lock)
+ 
+ 
+@@ -308,7 +310,7 @@ static void __btree_node_write_done(struct closure *cl)
+ 	btree_complete_write(b, w);
+ 
+ 	if (btree_node_dirty(b))
+-		schedule_delayed_work(&b->work, 30 * HZ);
++		queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
+ 
+ 	closure_return_with_destructor(cl, btree_node_write_unlock);
+ }
+@@ -481,7 +483,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
+ 	BUG_ON(!i->keys);
+ 
+ 	if (!btree_node_dirty(b))
+-		schedule_delayed_work(&b->work, 30 * HZ);
++		queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
+ 
+ 	set_btree_node_dirty(b);
+ 
+@@ -2764,3 +2766,18 @@ void bch_keybuf_init(struct keybuf *buf)
+ 	spin_lock_init(&buf->lock);
+ 	array_allocator_init(&buf->freelist);
+ }
++
++void bch_btree_exit(void)
++{
++	if (btree_io_wq)
++		destroy_workqueue(btree_io_wq);
++}
++
++int __init bch_btree_init(void)
++{
++	btree_io_wq = create_singlethread_workqueue("bch_btree_io");
++	if (!btree_io_wq)
++		return -ENOMEM;
++
++	return 0;
++}
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index e7d1b52c5cc8..85a44a0cffe0 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -2821,6 +2821,7 @@ static void bcache_exit(void)
+ 		destroy_workqueue(bcache_wq);
+ 	if (bch_journal_wq)
+ 		destroy_workqueue(bch_journal_wq);
++	bch_btree_exit();
+ 
+ 	if (bcache_major)
+ 		unregister_blkdev(bcache_major, "bcache");
+@@ -2876,6 +2877,9 @@ static int __init bcache_init(void)
+ 		return bcache_major;
+ 	}
+ 
++	if (bch_btree_init())
++		goto err;
++
+ 	bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
+ 	if (!bcache_wq)
+ 		goto err;
+-- 
+2.26.2
+
diff --git a/for-next/0004-bcache-Give-btree_io_wq-correct-semantics-again.patch b/for-next/0004-bcache-Give-btree_io_wq-correct-semantics-again.patch
new file mode 100644
index 0000000..c319554
--- /dev/null
+++ b/for-next/0004-bcache-Give-btree_io_wq-correct-semantics-again.patch
@@ -0,0 +1,37 @@
+From 57c862900fae3b3a9158e28e71f8a6f1af305246 Mon Sep 17 00:00:00 2001
+From: Kai Krakow <kai@kaishome.de>
+Date: Fri, 29 Jan 2021 17:40:06 +0100
+Subject: [PATCH 4/5] bcache: Give btree_io_wq correct semantics again
+
+Before killing `btree_io_wq`, the queue was allocated using
+`create_singlethread_workqueue()` which has `WQ_MEM_RECLAIM`. After
+killing it, it no longer had this property but `system_wq` is not
+single threaded.
+
+Let's combine both worlds and make it multi threaded but able to
+reclaim memory.
+
+Cc: Coly Li <colyli@suse.de>
+Cc: stable@vger.kernel.org # 5.4+
+Signed-off-by: Kai Krakow <kai@kaishome.de>
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/btree.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index 952f022db5a5..fe6dce125aba 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -2775,7 +2775,7 @@ void bch_btree_exit(void)
+ 
+ int __init bch_btree_init(void)
+ {
+-	btree_io_wq = create_singlethread_workqueue("bch_btree_io");
++	btree_io_wq = alloc_workqueue("bch_btree_io", WQ_MEM_RECLAIM, 0);
+ 	if (!btree_io_wq)
+ 		return -ENOMEM;
+ 
+-- 
+2.26.2
+
diff --git a/for-next/0005-bcache-Move-journal-work-to-new-flush-wq.patch b/for-next/0005-bcache-Move-journal-work-to-new-flush-wq.patch
new file mode 100644
index 0000000..4b1f369
--- /dev/null
+++ b/for-next/0005-bcache-Move-journal-work-to-new-flush-wq.patch
@@ -0,0 +1,101 @@
+From 06ccb26034779f39e0f3ed945c90fc8b2dbcc1f5 Mon Sep 17 00:00:00 2001
+From: Kai Krakow <kai@kaishome.de>
+Date: Fri, 29 Jan 2021 17:40:07 +0100
+Subject: [PATCH 5/5] bcache: Move journal work to new flush wq
+
+This is potentially long running and not latency sensitive, let's get
+it out of the way of other latency sensitive events.
+
+As observed in the previous commit, the `system_wq` comes easily
+congested by bcache, and this fixes a few more stalls I was observing
+every once in a while.
+
+Let's not make this `WQ_MEM_RECLAIM` as it showed to reduce performance
+of boot and file system operations in my tests. Also, without
+`WQ_MEM_RECLAIM`, I no longer see desktop stalls. This matches the
+previous behavior as `system_wq` also does no memory reclaim:
+
+> // workqueue.c:
+> system_wq = alloc_workqueue("events", 0, 0);
+
+Cc: Coly Li <colyli@suse.de>
+Cc: stable@vger.kernel.org # 5.4+
+Signed-off-by: Kai Krakow <kai@kaishome.de>
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/bcache.h  |  1 +
+ drivers/md/bcache/journal.c |  4 ++--
+ drivers/md/bcache/super.c   | 16 ++++++++++++++++
+ 3 files changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index 2b8c7dd2cfae..848dd4db1659 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -1005,6 +1005,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
+ 
+ extern struct workqueue_struct *bcache_wq;
+ extern struct workqueue_struct *bch_journal_wq;
++extern struct workqueue_struct *bch_flush_wq;
+ extern struct mutex bch_register_lock;
+ extern struct list_head bch_cache_sets;
+ 
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index aefbdb7e003b..c6613e817333 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -932,8 +932,8 @@ atomic_t *bch_journal(struct cache_set *c,
+ 		journal_try_write(c);
+ 	} else if (!w->dirty) {
+ 		w->dirty = true;
+-		schedule_delayed_work(&c->journal.work,
+-				      msecs_to_jiffies(c->journal_delay_ms));
++		queue_delayed_work(bch_flush_wq, &c->journal.work,
++				   msecs_to_jiffies(c->journal_delay_ms));
+ 		spin_unlock(&c->journal.lock);
+ 	} else {
+ 		spin_unlock(&c->journal.lock);
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 85a44a0cffe0..0228ccb293fc 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -49,6 +49,7 @@ static int bcache_major;
+ static DEFINE_IDA(bcache_device_idx);
+ static wait_queue_head_t unregister_wait;
+ struct workqueue_struct *bcache_wq;
++struct workqueue_struct *bch_flush_wq;
+ struct workqueue_struct *bch_journal_wq;
+ 
+ 
+@@ -2821,6 +2822,8 @@ static void bcache_exit(void)
+ 		destroy_workqueue(bcache_wq);
+ 	if (bch_journal_wq)
+ 		destroy_workqueue(bch_journal_wq);
++	if (bch_flush_wq)
++		destroy_workqueue(bch_flush_wq);
+ 	bch_btree_exit();
+ 
+ 	if (bcache_major)
+@@ -2884,6 +2887,19 @@ static int __init bcache_init(void)
+ 	if (!bcache_wq)
+ 		goto err;
+ 
++	/*
++	 * Let's not make this `WQ_MEM_RECLAIM` for the following reasons:
++	 *
++	 * 1. It used `system_wq` before which also does no memory reclaim.
++	 * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and
++	 *    reduced throughput can be observed.
++	 *
++	 * We still want to user our own queue to not congest the `system_wq`.
++	 */
++	bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
++	if (!bch_flush_wq)
++		goto err;
++
+ 	bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
+ 	if (!bch_journal_wq)
+ 		goto err;
+-- 
+2.26.2
+
diff --git a/for-next/0001-bcache-dont-reset-bio-opf-in-bch_data_insert_start.patch b/for-test/0001-bcache-dont-reset-bio-opf-in-bch_data_insert_start.patch
similarity index 100%
rename from for-next/0001-bcache-dont-reset-bio-opf-in-bch_data_insert_start.patch
rename to for-test/0001-bcache-dont-reset-bio-opf-in-bch_data_insert_start.patch