dm-integrity: avoid deadlock

Based on the patch: dm verity: avoid deadlock

A deadlock was found in the prefetch code in the dm verity map
function.  This patch fixes this by transferring the prefetch
to a worker thread and skipping it completely if kmalloc fails.

If generic_make_request is called recursively, it queues the I/O
request on the current->bio_list without making the I/O request
and returns. The routine making the recursive call cannot wait
for the I/O to complete.

The deadlock occurs when one thread grabs the bufio_client
mutex and waits for an I/O to complete but the I/O is queued
on another thread's current->bio_list and is waiting to get
the mutex held by the first thread.

The fix recognises that prefetching is not essential.  If memory
can be allocated, it queues the prefetch request to the worker thread,
but if not, it does nothing.

Signed-off-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 2661e2c..671d767 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -130,6 +130,13 @@
 #define io_block(io) (io->bi_iter.bi_sector >> \
 			(io->dmi->data_block_bits - SECTOR_SHIFT))
 
+struct dm_int_prefetch_work {
+	struct work_struct work;
+	struct dm_int *dmi;
+	sector_t sector;
+	unsigned bi_size;
+};
+
 static void dm_int_queue_hmac(struct dm_int_io *io);
 
 /*
@@ -225,14 +232,16 @@
 	bio_endio_nodec(bio, err);	/* finally completed, end main bio */
 }
 
-static void dm_int_prefetch(struct dm_int_io *io)
+static void dm_int_prefetch(struct work_struct *work)
 {
-	struct dm_int *dmi = io->dmi;
+	struct dm_int_prefetch_work *pw =
+		container_of(work, struct dm_int_prefetch_work, work);
+	struct dm_int *dmi = pw->dmi;
 	sector_t first, last, data;
 	loff_t offset;
 
 	/* block number to read */
-	offset = io->bi_iter.bi_sector << SECTOR_SHIFT;
+	offset = pw->sector << SECTOR_SHIFT;
 	data = offset >> dmi->data_block_bits;
 	if (dmi->hmac_block_shift)
 		first = data >> dmi->hmac_block_shift;
@@ -242,7 +251,7 @@
 	}
 
 	/* offset to the last byte of data */
-	offset += (io->bi_iter.bi_size - 1);
+	offset += (pw->bi_size - 1);
 	data = offset >> dmi->data_block_bits;
 	if (dmi->hmac_block_shift)
 		last = data >> dmi->hmac_block_shift;
@@ -259,6 +268,25 @@
 		last = dmi->hmac_count;
 
 	dm_bufio_prefetch(dmi->bufio, dmi->hmac_start + first, last - first);
+
+	kfree(pw);
+}
+
+static void dm_int_submit_prefetch(struct dm_int_io *io)
+{
+	struct dm_int_prefetch_work *pw;
+
+	pw = kmalloc(sizeof(struct dm_int_prefetch_work),
+		GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+	if (!pw)
+		return;
+
+	INIT_WORK(&pw->work, dm_int_prefetch);
+	pw->dmi = io->dmi;
+	pw->sector = io->bi_iter.bi_sector;
+	pw->bi_size = io->bi_iter.bi_size;
+	queue_work(io->dmi->io_queue, &pw->work);
 }
 
 static int dm_int_verify_hmac(struct dm_int_io *io, loff_t offset,
@@ -551,7 +579,7 @@
 
 	generic_make_request(bio);
 
-	dm_int_prefetch(io);
+	dm_int_submit_prefetch(io);
 
 	dm_int_io_put(io);