tcmu: Skip Data-Out blocks before gathering Data-In buffer for BIDI case

For the bidirectional case, the Data-Out buffer blocks will always at
the head of the tcmu_cmd's bitmap, and before gathering the Data-In
buffer, first of all it should skip the Data-Out ones, or the device
supporting BIDI commands won't work.

Fixed: 26418649eead ("target/user: Introduce data_bitmap, replace
		data_length/data_head/data_tail")
Reported-by: Ilias Tsitsimpis <iliastsi@arrikto.com>
Tested-by: Ilias Tsitsimpis <iliastsi@arrikto.com>
Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Cc: stable@vger.kernel.org # 4.6+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9885d1b..f615c3b 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -311,24 +311,50 @@
 		   DATA_BLOCK_BITS);
 }
 
-static void gather_data_area(struct tcmu_dev *udev, unsigned long *cmd_bitmap,
-		struct scatterlist *data_sg, unsigned int data_nents)
+static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
+			     bool bidi)
 {
+	struct se_cmd *se_cmd = cmd->se_cmd;
 	int i, block;
 	int block_remaining = 0;
 	void *from, *to;
 	size_t copy_bytes, from_offset;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *data_sg;
+	unsigned int data_nents;
+	DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
+
+	bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+
+	if (!bidi) {
+		data_sg = se_cmd->t_data_sg;
+		data_nents = se_cmd->t_data_nents;
+	} else {
+		uint32_t count;
+
+		/*
+		 * For bidi case, the first count blocks are for Data-Out
+		 * buffer blocks, and before gathering the Data-In buffer
+		 * the Data-Out buffer blocks should be discarded.
+		 */
+		count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE);
+		while (count--) {
+			block = find_first_bit(bitmap, DATA_BLOCK_BITS);
+			clear_bit(block, bitmap);
+		}
+
+		data_sg = se_cmd->t_bidi_data_sg;
+		data_nents = se_cmd->t_bidi_data_nents;
+	}
 
 	for_each_sg(data_sg, sg, data_nents, i) {
 		int sg_remaining = sg->length;
 		to = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
-				block = find_first_bit(cmd_bitmap,
+				block = find_first_bit(bitmap,
 						DATA_BLOCK_BITS);
 				block_remaining = DATA_BLOCK_SIZE;
-				clear_bit(block, cmd_bitmap);
+				clear_bit(block, bitmap);
 			}
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
@@ -610,19 +636,11 @@
 			       se_cmd->scsi_sense_length);
 		free_data_area(udev, cmd);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
 		/* Get Data-In buffer before clean up */
-		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
-		gather_data_area(udev, bitmap,
-			se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents);
+		gather_data_area(udev, cmd, true);
 		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
-		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
-		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
-		gather_data_area(udev, bitmap,
-			se_cmd->t_data_sg, se_cmd->t_data_nents);
+		gather_data_area(udev, cmd, false);
 		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
 		free_data_area(udev, cmd);