releases/4.14.152/ocfs2-clear-zero-in-unaligned-direct-io.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From 4b7fb7340b7656e205f4bdee5fc25279fe234160 Mon Sep 17 00:00:00 2001
 From: Sasha Levin <sashal@kernel.org>
 Date: Sun, 6 Oct 2019 17:57:47 -0700
 Subject: ocfs2: clear zero in unaligned direct IO

 From: Jia Guo <guojia12@huawei.com>

 [ Upstream commit 7a243c82ea527cd1da47381ad9cd646844f3b693 ]

 Unused portion of a part-written fs-block-sized block is not set to zero
 in unaligned append direct write.This can lead to serious data
 inconsistencies.

 Ocfs2 manage disk with cluster size(for example, 1M), part-written in
 one cluster will change the cluster state from UN-WRITTEN to WRITTEN,
 VFS(function dio_zero_block) doesn't do the cleaning because bh's state
 is not set to NEW in function ocfs2_dio_wr_get_block when we write a
 WRITTEN cluster.  For example, the cluster size is 1M, file size is 8k
 and we direct write from 14k to 15k, then 12k~14k and 15k~16k will
 contain dirty data.

 We have to deal with two cases:
  1.The starting position of direct write is outside the file.
  2.The starting position of direct write is located in the file.

 We need set bh's state to NEW in the first case.  In the second case, we
 need mapped twice because bh's state of area out file should be set to
 NEW while area in file not.

 [akpm@linux-foundation.org: coding style fixes]
 Link: http://lkml.kernel.org/r/5292e287-8f1a-fd4a-1a14-661e555e0bed@huawei.com
 Signed-off-by: Jia Guo <guojia12@huawei.com>
 Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
 Cc: Mark Fasheh <mark@fasheh.com>
 Cc: Joel Becker <jlbec@evilplan.org>
 Cc: Junxiao Bi <junxiao.bi@oracle.com>
 Cc: Joseph Qi <joseph.qi@huawei.com>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
 Signed-off-by: Sasha Levin <sashal@kernel.org>
 ---
  fs/ocfs2/aops.c | 22 +++++++++++++++++++++-
  1 file changed, 21 insertions(+), 1 deletion(-)

 diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
 index 99550f4bd159a..ebeec7530cb60 100644
 --- a/fs/ocfs2/aops.c
 +++ b/fs/ocfs2/aops.c
 @@ -2151,13 +2151,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
  	struct ocfs2_dio_write_ctxt *dwc = NULL;
  	struct buffer_head *di_bh = NULL;
  	u64 p_blkno;
 -	loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
 +	unsigned int i_blkbits = inode->i_sb->s_blocksize_bits;
 +	loff_t pos = iblock << i_blkbits;
 +	sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits;
  	unsigned len, total_len = bh_result->b_size;
  	int ret = 0, first_get_block = 0;

  	len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
  	len = min(total_len, len);

 +	/*
 +	 * bh_result->b_size is count in get_more_blocks according to write
 +	 * "pos" and "end", we need map twice to return different buffer state:
 +	 * 1. area in file size, not set NEW;
 +	 * 2. area out file size, set  NEW.
 +	 *
 +	 *		   iblock    endblk
 +	 * |--------|---------|---------|---------
 +	 * |<-------area in file------->|
 +	 */
 +
 +	if ((iblock <= endblk) &&
 +	    ((iblock + ((len - 1) >> i_blkbits)) > endblk))
 +		len = (endblk - iblock + 1) << i_blkbits;
 +
  	mlog(0, "get block of %lu at %llu:%u req %u\n",
  			inode->i_ino, pos, len, total_len);

 @@ -2241,6 +2258,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
  	if (desc->c_needs_zero)
  		set_buffer_new(bh_result);

 +	if (iblock > endblk)
 +		set_buffer_new(bh_result);
 +
  	/* May sleep in end_io. It should not happen in a irq context. So defer
  	 * it to dio work queue. */
  	set_buffer_defer_completion(bh_result);
 --
 2.20.1
	From 4b7fb7340b7656e205f4bdee5fc25279fe234160 Mon Sep 17 00:00:00 2001
	From: Sasha Levin <sashal@kernel.org>
	Date: Sun, 6 Oct 2019 17:57:47 -0700
	Subject: ocfs2: clear zero in unaligned direct IO

	From: Jia Guo <guojia12@huawei.com>

	[ Upstream commit 7a243c82ea527cd1da47381ad9cd646844f3b693 ]

	Unused portion of a part-written fs-block-sized block is not set to zero
	in unaligned append direct write.This can lead to serious data
	inconsistencies.

	Ocfs2 manage disk with cluster size(for example, 1M), part-written in
	one cluster will change the cluster state from UN-WRITTEN to WRITTEN,
	VFS(function dio_zero_block) doesn't do the cleaning because bh's state
	is not set to NEW in function ocfs2_dio_wr_get_block when we write a
	WRITTEN cluster. For example, the cluster size is 1M, file size is 8k
	and we direct write from 14k to 15k, then 12k~14k and 15k~16k will
	contain dirty data.

	We have to deal with two cases:
	1.The starting position of direct write is outside the file.
	2.The starting position of direct write is located in the file.

	We need set bh's state to NEW in the first case. In the second case, we
	need mapped twice because bh's state of area out file should be set to
	NEW while area in file not.

	[akpm@linux-foundation.org: coding style fixes]
	Link: http://lkml.kernel.org/r/5292e287-8f1a-fd4a-1a14-661e555e0bed@huawei.com
	Signed-off-by: Jia Guo <guojia12@huawei.com>
	Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
	Cc: Mark Fasheh <mark@fasheh.com>
	Cc: Joel Becker <jlbec@evilplan.org>
	Cc: Junxiao Bi <junxiao.bi@oracle.com>
	Cc: Joseph Qi <joseph.qi@huawei.com>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
	Signed-off-by: Sasha Levin <sashal@kernel.org>
	---
	fs/ocfs2/aops.c \| 22 +++++++++++++++++++++-
	1 file changed, 21 insertions(+), 1 deletion(-)

	diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
	index 99550f4bd159a..ebeec7530cb60 100644
	--- a/fs/ocfs2/aops.c
	+++ b/fs/ocfs2/aops.c
	@@ -2151,13 +2151,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
	struct ocfs2_dio_write_ctxt *dwc = NULL;
	struct buffer_head *di_bh = NULL;
	u64 p_blkno;
	- loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
	+ unsigned int i_blkbits = inode->i_sb->s_blocksize_bits;
	+ loff_t pos = iblock << i_blkbits;
	+ sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits;
	unsigned len, total_len = bh_result->b_size;
	int ret = 0, first_get_block = 0;

	len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
	len = min(total_len, len);

	+ /*
	+ * bh_result->b_size is count in get_more_blocks according to write
	+ * "pos" and "end", we need map twice to return different buffer state:
	+ * 1. area in file size, not set NEW;
	+ * 2. area out file size, set NEW.
	+ *
	+ * iblock endblk
	+ * \|--------\|---------\|---------\|---------
	+ * \|<-------area in file------->\|
	+ */
	+
	+ if ((iblock <= endblk) &&
	+ ((iblock + ((len - 1) >> i_blkbits)) > endblk))
	+ len = (endblk - iblock + 1) << i_blkbits;
	+
	mlog(0, "get block of %lu at %llu:%u req %u\n",
	inode->i_ino, pos, len, total_len);

	@@ -2241,6 +2258,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
	if (desc->c_needs_zero)
	set_buffer_new(bh_result);

	+ if (iblock > endblk)
	+ set_buffer_new(bh_result);
	+
	/* May sleep in end_io. It should not happen in a irq context. So defer
	* it to dio work queue. */
	set_buffer_defer_completion(bh_result);
	--
	2.20.1