tux3: Support mmap write: Fix race of mmap write with write(2) for copy_from_user()

mmap write and write(2) can race.

               cpu0                          cpu1
       write(2)
           delta-1 = get_delta()
	   remove_suid()
	   update_timestamp()
           write_begin()
                                         delta++
                                         mmap write
                                             lock_page()
                                             delta-2 = get_delta()
					     page-B = pagefork(page-A)
					     unlock_page()
               get_delta_if_need()
	       lock_page()
	       page-B = find_get_page();
	       pagefork(page-B)

           write_end()
               put_delta()

If mmap write and write(2) run in above order, cpu0 see dirty page-B
for delta-2, while write(2) is using delta-1.  We should guarantee one
doesn't see data in future delta.

To fix this race, this (re)takes delta while holding lock_page(). With
this change, we guarantee to use latest delta, and fix race.

FIXME: But this way separates transactions of
update_timestamp()/remove_suid()/etc. and first modification to
page. We would be better to modify data and metadata in same delta
though (to make app's modification and on-disk state more atomic).

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c
index fc7f85e..a50919f 100644
--- a/fs/tux3/commit.c
+++ b/fs/tux3/commit.c
@@ -768,10 +768,14 @@
  * }
  * change_end_if_need()
  */
-void change_begin_if_needed(struct sb *sb)
+void change_begin_if_needed(struct sb *sb, int need_sep)
 {
 	if (current->journal_info == NULL)
 		change_begin(sb);
+	else if (need_sep) {
+		change_end(sb);
+		change_begin(sb);
+	}
 }
 
 void change_end_if_needed(struct sb *sb)
diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c
index 612ac41..c71f1c5 100644
--- a/fs/tux3/filemap.c
+++ b/fs/tux3/filemap.c
@@ -824,12 +824,12 @@
 				   struct address_space *mapping,
 				   loff_t pos, unsigned len, unsigned flags,
 				   struct page **pagep, void **fsdata,
-				   int check_fork)
+				   int tux3_flags)
 {
 	int ret;
 
 	ret = tux3_write_begin(mapping, pos, len, flags, pagep,
-			       tux3_da_get_block, check_fork);
+			       tux3_da_get_block, tux3_flags);
 	if (ret < 0)
 		tux3_write_failed(mapping, pos + len);
 	return ret;
@@ -854,12 +854,9 @@
 				 loff_t pos, unsigned len, unsigned flags,
 				 struct page **pagep, void **fsdata)
 {
-	/* Separate big write transaction to small chunk. */
-	assert(S_ISREG(mapping->host->i_mode));
-	change_begin_if_needed(tux_sb(mapping->host->i_sb));
-
 	return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep,
-				       fsdata, 1);
+				       fsdata,
+				       TUX3_F_PAGEFORK | TUX3_F_SEP_DELTA);
 }
 
 static int tux3_file_write_end(struct file *file, struct address_space *mapping,
@@ -989,7 +986,7 @@
 				    struct page **pagep, void **fsdata)
 {
 	return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep,
-				       fsdata, 1);
+				       fsdata, TUX3_F_PAGEFORK);
 }
 
 /* Copy of tux_file_aops, except ->write_begin/end */
diff --git a/fs/tux3/filemap_blocklib.c b/fs/tux3/filemap_blocklib.c
index f7d7864..8b77c9f 100644
--- a/fs/tux3/filemap_blocklib.c
+++ b/fs/tux3/filemap_blocklib.c
@@ -152,6 +152,9 @@
 	return err;
 }
 
+#define TUX3_F_PAGEFORK		(1 << 0)
+#define TUX3_F_SEP_DELTA	(1 << 1)
+
 /*
  * Copy of block_write_begin()
  * (Add to call pagefork_for_blockdirty() for buffer fork)
@@ -159,7 +162,7 @@
 static int tux3_write_begin(struct address_space *mapping, loff_t pos,
 			    unsigned len, unsigned flags,
 			    struct page **pagep, get_block_t *get_block,
-			    int check_fork)
+			    int tux3_flags)
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	struct page *page;
@@ -170,12 +173,19 @@
 	if (!page)
 		return -ENOMEM;
 
+	if (tux3_flags & TUX3_F_SEP_DELTA) {
+		/* Separate big write transaction to small chunk. */
+		assert(S_ISREG(mapping->host->i_mode));
+		change_begin_if_needed(tux_sb(mapping->host->i_sb), 1);
+	}
+
 	/*
-	 * FIXME: If check_fork == 0, caller handle buffer fork.
-	 * Unlike check_fork hack, we are better to provide the different
-	 * blockget() implementation doesn't use tux3_write_begin().
+	 * FIXME: If TUX3_WRITE_PAGEFORK, caller handle buffer fork.
+	 * Unlike TUX3_WRITE_PAGEFORK hack, we are better to provide
+	 * the different blockget() implementation doesn't use
+	 * tux3_write_begin().
 	 */
-	if (check_fork) {
+	if (tux3_flags & TUX3_F_PAGEFORK) {
 		struct page *tmp;
 
 		tmp = pagefork_for_blockdirty(page, tux3_get_current_delta());
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index 44576e1..0415298 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -790,7 +790,7 @@
 void change_end_atomic_nested(struct sb *sb, void *ptr);
 void change_begin(struct sb *sb);
 int change_end(struct sb *sb);
-void change_begin_if_needed(struct sb *sb);
+void change_begin_if_needed(struct sb *sb, int need_sep);
 void change_end_if_needed(struct sb *sb);
 
 /* commit_flusher.c */