tux3: Support mmap write: Fix race of mmap write with truncate(2)

mmap write and truncate(2) can race on delta boundary.

               cpu0                                   cpu1
       delta-1 = get_delta();
       truncate(2)
                                              delta++
                                              mmap write
                                                  delta-2 = get_delta();
                                                  page-B = pagefork(page-A);
           tux3_truncate_inode_page(page-B);

If mmap write and truncate(2) ran by above order, truncate(2) see
dirty page for delta-2. We should guarantee one doesn't see data in
future delta.

To avoid this race, this introduces ->truncate_lock. And protects mmap
write while truncate(2) is running.

FIXME: This race would be rare cases in real usage though, more fine
granulate lock would be better.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
diff --git a/fs/tux3/filemap_mmap.c b/fs/tux3/filemap_mmap.c
index 93b9f9c..a17182a 100644
--- a/fs/tux3/filemap_mmap.c
+++ b/fs/tux3/filemap_mmap.c
@@ -146,6 +146,7 @@
 	sb_start_pagefault(inode->i_sb);
 
 retry:
+	down_read(&tux_inode(inode)->truncate_lock);
 	lock_page(page);
 	if (page->mapping != mapping(inode)) {
 		unlock_page(page);
@@ -177,6 +178,7 @@
 		change_end_atomic_nested(sb, ptr);
 		unlock_page(page);
 		page_cache_release(page);
+		up_read(&tux_inode(inode)->truncate_lock);
 
 		switch (PTR_ERR(clone)) {
 		case -EAGAIN:
@@ -218,6 +220,7 @@
 //	ret = VM_FAULT_LOCKED;
 #endif
 out:
+	up_read(&tux_inode(inode)->truncate_lock);
 	sb_end_pagefault(inode->i_sb);
 
 	return ret;
diff --git a/fs/tux3/inode.c b/fs/tux3/inode.c
index f759f87..5d074ba 100644
--- a/fs/tux3/inode.c
+++ b/fs/tux3/inode.c
@@ -817,7 +817,7 @@
 {
 	struct inode *inode = dentry->d_inode;
 	struct sb *sb = tux_sb(inode->i_sb);
-	int err, need_truncate = 0;
+	int err, need_truncate = 0, need_lock = 0;
 
 	err = inode_change_ok(inode, iattr);
 	if (err)
@@ -826,24 +826,28 @@
 	if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
 		inode_dio_wait(inode);
 		need_truncate = 1;
+		/* If truncate pages, this can race with mmap write */
+		if (iattr->ia_size < inode->i_size)
+			need_lock = 1;
 	}
 
+	if (need_lock)
+		down_write(&tux_inode(inode)->truncate_lock);
 	change_begin(sb);
 
 	tux3_iattrdirty(inode);
 
-	if (need_truncate) {
+	if (need_truncate)
 		err = tux3_truncate(inode, iattr->ia_size);
-		if (err)
-			return err;
-	}
-
-	setattr_copy(inode, iattr);
+	if (!err)
+		setattr_copy(inode, iattr);
 	tux3_mark_inode_dirty(inode);
 
 	change_end(sb);
+	if (need_lock)
+		up_write(&tux_inode(inode)->truncate_lock);
 
-	return 0;
+	return err;
 }
 
 #include "inode_vfslib.c"
diff --git a/fs/tux3/super.c b/fs/tux3/super.c
index 140bcd5..b8c8475 100644
--- a/fs/tux3/super.c
+++ b/fs/tux3/super.c
@@ -267,6 +267,7 @@
 	INIT_LIST_HEAD(&tuxnode->orphan_list);
 	spin_lock_init(&tuxnode->hole_extents_lock);
 	INIT_LIST_HEAD(&tuxnode->hole_extents);
+	init_rwsem(&tuxnode->truncate_lock);
 	spin_lock_init(&tuxnode->lock);
 	/* Initialize inode_delta_dirty */
 	for (i = 0; i < ARRAY_SIZE(tuxnode->i_ddc); i++) {
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index 04fde94..44576e1 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -431,6 +431,7 @@
 	spinlock_t hole_extents_lock;	/* lock for hole_extents */
 	struct list_head hole_extents;	/* hole extents list */
 
+	struct rw_semaphore truncate_lock; /* lock for truncate and mmap */
 	spinlock_t lock;		/* lock for inode metadata */
 	/* Per-delta dirty data for inode */
 	unsigned flags;			/* flags for inode state */