tux3: Support mmap write: Fix race of mmap write with truncate(2)
mmap write and truncate(2) can race on delta boundary.
cpu0 cpu1
delta-1 = get_delta();
truncate(2)
delta++
mmap write
delta-2 = get_delta();
page-B = pagefork(page-A);
tux3_truncate_inode_page(page-B);
If mmap write and truncate(2) ran by above order, truncate(2) see
dirty page for delta-2. We should guarantee one doesn't see data in
future delta.
To avoid this race, this introduces ->truncate_lock. And protects mmap
write while truncate(2) is running.
FIXME: This race would be rare cases in real usage though, more fine
granulate lock would be better.
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
diff --git a/fs/tux3/filemap_mmap.c b/fs/tux3/filemap_mmap.c
index 93b9f9c..a17182a 100644
--- a/fs/tux3/filemap_mmap.c
+++ b/fs/tux3/filemap_mmap.c
@@ -146,6 +146,7 @@
sb_start_pagefault(inode->i_sb);
retry:
+ down_read(&tux_inode(inode)->truncate_lock);
lock_page(page);
if (page->mapping != mapping(inode)) {
unlock_page(page);
@@ -177,6 +178,7 @@
change_end_atomic_nested(sb, ptr);
unlock_page(page);
page_cache_release(page);
+ up_read(&tux_inode(inode)->truncate_lock);
switch (PTR_ERR(clone)) {
case -EAGAIN:
@@ -218,6 +220,7 @@
// ret = VM_FAULT_LOCKED;
#endif
out:
+ up_read(&tux_inode(inode)->truncate_lock);
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/tux3/inode.c b/fs/tux3/inode.c
index f759f87..5d074ba 100644
--- a/fs/tux3/inode.c
+++ b/fs/tux3/inode.c
@@ -817,7 +817,7 @@
{
struct inode *inode = dentry->d_inode;
struct sb *sb = tux_sb(inode->i_sb);
- int err, need_truncate = 0;
+ int err, need_truncate = 0, need_lock = 0;
err = inode_change_ok(inode, iattr);
if (err)
@@ -826,24 +826,28 @@
if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
inode_dio_wait(inode);
need_truncate = 1;
+ /* If truncate pages, this can race with mmap write */
+ if (iattr->ia_size < inode->i_size)
+ need_lock = 1;
}
+ if (need_lock)
+ down_write(&tux_inode(inode)->truncate_lock);
change_begin(sb);
tux3_iattrdirty(inode);
- if (need_truncate) {
+ if (need_truncate)
err = tux3_truncate(inode, iattr->ia_size);
- if (err)
- return err;
- }
-
- setattr_copy(inode, iattr);
+ if (!err)
+ setattr_copy(inode, iattr);
tux3_mark_inode_dirty(inode);
change_end(sb);
+ if (need_lock)
+ up_write(&tux_inode(inode)->truncate_lock);
- return 0;
+ return err;
}
#include "inode_vfslib.c"
diff --git a/fs/tux3/super.c b/fs/tux3/super.c
index 140bcd5..b8c8475 100644
--- a/fs/tux3/super.c
+++ b/fs/tux3/super.c
@@ -267,6 +267,7 @@
INIT_LIST_HEAD(&tuxnode->orphan_list);
spin_lock_init(&tuxnode->hole_extents_lock);
INIT_LIST_HEAD(&tuxnode->hole_extents);
+ init_rwsem(&tuxnode->truncate_lock);
spin_lock_init(&tuxnode->lock);
/* Initialize inode_delta_dirty */
for (i = 0; i < ARRAY_SIZE(tuxnode->i_ddc); i++) {
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index 04fde94..44576e1 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -431,6 +431,7 @@
spinlock_t hole_extents_lock; /* lock for hole_extents */
struct list_head hole_extents; /* hole extents list */
+ struct rw_semaphore truncate_lock; /* lock for truncate and mmap */
spinlock_t lock; /* lock for inode metadata */
/* Per-delta dirty data for inode */
unsigned flags; /* flags for inode state */