tux3: Support mmap write: Fix race of mmap write with write(2) for copy_from_user()
mmap write and write(2) can race.
cpu0 cpu1
write(2)
delta-1 = get_delta()
remove_suid()
update_timestamp()
write_begin()
delta++
mmap write
lock_page()
delta-2 = get_delta()
page-B = pagefork(page-A)
unlock_page()
get_delta_if_need()
lock_page()
page-B = find_get_page();
pagefork(page-B)
write_end()
put_delta()
If mmap write and write(2) run in above order, cpu0 see dirty page-B
for delta-2, while write(2) is using delta-1. We should guarantee one
doesn't see data in future delta.
To fix this race, this (re)takes delta while holding lock_page(). With
this change, we guarantee to use latest delta, and fix race.
FIXME: But this way separates transactions of
update_timestamp()/remove_suid()/etc. and first modification to
page. We would be better to modify data and metadata in same delta
though (to make app's modification and on-disk state more atomic).
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c
index fc7f85e..a50919f 100644
--- a/fs/tux3/commit.c
+++ b/fs/tux3/commit.c
@@ -768,10 +768,14 @@
* }
* change_end_if_need()
*/
-void change_begin_if_needed(struct sb *sb)
+void change_begin_if_needed(struct sb *sb, int need_sep)
{
if (current->journal_info == NULL)
change_begin(sb);
+ else if (need_sep) {
+ change_end(sb);
+ change_begin(sb);
+ }
}
void change_end_if_needed(struct sb *sb)
diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c
index 612ac41..c71f1c5 100644
--- a/fs/tux3/filemap.c
+++ b/fs/tux3/filemap.c
@@ -824,12 +824,12 @@
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
- int check_fork)
+ int tux3_flags)
{
int ret;
ret = tux3_write_begin(mapping, pos, len, flags, pagep,
- tux3_da_get_block, check_fork);
+ tux3_da_get_block, tux3_flags);
if (ret < 0)
tux3_write_failed(mapping, pos + len);
return ret;
@@ -854,12 +854,9 @@
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- /* Separate big write transaction to small chunk. */
- assert(S_ISREG(mapping->host->i_mode));
- change_begin_if_needed(tux_sb(mapping->host->i_sb));
-
return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, 1);
+ fsdata,
+ TUX3_F_PAGEFORK | TUX3_F_SEP_DELTA);
}
static int tux3_file_write_end(struct file *file, struct address_space *mapping,
@@ -989,7 +986,7 @@
struct page **pagep, void **fsdata)
{
return __tux3_file_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, 1);
+ fsdata, TUX3_F_PAGEFORK);
}
/* Copy of tux_file_aops, except ->write_begin/end */
diff --git a/fs/tux3/filemap_blocklib.c b/fs/tux3/filemap_blocklib.c
index f7d7864..8b77c9f 100644
--- a/fs/tux3/filemap_blocklib.c
+++ b/fs/tux3/filemap_blocklib.c
@@ -152,6 +152,9 @@
return err;
}
+#define TUX3_F_PAGEFORK (1 << 0)
+#define TUX3_F_SEP_DELTA (1 << 1)
+
/*
* Copy of block_write_begin()
* (Add to call pagefork_for_blockdirty() for buffer fork)
@@ -159,7 +162,7 @@
static int tux3_write_begin(struct address_space *mapping, loff_t pos,
unsigned len, unsigned flags,
struct page **pagep, get_block_t *get_block,
- int check_fork)
+ int tux3_flags)
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct page *page;
@@ -170,12 +173,19 @@
if (!page)
return -ENOMEM;
+ if (tux3_flags & TUX3_F_SEP_DELTA) {
+ /* Separate big write transaction to small chunk. */
+ assert(S_ISREG(mapping->host->i_mode));
+ change_begin_if_needed(tux_sb(mapping->host->i_sb), 1);
+ }
+
/*
- * FIXME: If check_fork == 0, caller handle buffer fork.
- * Unlike check_fork hack, we are better to provide the different
- * blockget() implementation doesn't use tux3_write_begin().
+ * FIXME: If TUX3_WRITE_PAGEFORK, caller handle buffer fork.
+ * Unlike TUX3_WRITE_PAGEFORK hack, we are better to provide
+ * the different blockget() implementation doesn't use
+ * tux3_write_begin().
*/
- if (check_fork) {
+ if (tux3_flags & TUX3_F_PAGEFORK) {
struct page *tmp;
tmp = pagefork_for_blockdirty(page, tux3_get_current_delta());
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index 44576e1..0415298 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -790,7 +790,7 @@
void change_end_atomic_nested(struct sb *sb, void *ptr);
void change_begin(struct sb *sb);
int change_end(struct sb *sb);
-void change_begin_if_needed(struct sb *sb);
+void change_begin_if_needed(struct sb *sb, int need_sep);
void change_end_if_needed(struct sb *sb);
/* commit_flusher.c */