| From 36acd81f53aa3317996d38c0a79171874f613fc2 Mon Sep 17 00:00:00 2001 |
| From: Mikulas Patocka <mpatocka@redhat.com> |
| Date: Wed, 8 Jan 2020 10:46:05 -0500 |
| Subject: [PATCH] dm writecache: fix incorrect flush sequence when doing SSD |
| mode commit |
| |
| commit aa9509209c5ac2f0b35d01a922bf9ae072d0c2fc upstream. |
| |
| When committing state, the function writecache_flush does the following: |
| 1. write metadata (writecache_commit_flushed) |
| 2. flush disk cache (writecache_commit_flushed) |
| 3. wait for data writes to complete (writecache_wait_for_ios) |
| 4. increase superblock seq_count |
| 5. write the superblock |
| 6. flush disk cache |
| |
| It may happen that at step 3, when we wait for some write to finish, the |
| disk may report the write as finished, but the write only hit the disk |
| cache and it is not yet stored in persistent storage. At step 5 we write |
| the superblock - it may happen that the superblock is written before the |
| write that we waited for in step 3. If the machine crashes, it may result |
| in incorrect data being returned after reboot. |
| |
| In order to fix the bug, we must swap steps 2 and 3 in the above sequence, |
| so that we first wait for writes to complete and then flush the disk |
| cache. |
| |
| Fixes: 48debafe4f2f ("dm: add writecache target") |
| Cc: stable@vger.kernel.org # 4.18+ |
| Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> |
| Signed-off-by: Mike Snitzer <snitzer@redhat.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c |
| index 8507b719ca87..90c3bf95b287 100644 |
| --- a/drivers/md/dm-writecache.c |
| +++ b/drivers/md/dm-writecache.c |
| @@ -443,7 +443,13 @@ static void writecache_notify_io(unsigned long error, void *context) |
| complete(&endio->c); |
| } |
| |
| -static void ssd_commit_flushed(struct dm_writecache *wc) |
| +static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) |
| +{ |
| + wait_event(wc->bio_in_progress_wait[direction], |
| + !atomic_read(&wc->bio_in_progress[direction])); |
| +} |
| + |
| +static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) |
| { |
| struct dm_io_region region; |
| struct dm_io_request req; |
| @@ -489,17 +495,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc) |
| writecache_notify_io(0, &endio); |
| wait_for_completion_io(&endio.c); |
| |
| + if (wait_for_ios) |
| + writecache_wait_for_ios(wc, WRITE); |
| + |
| writecache_disk_flush(wc, wc->ssd_dev); |
| |
| memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); |
| } |
| |
| -static void writecache_commit_flushed(struct dm_writecache *wc) |
| +static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) |
| { |
| if (WC_MODE_PMEM(wc)) |
| wmb(); |
| else |
| - ssd_commit_flushed(wc); |
| + ssd_commit_flushed(wc, wait_for_ios); |
| } |
| |
| static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) |
| @@ -523,12 +532,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) |
| writecache_error(wc, r, "error flushing metadata: %d", r); |
| } |
| |
| -static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) |
| -{ |
| - wait_event(wc->bio_in_progress_wait[direction], |
| - !atomic_read(&wc->bio_in_progress[direction])); |
| -} |
| - |
| #define WFE_RETURN_FOLLOWING 1 |
| #define WFE_LOWEST_SEQ 2 |
| |
| @@ -725,15 +728,12 @@ static void writecache_flush(struct dm_writecache *wc) |
| e = e2; |
| cond_resched(); |
| } |
| - writecache_commit_flushed(wc); |
| - |
| - if (!WC_MODE_PMEM(wc)) |
| - writecache_wait_for_ios(wc, WRITE); |
| + writecache_commit_flushed(wc, true); |
| |
| wc->seq_count++; |
| pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); |
| writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| |
| wc->overwrote_committed = false; |
| |
| @@ -757,7 +757,7 @@ static void writecache_flush(struct dm_writecache *wc) |
| } |
| |
| if (need_flush_after_free) |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| } |
| |
| static void writecache_flush_work(struct work_struct *work) |
| @@ -810,7 +810,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ |
| } |
| |
| if (discarded_something) |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| } |
| |
| static bool writecache_wait_for_writeback(struct dm_writecache *wc) |
| @@ -959,7 +959,7 @@ static void writecache_resume(struct dm_target *ti) |
| |
| if (need_flush) { |
| writecache_flush_all_metadata(wc); |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| } |
| |
| wc_unlock(wc); |
| @@ -1343,7 +1343,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head * |
| wc->writeback_size--; |
| n_walked++; |
| if (unlikely(n_walked >= ENDIO_LATENCY)) { |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| wc_unlock(wc); |
| wc_lock(wc); |
| n_walked = 0; |
| @@ -1424,7 +1424,7 @@ static int writecache_endio_thread(void *data) |
| writecache_wait_for_ios(wc, READ); |
| } |
| |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| |
| wc_unlock(wc); |
| } |
| @@ -1754,10 +1754,10 @@ static int init_memory(struct dm_writecache *wc) |
| write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); |
| |
| writecache_flush_all_metadata(wc); |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); |
| writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); |
| - writecache_commit_flushed(wc); |
| + writecache_commit_flushed(wc, false); |
| |
| return 0; |
| } |
| -- |
| 2.7.4 |
| |