| From 397dd83381b22a2c23069af776dde78d58ee6f0d Mon Sep 17 00:00:00 2001 |
| From: Andreas Gruenbacher <agruenba@redhat.com> |
| Date: Thu, 4 Apr 2019 21:11:11 +0100 |
| Subject: gfs2: Fix occasional glock use-after-free |
| |
| [ Upstream commit 9287c6452d2b1f24ea8e84bd3cf6f3c6f267f712 ] |
| |
| This patch has to do with the life cycle of glocks and buffers. When |
| gfs2 metadata or journaled data is queued to be written, a gfs2_bufdata |
| object is assigned to track the buffer, and that is queued to various |
| lists, including the glock's gl_ail_list to indicate it's on the active |
| items list. Once the page associated with the buffer has been written, |
| it is removed from the ail list, but its life isn't over until a revoke |
| has been successfully written. |
| |
| So after the block is written, its bufdata object is moved from the |
| glock's gl_ail_list to a file-system-wide list of pending revokes, |
| sd_log_le_revoke. At that point the glock still needs to track how many |
| revokes it contributed to that list (in gl_revokes) so that things like |
| glock go_sync can ensure all the metadata has been not only written, but |
| also revoked before the glock is granted to a different node. This is |
| to guarantee journal replay doesn't replay the block once the glock has |
| been granted to another node. |
| |
| Ross Lagerwall recently discovered a race in which an inode could be |
| evicted, and its glock freed after its ail list had been synced, but |
| while it still had unwritten revokes on the sd_log_le_revoke list. The |
| evict decremented the glock reference count to zero, which allowed the |
| glock to be freed. After the revoke was written, function |
| revoke_lo_after_commit tried to adjust the glock's gl_revokes counter |
| and clear its GLF_LFLUSH flag, at which time it referenced the freed |
| glock. |
| |
| This patch fixes the problem by incrementing the glock reference count |
| in gfs2_add_revoke when the glock's first bufdata object is moved from |
| the glock to the global revokes list. Later, when the glock's last such |
| bufdata object is freed, the reference count is decremented. This |
| guarantees that whichever process finishes last (the revoke writing or |
| the evict) will properly free the glock, and neither will reference the |
| glock after it has been freed. |
| |
| Reported-by: Ross Lagerwall <ross.lagerwall@citrix.com> |
| Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> |
| Signed-off-by: Bob Peterson <rpeterso@redhat.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| fs/gfs2/glock.c | 1 + |
| fs/gfs2/log.c | 3 ++- |
| fs/gfs2/lops.c | 6 ++++-- |
| 3 files changed, 7 insertions(+), 3 deletions(-) |
| |
| diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c |
| index e4f6d39500bcc..71c28ff98b564 100644 |
| --- a/fs/gfs2/glock.c |
| +++ b/fs/gfs2/glock.c |
| @@ -140,6 +140,7 @@ void gfs2_glock_free(struct gfs2_glock *gl) |
| { |
| struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
| |
| + BUG_ON(atomic_read(&gl->gl_revokes)); |
| rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); |
| smp_mb(); |
| wake_up_glock(gl); |
| diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c |
| index b8830fda51e8f..0e04f87a7dddb 100644 |
| --- a/fs/gfs2/log.c |
| +++ b/fs/gfs2/log.c |
| @@ -606,7 +606,8 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) |
| gfs2_remove_from_ail(bd); /* drops ref on bh */ |
| bd->bd_bh = NULL; |
| sdp->sd_log_num_revoke++; |
| - atomic_inc(&gl->gl_revokes); |
| + if (atomic_inc_return(&gl->gl_revokes) == 1) |
| + gfs2_glock_hold(gl); |
| set_bit(GLF_LFLUSH, &gl->gl_flags); |
| list_add(&bd->bd_list, &sdp->sd_log_le_revoke); |
| } |
| diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c |
| index 8722c60b11feb..4b280611246df 100644 |
| --- a/fs/gfs2/lops.c |
| +++ b/fs/gfs2/lops.c |
| @@ -669,8 +669,10 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| bd = list_entry(head->next, struct gfs2_bufdata, bd_list); |
| list_del_init(&bd->bd_list); |
| gl = bd->bd_gl; |
| - atomic_dec(&gl->gl_revokes); |
| - clear_bit(GLF_LFLUSH, &gl->gl_flags); |
| + if (atomic_dec_return(&gl->gl_revokes) == 0) { |
| + clear_bit(GLF_LFLUSH, &gl->gl_flags); |
| + gfs2_glock_queue_put(gl); |
| + } |
| kmem_cache_free(gfs2_bufdata_cachep, bd); |
| } |
| } |
| -- |
| 2.20.1 |
| |