| From: Sidhartha Kumar <sidhartha.kumar@oracle.com> |
| Subject: maple_tree: reset mas->index and mas->last on write retries |
| Date: Mon, 12 Aug 2024 15:05:42 -0400 |
| |
| The following scenario can result in a race condition: |
| |
| Consider a node with the following indices and values |
| |
| a<------->b<----------->c<--------->d |
| 0xA NULL 0xB |
| |
| CPU 1 CPU 2 |
| --------- --------- |
| mas_set_range(a,b) |
| mas_erase() |
| -> range is expanded (a,c) because of null expansion |
| |
| mas_nomem() |
| mas_unlock() |
| mas_store_range(b,c,0xC) |
| |
| The node now looks like: |
| |
| a<------->b<----------->c<--------->d |
| 0xA 0xC 0xB |
| |
| mas_lock() |
| mas_erase() <------ range of erase is still (a,c) |
| |
| The node is now NULL from (a,c) but the write from CPU 2 should have been |
| retained and range (b,c) should still have 0xC as its value. We can fix |
| this by re-intializing to the original index and last. This does not need |
| a cc: Stable as there are no users of the maple tree which use internal |
| locking and this condition is only possible with internal locking. |
| |
| Link: https://lkml.kernel.org/r/20240812190543.71967-1-sidhartha.kumar@oracle.com |
| Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> |
| Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| lib/maple_tree.c | 16 ++++++++++++---- |
| 1 file changed, 12 insertions(+), 4 deletions(-) |
| |
| --- a/lib/maple_tree.c~maple_tree-reset-mas-index-and-mas-last-on-write-retries |
| +++ a/lib/maple_tree.c |
| @@ -5451,14 +5451,19 @@ EXPORT_SYMBOL_GPL(mas_store); |
| */ |
| int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) |
| { |
| + unsigned long index = mas->index; |
| + unsigned long last = mas->last; |
| MA_WR_STATE(wr_mas, mas, entry); |
| |
| mas_wr_store_setup(&wr_mas); |
| trace_ma_write(__func__, mas, 0, entry); |
| retry: |
| mas_wr_store_entry(&wr_mas); |
| - if (unlikely(mas_nomem(mas, gfp))) |
| + if (unlikely(mas_nomem(mas, gfp))) { |
| + if (!entry) |
| + __mas_set_range(mas, index, last); |
| goto retry; |
| + } |
| |
| if (unlikely(mas_is_err(mas))) |
| return xa_err(mas->node); |
| @@ -6245,23 +6250,26 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev); |
| void *mas_erase(struct ma_state *mas) |
| { |
| void *entry; |
| + unsigned long index = mas->index; |
| MA_WR_STATE(wr_mas, mas, NULL); |
| |
| if (!mas_is_active(mas) || !mas_is_start(mas)) |
| mas->status = ma_start; |
| |
| - /* Retry unnecessary when holding the write lock. */ |
| +write_retry: |
| entry = mas_state_walk(mas); |
| if (!entry) |
| return NULL; |
| |
| -write_retry: |
| /* Must reset to ensure spanning writes of last slot are detected */ |
| mas_reset(mas); |
| mas_wr_store_setup(&wr_mas); |
| mas_wr_store_entry(&wr_mas); |
| - if (mas_nomem(mas, GFP_KERNEL)) |
| + if (mas_nomem(mas, GFP_KERNEL)) { |
| + /* in case the range of entry changed when unlocked */ |
| + mas->index = mas->last = index; |
| goto write_retry; |
| + } |
| |
| return entry; |
| } |
| _ |