| From e5bbc8a6c992901058bc09e2ce01d16c111ff047 Mon Sep 17 00:00:00 2001 |
| From: Mike Kravetz <mike.kravetz@oracle.com> |
| Date: Tue, 10 Jan 2017 16:58:27 -0800 |
| Subject: mm/hugetlb.c: fix reservation race when freeing surplus pages |
| |
| From: Mike Kravetz <mike.kravetz@oracle.com> |
| |
| commit e5bbc8a6c992901058bc09e2ce01d16c111ff047 upstream. |
| |
| return_unused_surplus_pages() decrements the global reservation count, |
| and frees any unused surplus pages that were backing the reservation. |
| |
| Commit 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in |
| return_unused_surplus_pages()") added a call to cond_resched_lock in the |
| loop freeing the pages. |
| |
| As a result, the hugetlb_lock could be dropped, and someone else could |
| use the pages that will be freed in subsequent iterations of the loop. |
| This could result in inconsistent global hugetlb page state, application |
| api failures (such as mmap) failures or application crashes. |
| |
| When dropping the lock in return_unused_surplus_pages, make sure that |
| the global reservation count (resv_huge_pages) remains sufficiently |
| large to prevent someone else from claiming pages about to be freed. |
| |
| Analyzed by Paul Cassella. |
| |
| Fixes: 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in return_unused_surplus_pages()") |
| Link: http://lkml.kernel.org/r/1483991767-6879-1-git-send-email-mike.kravetz@oracle.com |
| Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> |
| Reported-by: Paul Cassella <cassella@cray.com> |
| Suggested-by: Michal Hocko <mhocko@kernel.org> |
| Cc: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> |
| Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> |
| Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com> |
| Cc: Hillf Danton <hillf.zj@alibaba-inc.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| mm/hugetlb.c | 37 ++++++++++++++++++++++++++++--------- |
| 1 file changed, 28 insertions(+), 9 deletions(-) |
| |
| --- a/mm/hugetlb.c |
| +++ b/mm/hugetlb.c |
| @@ -1723,23 +1723,32 @@ free: |
| } |
| |
| /* |
| - * When releasing a hugetlb pool reservation, any surplus pages that were |
| - * allocated to satisfy the reservation must be explicitly freed if they were |
| - * never used. |
| - * Called with hugetlb_lock held. |
| + * This routine has two main purposes: |
| + * 1) Decrement the reservation count (resv_huge_pages) by the value passed |
| + * in unused_resv_pages. This corresponds to the prior adjustments made |
| + * to the associated reservation map. |
| + * 2) Free any unused surplus pages that may have been allocated to satisfy |
| + * the reservation. As many as unused_resv_pages may be freed. |
| + * |
| + * Called with hugetlb_lock held. However, the lock could be dropped (and |
| + * reacquired) during calls to cond_resched_lock. Whenever dropping the lock, |
| + * we must make sure nobody else can claim pages we are in the process of |
| + * freeing. Do this by ensuring resv_huge_page always is greater than the |
| + * number of huge pages we plan to free when dropping the lock. |
| */ |
| static void return_unused_surplus_pages(struct hstate *h, |
| unsigned long unused_resv_pages) |
| { |
| unsigned long nr_pages; |
| |
| - /* Uncommit the reservation */ |
| - h->resv_huge_pages -= unused_resv_pages; |
| - |
| /* Cannot return gigantic pages currently */ |
| if (hstate_is_gigantic(h)) |
| - return; |
| + goto out; |
| |
| + /* |
| + * Part (or even all) of the reservation could have been backed |
| + * by pre-allocated pages. Only free surplus pages. |
| + */ |
| nr_pages = min(unused_resv_pages, h->surplus_huge_pages); |
| |
| /* |
| @@ -1749,12 +1758,22 @@ static void return_unused_surplus_pages( |
| * when the nodes with surplus pages have no free pages. |
| * free_pool_huge_page() will balance the the freed pages across the |
| * on-line nodes with memory and will handle the hstate accounting. |
| + * |
| + * Note that we decrement resv_huge_pages as we free the pages. If |
| + * we drop the lock, resv_huge_pages will still be sufficiently large |
| + * to cover subsequent pages we may free. |
| */ |
| while (nr_pages--) { |
| + h->resv_huge_pages--; |
| + unused_resv_pages--; |
| if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) |
| - break; |
| + goto out; |
| cond_resched_lock(&hugetlb_lock); |
| } |
| + |
| +out: |
| + /* Fully uncommit the reservation */ |
| + h->resv_huge_pages -= unused_resv_pages; |
| } |
| |
| |