| From 70feee0e1ef331b22cc51f383d532a0d043fbdcc Mon Sep 17 00:00:00 2001 |
| From: Yisheng Xie <xieyisheng1@huawei.com> |
| Date: Fri, 2 Jun 2017 14:46:43 -0700 |
| Subject: mlock: fix mlock count can not decrease in race condition |
| |
| From: Yisheng Xie <xieyisheng1@huawei.com> |
| |
| commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc upstream. |
| |
| Kefeng reported that when running the follow test, the mlock count in |
| meminfo will increase permanently: |
| |
| [1] testcase |
| linux:~ # cat test_mlockal |
| grep Mlocked /proc/meminfo |
| for j in `seq 0 10` |
| do |
| for i in `seq 4 15` |
| do |
| ./p_mlockall >> log & |
| done |
| sleep 0.2 |
| done |
| # wait some time to let mlock counter decrease and 5s may not enough |
| sleep 5 |
| grep Mlocked /proc/meminfo |
| |
| linux:~ # cat p_mlockall.c |
| #include <sys/mman.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| |
| #define SPACE_LEN 4096 |
| |
| int main(int argc, char ** argv) |
| { |
| int ret; |
| void *adr = malloc(SPACE_LEN); |
| if (!adr) |
| return -1; |
| |
| ret = mlockall(MCL_CURRENT | MCL_FUTURE); |
| printf("mlcokall ret = %d\n", ret); |
| |
| ret = munlockall(); |
| printf("munlcokall ret = %d\n", ret); |
| |
| free(adr); |
| return 0; |
| } |
| |
| In __munlock_pagevec() we should decrement NR_MLOCK for each page where |
| we clear the PageMlocked flag. Commit 1ebb7cc6a583 ("mm: munlock: batch |
| NR_MLOCK zone state updates") has introduced a bug where we don't |
| decrement NR_MLOCK for pages where we clear the flag, but fail to |
| isolate them from the lru list (e.g. when the pages are on some other |
| cpu's percpu pagevec). Since PageMlocked stays cleared, the NR_MLOCK |
| accounting gets permanently disrupted by this. |
| |
| Fix it by counting the number of page whose PageMlock flag is cleared. |
| |
| Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates") |
| Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com |
| Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com> |
| Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com> |
| Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com> |
| Cc: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Joern Engel <joern@logfs.org> |
| Cc: Mel Gorman <mgorman@suse.de> |
| Cc: Michel Lespinasse <walken@google.com> |
| Cc: Hugh Dickins <hughd@google.com> |
| Cc: Rik van Riel <riel@redhat.com> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Michal Hocko <mhocko@suse.cz> |
| Cc: Xishi Qiu <qiuxishi@huawei.com> |
| Cc: zhongjiang <zhongjiang@huawei.com> |
| Cc: Hanjun Guo <guohanjun@huawei.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| mm/mlock.c | 5 +++-- |
| 1 file changed, 3 insertions(+), 2 deletions(-) |
| |
| --- a/mm/mlock.c |
| +++ b/mm/mlock.c |
| @@ -285,7 +285,7 @@ static void __munlock_pagevec(struct pag |
| { |
| int i; |
| int nr = pagevec_count(pvec); |
| - int delta_munlocked; |
| + int delta_munlocked = -nr; |
| struct pagevec pvec_putback; |
| int pgrescued = 0; |
| |
| @@ -305,6 +305,8 @@ static void __munlock_pagevec(struct pag |
| continue; |
| else |
| __munlock_isolation_failed(page); |
| + } else { |
| + delta_munlocked++; |
| } |
| |
| /* |
| @@ -316,7 +318,6 @@ static void __munlock_pagevec(struct pag |
| pagevec_add(&pvec_putback, pvec->pages[i]); |
| pvec->pages[i] = NULL; |
| } |
| - delta_munlocked = -nr + pagevec_count(&pvec_putback); |
| __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); |
| spin_unlock_irq(zone_lru_lock(zone)); |
| |