| From 4997ac100b5dda164badc024d137909a2c5402b5 Mon Sep 17 00:00:00 2001 |
| From: Gavin Shan <gwshan@linux.vnet.ibm.com> |
| Date: Fri, 24 Feb 2017 14:59:33 -0800 |
| Subject: [PATCH] mm/page_alloc: fix nodes for reclaim in fast path |
| |
| commit e02dc017c3032dcdce1b993af0db135462e1b4b7 upstream. |
| |
| When @node_reclaim_node isn't 0, the page allocator tries to reclaim |
| pages if the amount of free memory in the zones are below the low |
| watermark. On Power platform, none of NUMA nodes are scanned for page |
| reclaim because no nodes match the condition in zone_allows_reclaim(). |
| On Power platform, RECLAIM_DISTANCE is set to 10 which is the distance |
| of Node-A to Node-A. So the preferred node even won't be scanned for |
| page reclaim. |
| |
| __alloc_pages_nodemask() |
| get_page_from_freelist() |
| zone_allows_reclaim() |
| |
| Anton proposed the test code as below: |
| |
| # cat alloc.c |
| : |
| int main(int argc, char *argv[]) |
| { |
| void *p; |
| unsigned long size; |
| unsigned long start, end; |
| |
| start = time(NULL); |
| size = strtoul(argv[1], NULL, 0); |
| printf("To allocate %ldGB memory\n", size); |
| |
| size <<= 30; |
| p = malloc(size); |
| assert(p); |
| memset(p, 0, size); |
| |
| end = time(NULL); |
| printf("Used time: %ld seconds\n", end - start); |
| sleep(3600); |
| return 0; |
| } |
| |
| The system I use for testing has two NUMA nodes. Both have 128GB |
| memory. In below scnario, the page caches on node#0 should be reclaimed |
| when it encounters pressure to accommodate request of allocation. |
| |
| # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ |
| sync; \ |
| echo 3 > /proc/sys/vm/drop_caches; \ |
| # taskset -c 0 cat file.32G > /dev/null; \ |
| grep FilePages /sys/devices/system/node/node0/meminfo |
| Node 0 FilePages: 33619712 kB |
| # taskset -c 0 ./alloc 128 |
| # grep FilePages /sys/devices/system/node/node0/meminfo |
| Node 0 FilePages: 33619840 kB |
| # grep MemFree /sys/devices/system/node/node0/meminfo |
| Node 0 MemFree: 186816 kB |
| |
| With the patch applied, the pagecache on node-0 is reclaimed when its |
| free memory is running out. It's the expected behaviour. |
| |
| # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ |
| sync; \ |
| echo 3 > /proc/sys/vm/drop_caches |
| # taskset -c 0 cat file.32G > /dev/null; \ |
| grep FilePages /sys/devices/system/node/node0/meminfo |
| Node 0 FilePages: 33605568 kB |
| # taskset -c 0 ./alloc 128 |
| # grep FilePages /sys/devices/system/node/node0/meminfo |
| Node 0 FilePages: 1379520 kB |
| # grep MemFree /sys/devices/system/node/node0/meminfo |
| Node 0 MemFree: 317120 kB |
| |
| Fixes: 5f7a75acdb24 ("mm: page_alloc: do not cache reclaim distances") |
| Link: http://lkml.kernel.org/r/1486532455-29613-1-git-send-email-gwshan@linux.vnet.ibm.com |
| Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> |
| Acked-by: Mel Gorman <mgorman@suse.de> |
| Acked-by: Michal Hocko <mhocko@suse.com> |
| Cc: Anton Blanchard <anton@samba.org> |
| Cc: Michael Ellerman <mpe@ellerman.id.au> |
| Cc: <stable@vger.kernel.org> [3.16+] |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c |
| index ecfec7091c31..091c57712846 100644 |
| --- a/mm/page_alloc.c |
| +++ b/mm/page_alloc.c |
| @@ -2834,7 +2834,7 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, |
| #ifdef CONFIG_NUMA |
| static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) |
| { |
| - return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < |
| + return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= |
| RECLAIM_DISTANCE; |
| } |
| #else /* CONFIG_NUMA */ |
| -- |
| 2.12.0 |
| |