| From 846ec138f13ecbd808ef0089d19cb62cb2be72e4 Mon Sep 17 00:00:00 2001 |
| From: Pavel Tatashin <pasha.tatashin@soleen.com> |
| Date: Wed, 3 Jun 2020 15:59:24 -0700 |
| Subject: [PATCH] mm: initialize deferred pages with interrupts enabled |
| |
| commit 3d060856adfc59afb9d029c233141334cfaba418 upstream. |
| |
| Initializing struct pages is a long task and keeping interrupts disabled |
| for the duration of this operation introduces a number of problems. |
| |
| 1. jiffies are not updated for long period of time, and thus incorrect time |
| is reported. See proposed solution and discussion here: |
| lkml/20200311123848.118638-1-shile.zhang@linux.alibaba.com |
| 2. It prevents farther improving deferred page initialization by allowing |
| intra-node multi-threading. |
| |
| We are keeping interrupts disabled to solve a rather theoretical problem |
| that was never observed in real world (See 3a2d7fa8a3d5). |
| |
| Let's keep interrupts enabled. In case we ever encounter a scenario where |
| an interrupt thread wants to allocate large amount of memory this early in |
| boot we can deal with that by growing zone (see deferred_grow_zone()) by |
| the needed amount before starting deferred_init_memmap() threads. |
| |
| Before: |
| [ 1.232459] node 0 initialised, 12058412 pages in 1ms |
| |
| After: |
| [ 1.632580] node 0 initialised, 12051227 pages in 436ms |
| |
| Fixes: 3a2d7fa8a3d5 ("mm: disable interrupts while initializing deferred pages") |
| Reported-by: Shile Zhang <shile.zhang@linux.alibaba.com> |
| Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com> |
| Reviewed-by: David Hildenbrand <david@redhat.com> |
| Acked-by: Michal Hocko <mhocko@suse.com> |
| Acked-by: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Dan Williams <dan.j.williams@intel.com> |
| Cc: James Morris <jmorris@namei.org> |
| Cc: Kirill Tkhai <ktkhai@virtuozzo.com> |
| Cc: Sasha Levin <sashal@kernel.org> |
| Cc: Yiqian Wei <yiwei@redhat.com> |
| Cc: <stable@vger.kernel.org> [4.17+] |
| Link: http://lkml.kernel.org/r/20200403140952.17177-3-pasha.tatashin@soleen.com |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h |
| index 70394cabaf4e..5f370f1525c4 100644 |
| --- a/include/linux/mmzone.h |
| +++ b/include/linux/mmzone.h |
| @@ -699,6 +699,8 @@ typedef struct pglist_data { |
| /* |
| * Must be held any time you expect node_start_pfn, |
| * node_present_pages, node_spanned_pages or nr_zones to stay constant. |
| + * Also synchronizes pgdat->first_deferred_pfn during deferred page |
| + * init. |
| * |
| * pgdat_resize_lock() and pgdat_resize_unlock() are provided to |
| * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG |
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c |
| index 21473d992097..3c82f8fa2ac5 100644 |
| --- a/mm/page_alloc.c |
| +++ b/mm/page_alloc.c |
| @@ -1743,6 +1743,13 @@ static int __init deferred_init_memmap(void *data) |
| BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); |
| pgdat->first_deferred_pfn = ULONG_MAX; |
| |
| + /* |
| + * Once we unlock here, the zone cannot be grown anymore, thus if an |
| + * interrupt thread must allocate this early in boot, zone must be |
| + * pre-grown prior to start of deferred page initialization. |
| + */ |
| + pgdat_resize_unlock(pgdat, &flags); |
| + |
| /* Only the highest zone is deferred so find it */ |
| for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
| zone = pgdat->node_zones + zid; |
| @@ -1765,8 +1772,6 @@ static int __init deferred_init_memmap(void *data) |
| touch_nmi_watchdog(); |
| } |
| zone_empty: |
| - pgdat_resize_unlock(pgdat, &flags); |
| - |
| /* Sanity check that the next zone really is unpopulated */ |
| WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); |
| |
| @@ -1808,17 +1813,6 @@ deferred_grow_zone(struct zone *zone, unsigned int order) |
| |
| pgdat_resize_lock(pgdat, &flags); |
| |
| - /* |
| - * If deferred pages have been initialized while we were waiting for |
| - * the lock, return true, as the zone was grown. The caller will retry |
| - * this zone. We won't return to this function since the caller also |
| - * has this static branch. |
| - */ |
| - if (!static_branch_unlikely(&deferred_pages)) { |
| - pgdat_resize_unlock(pgdat, &flags); |
| - return true; |
| - } |
| - |
| /* |
| * If someone grew this zone while we were waiting for spinlock, return |
| * true, as there might be enough pages already. |
| -- |
| 2.27.0 |
| |