| From f5bf18fa22f8c41a13eb8762c7373eb3a93a7333 Mon Sep 17 00:00:00 2001 |
| From: Nishanth Aravamudan <nacc@linux.vnet.ibm.com> |
| Date: Wed, 21 Mar 2012 16:34:07 -0700 |
| Subject: bootmem/sparsemem: remove limit constraint in alloc_bootmem_section |
| |
| From: Nishanth Aravamudan <nacc@linux.vnet.ibm.com> |
| |
| commit f5bf18fa22f8c41a13eb8762c7373eb3a93a7333 upstream. |
| |
| While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory |
| Overcommit) on powerpc, we tripped the following: |
| |
| kernel BUG at mm/bootmem.c:483! |
| cpu 0x0: Vector: 700 (Program Check) at [c000000000c03940] |
| pc: c000000000a62bd8: .alloc_bootmem_core+0x90/0x39c |
| lr: c000000000a64bcc: .sparse_early_usemaps_alloc_node+0x84/0x29c |
| sp: c000000000c03bc0 |
| msr: 8000000000021032 |
| current = 0xc000000000b0cce0 |
| paca = 0xc000000001d80000 |
| pid = 0, comm = swapper |
| kernel BUG at mm/bootmem.c:483! |
| enter ? for help |
| [c000000000c03c80] c000000000a64bcc |
| .sparse_early_usemaps_alloc_node+0x84/0x29c |
| [c000000000c03d50] c000000000a64f10 .sparse_init+0x12c/0x28c |
| [c000000000c03e20] c000000000a474f4 .setup_arch+0x20c/0x294 |
| [c000000000c03ee0] c000000000a4079c .start_kernel+0xb4/0x460 |
| [c000000000c03f90] c000000000009670 .start_here_common+0x1c/0x2c |
| |
| This is |
| |
| BUG_ON(limit && goal + size > limit); |
| |
| and after some debugging, it seems that |
| |
| goal = 0x7ffff000000 |
| limit = 0x80000000000 |
| |
| and sparse_early_usemaps_alloc_node -> |
| sparse_early_usemaps_alloc_pgdat_section calls |
| |
| return alloc_bootmem_section(usemap_size() * count, section_nr); |
| |
| This is on a system with 8TB available via the AMS pool, and as a quirk |
| of AMS in firmware, all of that memory shows up in node 0. So, we end |
| up with an allocation that will fail the goal/limit constraints. |
| |
| In theory, we could "fall-back" to alloc_bootmem_node() in |
| sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE |
| defined, we'll BUG_ON() instead. A simple solution appears to be to |
| unconditionally remove the limit condition in alloc_bootmem_section, |
| meaning allocations are allowed to cross section boundaries (necessary |
| for systems of this size). |
| |
| Johannes Weiner pointed out that if alloc_bootmem_section() no longer |
| guarantees section-locality, we need check_usemap_section_nr() to print |
| possible cross-dependencies between node descriptors and the usemaps |
| allocated through it. That makes the two loops in |
| sparse_early_usemaps_alloc_node() identical, so re-factor the code a |
| bit. |
| |
| [akpm@linux-foundation.org: code simplification] |
| Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> |
| Cc: Dave Hansen <haveblue@us.ibm.com> |
| Cc: Anton Blanchard <anton@au1.ibm.com> |
| Cc: Paul Mackerras <paulus@samba.org> |
| Cc: Ben Herrenschmidt <benh@kernel.crashing.org> |
| Cc: Robert Jennings <rcj@linux.vnet.ibm.com> |
| Acked-by: Johannes Weiner <hannes@cmpxchg.org> |
| Acked-by: Mel Gorman <mgorman@suse.de> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| mm/bootmem.c | 5 ++--- |
| mm/sparse.c | 30 +++++++++++------------------- |
| 2 files changed, 13 insertions(+), 22 deletions(-) |
| |
| --- a/mm/bootmem.c |
| +++ b/mm/bootmem.c |
| @@ -768,14 +768,13 @@ void * __init alloc_bootmem_section(unsi |
| unsigned long section_nr) |
| { |
| bootmem_data_t *bdata; |
| - unsigned long pfn, goal, limit; |
| + unsigned long pfn, goal; |
| |
| pfn = section_nr_to_pfn(section_nr); |
| goal = pfn << PAGE_SHIFT; |
| - limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; |
| bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; |
| |
| - return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); |
| + return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0); |
| } |
| #endif |
| |
| --- a/mm/sparse.c |
| +++ b/mm/sparse.c |
| @@ -353,29 +353,21 @@ static void __init sparse_early_usemaps_ |
| |
| usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), |
| usemap_count); |
| - if (usemap) { |
| - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { |
| - if (!present_section_nr(pnum)) |
| - continue; |
| - usemap_map[pnum] = usemap; |
| - usemap += size; |
| + if (!usemap) { |
| + usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); |
| + if (!usemap) { |
| + printk(KERN_WARNING "%s: allocation failed\n", __func__); |
| + return; |
| } |
| - return; |
| } |
| |
| - usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); |
| - if (usemap) { |
| - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { |
| - if (!present_section_nr(pnum)) |
| - continue; |
| - usemap_map[pnum] = usemap; |
| - usemap += size; |
| - check_usemap_section_nr(nodeid, usemap_map[pnum]); |
| - } |
| - return; |
| + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { |
| + if (!present_section_nr(pnum)) |
| + continue; |
| + usemap_map[pnum] = usemap; |
| + usemap += size; |
| + check_usemap_section_nr(nodeid, usemap_map[pnum]); |
| } |
| - |
| - printk(KERN_WARNING "%s: allocation failed\n", __func__); |
| } |
| |
| #ifndef CONFIG_SPARSEMEM_VMEMMAP |