| From 72403b4a0fbdf433c1fe0127e49864658f6f6468 Mon Sep 17 00:00:00 2001 |
| From: Mel Gorman <mgorman@suse.de> |
| Date: Tue, 12 Nov 2013 15:08:32 -0800 |
| Subject: mm: numa: return the number of base pages altered by protection changes |
| |
| From: Mel Gorman <mgorman@suse.de> |
| |
| commit 72403b4a0fbdf433c1fe0127e49864658f6f6468 upstream. |
| |
| Commit 0255d4918480 ("mm: Account for a THP NUMA hinting update as one |
| PTE update") was added to account for the number of PTE updates when |
| marking pages prot_numa. task_numa_work was using the old return value |
| to track how much address space had been updated. Altering the return |
| value causes the scanner to do more work than it is configured or |
| documented to in a single unit of work. |
| |
| This patch reverts that commit and accounts for the number of THP |
| updates separately in vmstat. It is up to the administrator to |
| interpret the pair of values correctly. This is a straight-forward |
| operation and likely to only be of interest when actively debugging NUMA |
| balancing problems. |
| |
| The impact of this patch is that the NUMA PTE scanner will scan slower |
| when THP is enabled and workloads may converge slower as a result. On |
| the flip size system CPU usage should be lower than recent tests |
| reported. This is an illustrative example of a short single JVM specjbb |
| test |
| |
| specjbb |
| 3.12.0 3.12.0 |
| vanilla acctupdates |
| TPut 1 26143.00 ( 0.00%) 25747.00 ( -1.51%) |
| TPut 7 185257.00 ( 0.00%) 183202.00 ( -1.11%) |
| TPut 13 329760.00 ( 0.00%) 346577.00 ( 5.10%) |
| TPut 19 442502.00 ( 0.00%) 460146.00 ( 3.99%) |
| TPut 25 540634.00 ( 0.00%) 549053.00 ( 1.56%) |
| TPut 31 512098.00 ( 0.00%) 519611.00 ( 1.47%) |
| TPut 37 461276.00 ( 0.00%) 474973.00 ( 2.97%) |
| TPut 43 403089.00 ( 0.00%) 414172.00 ( 2.75%) |
| |
| 3.12.0 3.12.0 |
| vanillaacctupdates |
| User 5169.64 5184.14 |
| System 100.45 80.02 |
| Elapsed 252.75 251.85 |
| |
| Performance is similar but note the reduction in system CPU time. While |
| this showed a performance gain, it will not be universal but at least |
| it'll be behaving as documented. The vmstats are obviously different but |
| here is an obvious interpretation of them from mmtests. |
| |
| 3.12.0 3.12.0 |
| vanillaacctupdates |
| NUMA page range updates 1408326 11043064 |
| NUMA huge PMD updates 0 21040 |
| NUMA PTE updates 1408326 291624 |
| |
| "NUMA page range updates" == nr_pte_updates and is the value returned to |
| the NUMA pte scanner. NUMA huge PMD updates were the number of THP |
| updates which in combination can be used to calculate how many ptes were |
| updated from userspace. |
| |
| Signed-off-by: Mel Gorman <mgorman@suse.de> |
| Reported-by: Alex Thorlton <athorlton@sgi.com> |
| Reviewed-by: Rik van Riel <riel@redhat.com> |
| Cc: <stable@vger.kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Mel Gorman <mgorman@suse.de> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| include/linux/vm_event_item.h | 1 + |
| mm/mprotect.c | 7 ++++++- |
| mm/vmstat.c | 1 + |
| 3 files changed, 8 insertions(+), 1 deletion(-) |
| |
| --- a/include/linux/vm_event_item.h |
| +++ b/include/linux/vm_event_item.h |
| @@ -39,6 +39,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS |
| PAGEOUTRUN, ALLOCSTALL, PGROTATED, |
| #ifdef CONFIG_NUMA_BALANCING |
| NUMA_PTE_UPDATES, |
| + NUMA_HUGE_PTE_UPDATES, |
| NUMA_HINT_FAULTS, |
| NUMA_HINT_FAULTS_LOCAL, |
| NUMA_PAGE_MIGRATE, |
| --- a/mm/mprotect.c |
| +++ b/mm/mprotect.c |
| @@ -135,6 +135,7 @@ static inline unsigned long change_pmd_r |
| pmd_t *pmd; |
| unsigned long next; |
| unsigned long pages = 0; |
| + unsigned long nr_huge_updates = 0; |
| bool all_same_node; |
| |
| pmd = pmd_offset(pud, addr); |
| @@ -145,7 +146,8 @@ static inline unsigned long change_pmd_r |
| split_huge_page_pmd(vma, addr, pmd); |
| else if (change_huge_pmd(vma, pmd, addr, newprot, |
| prot_numa)) { |
| - pages++; |
| + pages += HPAGE_PMD_NR; |
| + nr_huge_updates++; |
| continue; |
| } |
| /* fall through */ |
| @@ -165,6 +167,9 @@ static inline unsigned long change_pmd_r |
| change_pmd_protnuma(vma->vm_mm, addr, pmd); |
| } while (pmd++, addr = next, addr != end); |
| |
| + if (nr_huge_updates) |
| + count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); |
| + |
| return pages; |
| } |
| |
| --- a/mm/vmstat.c |
| +++ b/mm/vmstat.c |
| @@ -779,6 +779,7 @@ const char * const vmstat_text[] = { |
| |
| #ifdef CONFIG_NUMA_BALANCING |
| "numa_pte_updates", |
| + "numa_huge_pte_updates", |
| "numa_hint_faults", |
| "numa_hint_faults_local", |
| "numa_pages_migrated", |