blob: def1f9eb24cf38da6de393a935cc99685c95328f [file] [log] [blame]
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri Jul 3 08:44:37 2009 -0500
Subject: mm: page_alloc: Reduce lock sections further
Split out the pages which are to be freed into a separate list and
call free_pages_bulk() outside of the percpu page allocator locks.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/page_alloc.c | 146 +++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 104 insertions(+), 42 deletions(-)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1113,7 +1113,7 @@ static bool bulkfree_pcp_prepare(struct
#endif /* CONFIG_DEBUG_VM */
/*
- * Frees a number of pages from the PCP lists
+ * Frees a number of pages which have been collected from the pcp lists.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -1123,16 +1123,64 @@ static bool bulkfree_pcp_prepare(struct
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
-static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+static void free_pcppages_bulk(struct zone *zone, struct list_head *list,
+ bool zone_retry)
{
- int migratetype = 0;
- int batch_free = 0;
bool isolated_pageblocks;
+ unsigned long flags;
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
isolated_pageblocks = has_isolate_pageblock(zone);
+ while (!list_empty(list)) {
+ struct page *page;
+ int mt; /* migratetype of the to-be-freed page */
+
+ page = list_first_entry(list, struct page, lru);
+
+ /*
+ * free_unref_page_list() sorts pages by zone. If we end up if
+ * pages from different NUMA nodes belonging to the same ZONE
+ * index then we need to redo with the correcte ZONE pointer.
+ */
+ if (page_zone(page) != zone) {
+ WARN_ON_ONCE(zone_retry == false);
+ if (zone_retry)
+ break;
+ }
+
+ /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+
+ mt = get_pcppage_migratetype(page);
+ /* MIGRATE_ISOLATE page should not go to pcplists */
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+ /* Pageblock could have been isolated meanwhile */
+ if (unlikely(isolated_pageblocks))
+ mt = get_pageblock_migratetype(page);
+
+ if (bulkfree_pcp_prepare(page))
+ continue;
+
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+ trace_mm_page_pcpu_drain(page, 0, mt);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Moves a number of pages from the PCP lists to free list which
+ * is freed outside of the locked region.
+ *
+ * Assumes all pages on list are in same zone, and of same order.
+ * count is the number of pages to free.
+ */
+static void isolate_pcp_pages(int count, struct per_cpu_pages *src,
+ struct list_head *dst)
+{
+ int migratetype = 0;
+ int batch_free = 0;
+
while (count) {
struct page *page;
struct list_head *list;
@@ -1148,7 +1196,7 @@ static void free_pcppages_bulk(struct zo
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
- list = &pcp->lists[migratetype];
+ list = &src->lists[migratetype];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
@@ -1156,27 +1204,12 @@ static void free_pcppages_bulk(struct zo
batch_free = count;
do {
- int mt; /* migratetype of the to-be-freed page */
-
page = list_last_entry(list, struct page, lru);
- /* must delete as __free_one_page list manipulates */
list_del(&page->lru);
- mt = get_pcppage_migratetype(page);
- /* MIGRATE_ISOLATE page should not go to pcplists */
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
- /* Pageblock could have been isolated meanwhile */
- if (unlikely(isolated_pageblocks))
- mt = get_pageblock_migratetype(page);
-
- if (bulkfree_pcp_prepare(page))
- continue;
-
- __free_one_page(page, page_to_pfn(page), zone, 0, mt);
- trace_mm_page_pcpu_drain(page, 0, mt);
+ list_add(&page->lru, dst);
} while (--count && --batch_free && !list_empty(list));
}
- spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone,
@@ -1184,13 +1217,15 @@ static void free_one_page(struct zone *z
unsigned int order,
int migratetype)
{
- spin_lock(&zone->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype);
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -2426,16 +2461,18 @@ static int rmqueue_bulk(struct zone *zon
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
+ LIST_HEAD(dst);
int to_drain, batch;
local_lock_irqsave(pa_lock, flags);
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0) {
- free_pcppages_bulk(zone, to_drain, pcp);
+ isolate_pcp_pages(to_drain, pcp, &dst);
pcp->count -= to_drain;
}
local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, &dst, false);
}
#endif
@@ -2451,16 +2488,21 @@ static void drain_pages_zone(unsigned in
unsigned long flags;
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
+ int count;
cpu_lock_irqsave(cpu, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- if (pcp->count) {
- free_pcppages_bulk(zone, pcp->count, pcp);
+ count = pcp->count;
+ if (count) {
+ isolate_pcp_pages(count, pcp, &dst);
pcp->count = 0;
}
cpu_unlock_irqrestore(cpu, flags);
+ if (count)
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2663,7 +2705,8 @@ static bool free_unref_page_prepare(stru
return true;
}
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+ struct list_head *dst)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
@@ -2692,7 +2735,8 @@ static void free_unref_page_commit(struc
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
- free_pcppages_bulk(zone, batch, pcp);
+
+ isolate_pcp_pages(batch, pcp, dst);
pcp->count -= batch;
}
}
@@ -2704,13 +2748,17 @@ void free_unref_page(struct page *page)
{
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
+ struct zone *zone = page_zone(page);
+ LIST_HEAD(dst);
if (!free_unref_page_prepare(page, pfn))
return;
local_lock_irqsave(pa_lock, flags);
- free_unref_page_commit(page, pfn);
+ free_unref_page_commit(page, pfn, &dst);
+
local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2720,7 +2768,11 @@ void free_unref_page_list(struct list_he
{
struct page *page, *next;
unsigned long flags, pfn;
- int batch_count = 0;
+ struct list_head dsts[__MAX_NR_ZONES];
+ int i;
+
+ for (i = 0; i < __MAX_NR_ZONES; i++)
+ INIT_LIST_HEAD(&dsts[i]);
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
@@ -2733,22 +2785,32 @@ void free_unref_page_list(struct list_he
local_lock_irqsave(pa_lock, flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
+ enum zone_type type;
set_page_private(page, 0);
trace_mm_page_free_batched(page);
- free_unref_page_commit(page, pfn);
+ type = page_zonenum(page);
+ free_unref_page_commit(page, pfn, &dsts[type]);
- /*
- * Guard against excessive IRQ disabled times when we get
- * a large list of pages to free.
- */
- if (++batch_count == SWAP_CLUSTER_MAX) {
- local_unlock_irqrestore(pa_lock, flags);
- batch_count = 0;
- local_lock_irqsave(pa_lock, flags);
- }
}
local_unlock_irqrestore(pa_lock, flags);
+
+ i = 0;
+ do {
+ struct page *page;
+ struct zone *zone;
+
+ if (i >= __MAX_NR_ZONES)
+ break;
+ if (list_empty(&dsts[i])) {
+ i++;
+ continue;
+ }
+ page = list_first_entry(&dsts[i], struct page, lru);
+ zone = page_zone(page);
+
+ free_pcppages_bulk(zone, &dsts[i], true);
+ } while (1);
}
/*