| From: Peter Zijlstra <peterz@infradead.org> |
| Date: Fri Jul 3 08:44:37 2009 -0500 |
| Subject: mm: page_alloc: Reduce lock sections further |
| |
| Split out the pages which are to be freed into a separate list and |
| call free_pages_bulk() outside of the percpu page allocator locks. |
| |
| Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| --- |
| mm/page_alloc.c | 146 +++++++++++++++++++++++++++++++++++++++----------------- |
| 1 file changed, 104 insertions(+), 42 deletions(-) |
| |
| --- a/mm/page_alloc.c |
| +++ b/mm/page_alloc.c |
| @@ -1113,7 +1113,7 @@ static bool bulkfree_pcp_prepare(struct |
| #endif /* CONFIG_DEBUG_VM */ |
| |
| /* |
| - * Frees a number of pages from the PCP lists |
| + * Frees a number of pages which have been collected from the pcp lists. |
| * Assumes all pages on list are in same zone, and of same order. |
| * count is the number of pages to free. |
| * |
| @@ -1123,16 +1123,64 @@ static bool bulkfree_pcp_prepare(struct |
| * And clear the zone's pages_scanned counter, to hold off the "all pages are |
| * pinned" detection logic. |
| */ |
| -static void free_pcppages_bulk(struct zone *zone, int count, |
| - struct per_cpu_pages *pcp) |
| +static void free_pcppages_bulk(struct zone *zone, struct list_head *list, |
| + bool zone_retry) |
| { |
| - int migratetype = 0; |
| - int batch_free = 0; |
| bool isolated_pageblocks; |
| + unsigned long flags; |
| |
| - spin_lock(&zone->lock); |
| + spin_lock_irqsave(&zone->lock, flags); |
| isolated_pageblocks = has_isolate_pageblock(zone); |
| |
| + while (!list_empty(list)) { |
| + struct page *page; |
| + int mt; /* migratetype of the to-be-freed page */ |
| + |
| + page = list_first_entry(list, struct page, lru); |
| + |
| + /* |
| + * free_unref_page_list() sorts pages by zone. If we end up if |
| + * pages from different NUMA nodes belonging to the same ZONE |
| + * index then we need to redo with the correcte ZONE pointer. |
| + */ |
| + if (page_zone(page) != zone) { |
| + WARN_ON_ONCE(zone_retry == false); |
| + if (zone_retry) |
| + break; |
| + } |
| + |
| + /* must delete as __free_one_page list manipulates */ |
| + list_del(&page->lru); |
| + |
| + mt = get_pcppage_migratetype(page); |
| + /* MIGRATE_ISOLATE page should not go to pcplists */ |
| + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); |
| + /* Pageblock could have been isolated meanwhile */ |
| + if (unlikely(isolated_pageblocks)) |
| + mt = get_pageblock_migratetype(page); |
| + |
| + if (bulkfree_pcp_prepare(page)) |
| + continue; |
| + |
| + __free_one_page(page, page_to_pfn(page), zone, 0, mt); |
| + trace_mm_page_pcpu_drain(page, 0, mt); |
| + } |
| + spin_unlock_irqrestore(&zone->lock, flags); |
| +} |
| + |
| +/* |
| + * Moves a number of pages from the PCP lists to free list which |
| + * is freed outside of the locked region. |
| + * |
| + * Assumes all pages on list are in same zone, and of same order. |
| + * count is the number of pages to free. |
| + */ |
| +static void isolate_pcp_pages(int count, struct per_cpu_pages *src, |
| + struct list_head *dst) |
| +{ |
| + int migratetype = 0; |
| + int batch_free = 0; |
| + |
| while (count) { |
| struct page *page; |
| struct list_head *list; |
| @@ -1148,7 +1196,7 @@ static void free_pcppages_bulk(struct zo |
| batch_free++; |
| if (++migratetype == MIGRATE_PCPTYPES) |
| migratetype = 0; |
| - list = &pcp->lists[migratetype]; |
| + list = &src->lists[migratetype]; |
| } while (list_empty(list)); |
| |
| /* This is the only non-empty list. Free them all. */ |
| @@ -1156,27 +1204,12 @@ static void free_pcppages_bulk(struct zo |
| batch_free = count; |
| |
| do { |
| - int mt; /* migratetype of the to-be-freed page */ |
| - |
| page = list_last_entry(list, struct page, lru); |
| - /* must delete as __free_one_page list manipulates */ |
| list_del(&page->lru); |
| |
| - mt = get_pcppage_migratetype(page); |
| - /* MIGRATE_ISOLATE page should not go to pcplists */ |
| - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); |
| - /* Pageblock could have been isolated meanwhile */ |
| - if (unlikely(isolated_pageblocks)) |
| - mt = get_pageblock_migratetype(page); |
| - |
| - if (bulkfree_pcp_prepare(page)) |
| - continue; |
| - |
| - __free_one_page(page, page_to_pfn(page), zone, 0, mt); |
| - trace_mm_page_pcpu_drain(page, 0, mt); |
| + list_add(&page->lru, dst); |
| } while (--count && --batch_free && !list_empty(list)); |
| } |
| - spin_unlock(&zone->lock); |
| } |
| |
| static void free_one_page(struct zone *zone, |
| @@ -1184,13 +1217,15 @@ static void free_one_page(struct zone *z |
| unsigned int order, |
| int migratetype) |
| { |
| - spin_lock(&zone->lock); |
| + unsigned long flags; |
| + |
| + spin_lock_irqsave(&zone->lock, flags); |
| if (unlikely(has_isolate_pageblock(zone) || |
| is_migrate_isolate(migratetype))) { |
| migratetype = get_pfnblock_migratetype(page, pfn); |
| } |
| __free_one_page(page, pfn, zone, order, migratetype); |
| - spin_unlock(&zone->lock); |
| + spin_unlock_irqrestore(&zone->lock, flags); |
| } |
| |
| static void __meminit __init_single_page(struct page *page, unsigned long pfn, |
| @@ -2426,16 +2461,18 @@ static int rmqueue_bulk(struct zone *zon |
| void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
| { |
| unsigned long flags; |
| + LIST_HEAD(dst); |
| int to_drain, batch; |
| |
| local_lock_irqsave(pa_lock, flags); |
| batch = READ_ONCE(pcp->batch); |
| to_drain = min(pcp->count, batch); |
| if (to_drain > 0) { |
| - free_pcppages_bulk(zone, to_drain, pcp); |
| + isolate_pcp_pages(to_drain, pcp, &dst); |
| pcp->count -= to_drain; |
| } |
| local_unlock_irqrestore(pa_lock, flags); |
| + free_pcppages_bulk(zone, &dst, false); |
| } |
| #endif |
| |
| @@ -2451,16 +2488,21 @@ static void drain_pages_zone(unsigned in |
| unsigned long flags; |
| struct per_cpu_pageset *pset; |
| struct per_cpu_pages *pcp; |
| + LIST_HEAD(dst); |
| + int count; |
| |
| cpu_lock_irqsave(cpu, flags); |
| pset = per_cpu_ptr(zone->pageset, cpu); |
| |
| pcp = &pset->pcp; |
| - if (pcp->count) { |
| - free_pcppages_bulk(zone, pcp->count, pcp); |
| + count = pcp->count; |
| + if (count) { |
| + isolate_pcp_pages(count, pcp, &dst); |
| pcp->count = 0; |
| } |
| cpu_unlock_irqrestore(cpu, flags); |
| + if (count) |
| + free_pcppages_bulk(zone, &dst, false); |
| } |
| |
| /* |
| @@ -2663,7 +2705,8 @@ static bool free_unref_page_prepare(stru |
| return true; |
| } |
| |
| -static void free_unref_page_commit(struct page *page, unsigned long pfn) |
| +static void free_unref_page_commit(struct page *page, unsigned long pfn, |
| + struct list_head *dst) |
| { |
| struct zone *zone = page_zone(page); |
| struct per_cpu_pages *pcp; |
| @@ -2692,7 +2735,8 @@ static void free_unref_page_commit(struc |
| pcp->count++; |
| if (pcp->count >= pcp->high) { |
| unsigned long batch = READ_ONCE(pcp->batch); |
| - free_pcppages_bulk(zone, batch, pcp); |
| + |
| + isolate_pcp_pages(batch, pcp, dst); |
| pcp->count -= batch; |
| } |
| } |
| @@ -2704,13 +2748,17 @@ void free_unref_page(struct page *page) |
| { |
| unsigned long flags; |
| unsigned long pfn = page_to_pfn(page); |
| + struct zone *zone = page_zone(page); |
| + LIST_HEAD(dst); |
| |
| if (!free_unref_page_prepare(page, pfn)) |
| return; |
| |
| local_lock_irqsave(pa_lock, flags); |
| - free_unref_page_commit(page, pfn); |
| + free_unref_page_commit(page, pfn, &dst); |
| + |
| local_unlock_irqrestore(pa_lock, flags); |
| + free_pcppages_bulk(zone, &dst, false); |
| } |
| |
| /* |
| @@ -2720,7 +2768,11 @@ void free_unref_page_list(struct list_he |
| { |
| struct page *page, *next; |
| unsigned long flags, pfn; |
| - int batch_count = 0; |
| + struct list_head dsts[__MAX_NR_ZONES]; |
| + int i; |
| + |
| + for (i = 0; i < __MAX_NR_ZONES; i++) |
| + INIT_LIST_HEAD(&dsts[i]); |
| |
| /* Prepare pages for freeing */ |
| list_for_each_entry_safe(page, next, list, lru) { |
| @@ -2733,22 +2785,32 @@ void free_unref_page_list(struct list_he |
| local_lock_irqsave(pa_lock, flags); |
| list_for_each_entry_safe(page, next, list, lru) { |
| unsigned long pfn = page_private(page); |
| + enum zone_type type; |
| |
| set_page_private(page, 0); |
| trace_mm_page_free_batched(page); |
| - free_unref_page_commit(page, pfn); |
| + type = page_zonenum(page); |
| + free_unref_page_commit(page, pfn, &dsts[type]); |
| |
| - /* |
| - * Guard against excessive IRQ disabled times when we get |
| - * a large list of pages to free. |
| - */ |
| - if (++batch_count == SWAP_CLUSTER_MAX) { |
| - local_unlock_irqrestore(pa_lock, flags); |
| - batch_count = 0; |
| - local_lock_irqsave(pa_lock, flags); |
| - } |
| } |
| local_unlock_irqrestore(pa_lock, flags); |
| + |
| + i = 0; |
| + do { |
| + struct page *page; |
| + struct zone *zone; |
| + |
| + if (i >= __MAX_NR_ZONES) |
| + break; |
| + if (list_empty(&dsts[i])) { |
| + i++; |
| + continue; |
| + } |
| + page = list_first_entry(&dsts[i], struct page, lru); |
| + zone = page_zone(page); |
| + |
| + free_pcppages_bulk(zone, &dsts[i], true); |
| + } while (1); |
| } |
| |
| /* |