patches/mm-page_alloc-reduce-lock-sections-further.patch - pub/scm/linux/kernel/git/rt/linux-rt-devel - Git at Google

 From: Peter Zijlstra <peterz@infradead.org>
 Date: Fri Jul 3 08:44:37 2009 -0500
 Subject: mm: page_alloc: Reduce lock sections further

 Split out the pages which are to be freed into a separate list and
 call free_pages_bulk() outside of the percpu page allocator locks.

 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 ---
  mm/page_alloc.c |  146 +++++++++++++++++++++++++++++++++++++++-----------------
  1 file changed, 104 insertions(+), 42 deletions(-)

 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -1113,7 +1113,7 @@ static bool bulkfree_pcp_prepare(struct
  #endif /* CONFIG_DEBUG_VM */

  /*
 - * Frees a number of pages from the PCP lists
 + * Frees a number of pages which have been collected from the pcp lists.
   * Assumes all pages on list are in same zone, and of same order.
   * count is the number of pages to free.
   *
 @@ -1123,16 +1123,64 @@ static bool bulkfree_pcp_prepare(struct
   * And clear the zone's pages_scanned counter, to hold off the "all pages are
   * pinned" detection logic.
   */
 -static void free_pcppages_bulk(struct zone *zone, int count,
 -					struct per_cpu_pages *pcp)
 +static void free_pcppages_bulk(struct zone *zone, struct list_head *list,
 +			       bool zone_retry)
  {
 -	int migratetype = 0;
 -	int batch_free = 0;
  	bool isolated_pageblocks;
 +	unsigned long flags;

 -	spin_lock(&zone->lock);
 +	spin_lock_irqsave(&zone->lock, flags);
  	isolated_pageblocks = has_isolate_pageblock(zone);

 +	while (!list_empty(list)) {
 +		struct page *page;
 +		int mt; /* migratetype of the to-be-freed page */
 +
 +		page = list_first_entry(list, struct page, lru);
 +
 +		/*
 +		 * free_unref_page_list() sorts pages by zone. If we end up if
 +		 * pages from different NUMA nodes belonging to the same ZONE
 +		 * index then we need to redo with the correcte ZONE pointer.
 +		 */
 +		if (page_zone(page) != zone) {
 +			WARN_ON_ONCE(zone_retry == false);
 +			if (zone_retry)
 +				break;
 +		}
 +
 +		/* must delete as __free_one_page list manipulates */
 +		list_del(&page->lru);
 +
 +		mt = get_pcppage_migratetype(page);
 +		/* MIGRATE_ISOLATE page should not go to pcplists */
 +		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
 +		/* Pageblock could have been isolated meanwhile */
 +		if (unlikely(isolated_pageblocks))
 +			mt = get_pageblock_migratetype(page);
 +
 +		if (bulkfree_pcp_prepare(page))
 +			continue;
 +
 +		__free_one_page(page, page_to_pfn(page), zone, 0, mt);
 +		trace_mm_page_pcpu_drain(page, 0, mt);
 +	}
 +	spin_unlock_irqrestore(&zone->lock, flags);
 +}
 +
 +/*
 + * Moves a number of pages from the PCP lists to free list which
 + * is freed outside of the locked region.
 + *
 + * Assumes all pages on list are in same zone, and of same order.
 + * count is the number of pages to free.
 + */
 +static void isolate_pcp_pages(int count, struct per_cpu_pages *src,
 +			      struct list_head *dst)
 +{
 +	int migratetype = 0;
 +	int batch_free = 0;
 +
  	while (count) {
  		struct page *page;
  		struct list_head *list;
 @@ -1148,7 +1196,7 @@ static void free_pcppages_bulk(struct zo
  			batch_free++;
  			if (++migratetype == MIGRATE_PCPTYPES)
  				migratetype = 0;
 -			list = &pcp->lists[migratetype];
 +			list = &src->lists[migratetype];
  		} while (list_empty(list));

  		/* This is the only non-empty list. Free them all. */
 @@ -1156,27 +1204,12 @@ static void free_pcppages_bulk(struct zo
  			batch_free = count;

  		do {
 -			int mt;	/* migratetype of the to-be-freed page */
 -
  			page = list_last_entry(list, struct page, lru);
 -			/* must delete as __free_one_page list manipulates */
  			list_del(&page->lru);

 -			mt = get_pcppage_migratetype(page);
 -			/* MIGRATE_ISOLATE page should not go to pcplists */
 -			VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
 -			/* Pageblock could have been isolated meanwhile */
 -			if (unlikely(isolated_pageblocks))
 -				mt = get_pageblock_migratetype(page);
 -
 -			if (bulkfree_pcp_prepare(page))
 -				continue;
 -
 -			__free_one_page(page, page_to_pfn(page), zone, 0, mt);
 -			trace_mm_page_pcpu_drain(page, 0, mt);
 +			list_add(&page->lru, dst);
  		} while (--count && --batch_free && !list_empty(list));
  	}
 -	spin_unlock(&zone->lock);
  }

  static void free_one_page(struct zone *zone,
 @@ -1184,13 +1217,15 @@ static void free_one_page(struct zone *z
  				unsigned int order,
  				int migratetype)
  {
 -	spin_lock(&zone->lock);
 +	unsigned long flags;
 +
 +	spin_lock_irqsave(&zone->lock, flags);
  	if (unlikely(has_isolate_pageblock(zone) ||
  		is_migrate_isolate(migratetype))) {
  		migratetype = get_pfnblock_migratetype(page, pfn);
  	}
  	__free_one_page(page, pfn, zone, order, migratetype);
 -	spin_unlock(&zone->lock);
 +	spin_unlock_irqrestore(&zone->lock, flags);
  }

  static void __meminit __init_single_page(struct page *page, unsigned long pfn,
 @@ -2426,16 +2461,18 @@ static int rmqueue_bulk(struct zone *zon
  void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  {
  	unsigned long flags;
 +	LIST_HEAD(dst);
  	int to_drain, batch;

  	local_lock_irqsave(pa_lock, flags);
  	batch = READ_ONCE(pcp->batch);
  	to_drain = min(pcp->count, batch);
  	if (to_drain > 0) {
 -		free_pcppages_bulk(zone, to_drain, pcp);
 +		isolate_pcp_pages(to_drain, pcp, &dst);
  		pcp->count -= to_drain;
  	}
  	local_unlock_irqrestore(pa_lock, flags);
 +	free_pcppages_bulk(zone, &dst, false);
  }
  #endif

 @@ -2451,16 +2488,21 @@ static void drain_pages_zone(unsigned in
  	unsigned long flags;
  	struct per_cpu_pageset *pset;
  	struct per_cpu_pages *pcp;
 +	LIST_HEAD(dst);
 +	int count;

  	cpu_lock_irqsave(cpu, flags);
  	pset = per_cpu_ptr(zone->pageset, cpu);

  	pcp = &pset->pcp;
 -	if (pcp->count) {
 -		free_pcppages_bulk(zone, pcp->count, pcp);
 +	count = pcp->count;
 +	if (count) {
 +		isolate_pcp_pages(count, pcp, &dst);
  		pcp->count = 0;
  	}
  	cpu_unlock_irqrestore(cpu, flags);
 +	if (count)
 +		free_pcppages_bulk(zone, &dst, false);
  }

  /*
 @@ -2663,7 +2705,8 @@ static bool free_unref_page_prepare(stru
  	return true;
  }

 -static void free_unref_page_commit(struct page *page, unsigned long pfn)
 +static void free_unref_page_commit(struct page *page, unsigned long pfn,
 +				   struct list_head *dst)
  {
  	struct zone *zone = page_zone(page);
  	struct per_cpu_pages *pcp;
 @@ -2692,7 +2735,8 @@ static void free_unref_page_commit(struc
  	pcp->count++;
  	if (pcp->count >= pcp->high) {
  		unsigned long batch = READ_ONCE(pcp->batch);
 -		free_pcppages_bulk(zone, batch, pcp);
 +
 +		isolate_pcp_pages(batch, pcp, dst);
  		pcp->count -= batch;
  	}
  }
 @@ -2704,13 +2748,17 @@ void free_unref_page(struct page *page)
  {
  	unsigned long flags;
  	unsigned long pfn = page_to_pfn(page);
 +	struct zone *zone = page_zone(page);
 +	LIST_HEAD(dst);

  	if (!free_unref_page_prepare(page, pfn))
  		return;

  	local_lock_irqsave(pa_lock, flags);
 -	free_unref_page_commit(page, pfn);
 +	free_unref_page_commit(page, pfn, &dst);
 +
  	local_unlock_irqrestore(pa_lock, flags);
 +	free_pcppages_bulk(zone, &dst, false);
  }

  /*
 @@ -2720,7 +2768,11 @@ void free_unref_page_list(struct list_he
  {
  	struct page *page, *next;
  	unsigned long flags, pfn;
 -	int batch_count = 0;
 +	struct list_head dsts[__MAX_NR_ZONES];
 +	int i;
 +
 +	for (i = 0; i < __MAX_NR_ZONES; i++)
 +		INIT_LIST_HEAD(&dsts[i]);

  	/* Prepare pages for freeing */
  	list_for_each_entry_safe(page, next, list, lru) {
 @@ -2733,22 +2785,32 @@ void free_unref_page_list(struct list_he
  	local_lock_irqsave(pa_lock, flags);
  	list_for_each_entry_safe(page, next, list, lru) {
  		unsigned long pfn = page_private(page);
 +		enum zone_type type;

  		set_page_private(page, 0);
  		trace_mm_page_free_batched(page);
 -		free_unref_page_commit(page, pfn);
 +		type = page_zonenum(page);
 +		free_unref_page_commit(page, pfn, &dsts[type]);

 -		/*
 -		 * Guard against excessive IRQ disabled times when we get
 -		 * a large list of pages to free.
 -		 */
 -		if (++batch_count == SWAP_CLUSTER_MAX) {
 -			local_unlock_irqrestore(pa_lock, flags);
 -			batch_count = 0;
 -			local_lock_irqsave(pa_lock, flags);
 -		}
  	}
  	local_unlock_irqrestore(pa_lock, flags);
 +
 +	i = 0;
 +	do {
 +		struct page *page;
 +		struct zone *zone;
 +
 +		if (i >= __MAX_NR_ZONES)
 +			break;
 +		if (list_empty(&dsts[i])) {
 +			i++;
 +			continue;
 +		}
 +		page = list_first_entry(&dsts[i], struct page, lru);
 +		zone = page_zone(page);
 +
 +		free_pcppages_bulk(zone, &dsts[i], true);
 +	} while (1);
  }

  /*
	From: Peter Zijlstra <peterz@infradead.org>
	Date: Fri Jul 3 08:44:37 2009 -0500
	Subject: mm: page_alloc: Reduce lock sections further

	Split out the pages which are to be freed into a separate list and
	call free_pages_bulk() outside of the percpu page allocator locks.

	Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	---
	mm/page_alloc.c \| 146 +++++++++++++++++++++++++++++++++++++++-----------------
	1 file changed, 104 insertions(+), 42 deletions(-)

	--- a/mm/page_alloc.c
	+++ b/mm/page_alloc.c
	@@ -1113,7 +1113,7 @@ static bool bulkfree_pcp_prepare(struct
	#endif /* CONFIG_DEBUG_VM */

	/*
	- * Frees a number of pages from the PCP lists
	+ * Frees a number of pages which have been collected from the pcp lists.
	* Assumes all pages on list are in same zone, and of same order.
	* count is the number of pages to free.
	*
	@@ -1123,16 +1123,64 @@ static bool bulkfree_pcp_prepare(struct
	* And clear the zone's pages_scanned counter, to hold off the "all pages are
	* pinned" detection logic.
	*/
	-static void free_pcppages_bulk(struct zone *zone, int count,
	- struct per_cpu_pages *pcp)
	+static void free_pcppages_bulk(struct zone zone, struct list_head list,
	+ bool zone_retry)
	{
	- int migratetype = 0;
	- int batch_free = 0;
	bool isolated_pageblocks;
	+ unsigned long flags;

	- spin_lock(&zone->lock);
	+ spin_lock_irqsave(&zone->lock, flags);
	isolated_pageblocks = has_isolate_pageblock(zone);

	+ while (!list_empty(list)) {
	+ struct page *page;
	+ int mt; /* migratetype of the to-be-freed page */
	+
	+ page = list_first_entry(list, struct page, lru);
	+
	+ /*
	+ * free_unref_page_list() sorts pages by zone. If we end up if
	+ * pages from different NUMA nodes belonging to the same ZONE
	+ * index then we need to redo with the correcte ZONE pointer.
	+ */
	+ if (page_zone(page) != zone) {
	+ WARN_ON_ONCE(zone_retry == false);
	+ if (zone_retry)
	+ break;
	+ }
	+
	+ /* must delete as __free_one_page list manipulates */
	+ list_del(&page->lru);
	+
	+ mt = get_pcppage_migratetype(page);
	+ /* MIGRATE_ISOLATE page should not go to pcplists */
	+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
	+ /* Pageblock could have been isolated meanwhile */
	+ if (unlikely(isolated_pageblocks))
	+ mt = get_pageblock_migratetype(page);
	+
	+ if (bulkfree_pcp_prepare(page))
	+ continue;
	+
	+ __free_one_page(page, page_to_pfn(page), zone, 0, mt);
	+ trace_mm_page_pcpu_drain(page, 0, mt);
	+ }
	+ spin_unlock_irqrestore(&zone->lock, flags);
	+}
	+
	+/*
	+ * Moves a number of pages from the PCP lists to free list which
	+ * is freed outside of the locked region.
	+ *
	+ * Assumes all pages on list are in same zone, and of same order.
	+ * count is the number of pages to free.
	+ */
	+static void isolate_pcp_pages(int count, struct per_cpu_pages *src,
	+ struct list_head *dst)
	+{
	+ int migratetype = 0;
	+ int batch_free = 0;
	+
	while (count) {
	struct page *page;
	struct list_head *list;
	@@ -1148,7 +1196,7 @@ static void free_pcppages_bulk(struct zo
	batch_free++;
	if (++migratetype == MIGRATE_PCPTYPES)
	migratetype = 0;
	- list = &pcp->lists[migratetype];
	+ list = &src->lists[migratetype];
	} while (list_empty(list));

	/* This is the only non-empty list. Free them all. */
	@@ -1156,27 +1204,12 @@ static void free_pcppages_bulk(struct zo
	batch_free = count;

	do {
	- int mt; /* migratetype of the to-be-freed page */
	-
	page = list_last_entry(list, struct page, lru);
	- /* must delete as __free_one_page list manipulates */
	list_del(&page->lru);

	- mt = get_pcppage_migratetype(page);
	- /* MIGRATE_ISOLATE page should not go to pcplists */
	- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
	- /* Pageblock could have been isolated meanwhile */
	- if (unlikely(isolated_pageblocks))
	- mt = get_pageblock_migratetype(page);
	-
	- if (bulkfree_pcp_prepare(page))
	- continue;
	-
	- __free_one_page(page, page_to_pfn(page), zone, 0, mt);
	- trace_mm_page_pcpu_drain(page, 0, mt);
	+ list_add(&page->lru, dst);
	} while (--count && --batch_free && !list_empty(list));
	}
	- spin_unlock(&zone->lock);
	}

	static void free_one_page(struct zone *zone,
	@@ -1184,13 +1217,15 @@ static void free_one_page(struct zone *z
	unsigned int order,
	int migratetype)
	{
	- spin_lock(&zone->lock);
	+ unsigned long flags;
	+
	+ spin_lock_irqsave(&zone->lock, flags);
	if (unlikely(has_isolate_pageblock(zone) \|\|
	is_migrate_isolate(migratetype))) {
	migratetype = get_pfnblock_migratetype(page, pfn);
	}
	__free_one_page(page, pfn, zone, order, migratetype);
	- spin_unlock(&zone->lock);
	+ spin_unlock_irqrestore(&zone->lock, flags);
	}

	static void __meminit __init_single_page(struct page *page, unsigned long pfn,
	@@ -2426,16 +2461,18 @@ static int rmqueue_bulk(struct zone *zon
	void drain_zone_pages(struct zone zone, struct per_cpu_pages pcp)
	{
	unsigned long flags;
	+ LIST_HEAD(dst);
	int to_drain, batch;

	local_lock_irqsave(pa_lock, flags);
	batch = READ_ONCE(pcp->batch);
	to_drain = min(pcp->count, batch);
	if (to_drain > 0) {
	- free_pcppages_bulk(zone, to_drain, pcp);
	+ isolate_pcp_pages(to_drain, pcp, &dst);
	pcp->count -= to_drain;
	}
	local_unlock_irqrestore(pa_lock, flags);
	+ free_pcppages_bulk(zone, &dst, false);
	}
	#endif

	@@ -2451,16 +2488,21 @@ static void drain_pages_zone(unsigned in
	unsigned long flags;
	struct per_cpu_pageset *pset;
	struct per_cpu_pages *pcp;
	+ LIST_HEAD(dst);
	+ int count;

	cpu_lock_irqsave(cpu, flags);
	pset = per_cpu_ptr(zone->pageset, cpu);

	pcp = &pset->pcp;
	- if (pcp->count) {
	- free_pcppages_bulk(zone, pcp->count, pcp);
	+ count = pcp->count;
	+ if (count) {
	+ isolate_pcp_pages(count, pcp, &dst);
	pcp->count = 0;
	}
	cpu_unlock_irqrestore(cpu, flags);
	+ if (count)
	+ free_pcppages_bulk(zone, &dst, false);
	}

	/*
	@@ -2663,7 +2705,8 @@ static bool free_unref_page_prepare(stru
	return true;
	}

	-static void free_unref_page_commit(struct page *page, unsigned long pfn)
	+static void free_unref_page_commit(struct page *page, unsigned long pfn,
	+ struct list_head *dst)
	{
	struct zone *zone = page_zone(page);
	struct per_cpu_pages *pcp;
	@@ -2692,7 +2735,8 @@ static void free_unref_page_commit(struc
	pcp->count++;
	if (pcp->count >= pcp->high) {
	unsigned long batch = READ_ONCE(pcp->batch);
	- free_pcppages_bulk(zone, batch, pcp);
	+
	+ isolate_pcp_pages(batch, pcp, dst);
	pcp->count -= batch;
	}
	}
	@@ -2704,13 +2748,17 @@ void free_unref_page(struct page *page)
	{
	unsigned long flags;
	unsigned long pfn = page_to_pfn(page);
	+ struct zone *zone = page_zone(page);
	+ LIST_HEAD(dst);

	if (!free_unref_page_prepare(page, pfn))
	return;

	local_lock_irqsave(pa_lock, flags);
	- free_unref_page_commit(page, pfn);
	+ free_unref_page_commit(page, pfn, &dst);
	+
	local_unlock_irqrestore(pa_lock, flags);
	+ free_pcppages_bulk(zone, &dst, false);
	}

	/*
	@@ -2720,7 +2768,11 @@ void free_unref_page_list(struct list_he
	{
	struct page page, next;
	unsigned long flags, pfn;
	- int batch_count = 0;
	+ struct list_head dsts[__MAX_NR_ZONES];
	+ int i;
	+
	+ for (i = 0; i < __MAX_NR_ZONES; i++)
	+ INIT_LIST_HEAD(&dsts[i]);

	/* Prepare pages for freeing */
	list_for_each_entry_safe(page, next, list, lru) {
	@@ -2733,22 +2785,32 @@ void free_unref_page_list(struct list_he
	local_lock_irqsave(pa_lock, flags);
	list_for_each_entry_safe(page, next, list, lru) {
	unsigned long pfn = page_private(page);
	+ enum zone_type type;

	set_page_private(page, 0);
	trace_mm_page_free_batched(page);
	- free_unref_page_commit(page, pfn);
	+ type = page_zonenum(page);
	+ free_unref_page_commit(page, pfn, &dsts[type]);

	- /*
	- * Guard against excessive IRQ disabled times when we get
	- * a large list of pages to free.
	- */
	- if (++batch_count == SWAP_CLUSTER_MAX) {
	- local_unlock_irqrestore(pa_lock, flags);
	- batch_count = 0;
	- local_lock_irqsave(pa_lock, flags);
	- }
	}
	local_unlock_irqrestore(pa_lock, flags);
	+
	+ i = 0;
	+ do {
	+ struct page *page;
	+ struct zone *zone;
	+
	+ if (i >= __MAX_NR_ZONES)
	+ break;
	+ if (list_empty(&dsts[i])) {
	+ i++;
	+ continue;
	+ }
	+ page = list_first_entry(&dsts[i], struct page, lru);
	+ zone = page_zone(page);
	+
	+ free_pcppages_bulk(zone, &dsts[i], true);
	+ } while (1);
	}

	/*