patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch - pub/scm/linux/kernel/git/rt/linux-rt-devel - Git at Google

 From: Ingo Molnar <mingo@elte.hu>
 Date: Fri, 3 Jul 2009 08:29:37 -0500
 Subject: [PATCH 8/8] mm: page_alloc: Use a local_lock instead of explicit
  local_irq_save().

 The page-allocator disables interrupts for a few reasons:
 - Decouple interrupt the irqsave operation from spin_lock() so it can be
   extended over the actual lock region and cover other areas. Areas like
   counters increments where the preemptible version can be avoided.

 - Access to the per-CPU pcp from struct zone.

 Replace the irqsave with a local-lock. The counters are expected to be
 always modified with disabled preemption and no access from interrupt
 context.

 Contains fixes from:
 	 Peter Zijlstra <a.p.zijlstra@chello.nl>
 	 Thomas Gleixner <tglx@linutronix.de>

 Signed-off-by: Ingo Molnar <mingo@elte.hu>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 ---
  mm/page_alloc.c |   49 ++++++++++++++++++++++++++++++-------------------
  1 file changed, 30 insertions(+), 19 deletions(-)

 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -62,6 +62,7 @@
  #include <linux/hugetlb.h>
  #include <linux/sched/rt.h>
  #include <linux/sched/mm.h>
 +#include <linux/local_lock.h>
  #include <linux/page_owner.h>
  #include <linux/kthread.h>
  #include <linux/memcontrol.h>
 @@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes);

  int page_group_by_mobility_disabled __read_mostly;

 +struct pa_lock {
 +	local_lock_t l;
 +};
 +static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
 +	.l	= INIT_LOCAL_LOCK(l),
 +};
 +
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  /*
   * During boot we initialize deferred pages on-demand, as needed, but once
 @@ -1537,11 +1545,11 @@ static void __free_pages_ok(struct page
  		return;

  	migratetype = get_pfnblock_migratetype(page, pfn);
 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	__count_vm_events(PGFREE, 1 << order);
  	free_one_page(page_zone(page), page, pfn, order, migratetype,
  		      fpi_flags);
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  }

  void __free_pages_core(struct page *page, unsigned int order)
 @@ -2958,12 +2966,12 @@ void drain_zone_pages(struct zone *zone,
  	unsigned long flags;
  	int to_drain, batch;

 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	batch = READ_ONCE(pcp->batch);
  	to_drain = min(pcp->count, batch);
  	if (to_drain > 0)
  		free_pcppages_bulk(zone, to_drain, pcp);
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  }
  #endif

 @@ -2980,13 +2988,13 @@ static void drain_pages_zone(unsigned in
  	struct per_cpu_pageset *pset;
  	struct per_cpu_pages *pcp;

 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	pset = per_cpu_ptr(zone->pageset, cpu);

  	pcp = &pset->pcp;
  	if (pcp->count)
  		free_pcppages_bulk(zone, pcp->count, pcp);
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  }

  /*
 @@ -3249,9 +3257,9 @@ void free_unref_page(struct page *page)
  	if (!free_unref_page_prepare(page, pfn))
  		return;

 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	free_unref_page_commit(page, pfn);
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  }

  /*
 @@ -3271,7 +3279,7 @@ void free_unref_page_list(struct list_he
  		set_page_private(page, pfn);
  	}

 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	list_for_each_entry_safe(page, next, list, lru) {
  		unsigned long pfn = page_private(page);

 @@ -3284,12 +3292,12 @@ void free_unref_page_list(struct list_he
  		 * a large list of pages to free.
  		 */
  		if (++batch_count == SWAP_CLUSTER_MAX) {
 -			local_irq_restore(flags);
 +			local_unlock_irqrestore(&pa_lock.l, flags);
  			batch_count = 0;
 -			local_irq_save(flags);
 +			local_lock_irqsave(&pa_lock.l, flags);
  		}
  	}
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  }

  /*
 @@ -3444,7 +3452,7 @@ static struct page *rmqueue_pcplist(stru
  	struct page *page;
  	unsigned long flags;

 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	pcp = &this_cpu_ptr(zone->pageset)->pcp;
  	list = &pcp->lists[migratetype];
  	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
 @@ -3452,7 +3460,7 @@ static struct page *rmqueue_pcplist(stru
  		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
  		zone_statistics(preferred_zone, zone);
  	}
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  	return page;
  }

 @@ -3486,7 +3494,9 @@ struct page *rmqueue(struct zone *prefer
  	 * allocate greater than order-1 page units with __GFP_NOFAIL.
  	 */
  	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
 -	spin_lock_irqsave(&zone->lock, flags);
 +
 +	local_lock_irqsave(&pa_lock.l, flags);
 +	spin_lock(&zone->lock);

  	do {
  		page = NULL;
 @@ -3507,12 +3517,13 @@ struct page *rmqueue(struct zone *prefer
  	spin_unlock(&zone->lock);
  	if (!page)
  		goto failed;
 +
  	__mod_zone_freepage_state(zone, -(1 << order),
  				  get_pcppage_migratetype(page));

  	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
  	zone_statistics(preferred_zone, zone);
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);

  out:
  	/* Separate test+clear to avoid unnecessary atomics */
 @@ -3525,7 +3536,7 @@ struct page *rmqueue(struct zone *prefer
  	return page;

  failed:
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  	return NULL;
  }

 @@ -8813,7 +8824,7 @@ void zone_pcp_reset(struct zone *zone)
  	struct per_cpu_pageset *pset;

  	/* avoid races with drain_pages()  */
 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	if (zone->pageset != &boot_pageset) {
  		for_each_online_cpu(cpu) {
  			pset = per_cpu_ptr(zone->pageset, cpu);
 @@ -8822,7 +8833,7 @@ void zone_pcp_reset(struct zone *zone)
  		free_percpu(zone->pageset);
  		zone->pageset = &boot_pageset;
  	}
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  }

  #ifdef CONFIG_MEMORY_HOTREMOVE
	From: Ingo Molnar <mingo@elte.hu>
	Date: Fri, 3 Jul 2009 08:29:37 -0500
	Subject: [PATCH 8/8] mm: page_alloc: Use a local_lock instead of explicit
	local_irq_save().

	The page-allocator disables interrupts for a few reasons:
	- Decouple interrupt the irqsave operation from spin_lock() so it can be
	extended over the actual lock region and cover other areas. Areas like
	counters increments where the preemptible version can be avoided.

	- Access to the per-CPU pcp from struct zone.

	Replace the irqsave with a local-lock. The counters are expected to be
	always modified with disabled preemption and no access from interrupt
	context.

	Contains fixes from:
	Peter Zijlstra <a.p.zijlstra@chello.nl>
	Thomas Gleixner <tglx@linutronix.de>

	Signed-off-by: Ingo Molnar <mingo@elte.hu>
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
	---
	mm/page_alloc.c \| 49 ++++++++++++++++++++++++++++++-------------------
	1 file changed, 30 insertions(+), 19 deletions(-)

	--- a/mm/page_alloc.c
	+++ b/mm/page_alloc.c
	@@ -62,6 +62,7 @@
	#include <linux/hugetlb.h>
	#include <linux/sched/rt.h>
	#include <linux/sched/mm.h>
	+#include <linux/local_lock.h>
	#include <linux/page_owner.h>
	#include <linux/kthread.h>
	#include <linux/memcontrol.h>
	@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes);

	int page_group_by_mobility_disabled __read_mostly;

	+struct pa_lock {
	+ local_lock_t l;
	+};
	+static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
	+ .l = INIT_LOCAL_LOCK(l),
	+};
	+
	#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
	/*
	* During boot we initialize deferred pages on-demand, as needed, but once
	@@ -1537,11 +1545,11 @@ static void __free_pages_ok(struct page
	return;

	migratetype = get_pfnblock_migratetype(page, pfn);
	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	__count_vm_events(PGFREE, 1 << order);
	free_one_page(page_zone(page), page, pfn, order, migratetype,
	fpi_flags);
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	}

	void __free_pages_core(struct page *page, unsigned int order)
	@@ -2958,12 +2966,12 @@ void drain_zone_pages(struct zone *zone,
	unsigned long flags;
	int to_drain, batch;

	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	batch = READ_ONCE(pcp->batch);
	to_drain = min(pcp->count, batch);
	if (to_drain > 0)
	free_pcppages_bulk(zone, to_drain, pcp);
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	}
	#endif

	@@ -2980,13 +2988,13 @@ static void drain_pages_zone(unsigned in
	struct per_cpu_pageset *pset;
	struct per_cpu_pages *pcp;

	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	pset = per_cpu_ptr(zone->pageset, cpu);

	pcp = &pset->pcp;
	if (pcp->count)
	free_pcppages_bulk(zone, pcp->count, pcp);
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	}

	/*
	@@ -3249,9 +3257,9 @@ void free_unref_page(struct page *page)
	if (!free_unref_page_prepare(page, pfn))
	return;

	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	free_unref_page_commit(page, pfn);
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	}

	/*
	@@ -3271,7 +3279,7 @@ void free_unref_page_list(struct list_he
	set_page_private(page, pfn);
	}

	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	list_for_each_entry_safe(page, next, list, lru) {
	unsigned long pfn = page_private(page);

	@@ -3284,12 +3292,12 @@ void free_unref_page_list(struct list_he
	* a large list of pages to free.
	*/
	if (++batch_count == SWAP_CLUSTER_MAX) {
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	batch_count = 0;
	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	}
	}
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	}

	/*
	@@ -3444,7 +3452,7 @@ static struct page *rmqueue_pcplist(stru
	struct page *page;
	unsigned long flags;

	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	pcp = &this_cpu_ptr(zone->pageset)->pcp;
	list = &pcp->lists[migratetype];
	page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
	@@ -3452,7 +3460,7 @@ static struct page *rmqueue_pcplist(stru
	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
	zone_statistics(preferred_zone, zone);
	}
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	return page;
	}

	@@ -3486,7 +3494,9 @@ struct page rmqueue(struct zone prefer
	* allocate greater than order-1 page units with __GFP_NOFAIL.
	*/
	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
	- spin_lock_irqsave(&zone->lock, flags);
	+
	+ local_lock_irqsave(&pa_lock.l, flags);
	+ spin_lock(&zone->lock);

	do {
	page = NULL;
	@@ -3507,12 +3517,13 @@ struct page rmqueue(struct zone prefer
	spin_unlock(&zone->lock);
	if (!page)
	goto failed;
	+
	__mod_zone_freepage_state(zone, -(1 << order),
	get_pcppage_migratetype(page));

	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
	zone_statistics(preferred_zone, zone);
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);

	out:
	/* Separate test+clear to avoid unnecessary atomics */
	@@ -3525,7 +3536,7 @@ struct page rmqueue(struct zone prefer
	return page;

	failed:
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	return NULL;
	}

	@@ -8813,7 +8824,7 @@ void zone_pcp_reset(struct zone *zone)
	struct per_cpu_pageset *pset;

	/* avoid races with drain_pages() */
	- local_irq_save(flags);
	+ local_lock_irqsave(&pa_lock.l, flags);
	if (zone->pageset != &boot_pageset) {
	for_each_online_cpu(cpu) {
	pset = per_cpu_ptr(zone->pageset, cpu);
	@@ -8822,7 +8833,7 @@ void zone_pcp_reset(struct zone *zone)
	free_percpu(zone->pageset);
	zone->pageset = &boot_pageset;
	}
	- local_irq_restore(flags);
	+ local_unlock_irqrestore(&pa_lock.l, flags);
	}

	#ifdef CONFIG_MEMORY_HOTREMOVE