patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch - pub/scm/linux/kernel/git/rt/linux-stable-rt - Git at Google

 From: Thomas Gleixner <tglx@linutronix.de>
 Date: Thu, 21 Jun 2018 17:29:19 +0200
 Subject: [PATCH 4/4] mm/SLUB: delay giving back empty slubs to IRQ enabled
  regions

 __free_slab() is invoked with disabled interrupts which increases the
 irq-off time while __free_pages() is doing the work.
 Allow __free_slab() to be invoked with enabled interrupts and move
 everything from interrupts-off invocations to a temporary per-CPU list
 so it can be processed later.

 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 ---
  mm/slub.c |   74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
  1 file changed, 69 insertions(+), 5 deletions(-)

 --- a/mm/slub.c
 +++ b/mm/slub.c
 @@ -1401,6 +1401,12 @@ static inline void dec_slabs_node(struct

  #endif /* CONFIG_SLUB_DEBUG */

 +struct slub_free_list {
 +	raw_spinlock_t		lock;
 +	struct list_head	list;
 +};
 +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
 +
  /*
   * Hooks for other subsystems that check memory allocations. In a typical
   * production configuration these hooks all should produce no code at all.
 @@ -1751,6 +1757,16 @@ static void __free_slab(struct kmem_cach
  	__free_pages(page, order);
  }

 +static void free_delayed(struct list_head *h)
 +{
 +	while (!list_empty(h)) {
 +		struct page *page = list_first_entry(h, struct page, lru);
 +
 +		list_del(&page->lru);
 +		__free_slab(page->slab_cache, page);
 +	}
 +}
 +
  static void rcu_free_slab(struct rcu_head *h)
  {
  	struct page *page = container_of(h, struct page, rcu_head);
 @@ -1762,6 +1778,12 @@ static void free_slab(struct kmem_cache
  {
  	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
  		call_rcu(&page->rcu_head, rcu_free_slab);
 +	} else if (irqs_disabled()) {
 +		struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
 +
 +		raw_spin_lock(&f->lock);
 +		list_add(&page->lru, &f->list);
 +		raw_spin_unlock(&f->lock);
  	} else
  		__free_slab(s, page);
  }
 @@ -2283,14 +2305,21 @@ static void put_cpu_partial(struct kmem_
  			pobjects = oldpage->pobjects;
  			pages = oldpage->pages;
  			if (drain && pobjects > s->cpu_partial) {
 +				struct slub_free_list *f;
  				unsigned long flags;
 +				LIST_HEAD(tofree);
  				/*
  				 * partial array is full. Move the existing
  				 * set to the per node partial list.
  				 */
  				local_irq_save(flags);
  				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
 +				f = this_cpu_ptr(&slub_free_list);
 +				raw_spin_lock(&f->lock);
 +				list_splice_init(&f->list, &tofree);
 +				raw_spin_unlock(&f->lock);
  				local_irq_restore(flags);
 +				free_delayed(&tofree);
  				oldpage = NULL;
  				pobjects = 0;
  				pages = 0;
 @@ -2358,7 +2387,22 @@ static bool has_cpu_slab(int cpu, void *

  static void flush_all(struct kmem_cache *s)
  {
 +	LIST_HEAD(tofree);
 +	int cpu;
 +
  	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
 +	for_each_online_cpu(cpu) {
 +		struct slub_free_list *f;
 +
 +		if (!has_cpu_slab(cpu, s))
 +			continue;
 +
 +		f = &per_cpu(slub_free_list, cpu);
 +		raw_spin_lock_irq(&f->lock);
 +		list_splice_init(&f->list, &tofree);
 +		raw_spin_unlock_irq(&f->lock);
 +		free_delayed(&tofree);
 +	}
  }

  /*
 @@ -2555,8 +2599,10 @@ static inline void *get_freelist(struct
   * already disabled (which is the case for bulk allocation).
   */
  static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 -			  unsigned long addr, struct kmem_cache_cpu *c)
 +			  unsigned long addr, struct kmem_cache_cpu *c,
 +			  struct list_head *to_free)
  {
 +	struct slub_free_list *f;
  	void *freelist;
  	struct page *page;

 @@ -2622,6 +2668,13 @@ static void *___slab_alloc(struct kmem_c
  	VM_BUG_ON(!c->page->frozen);
  	c->freelist = get_freepointer(s, freelist);
  	c->tid = next_tid(c->tid);
 +
 +out:
 +	f = this_cpu_ptr(&slub_free_list);
 +	raw_spin_lock(&f->lock);
 +	list_splice_init(&f->list, to_free);
 +	raw_spin_unlock(&f->lock);
 +
  	return freelist;

  new_slab:
 @@ -2637,7 +2690,7 @@ static void *___slab_alloc(struct kmem_c

  	if (unlikely(!freelist)) {
  		slab_out_of_memory(s, gfpflags, node);
 -		return NULL;
 +		goto out;
  	}

  	page = c->page;
 @@ -2650,7 +2703,7 @@ static void *___slab_alloc(struct kmem_c
  		goto new_slab;	/* Slab failed checks. Next slab needed */

  	deactivate_slab(s, page, get_freepointer(s, freelist), c);
 -	return freelist;
 +	goto out;
  }

  /*
 @@ -2662,6 +2715,7 @@ static void *__slab_alloc(struct kmem_ca
  {
  	void *p;
  	unsigned long flags;
 +	LIST_HEAD(tofree);

  	local_irq_save(flags);
  #ifdef CONFIG_PREEMPTION
 @@ -2673,8 +2727,9 @@ static void *__slab_alloc(struct kmem_ca
  	c = this_cpu_ptr(s->cpu_slab);
  #endif

 -	p = ___slab_alloc(s, gfpflags, node, addr, c);
 +	p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
  	local_irq_restore(flags);
 +	free_delayed(&tofree);
  	return p;
  }

 @@ -3166,6 +3221,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
  			  void **p)
  {
  	struct kmem_cache_cpu *c;
 +	LIST_HEAD(to_free);
  	int i;

  	/* memcg and kmem_cache debug support */
 @@ -3198,7 +3254,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
  			 * of re-populating per CPU c->freelist
  			 */
  			p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
 -					    _RET_IP_, c);
 +					    _RET_IP_, c, &to_free);
  			if (unlikely(!p[i]))
  				goto error;

 @@ -3213,6 +3269,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
  	}
  	c->tid = next_tid(c->tid);
  	local_irq_enable();
 +	free_delayed(&to_free);

  	/* Clear memory outside IRQ disabled fastpath loop */
  	if (unlikely(slab_want_init_on_alloc(flags, s))) {
 @@ -3227,6 +3284,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
  	return i;
  error:
  	local_irq_enable();
 +	free_delayed(&to_free);
  	slab_post_alloc_hook(s, flags, i, p);
  	__kmem_cache_free_bulk(s, i, p);
  	return 0;
 @@ -4263,6 +4321,12 @@ void __init kmem_cache_init(void)
  {
  	static __initdata struct kmem_cache boot_kmem_cache,
  		boot_kmem_cache_node;
 +	int cpu;
 +
 +	for_each_possible_cpu(cpu) {
 +		raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
 +		INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
 +	}

  	if (debug_guardpage_minorder())
  		slub_max_order = 0;
	From: Thomas Gleixner <tglx@linutronix.de>
	Date: Thu, 21 Jun 2018 17:29:19 +0200
	Subject: [PATCH 4/4] mm/SLUB: delay giving back empty slubs to IRQ enabled
	regions

	__free_slab() is invoked with disabled interrupts which increases the
	irq-off time while __free_pages() is doing the work.
	Allow __free_slab() to be invoked with enabled interrupts and move
	everything from interrupts-off invocations to a temporary per-CPU list
	so it can be processed later.

	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
	---
	mm/slub.c \| 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
	1 file changed, 69 insertions(+), 5 deletions(-)

	--- a/mm/slub.c
	+++ b/mm/slub.c
	@@ -1401,6 +1401,12 @@ static inline void dec_slabs_node(struct

	#endif /* CONFIG_SLUB_DEBUG */

	+struct slub_free_list {
	+ raw_spinlock_t lock;
	+ struct list_head list;
	+};
	+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
	+
	/*
	* Hooks for other subsystems that check memory allocations. In a typical
	* production configuration these hooks all should produce no code at all.
	@@ -1751,6 +1757,16 @@ static void __free_slab(struct kmem_cach
	__free_pages(page, order);
	}

	+static void free_delayed(struct list_head *h)
	+{
	+ while (!list_empty(h)) {
	+ struct page *page = list_first_entry(h, struct page, lru);
	+
	+ list_del(&page->lru);
	+ __free_slab(page->slab_cache, page);
	+ }
	+}
	+
	static void rcu_free_slab(struct rcu_head *h)
	{
	struct page *page = container_of(h, struct page, rcu_head);
	@@ -1762,6 +1778,12 @@ static void free_slab(struct kmem_cache
	{
	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
	call_rcu(&page->rcu_head, rcu_free_slab);
	+ } else if (irqs_disabled()) {
	+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
	+
	+ raw_spin_lock(&f->lock);
	+ list_add(&page->lru, &f->list);
	+ raw_spin_unlock(&f->lock);
	} else
	__free_slab(s, page);
	}
	@@ -2283,14 +2305,21 @@ static void put_cpu_partial(struct kmem_
	pobjects = oldpage->pobjects;
	pages = oldpage->pages;
	if (drain && pobjects > s->cpu_partial) {
	+ struct slub_free_list *f;
	unsigned long flags;
	+ LIST_HEAD(tofree);
	/*
	* partial array is full. Move the existing
	* set to the per node partial list.
	*/
	local_irq_save(flags);
	unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
	+ f = this_cpu_ptr(&slub_free_list);
	+ raw_spin_lock(&f->lock);
	+ list_splice_init(&f->list, &tofree);
	+ raw_spin_unlock(&f->lock);
	local_irq_restore(flags);
	+ free_delayed(&tofree);
	oldpage = NULL;
	pobjects = 0;
	pages = 0;
	@@ -2358,7 +2387,22 @@ static bool has_cpu_slab(int cpu, void *

	static void flush_all(struct kmem_cache *s)
	{
	+ LIST_HEAD(tofree);
	+ int cpu;
	+
	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
	+ for_each_online_cpu(cpu) {
	+ struct slub_free_list *f;
	+
	+ if (!has_cpu_slab(cpu, s))
	+ continue;
	+
	+ f = &per_cpu(slub_free_list, cpu);
	+ raw_spin_lock_irq(&f->lock);
	+ list_splice_init(&f->list, &tofree);
	+ raw_spin_unlock_irq(&f->lock);
	+ free_delayed(&tofree);
	+ }
	}

	/*
	@@ -2555,8 +2599,10 @@ static inline void *get_freelist(struct
	* already disabled (which is the case for bulk allocation).
	*/
	static void ___slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
	- unsigned long addr, struct kmem_cache_cpu *c)
	+ unsigned long addr, struct kmem_cache_cpu *c,
	+ struct list_head *to_free)
	{
	+ struct slub_free_list *f;
	void *freelist;
	struct page *page;

	@@ -2622,6 +2668,13 @@ static void *___slab_alloc(struct kmem_c
	VM_BUG_ON(!c->page->frozen);
	c->freelist = get_freepointer(s, freelist);
	c->tid = next_tid(c->tid);
	+
	+out:
	+ f = this_cpu_ptr(&slub_free_list);
	+ raw_spin_lock(&f->lock);
	+ list_splice_init(&f->list, to_free);
	+ raw_spin_unlock(&f->lock);
	+
	return freelist;

	new_slab:
	@@ -2637,7 +2690,7 @@ static void *___slab_alloc(struct kmem_c

	if (unlikely(!freelist)) {
	slab_out_of_memory(s, gfpflags, node);
	- return NULL;
	+ goto out;
	}

	page = c->page;
	@@ -2650,7 +2703,7 @@ static void *___slab_alloc(struct kmem_c
	goto new_slab; /* Slab failed checks. Next slab needed */

	deactivate_slab(s, page, get_freepointer(s, freelist), c);
	- return freelist;
	+ goto out;
	}

	/*
	@@ -2662,6 +2715,7 @@ static void *__slab_alloc(struct kmem_ca
	{
	void *p;
	unsigned long flags;
	+ LIST_HEAD(tofree);

	local_irq_save(flags);
	#ifdef CONFIG_PREEMPTION
	@@ -2673,8 +2727,9 @@ static void *__slab_alloc(struct kmem_ca
	c = this_cpu_ptr(s->cpu_slab);
	#endif

	- p = ___slab_alloc(s, gfpflags, node, addr, c);
	+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
	local_irq_restore(flags);
	+ free_delayed(&tofree);
	return p;
	}

	@@ -3166,6 +3221,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
	void **p)
	{
	struct kmem_cache_cpu *c;
	+ LIST_HEAD(to_free);
	int i;

	/* memcg and kmem_cache debug support */
	@@ -3198,7 +3254,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
	* of re-populating per CPU c->freelist
	*/
	p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
	- _RET_IP_, c);
	+ _RET_IP_, c, &to_free);
	if (unlikely(!p[i]))
	goto error;

	@@ -3213,6 +3269,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
	}
	c->tid = next_tid(c->tid);
	local_irq_enable();
	+ free_delayed(&to_free);

	/* Clear memory outside IRQ disabled fastpath loop */
	if (unlikely(slab_want_init_on_alloc(flags, s))) {
	@@ -3227,6 +3284,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
	return i;
	error:
	local_irq_enable();
	+ free_delayed(&to_free);
	slab_post_alloc_hook(s, flags, i, p);
	__kmem_cache_free_bulk(s, i, p);
	return 0;
	@@ -4263,6 +4321,12 @@ void __init kmem_cache_init(void)
	{
	static __initdata struct kmem_cache boot_kmem_cache,
	boot_kmem_cache_node;
	+ int cpu;
	+
	+ for_each_possible_cpu(cpu) {
	+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
	+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
	+ }

	if (debug_guardpage_minorder())
	slub_max_order = 0;