blob: 77efcc09559b10677fb267cd21b4acae82819590 [file] [log] [blame]
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 3 Jul 2009 08:44:43 -0500
Subject: mm: More lock breaks in slab.c
Handle __free_pages outside of the locked regions. This reduces the
lock contention on the percpu slab locks in -rt significantly.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/slab.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 86 insertions(+), 22 deletions(-)
Index: linux-stable/mm/slab.c
===================================================================
--- linux-stable.orig/mm/slab.c
+++ linux-stable/mm/slab.c
@@ -704,6 +704,7 @@ static void slab_set_debugobj_lock_class
#endif
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
+static DEFINE_PER_CPU(struct list_head, slab_free_list);
static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
#ifndef CONFIG_PREEMPT_RT_BASE
@@ -719,14 +720,57 @@ slab_on_each_cpu(void (*func)(void *arg,
{
unsigned int i;
- for_each_online_cpu(i) {
- spin_lock_irq(&per_cpu(slab_lock, i).lock);
+ get_cpu_light();
+ for_each_online_cpu(i)
func(arg, i);
- spin_unlock_irq(&per_cpu(slab_lock, i).lock);
- }
+ put_cpu_light();
+}
+
+static void lock_slab_on(unsigned int cpu)
+{
+ if (cpu == smp_processor_id())
+ local_lock_irq(slab_lock);
+ else
+ local_spin_lock_irq(slab_lock, &per_cpu(slab_lock, cpu).lock);
+}
+
+static void unlock_slab_on(unsigned int cpu)
+{
+ if (cpu == smp_processor_id())
+ local_unlock_irq(slab_lock);
+ else
+ local_spin_unlock_irq(slab_lock, &per_cpu(slab_lock, cpu).lock);
}
#endif
+static void free_delayed(struct list_head *h)
+{
+ while(!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_pages(page, page->index);
+ }
+}
+
+static void unlock_l3_and_free_delayed(spinlock_t *list_lock)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_spin_unlock_irq(slab_lock, list_lock);
+ free_delayed(&tmp);
+}
+
+static void unlock_slab_and_free_delayed(unsigned long flags)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_unlock_irqrestore(slab_lock, flags);
+ free_delayed(&tmp);
+}
+
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
@@ -1340,7 +1384,7 @@ static void __cpuinit cpuup_canceled(lon
free_block(cachep, nc->entry, nc->avail, node);
if (!cpumask_empty(mask)) {
- local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
goto free_array_cache;
}
@@ -1354,7 +1398,7 @@ static void __cpuinit cpuup_canceled(lon
alien = l3->alien;
l3->alien = NULL;
- local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
kfree(shared);
if (alien) {
@@ -1635,6 +1679,8 @@ void __init kmem_cache_init(void)
use_alien_caches = 0;
local_irq_lock_init(slab_lock);
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(slab_free_list, i));
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
@@ -1973,12 +2019,14 @@ static void *kmem_getpages(struct kmem_c
/*
* Interface to system's page release.
*/
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed)
{
unsigned long i = (1 << cachep->gfporder);
- struct page *page = virt_to_page(addr);
+ struct page *page, *basepage = virt_to_page(addr);
const unsigned long nr_freed = i;
+ page = basepage;
+
kmemcheck_free_shadow(page, cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1995,7 +2043,13 @@ static void kmem_freepages(struct kmem_c
}
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- free_pages((unsigned long)addr, cachep->gfporder);
+
+ if (!delayed) {
+ free_pages((unsigned long)addr, cachep->gfporder);
+ } else {
+ basepage->index = cachep->gfporder;
+ list_add(&basepage->lru, &__get_cpu_var(slab_free_list));
+ }
}
static void kmem_rcu_free(struct rcu_head *head)
@@ -2003,7 +2057,7 @@ static void kmem_rcu_free(struct rcu_hea
struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
struct kmem_cache *cachep = slab_rcu->cachep;
- kmem_freepages(cachep, slab_rcu->addr);
+ kmem_freepages(cachep, slab_rcu->addr, false);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slab_rcu);
}
@@ -2222,7 +2276,8 @@ static void slab_destroy_debugcheck(stru
* Before calling the slab must have been unlinked from the cache. The
* cache-lock is not held/needed.
*/
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp,
+ bool delayed)
{
void *addr = slabp->s_mem - slabp->colouroff;
@@ -2235,7 +2290,7 @@ static void slab_destroy(struct kmem_cac
slab_rcu->addr = addr;
call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
- kmem_freepages(cachep, addr);
+ kmem_freepages(cachep, addr, delayed);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slabp);
}
@@ -2700,9 +2755,15 @@ static void do_drain(void *arg)
__do_drain(arg, smp_processor_id());
}
#else
-static void do_drain(void *arg, int this_cpu)
+static void do_drain(void *arg, int cpu)
{
- __do_drain(arg, this_cpu);
+ LIST_HEAD(tmp);
+
+ lock_slab_on(cpu);
+ __do_drain(arg, cpu);
+ list_splice_init(&per_cpu(slab_free_list, cpu), &tmp);
+ unlock_slab_on(cpu);
+ free_delayed(&tmp);
}
#endif
@@ -2760,7 +2821,7 @@ static int drain_freelist(struct kmem_ca
*/
l3->free_objects -= cache->num;
local_spin_unlock_irq(slab_lock, &l3->list_lock);
- slab_destroy(cache, slabp);
+ slab_destroy(cache, slabp, false);
nr_freed++;
}
out:
@@ -3095,7 +3156,7 @@ static int cache_grow(struct kmem_cache
spin_unlock(&l3->list_lock);
return 1;
opps1:
- kmem_freepages(cachep, objp);
+ kmem_freepages(cachep, objp, false);
failed:
if (local_flags & __GFP_WAIT)
local_lock_irq(slab_lock);
@@ -3772,7 +3833,7 @@ static void free_block(struct kmem_cache
* a different cache, refer to comments before
* alloc_slabmgmt.
*/
- slab_destroy(cachep, slabp);
+ slab_destroy(cachep, slabp, true);
} else {
list_add(&slabp->list, &l3->slabs_free);
}
@@ -4039,7 +4100,7 @@ void kmem_cache_free(struct kmem_cache *
debug_check_no_obj_freed(objp, cachep->object_size);
local_lock_irqsave(slab_lock, flags);
__cache_free(cachep, objp, __builtin_return_address(0));
- local_unlock_irqrestore(slab_lock, flags);
+ unlock_slab_and_free_delayed(flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
@@ -4070,7 +4131,7 @@ void kfree(const void *objp)
debug_check_no_obj_freed(objp, c->object_size);
local_lock_irqsave(slab_lock, flags);
__cache_free(c, (void *)objp, __builtin_return_address(0));
- local_unlock_irqrestore(slab_lock, flags);
+ unlock_slab_and_free_delayed(flags);
}
EXPORT_SYMBOL(kfree);
@@ -4126,7 +4187,8 @@ static int alloc_kmemlist(struct kmem_ca
}
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
+
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -4192,7 +4254,9 @@ static void do_ccupdate_local(void *info
#else
static void do_ccupdate_local(void *info, int cpu)
{
+ lock_slab_on(cpu);
__do_ccupdate_local(info, cpu);
+ unlock_slab_on(cpu);
}
#endif
@@ -4234,8 +4298,8 @@ static int do_tune_cpucache(struct kmem_
local_spin_lock_irq(slab_lock,
&cachep->nodelists[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- local_spin_unlock_irq(slab_lock,
- &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+
+ unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
kfree(ccold);
}
kfree(new);