| From: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Date: Fri, 3 Jul 2009 08:44:43 -0500 |
| Subject: mm: More lock breaks in slab.c |
| |
| Handle __free_pages outside of the locked regions. This reduces the |
| lock contention on the percpu slab locks in -rt significantly. |
| |
| Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| --- |
| mm/slab.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++------------- |
| 1 file changed, 86 insertions(+), 22 deletions(-) |
| |
| Index: linux-stable/mm/slab.c |
| =================================================================== |
| --- linux-stable.orig/mm/slab.c |
| +++ linux-stable/mm/slab.c |
| @@ -704,6 +704,7 @@ static void slab_set_debugobj_lock_class |
| #endif |
| |
| static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); |
| +static DEFINE_PER_CPU(struct list_head, slab_free_list); |
| static DEFINE_LOCAL_IRQ_LOCK(slab_lock); |
| |
| #ifndef CONFIG_PREEMPT_RT_BASE |
| @@ -719,14 +720,57 @@ slab_on_each_cpu(void (*func)(void *arg, |
| { |
| unsigned int i; |
| |
| - for_each_online_cpu(i) { |
| - spin_lock_irq(&per_cpu(slab_lock, i).lock); |
| + get_cpu_light(); |
| + for_each_online_cpu(i) |
| func(arg, i); |
| - spin_unlock_irq(&per_cpu(slab_lock, i).lock); |
| - } |
| + put_cpu_light(); |
| +} |
| + |
| +static void lock_slab_on(unsigned int cpu) |
| +{ |
| + if (cpu == smp_processor_id()) |
| + local_lock_irq(slab_lock); |
| + else |
| + local_spin_lock_irq(slab_lock, &per_cpu(slab_lock, cpu).lock); |
| +} |
| + |
| +static void unlock_slab_on(unsigned int cpu) |
| +{ |
| + if (cpu == smp_processor_id()) |
| + local_unlock_irq(slab_lock); |
| + else |
| + local_spin_unlock_irq(slab_lock, &per_cpu(slab_lock, cpu).lock); |
| } |
| #endif |
| |
| +static void free_delayed(struct list_head *h) |
| +{ |
| + while(!list_empty(h)) { |
| + struct page *page = list_first_entry(h, struct page, lru); |
| + |
| + list_del(&page->lru); |
| + __free_pages(page, page->index); |
| + } |
| +} |
| + |
| +static void unlock_l3_and_free_delayed(spinlock_t *list_lock) |
| +{ |
| + LIST_HEAD(tmp); |
| + |
| + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); |
| + local_spin_unlock_irq(slab_lock, list_lock); |
| + free_delayed(&tmp); |
| +} |
| + |
| +static void unlock_slab_and_free_delayed(unsigned long flags) |
| +{ |
| + LIST_HEAD(tmp); |
| + |
| + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); |
| + local_unlock_irqrestore(slab_lock, flags); |
| + free_delayed(&tmp); |
| +} |
| + |
| static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) |
| { |
| return cachep->array[smp_processor_id()]; |
| @@ -1340,7 +1384,7 @@ static void __cpuinit cpuup_canceled(lon |
| free_block(cachep, nc->entry, nc->avail, node); |
| |
| if (!cpumask_empty(mask)) { |
| - local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| + unlock_l3_and_free_delayed(&l3->list_lock); |
| goto free_array_cache; |
| } |
| |
| @@ -1354,7 +1398,7 @@ static void __cpuinit cpuup_canceled(lon |
| alien = l3->alien; |
| l3->alien = NULL; |
| |
| - local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| + unlock_l3_and_free_delayed(&l3->list_lock); |
| |
| kfree(shared); |
| if (alien) { |
| @@ -1635,6 +1679,8 @@ void __init kmem_cache_init(void) |
| use_alien_caches = 0; |
| |
| local_irq_lock_init(slab_lock); |
| + for_each_possible_cpu(i) |
| + INIT_LIST_HEAD(&per_cpu(slab_free_list, i)); |
| |
| for (i = 0; i < NUM_INIT_LISTS; i++) { |
| kmem_list3_init(&initkmem_list3[i]); |
| @@ -1973,12 +2019,14 @@ static void *kmem_getpages(struct kmem_c |
| /* |
| * Interface to system's page release. |
| */ |
| -static void kmem_freepages(struct kmem_cache *cachep, void *addr) |
| +static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed) |
| { |
| unsigned long i = (1 << cachep->gfporder); |
| - struct page *page = virt_to_page(addr); |
| + struct page *page, *basepage = virt_to_page(addr); |
| const unsigned long nr_freed = i; |
| |
| + page = basepage; |
| + |
| kmemcheck_free_shadow(page, cachep->gfporder); |
| |
| if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| @@ -1995,7 +2043,13 @@ static void kmem_freepages(struct kmem_c |
| } |
| if (current->reclaim_state) |
| current->reclaim_state->reclaimed_slab += nr_freed; |
| - free_pages((unsigned long)addr, cachep->gfporder); |
| + |
| + if (!delayed) { |
| + free_pages((unsigned long)addr, cachep->gfporder); |
| + } else { |
| + basepage->index = cachep->gfporder; |
| + list_add(&basepage->lru, &__get_cpu_var(slab_free_list)); |
| + } |
| } |
| |
| static void kmem_rcu_free(struct rcu_head *head) |
| @@ -2003,7 +2057,7 @@ static void kmem_rcu_free(struct rcu_hea |
| struct slab_rcu *slab_rcu = (struct slab_rcu *)head; |
| struct kmem_cache *cachep = slab_rcu->cachep; |
| |
| - kmem_freepages(cachep, slab_rcu->addr); |
| + kmem_freepages(cachep, slab_rcu->addr, false); |
| if (OFF_SLAB(cachep)) |
| kmem_cache_free(cachep->slabp_cache, slab_rcu); |
| } |
| @@ -2222,7 +2276,8 @@ static void slab_destroy_debugcheck(stru |
| * Before calling the slab must have been unlinked from the cache. The |
| * cache-lock is not held/needed. |
| */ |
| -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
| +static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp, |
| + bool delayed) |
| { |
| void *addr = slabp->s_mem - slabp->colouroff; |
| |
| @@ -2235,7 +2290,7 @@ static void slab_destroy(struct kmem_cac |
| slab_rcu->addr = addr; |
| call_rcu(&slab_rcu->head, kmem_rcu_free); |
| } else { |
| - kmem_freepages(cachep, addr); |
| + kmem_freepages(cachep, addr, delayed); |
| if (OFF_SLAB(cachep)) |
| kmem_cache_free(cachep->slabp_cache, slabp); |
| } |
| @@ -2700,9 +2755,15 @@ static void do_drain(void *arg) |
| __do_drain(arg, smp_processor_id()); |
| } |
| #else |
| -static void do_drain(void *arg, int this_cpu) |
| +static void do_drain(void *arg, int cpu) |
| { |
| - __do_drain(arg, this_cpu); |
| + LIST_HEAD(tmp); |
| + |
| + lock_slab_on(cpu); |
| + __do_drain(arg, cpu); |
| + list_splice_init(&per_cpu(slab_free_list, cpu), &tmp); |
| + unlock_slab_on(cpu); |
| + free_delayed(&tmp); |
| } |
| #endif |
| |
| @@ -2760,7 +2821,7 @@ static int drain_freelist(struct kmem_ca |
| */ |
| l3->free_objects -= cache->num; |
| local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| - slab_destroy(cache, slabp); |
| + slab_destroy(cache, slabp, false); |
| nr_freed++; |
| } |
| out: |
| @@ -3095,7 +3156,7 @@ static int cache_grow(struct kmem_cache |
| spin_unlock(&l3->list_lock); |
| return 1; |
| opps1: |
| - kmem_freepages(cachep, objp); |
| + kmem_freepages(cachep, objp, false); |
| failed: |
| if (local_flags & __GFP_WAIT) |
| local_lock_irq(slab_lock); |
| @@ -3772,7 +3833,7 @@ static void free_block(struct kmem_cache |
| * a different cache, refer to comments before |
| * alloc_slabmgmt. |
| */ |
| - slab_destroy(cachep, slabp); |
| + slab_destroy(cachep, slabp, true); |
| } else { |
| list_add(&slabp->list, &l3->slabs_free); |
| } |
| @@ -4039,7 +4100,7 @@ void kmem_cache_free(struct kmem_cache * |
| debug_check_no_obj_freed(objp, cachep->object_size); |
| local_lock_irqsave(slab_lock, flags); |
| __cache_free(cachep, objp, __builtin_return_address(0)); |
| - local_unlock_irqrestore(slab_lock, flags); |
| + unlock_slab_and_free_delayed(flags); |
| |
| trace_kmem_cache_free(_RET_IP_, objp); |
| } |
| @@ -4070,7 +4131,7 @@ void kfree(const void *objp) |
| debug_check_no_obj_freed(objp, c->object_size); |
| local_lock_irqsave(slab_lock, flags); |
| __cache_free(c, (void *)objp, __builtin_return_address(0)); |
| - local_unlock_irqrestore(slab_lock, flags); |
| + unlock_slab_and_free_delayed(flags); |
| } |
| EXPORT_SYMBOL(kfree); |
| |
| @@ -4126,7 +4187,8 @@ static int alloc_kmemlist(struct kmem_ca |
| } |
| l3->free_limit = (1 + nr_cpus_node(node)) * |
| cachep->batchcount + cachep->num; |
| - local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| + unlock_l3_and_free_delayed(&l3->list_lock); |
| + |
| kfree(shared); |
| free_alien_cache(new_alien); |
| continue; |
| @@ -4192,7 +4254,9 @@ static void do_ccupdate_local(void *info |
| #else |
| static void do_ccupdate_local(void *info, int cpu) |
| { |
| + lock_slab_on(cpu); |
| __do_ccupdate_local(info, cpu); |
| + unlock_slab_on(cpu); |
| } |
| #endif |
| |
| @@ -4234,8 +4298,8 @@ static int do_tune_cpucache(struct kmem_ |
| local_spin_lock_irq(slab_lock, |
| &cachep->nodelists[cpu_to_mem(i)]->list_lock); |
| free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); |
| - local_spin_unlock_irq(slab_lock, |
| - &cachep->nodelists[cpu_to_mem(i)]->list_lock); |
| + |
| + unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock); |
| kfree(ccold); |
| } |
| kfree(new); |