| From: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Date: Fri, 3 Jul 2009 08:44:43 -0500 |
| Subject: mm: More lock breaks in slab.c |
| |
| Handle __free_pages outside of the locked regions. This reduces the |
| lock contention on the percpu slab locks in -rt significantly. |
| |
| Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| --- |
| mm/slab.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- |
| 1 file changed, 64 insertions(+), 18 deletions(-) |
| |
| --- a/mm/slab.c |
| +++ b/mm/slab.c |
| @@ -697,6 +697,7 @@ static void slab_set_debugobj_lock_class |
| #endif |
| |
| static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); |
| +static DEFINE_PER_CPU(struct list_head, slab_free_list); |
| static DEFINE_LOCAL_IRQ_LOCK(slab_lock); |
| |
| #ifndef CONFIG_PREEMPT_RT_BASE |
| @@ -729,6 +730,34 @@ static void unlock_slab_on(unsigned int |
| } |
| #endif |
| |
| +static void free_delayed(struct list_head *h) |
| +{ |
| + while(!list_empty(h)) { |
| + struct page *page = list_first_entry(h, struct page, lru); |
| + |
| + list_del(&page->lru); |
| + __free_pages(page, page->index); |
| + } |
| +} |
| + |
| +static void unlock_l3_and_free_delayed(spinlock_t *list_lock) |
| +{ |
| + LIST_HEAD(tmp); |
| + |
| + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); |
| + local_spin_unlock_irq(slab_lock, list_lock); |
| + free_delayed(&tmp); |
| +} |
| + |
| +static void unlock_slab_and_free_delayed(unsigned long flags) |
| +{ |
| + LIST_HEAD(tmp); |
| + |
| + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); |
| + local_unlock_irqrestore(slab_lock, flags); |
| + free_delayed(&tmp); |
| +} |
| + |
| static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) |
| { |
| return cachep->array[smp_processor_id()]; |
| @@ -1345,7 +1374,7 @@ static void __cpuinit cpuup_canceled(lon |
| free_block(cachep, nc->entry, nc->avail, node); |
| |
| if (!cpumask_empty(mask)) { |
| - local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| + unlock_l3_and_free_delayed(&l3->list_lock); |
| goto free_array_cache; |
| } |
| |
| @@ -1359,7 +1388,7 @@ static void __cpuinit cpuup_canceled(lon |
| alien = l3->alien; |
| l3->alien = NULL; |
| |
| - local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| + unlock_l3_and_free_delayed(&l3->list_lock); |
| |
| kfree(shared); |
| if (alien) { |
| @@ -1652,6 +1681,8 @@ void __init kmem_cache_init(void) |
| use_alien_caches = 0; |
| |
| local_irq_lock_init(slab_lock); |
| + for_each_possible_cpu(i) |
| + INIT_LIST_HEAD(&per_cpu(slab_free_list, i)); |
| |
| for (i = 0; i < NUM_INIT_LISTS; i++) |
| kmem_list3_init(&initkmem_list3[i]); |
| @@ -1953,12 +1984,14 @@ static void *kmem_getpages(struct kmem_c |
| /* |
| * Interface to system's page release. |
| */ |
| -static void kmem_freepages(struct kmem_cache *cachep, void *addr) |
| +static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed) |
| { |
| unsigned long i = (1 << cachep->gfporder); |
| - struct page *page = virt_to_page(addr); |
| + struct page *page, *basepage = virt_to_page(addr); |
| const unsigned long nr_freed = i; |
| |
| + page = basepage; |
| + |
| kmemcheck_free_shadow(page, cachep->gfporder); |
| |
| if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| @@ -1977,7 +2010,12 @@ static void kmem_freepages(struct kmem_c |
| memcg_release_pages(cachep, cachep->gfporder); |
| if (current->reclaim_state) |
| current->reclaim_state->reclaimed_slab += nr_freed; |
| - free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); |
| + if (!delayed) { |
| + free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); |
| + } else { |
| + basepage->index = cachep->gfporder; |
| + list_add(&basepage->lru, &__get_cpu_var(slab_free_list)); |
| + } |
| } |
| |
| static void kmem_rcu_free(struct rcu_head *head) |
| @@ -1985,7 +2023,7 @@ static void kmem_rcu_free(struct rcu_hea |
| struct slab_rcu *slab_rcu = (struct slab_rcu *)head; |
| struct kmem_cache *cachep = slab_rcu->cachep; |
| |
| - kmem_freepages(cachep, slab_rcu->addr); |
| + kmem_freepages(cachep, slab_rcu->addr, false); |
| if (OFF_SLAB(cachep)) |
| kmem_cache_free(cachep->slabp_cache, slab_rcu); |
| } |
| @@ -2204,7 +2242,8 @@ static void slab_destroy_debugcheck(stru |
| * Before calling the slab must have been unlinked from the cache. The |
| * cache-lock is not held/needed. |
| */ |
| -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
| +static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp, |
| + bool delayed) |
| { |
| void *addr = slabp->s_mem - slabp->colouroff; |
| |
| @@ -2217,7 +2256,7 @@ static void slab_destroy(struct kmem_cac |
| slab_rcu->addr = addr; |
| call_rcu(&slab_rcu->head, kmem_rcu_free); |
| } else { |
| - kmem_freepages(cachep, addr); |
| + kmem_freepages(cachep, addr, delayed); |
| if (OFF_SLAB(cachep)) |
| kmem_cache_free(cachep->slabp_cache, slabp); |
| } |
| @@ -2628,9 +2667,15 @@ static void do_drain(void *arg) |
| __do_drain(arg, smp_processor_id()); |
| } |
| #else |
| -static void do_drain(void *arg, int this_cpu) |
| +static void do_drain(void *arg, int cpu) |
| { |
| - __do_drain(arg, this_cpu); |
| + LIST_HEAD(tmp); |
| + |
| + lock_slab_on(cpu); |
| + __do_drain(arg, cpu); |
| + list_splice_init(&per_cpu(slab_free_list, cpu), &tmp); |
| + unlock_slab_on(cpu); |
| + free_delayed(&tmp); |
| } |
| #endif |
| |
| @@ -2688,7 +2733,7 @@ static int drain_freelist(struct kmem_ca |
| */ |
| l3->free_objects -= cache->num; |
| local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| - slab_destroy(cache, slabp); |
| + slab_destroy(cache, slabp, false); |
| nr_freed++; |
| } |
| out: |
| @@ -3003,7 +3048,7 @@ static int cache_grow(struct kmem_cache |
| spin_unlock(&l3->list_lock); |
| return 1; |
| opps1: |
| - kmem_freepages(cachep, objp); |
| + kmem_freepages(cachep, objp, false); |
| failed: |
| if (local_flags & __GFP_WAIT) |
| local_lock_irq(slab_lock); |
| @@ -3684,7 +3729,7 @@ static void free_block(struct kmem_cache |
| * a different cache, refer to comments before |
| * alloc_slabmgmt. |
| */ |
| - slab_destroy(cachep, slabp); |
| + slab_destroy(cachep, slabp, true); |
| } else { |
| list_add(&slabp->list, &l3->slabs_free); |
| } |
| @@ -3952,7 +3997,7 @@ void kmem_cache_free(struct kmem_cache * |
| debug_check_no_obj_freed(objp, cachep->object_size); |
| local_lock_irqsave(slab_lock, flags); |
| __cache_free(cachep, objp, _RET_IP_); |
| - local_unlock_irqrestore(slab_lock, flags); |
| + unlock_slab_and_free_delayed(flags); |
| |
| trace_kmem_cache_free(_RET_IP_, objp); |
| } |
| @@ -3983,7 +4028,7 @@ void kfree(const void *objp) |
| debug_check_no_obj_freed(objp, c->object_size); |
| local_lock_irqsave(slab_lock, flags); |
| __cache_free(c, (void *)objp, _RET_IP_); |
| - local_unlock_irqrestore(slab_lock, flags); |
| + unlock_slab_and_free_delayed(flags); |
| } |
| EXPORT_SYMBOL(kfree); |
| |
| @@ -4033,7 +4078,8 @@ static int alloc_kmemlist(struct kmem_ca |
| } |
| l3->free_limit = (1 + nr_cpus_node(node)) * |
| cachep->batchcount + cachep->num; |
| - local_spin_unlock_irq(slab_lock, &l3->list_lock); |
| + unlock_l3_and_free_delayed(&l3->list_lock); |
| + |
| kfree(shared); |
| free_alien_cache(new_alien); |
| continue; |
| @@ -4141,8 +4187,8 @@ static int __do_tune_cpucache(struct kme |
| local_spin_lock_irq(slab_lock, |
| &cachep->nodelists[cpu_to_mem(i)]->list_lock); |
| free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); |
| - local_spin_unlock_irq(slab_lock, |
| - &cachep->nodelists[cpu_to_mem(i)]->list_lock); |
| + |
| + unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock); |
| kfree(ccold); |
| } |
| kfree(new); |