| From 1684df2ad25964a7fa192cefac10ea28b7f6946a Mon Sep 17 00:00:00 2001 |
| From: Ingo Molnar <mingo@elte.hu> |
| Date: Fri, 3 Jul 2009 08:44:11 -0500 |
| Subject: [PATCH] mm: Restructure slab to use percpu locked data structures |
| |
| commit a20e0cb87d49f65bea34026c7dfe364c035a1123 in tip. |
| |
| Instead of relying on preemption disabled rt protects the per cpu data |
| structures with per cpu locks. |
| |
| This patch contains fixes from |
| Andi Kleen <ak@suse.de> |
| Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Thomas Gleixner <tglx@linutronix.de> |
| |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/mm/slab.c b/mm/slab.c |
| index a9f325b..3164b8d 100644 |
| --- a/mm/slab.c |
| +++ b/mm/slab.c |
| @@ -121,6 +121,120 @@ |
| #include <asm/page.h> |
| |
| /* |
| + * On !PREEMPT_RT, raw irq flags are used as a per-CPU locking |
| + * mechanism. |
| + * |
| + * On PREEMPT_RT, we use per-CPU locks for this. That's why the |
| + * calling convention is changed slightly: a new 'flags' argument |
| + * is passed to 'irq disable/enable' - the PREEMPT_RT code stores |
| + * the CPU number of the lock there. |
| + */ |
| +#ifndef CONFIG_PREEMPT_RT |
| + |
| +# define slab_irq_disable(cpu) \ |
| + do { local_irq_disable(); (cpu) = smp_processor_id(); } while (0) |
| +# define slab_irq_enable(cpu) local_irq_enable() |
| + |
| +static inline void slab_irq_disable_this_rt(int cpu) |
| +{ |
| +} |
| + |
| +static inline void slab_irq_enable_rt(int cpu) |
| +{ |
| +} |
| + |
| +# define slab_irq_save(flags, cpu) \ |
| + do { local_irq_save(flags); (cpu) = smp_processor_id(); } while (0) |
| +# define slab_irq_restore(flags, cpu) local_irq_restore(flags) |
| + |
| +/* |
| + * In the __GFP_WAIT case we enable/disable interrupts on !PREEMPT_RT, |
| + * which has no per-CPU locking effect since we are holding the cache |
| + * lock in that case already. |
| + */ |
| +static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu) |
| +{ |
| + if (flags & __GFP_WAIT) |
| + local_irq_enable(); |
| +} |
| + |
| +static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu) |
| +{ |
| + if (flags & __GFP_WAIT) |
| + local_irq_disable(); |
| +} |
| + |
| +# define slab_spin_lock_irq(lock, cpu) \ |
| + do { spin_lock_irq(lock); (cpu) = smp_processor_id(); } while (0) |
| +# define slab_spin_unlock_irq(lock, cpu) spin_unlock_irq(lock) |
| + |
| +# define slab_spin_lock_irqsave(lock, flags, cpu) \ |
| + do { spin_lock_irqsave(lock, flags); (cpu) = smp_processor_id(); } while (0) |
| +# define slab_spin_unlock_irqrestore(lock, flags, cpu) \ |
| + do { spin_unlock_irqrestore(lock, flags); } while (0) |
| + |
| +#else /* CONFIG_PREEMPT_RT */ |
| + |
| +/* |
| + * Instead of serializing the per-cpu state by disabling interrupts we do so |
| + * by a lock. This keeps the code preemptable - albeit at the cost of remote |
| + * memory access when the task does get migrated away. |
| + */ |
| +DEFINE_PER_CPU_LOCKED(int, slab_irq_locks) = { 0, }; |
| + |
| +static void _slab_irq_disable(int *cpu) |
| +{ |
| + get_cpu_var_locked(slab_irq_locks, cpu); |
| +} |
| + |
| +#define slab_irq_disable(cpu) _slab_irq_disable(&(cpu)) |
| + |
| +static inline void slab_irq_enable(int cpu) |
| +{ |
| + put_cpu_var_locked(slab_irq_locks, cpu); |
| +} |
| + |
| +static inline void slab_irq_disable_this_rt(int cpu) |
| +{ |
| + spin_lock(&__get_cpu_lock(slab_irq_locks, cpu)); |
| +} |
| + |
| +static inline void slab_irq_enable_rt(int cpu) |
| +{ |
| + spin_unlock(&__get_cpu_lock(slab_irq_locks, cpu)); |
| +} |
| + |
| +# define slab_irq_save(flags, cpu) \ |
| + do { slab_irq_disable(cpu); (void) (flags); } while (0) |
| +# define slab_irq_restore(flags, cpu) \ |
| + do { slab_irq_enable(cpu); (void) (flags); } while (0) |
| + |
| +/* |
| + * On PREEMPT_RT we have to drop the locks unconditionally to avoid lock |
| + * recursion on the cache_grow()->alloc_slabmgmt() path. |
| + */ |
| +static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu) |
| +{ |
| + slab_irq_enable(*cpu); |
| +} |
| + |
| +static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu) |
| +{ |
| + slab_irq_disable(*cpu); |
| +} |
| + |
| +# define slab_spin_lock_irq(lock, cpu) \ |
| + do { slab_irq_disable(cpu); spin_lock(lock); } while (0) |
| +# define slab_spin_unlock_irq(lock, cpu) \ |
| + do { spin_unlock(lock); slab_irq_enable(cpu); } while (0) |
| +# define slab_spin_lock_irqsave(lock, flags, cpu) \ |
| + do { slab_irq_disable(cpu); spin_lock_irqsave(lock, flags); } while (0) |
| +# define slab_spin_unlock_irqrestore(lock, flags, cpu) \ |
| + do { spin_unlock_irqrestore(lock, flags); slab_irq_enable(cpu); } while (0) |
| + |
| +#endif /* CONFIG_PREEMPT_RT */ |
| + |
| +/* |
| * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. |
| * 0 for faster, smaller code (especially in the critical paths). |
| * |
| @@ -316,7 +430,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; |
| static int drain_freelist(struct kmem_cache *cache, |
| struct kmem_list3 *l3, int tofree); |
| static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
| - int node); |
| + int node, int *this_cpu); |
| static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); |
| static void cache_reap(struct work_struct *unused); |
| |
| @@ -699,9 +813,10 @@ static struct list_head cache_chain; |
| |
| static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); |
| |
| -static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) |
| +static inline struct array_cache * |
| +cpu_cache_get(struct kmem_cache *cachep, int this_cpu) |
| { |
| - return cachep->array[smp_processor_id()]; |
| + return cachep->array[this_cpu]; |
| } |
| |
| static inline struct kmem_cache *__find_general_cachep(size_t size, |
| @@ -941,7 +1056,7 @@ static int transfer_objects(struct array_cache *to, |
| #ifndef CONFIG_NUMA |
| |
| #define drain_alien_cache(cachep, alien) do { } while (0) |
| -#define reap_alien(cachep, l3) do { } while (0) |
| +#define reap_alien(cachep, l3, this_cpu) 0 |
| |
| static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
| { |
| @@ -952,27 +1067,28 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) |
| { |
| } |
| |
| -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
| +static inline int |
| +cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) |
| { |
| return 0; |
| } |
| |
| static inline void *alternate_node_alloc(struct kmem_cache *cachep, |
| - gfp_t flags) |
| + gfp_t flags, int *this_cpu) |
| { |
| return NULL; |
| } |
| |
| static inline void *____cache_alloc_node(struct kmem_cache *cachep, |
| - gfp_t flags, int nodeid) |
| + gfp_t flags, int nodeid, int *this_cpu) |
| { |
| return NULL; |
| } |
| |
| #else /* CONFIG_NUMA */ |
| |
| -static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
| -static void *alternate_node_alloc(struct kmem_cache *, gfp_t); |
| +static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int, int *); |
| +static void *alternate_node_alloc(struct kmem_cache *, gfp_t, int *); |
| |
| static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
| { |
| @@ -1011,7 +1127,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) |
| } |
| |
| static void __drain_alien_cache(struct kmem_cache *cachep, |
| - struct array_cache *ac, int node) |
| + struct array_cache *ac, int node, |
| + int *this_cpu) |
| { |
| struct kmem_list3 *rl3 = cachep->nodelists[node]; |
| |
| @@ -1025,7 +1142,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, |
| if (rl3->shared) |
| transfer_objects(rl3->shared, ac, ac->limit); |
| |
| - free_block(cachep, ac->entry, ac->avail, node); |
| + free_block(cachep, ac->entry, ac->avail, node, this_cpu); |
| ac->avail = 0; |
| spin_unlock(&rl3->list_lock); |
| } |
| @@ -1034,38 +1151,42 @@ static void __drain_alien_cache(struct kmem_cache *cachep, |
| /* |
| * Called from cache_reap() to regularly drain alien caches round robin. |
| */ |
| -static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) |
| +static int |
| +reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3, int *this_cpu) |
| { |
| - int node = __get_cpu_var(slab_reap_node); |
| + int node = per_cpu(slab_reap_node, *this_cpu); |
| |
| if (l3->alien) { |
| struct array_cache *ac = l3->alien[node]; |
| |
| if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { |
| - __drain_alien_cache(cachep, ac, node); |
| + __drain_alien_cache(cachep, ac, node, this_cpu); |
| spin_unlock_irq(&ac->lock); |
| + return 1; |
| } |
| } |
| + return 0; |
| } |
| |
| static void drain_alien_cache(struct kmem_cache *cachep, |
| struct array_cache **alien) |
| { |
| - int i = 0; |
| + int this_cpu, i = 0; |
| struct array_cache *ac; |
| unsigned long flags; |
| |
| for_each_online_node(i) { |
| ac = alien[i]; |
| if (ac) { |
| - spin_lock_irqsave(&ac->lock, flags); |
| - __drain_alien_cache(cachep, ac, i); |
| - spin_unlock_irqrestore(&ac->lock, flags); |
| + slab_spin_lock_irqsave(&ac->lock, flags, this_cpu); |
| + __drain_alien_cache(cachep, ac, i, &this_cpu); |
| + slab_spin_unlock_irqrestore(&ac->lock, flags, this_cpu); |
| } |
| } |
| } |
| |
| -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
| +static inline int |
| +cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) |
| { |
| struct slab *slabp = virt_to_slab(objp); |
| int nodeid = slabp->nodeid; |
| @@ -1073,7 +1194,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
| struct array_cache *alien = NULL; |
| int node; |
| |
| - node = numa_node_id(); |
| + node = cpu_to_node(*this_cpu); |
| |
| /* |
| * Make sure we are not freeing a object from another node to the array |
| @@ -1089,20 +1210,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
| spin_lock(&alien->lock); |
| if (unlikely(alien->avail == alien->limit)) { |
| STATS_INC_ACOVERFLOW(cachep); |
| - __drain_alien_cache(cachep, alien, nodeid); |
| + __drain_alien_cache(cachep, alien, nodeid, this_cpu); |
| } |
| alien->entry[alien->avail++] = objp; |
| spin_unlock(&alien->lock); |
| } else { |
| spin_lock(&(cachep->nodelists[nodeid])->list_lock); |
| - free_block(cachep, &objp, 1, nodeid); |
| + free_block(cachep, &objp, 1, nodeid, this_cpu); |
| spin_unlock(&(cachep->nodelists[nodeid])->list_lock); |
| } |
| return 1; |
| } |
| #endif |
| |
| -static void __cpuinit cpuup_canceled(long cpu) |
| +static void __cpuinit cpuup_canceled(int cpu) |
| { |
| struct kmem_cache *cachep; |
| struct kmem_list3 *l3 = NULL; |
| @@ -1113,6 +1234,7 @@ static void __cpuinit cpuup_canceled(long cpu) |
| struct array_cache *nc; |
| struct array_cache *shared; |
| struct array_cache **alien; |
| + int orig_cpu = cpu; |
| |
| /* cpu is dead; no one can alloc from it. */ |
| nc = cachep->array[cpu]; |
| @@ -1127,7 +1249,7 @@ static void __cpuinit cpuup_canceled(long cpu) |
| /* Free limit for this kmem_list3 */ |
| l3->free_limit -= cachep->batchcount; |
| if (nc) |
| - free_block(cachep, nc->entry, nc->avail, node); |
| + free_block(cachep, nc->entry, nc->avail, node, &cpu); |
| |
| if (!cpumask_empty(mask)) { |
| spin_unlock_irq(&l3->list_lock); |
| @@ -1137,7 +1259,7 @@ static void __cpuinit cpuup_canceled(long cpu) |
| shared = l3->shared; |
| if (shared) { |
| free_block(cachep, shared->entry, |
| - shared->avail, node); |
| + shared->avail, node, &cpu); |
| l3->shared = NULL; |
| } |
| |
| @@ -1153,6 +1275,7 @@ static void __cpuinit cpuup_canceled(long cpu) |
| } |
| free_array_cache: |
| kfree(nc); |
| + BUG_ON(cpu != orig_cpu); |
| } |
| /* |
| * In the previous loop, all the objects were freed to |
| @@ -1167,7 +1290,7 @@ free_array_cache: |
| } |
| } |
| |
| -static int __cpuinit cpuup_prepare(long cpu) |
| +static int __cpuinit cpuup_prepare(int cpu) |
| { |
| struct kmem_cache *cachep; |
| struct kmem_list3 *l3 = NULL; |
| @@ -1277,10 +1400,19 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, |
| long cpu = (long)hcpu; |
| int err = 0; |
| |
| + |
| switch (action) { |
| case CPU_UP_PREPARE: |
| case CPU_UP_PREPARE_FROZEN: |
| mutex_lock(&cache_chain_mutex); |
| + /* |
| + * lock/unlock cycle to push any holders away -- no new ones |
| + * can come in due to the cpu still being offline. |
| + * |
| + * XXX -- weird case anyway, can it happen? |
| + */ |
| + slab_irq_disable_this_rt(cpu); |
| + slab_irq_enable_rt(cpu); |
| err = cpuup_prepare(cpu); |
| mutex_unlock(&cache_chain_mutex); |
| break; |
| @@ -1320,10 +1452,14 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, |
| case CPU_UP_CANCELED: |
| case CPU_UP_CANCELED_FROZEN: |
| mutex_lock(&cache_chain_mutex); |
| + slab_irq_disable_this_rt(cpu); |
| cpuup_canceled(cpu); |
| + slab_irq_enable_rt(cpu); |
| mutex_unlock(&cache_chain_mutex); |
| break; |
| } |
| + |
| + |
| return err ? NOTIFY_BAD : NOTIFY_OK; |
| } |
| |
| @@ -1510,32 +1646,34 @@ void __init kmem_cache_init(void) |
| /* 4) Replace the bootstrap head arrays */ |
| { |
| struct array_cache *ptr; |
| + int cpu = smp_processor_id(); |
| |
| ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
| |
| - BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); |
| - memcpy(ptr, cpu_cache_get(&cache_cache), |
| + BUG_ON(cpu_cache_get(&cache_cache, cpu) != |
| + &initarray_cache.cache); |
| + memcpy(ptr, cpu_cache_get(&cache_cache, cpu), |
| sizeof(struct arraycache_init)); |
| /* |
| * Do not assume that spinlocks can be initialized via memcpy: |
| */ |
| spin_lock_init(&ptr->lock); |
| |
| - cache_cache.array[smp_processor_id()] = ptr; |
| + cache_cache.array[cpu] = ptr; |
| |
| ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
| |
| - BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) |
| + BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, cpu) |
| != &initarray_generic.cache); |
| - memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), |
| + memcpy(ptr, |
| + cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, cpu), |
| sizeof(struct arraycache_init)); |
| /* |
| * Do not assume that spinlocks can be initialized via memcpy: |
| */ |
| spin_lock_init(&ptr->lock); |
| |
| - malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = |
| - ptr; |
| + malloc_sizes[INDEX_AC].cs_cachep->array[cpu] = ptr; |
| } |
| /* 5) Replace the bootstrap kmem_list3's */ |
| { |
| @@ -1702,7 +1840,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, |
| |
| *addr++ = 0x12345678; |
| *addr++ = caller; |
| - *addr++ = smp_processor_id(); |
| + *addr++ = raw_smp_processor_id(); |
| size -= 3 * sizeof(unsigned long); |
| { |
| unsigned long *sptr = &caller; |
| @@ -1892,6 +2030,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab |
| } |
| #endif |
| |
| +static void |
| +__cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu); |
| + |
| + |
| /** |
| * slab_destroy - destroy and release all objects in a slab |
| * @cachep: cache pointer being destroyed |
| @@ -1901,7 +2043,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab |
| * Before calling the slab must have been unlinked from the cache. The |
| * cache-lock is not held/needed. |
| */ |
| -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
| +static void |
| +slab_destroy(struct kmem_cache *cachep, struct slab *slabp, int *this_cpu) |
| { |
| void *addr = slabp->s_mem - slabp->colouroff; |
| |
| @@ -1915,8 +2058,12 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
| call_rcu(&slab_rcu->head, kmem_rcu_free); |
| } else { |
| kmem_freepages(cachep, addr); |
| - if (OFF_SLAB(cachep)) |
| - kmem_cache_free(cachep->slabp_cache, slabp); |
| + if (OFF_SLAB(cachep)) { |
| + if (this_cpu) |
| + __cache_free(cachep->slabp_cache, slabp, this_cpu); |
| + else |
| + kmem_cache_free(cachep->slabp_cache, slabp); |
| + } |
| } |
| } |
| |
| @@ -2013,6 +2160,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, |
| |
| static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) |
| { |
| + int this_cpu; |
| + |
| if (g_cpucache_up == FULL) |
| return enable_cpucache(cachep, gfp); |
| |
| @@ -2056,10 +2205,12 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) |
| jiffies + REAPTIMEOUT_LIST3 + |
| ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
| |
| - cpu_cache_get(cachep)->avail = 0; |
| - cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; |
| - cpu_cache_get(cachep)->batchcount = 1; |
| - cpu_cache_get(cachep)->touched = 0; |
| + this_cpu = raw_smp_processor_id(); |
| + |
| + cpu_cache_get(cachep, this_cpu)->avail = 0; |
| + cpu_cache_get(cachep, this_cpu)->limit = BOOT_CPUCACHE_ENTRIES; |
| + cpu_cache_get(cachep, this_cpu)->batchcount = 1; |
| + cpu_cache_get(cachep, this_cpu)->touched = 0; |
| cachep->batchcount = 1; |
| cachep->limit = BOOT_CPUCACHE_ENTRIES; |
| return 0; |
| @@ -2371,19 +2522,19 @@ EXPORT_SYMBOL(kmem_cache_create); |
| #if DEBUG |
| static void check_irq_off(void) |
| { |
| +/* |
| + * On PREEMPT_RT we use locks to protect the per-CPU lists, |
| + * and keep interrupts enabled. |
| + */ |
| +#ifndef CONFIG_PREEMPT_RT |
| BUG_ON(!irqs_disabled()); |
| +#endif |
| } |
| |
| static void check_irq_on(void) |
| { |
| +#ifndef CONFIG_PREEMPT_RT |
| BUG_ON(irqs_disabled()); |
| -} |
| - |
| -static void check_spinlock_acquired(struct kmem_cache *cachep) |
| -{ |
| -#ifdef CONFIG_SMP |
| - check_irq_off(); |
| - assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); |
| #endif |
| } |
| |
| @@ -2398,34 +2549,67 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) |
| #else |
| #define check_irq_off() do { } while(0) |
| #define check_irq_on() do { } while(0) |
| -#define check_spinlock_acquired(x) do { } while(0) |
| #define check_spinlock_acquired_node(x, y) do { } while(0) |
| #endif |
| |
| -static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
| +static int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
| struct array_cache *ac, |
| int force, int node); |
| |
| -static void do_drain(void *arg) |
| +static void __do_drain(void *arg, int this_cpu) |
| { |
| struct kmem_cache *cachep = arg; |
| + int node = cpu_to_node(this_cpu); |
| struct array_cache *ac; |
| - int node = numa_node_id(); |
| |
| check_irq_off(); |
| - ac = cpu_cache_get(cachep); |
| + ac = cpu_cache_get(cachep, this_cpu); |
| spin_lock(&cachep->nodelists[node]->list_lock); |
| - free_block(cachep, ac->entry, ac->avail, node); |
| + free_block(cachep, ac->entry, ac->avail, node, &this_cpu); |
| spin_unlock(&cachep->nodelists[node]->list_lock); |
| ac->avail = 0; |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +static void do_drain(void *arg, int this_cpu) |
| +{ |
| + __do_drain(arg, this_cpu); |
| +} |
| +#else |
| +static void do_drain(void *arg) |
| +{ |
| + __do_drain(arg, smp_processor_id()); |
| +} |
| +#endif |
| + |
| +#ifdef CONFIG_PREEMPT_RT |
| +/* |
| + * execute func() for all CPUs. On PREEMPT_RT we dont actually have |
| + * to run on the remote CPUs - we only have to take their CPU-locks. |
| + * (This is a rare operation, so cacheline bouncing is not an issue.) |
| + */ |
| +static void |
| +slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) |
| +{ |
| + unsigned int i; |
| + |
| + check_irq_on(); |
| + for_each_online_cpu(i) { |
| + spin_lock(&__get_cpu_lock(slab_irq_locks, i)); |
| + func(arg, i); |
| + spin_unlock(&__get_cpu_lock(slab_irq_locks, i)); |
| + } |
| +} |
| +#else |
| +# define slab_on_each_cpu(func, cachep) on_each_cpu(func, cachep, 1) |
| +#endif |
| + |
| static void drain_cpu_caches(struct kmem_cache *cachep) |
| { |
| struct kmem_list3 *l3; |
| int node; |
| |
| - on_each_cpu(do_drain, cachep, 1); |
| + slab_on_each_cpu(do_drain, cachep); |
| check_irq_on(); |
| for_each_online_node(node) { |
| l3 = cachep->nodelists[node]; |
| @@ -2450,16 +2634,16 @@ static int drain_freelist(struct kmem_cache *cache, |
| struct kmem_list3 *l3, int tofree) |
| { |
| struct list_head *p; |
| - int nr_freed; |
| + int nr_freed, this_cpu; |
| struct slab *slabp; |
| |
| nr_freed = 0; |
| while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { |
| |
| - spin_lock_irq(&l3->list_lock); |
| + slab_spin_lock_irq(&l3->list_lock, this_cpu); |
| p = l3->slabs_free.prev; |
| if (p == &l3->slabs_free) { |
| - spin_unlock_irq(&l3->list_lock); |
| + slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
| goto out; |
| } |
| |
| @@ -2468,13 +2652,9 @@ static int drain_freelist(struct kmem_cache *cache, |
| BUG_ON(slabp->inuse); |
| #endif |
| list_del(&slabp->list); |
| - /* |
| - * Safe to drop the lock. The slab is no longer linked |
| - * to the cache. |
| - */ |
| l3->free_objects -= cache->num; |
| - spin_unlock_irq(&l3->list_lock); |
| - slab_destroy(cache, slabp); |
| + slab_destroy(cache, slabp, &this_cpu); |
| + slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
| nr_freed++; |
| } |
| out: |
| @@ -2738,8 +2918,8 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, |
| * Grow (by 1) the number of slabs within a cache. This is called by |
| * kmem_cache_alloc() when there are no active objs left in a cache. |
| */ |
| -static int cache_grow(struct kmem_cache *cachep, |
| - gfp_t flags, int nodeid, void *objp) |
| +static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, |
| + void *objp, int *this_cpu) |
| { |
| struct slab *slabp; |
| size_t offset; |
| @@ -2767,8 +2947,7 @@ static int cache_grow(struct kmem_cache *cachep, |
| |
| offset *= cachep->colour_off; |
| |
| - if (local_flags & __GFP_WAIT) |
| - local_irq_enable(); |
| + slab_irq_enable_GFP_WAIT(local_flags, this_cpu); |
| |
| /* |
| * The test for missing atomic flag is performed here, rather than |
| @@ -2797,8 +2976,8 @@ static int cache_grow(struct kmem_cache *cachep, |
| |
| cache_init_objs(cachep, slabp); |
| |
| - if (local_flags & __GFP_WAIT) |
| - local_irq_disable(); |
| + slab_irq_disable_GFP_WAIT(local_flags, this_cpu); |
| + |
| check_irq_off(); |
| spin_lock(&l3->list_lock); |
| |
| @@ -2811,8 +2990,7 @@ static int cache_grow(struct kmem_cache *cachep, |
| opps1: |
| kmem_freepages(cachep, objp); |
| failed: |
| - if (local_flags & __GFP_WAIT) |
| - local_irq_disable(); |
| + slab_irq_disable_GFP_WAIT(local_flags, this_cpu); |
| return 0; |
| } |
| |
| @@ -2934,7 +3112,8 @@ bad: |
| #define check_slabp(x,y) do { } while(0) |
| #endif |
| |
| -static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) |
| +static void * |
| +cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) |
| { |
| int batchcount; |
| struct kmem_list3 *l3; |
| @@ -2944,7 +3123,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) |
| retry: |
| check_irq_off(); |
| node = numa_node_id(); |
| - ac = cpu_cache_get(cachep); |
| + ac = cpu_cache_get(cachep, *this_cpu); |
| batchcount = ac->batchcount; |
| if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
| /* |
| @@ -2954,7 +3133,7 @@ retry: |
| */ |
| batchcount = BATCHREFILL_LIMIT; |
| } |
| - l3 = cachep->nodelists[node]; |
| + l3 = cachep->nodelists[cpu_to_node(*this_cpu)]; |
| |
| BUG_ON(ac->avail > 0 || !l3); |
| spin_lock(&l3->list_lock); |
| @@ -2979,7 +3158,7 @@ retry: |
| |
| slabp = list_entry(entry, struct slab, list); |
| check_slabp(cachep, slabp); |
| - check_spinlock_acquired(cachep); |
| + check_spinlock_acquired_node(cachep, cpu_to_node(*this_cpu)); |
| |
| /* |
| * The slab was either on partial or free list so |
| @@ -2993,8 +3172,9 @@ retry: |
| STATS_INC_ACTIVE(cachep); |
| STATS_SET_HIGH(cachep); |
| |
| - ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, |
| - node); |
| + ac->entry[ac->avail++] = |
| + slab_get_obj(cachep, slabp, |
| + cpu_to_node(*this_cpu)); |
| } |
| check_slabp(cachep, slabp); |
| |
| @@ -3013,10 +3193,10 @@ alloc_done: |
| |
| if (unlikely(!ac->avail)) { |
| int x; |
| - x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); |
| + x = cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu); |
| |
| /* cache_grow can reenable interrupts, then ac could change. */ |
| - ac = cpu_cache_get(cachep); |
| + ac = cpu_cache_get(cachep, *this_cpu); |
| if (!x && ac->avail == 0) /* no objects in sight? abort */ |
| return NULL; |
| |
| @@ -3103,26 +3283,27 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) |
| return should_failslab(obj_size(cachep), flags, cachep->flags); |
| } |
| |
| -static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
| +static inline void * |
| +____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) |
| { |
| void *objp; |
| struct array_cache *ac; |
| |
| check_irq_off(); |
| |
| - ac = cpu_cache_get(cachep); |
| + ac = cpu_cache_get(cachep, *this_cpu); |
| if (likely(ac->avail)) { |
| STATS_INC_ALLOCHIT(cachep); |
| ac->touched = 1; |
| objp = ac->entry[--ac->avail]; |
| } else { |
| STATS_INC_ALLOCMISS(cachep); |
| - objp = cache_alloc_refill(cachep, flags); |
| + objp = cache_alloc_refill(cachep, flags, this_cpu); |
| /* |
| * the 'ac' may be updated by cache_alloc_refill(), |
| * and kmemleak_erase() requires its correct value. |
| */ |
| - ac = cpu_cache_get(cachep); |
| + ac = cpu_cache_get(cachep, *this_cpu); |
| } |
| /* |
| * To avoid a false negative, if an object that is in one of the |
| @@ -3141,7 +3322,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
| * If we are in_interrupt, then process context, including cpusets and |
| * mempolicy, may not apply and should not be used for allocation policy. |
| */ |
| -static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) |
| +static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags, |
| + int *this_cpu) |
| { |
| int nid_alloc, nid_here; |
| |
| @@ -3153,7 +3335,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) |
| else if (current->mempolicy) |
| nid_alloc = slab_node(current->mempolicy); |
| if (nid_alloc != nid_here) |
| - return ____cache_alloc_node(cachep, flags, nid_alloc); |
| + return ____cache_alloc_node(cachep, flags, nid_alloc, this_cpu); |
| return NULL; |
| } |
| |
| @@ -3165,7 +3347,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) |
| * allocator to do its reclaim / fallback magic. We then insert the |
| * slab into the proper nodelist and then allocate from it. |
| */ |
| -static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) |
| +static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) |
| { |
| struct zonelist *zonelist; |
| gfp_t local_flags; |
| @@ -3193,7 +3375,8 @@ retry: |
| cache->nodelists[nid] && |
| cache->nodelists[nid]->free_objects) { |
| obj = ____cache_alloc_node(cache, |
| - flags | GFP_THISNODE, nid); |
| + flags | GFP_THISNODE, nid, |
| + this_cpu); |
| if (obj) |
| break; |
| } |
| @@ -3206,20 +3389,21 @@ retry: |
| * We may trigger various forms of reclaim on the allowed |
| * set and go into memory reserves if necessary. |
| */ |
| - if (local_flags & __GFP_WAIT) |
| - local_irq_enable(); |
| + slab_irq_enable_GFP_WAIT(local_flags, this_cpu); |
| + |
| kmem_flagcheck(cache, flags); |
| - obj = kmem_getpages(cache, local_flags, numa_node_id()); |
| - if (local_flags & __GFP_WAIT) |
| - local_irq_disable(); |
| + obj = kmem_getpages(cache, local_flags, cpu_to_node(*this_cpu)); |
| + |
| + slab_irq_disable_GFP_WAIT(local_flags, this_cpu); |
| + |
| if (obj) { |
| /* |
| * Insert into the appropriate per node queues |
| */ |
| nid = page_to_nid(virt_to_page(obj)); |
| - if (cache_grow(cache, flags, nid, obj)) { |
| + if (cache_grow(cache, flags, nid, obj, this_cpu)) { |
| obj = ____cache_alloc_node(cache, |
| - flags | GFP_THISNODE, nid); |
| + flags | GFP_THISNODE, nid, this_cpu); |
| if (!obj) |
| /* |
| * Another processor may allocate the |
| @@ -3240,7 +3424,7 @@ retry: |
| * A interface to enable slab creation on nodeid |
| */ |
| static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
| - int nodeid) |
| + int nodeid, int *this_cpu) |
| { |
| struct list_head *entry; |
| struct slab *slabp; |
| @@ -3288,11 +3472,11 @@ retry: |
| |
| must_grow: |
| spin_unlock(&l3->list_lock); |
| - x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); |
| + x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL, this_cpu); |
| if (x) |
| goto retry; |
| |
| - return fallback_alloc(cachep, flags); |
| + return fallback_alloc(cachep, flags, this_cpu); |
| |
| done: |
| return obj; |
| @@ -3315,6 +3499,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, |
| void *caller) |
| { |
| unsigned long save_flags; |
| + int this_cpu, this_node; |
| void *ptr; |
| |
| flags &= gfp_allowed_mask; |
| @@ -3325,32 +3510,33 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, |
| return NULL; |
| |
| cache_alloc_debugcheck_before(cachep, flags); |
| - local_irq_save(save_flags); |
| + slab_irq_save(save_flags, this_cpu); |
| |
| + this_node = cpu_to_node(this_cpu); |
| if (nodeid == -1) |
| - nodeid = numa_node_id(); |
| + nodeid = this_node; |
| |
| if (unlikely(!cachep->nodelists[nodeid])) { |
| /* Node not bootstrapped yet */ |
| - ptr = fallback_alloc(cachep, flags); |
| + ptr = fallback_alloc(cachep, flags, &this_cpu); |
| goto out; |
| } |
| |
| - if (nodeid == numa_node_id()) { |
| + if (nodeid == this_node) { |
| /* |
| * Use the locally cached objects if possible. |
| * However ____cache_alloc does not allow fallback |
| * to other nodes. It may fail while we still have |
| * objects on other nodes available. |
| */ |
| - ptr = ____cache_alloc(cachep, flags); |
| + ptr = ____cache_alloc(cachep, flags, &this_cpu); |
| if (ptr) |
| goto out; |
| } |
| /* ___cache_alloc_node can fall back to other nodes */ |
| - ptr = ____cache_alloc_node(cachep, flags, nodeid); |
| + ptr = ____cache_alloc_node(cachep, flags, nodeid, &this_cpu); |
| out: |
| - local_irq_restore(save_flags); |
| + slab_irq_restore(save_flags, this_cpu); |
| ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
| kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, |
| flags); |
| @@ -3365,33 +3551,33 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, |
| } |
| |
| static __always_inline void * |
| -__do_cache_alloc(struct kmem_cache *cache, gfp_t flags) |
| +__do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) |
| { |
| void *objp; |
| |
| if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { |
| - objp = alternate_node_alloc(cache, flags); |
| + objp = alternate_node_alloc(cache, flags, this_cpu); |
| if (objp) |
| goto out; |
| } |
| - objp = ____cache_alloc(cache, flags); |
| |
| + objp = ____cache_alloc(cache, flags, this_cpu); |
| /* |
| * We may just have run out of memory on the local node. |
| * ____cache_alloc_node() knows how to locate memory on other nodes |
| */ |
| - if (!objp) |
| - objp = ____cache_alloc_node(cache, flags, numa_node_id()); |
| - |
| + if (!objp) |
| + objp = ____cache_alloc_node(cache, flags, |
| + cpu_to_node(*this_cpu), this_cpu); |
| out: |
| return objp; |
| } |
| #else |
| |
| static __always_inline void * |
| -__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
| +__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) |
| { |
| - return ____cache_alloc(cachep, flags); |
| + return ____cache_alloc(cachep, flags, this_cpu); |
| } |
| |
| #endif /* CONFIG_NUMA */ |
| @@ -3400,6 +3586,7 @@ static __always_inline void * |
| __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) |
| { |
| unsigned long save_flags; |
| + int this_cpu; |
| void *objp; |
| |
| flags &= gfp_allowed_mask; |
| @@ -3410,9 +3597,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) |
| return NULL; |
| |
| cache_alloc_debugcheck_before(cachep, flags); |
| - local_irq_save(save_flags); |
| - objp = __do_cache_alloc(cachep, flags); |
| - local_irq_restore(save_flags); |
| + slab_irq_save(save_flags, this_cpu); |
| + objp = __do_cache_alloc(cachep, flags, &this_cpu); |
| + slab_irq_restore(save_flags, this_cpu); |
| objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); |
| kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, |
| flags); |
| @@ -3431,7 +3618,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) |
| * Caller needs to acquire correct kmem_list's list_lock |
| */ |
| static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, |
| - int node) |
| + int node, int *this_cpu) |
| { |
| int i; |
| struct kmem_list3 *l3; |
| @@ -3460,7 +3647,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, |
| * a different cache, refer to comments before |
| * alloc_slabmgmt. |
| */ |
| - slab_destroy(cachep, slabp); |
| + slab_destroy(cachep, slabp, this_cpu); |
| } else { |
| list_add(&slabp->list, &l3->slabs_free); |
| } |
| @@ -3474,11 +3661,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, |
| } |
| } |
| |
| -static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) |
| +static void |
| +cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac, int *this_cpu) |
| { |
| int batchcount; |
| struct kmem_list3 *l3; |
| - int node = numa_node_id(); |
| + int node = cpu_to_node(*this_cpu); |
| |
| batchcount = ac->batchcount; |
| #if DEBUG |
| @@ -3500,7 +3688,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) |
| } |
| } |
| |
| - free_block(cachep, ac->entry, batchcount, node); |
| + free_block(cachep, ac->entry, batchcount, node, this_cpu); |
| free_done: |
| #if STATS |
| { |
| @@ -3529,9 +3717,9 @@ free_done: |
| * Release an obj back to its cache. If the obj has a constructed state, it must |
| * be in this state _before_ it is released. Called with disabled ints. |
| */ |
| -static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
| +static void __cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu) |
| { |
| - struct array_cache *ac = cpu_cache_get(cachep); |
| + struct array_cache *ac = cpu_cache_get(cachep, *this_cpu); |
| |
| check_irq_off(); |
| kmemleak_free_recursive(objp, cachep->flags); |
| @@ -3546,7 +3734,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
| * variable to skip the call, which is mostly likely to be present in |
| * the cache. |
| */ |
| - if (nr_online_nodes > 1 && cache_free_alien(cachep, objp)) |
| + if (nr_online_nodes > 1 && cache_free_alien(cachep, objp, this_cpu)) |
| return; |
| |
| if (likely(ac->avail < ac->limit)) { |
| @@ -3555,7 +3743,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
| return; |
| } else { |
| STATS_INC_FREEMISS(cachep); |
| - cache_flusharray(cachep, ac); |
| + cache_flusharray(cachep, ac, this_cpu); |
| ac->entry[ac->avail++] = objp; |
| } |
| } |
| @@ -3754,13 +3942,14 @@ EXPORT_SYMBOL(__kmalloc); |
| void kmem_cache_free(struct kmem_cache *cachep, void *objp) |
| { |
| unsigned long flags; |
| + int this_cpu; |
| |
| - local_irq_save(flags); |
| + slab_irq_save(flags, this_cpu); |
| debug_check_no_locks_freed(objp, obj_size(cachep)); |
| if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) |
| debug_check_no_obj_freed(objp, obj_size(cachep)); |
| - __cache_free(cachep, objp); |
| - local_irq_restore(flags); |
| + __cache_free(cachep, objp, &this_cpu); |
| + slab_irq_restore(flags, this_cpu); |
| |
| trace_kmem_cache_free(_RET_IP_, objp); |
| } |
| @@ -3779,18 +3968,19 @@ void kfree(const void *objp) |
| { |
| struct kmem_cache *c; |
| unsigned long flags; |
| + int this_cpu; |
| |
| trace_kfree(_RET_IP_, objp); |
| |
| if (unlikely(ZERO_OR_NULL_PTR(objp))) |
| return; |
| - local_irq_save(flags); |
| + slab_irq_save(flags, this_cpu); |
| kfree_debugcheck(objp); |
| c = virt_to_cache(objp); |
| debug_check_no_locks_freed(objp, obj_size(c)); |
| debug_check_no_obj_freed(objp, obj_size(c)); |
| - __cache_free(c, (void *)objp); |
| - local_irq_restore(flags); |
| + __cache_free(c, (void *)objp, &this_cpu); |
| + slab_irq_restore(flags, this_cpu); |
| } |
| EXPORT_SYMBOL(kfree); |
| |
| @@ -3811,7 +4001,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); |
| */ |
| static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) |
| { |
| - int node; |
| + int node, this_cpu; |
| struct kmem_list3 *l3; |
| struct array_cache *new_shared; |
| struct array_cache **new_alien = NULL; |
| @@ -3839,11 +4029,11 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) |
| if (l3) { |
| struct array_cache *shared = l3->shared; |
| |
| - spin_lock_irq(&l3->list_lock); |
| + slab_spin_lock_irq(&l3->list_lock, this_cpu); |
| |
| if (shared) |
| free_block(cachep, shared->entry, |
| - shared->avail, node); |
| + shared->avail, node, &this_cpu); |
| |
| l3->shared = new_shared; |
| if (!l3->alien) { |
| @@ -3852,7 +4042,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) |
| } |
| l3->free_limit = (1 + nr_cpus_node(node)) * |
| cachep->batchcount + cachep->num; |
| - spin_unlock_irq(&l3->list_lock); |
| + slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
| kfree(shared); |
| free_alien_cache(new_alien); |
| continue; |
| @@ -3899,24 +4089,36 @@ struct ccupdate_struct { |
| struct array_cache *new[NR_CPUS]; |
| }; |
| |
| -static void do_ccupdate_local(void *info) |
| +static void __do_ccupdate_local(void *info, int this_cpu) |
| { |
| struct ccupdate_struct *new = info; |
| struct array_cache *old; |
| |
| check_irq_off(); |
| - old = cpu_cache_get(new->cachep); |
| + old = cpu_cache_get(new->cachep, this_cpu); |
| |
| - new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; |
| - new->new[smp_processor_id()] = old; |
| + new->cachep->array[this_cpu] = new->new[this_cpu]; |
| + new->new[this_cpu] = old; |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +static void do_ccupdate_local(void *arg, int this_cpu) |
| +{ |
| + __do_ccupdate_local(arg, this_cpu); |
| +} |
| +#else |
| +static void do_ccupdate_local(void *arg) |
| +{ |
| + __do_ccupdate_local(arg, smp_processor_id()); |
| +} |
| +#endif |
| + |
| /* Always called with the cache_chain_mutex held */ |
| static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
| int batchcount, int shared, gfp_t gfp) |
| { |
| struct ccupdate_struct *new; |
| - int i; |
| + int i, this_cpu; |
| |
| new = kzalloc(sizeof(*new), gfp); |
| if (!new) |
| @@ -3934,7 +4136,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
| } |
| new->cachep = cachep; |
| |
| - on_each_cpu(do_ccupdate_local, (void *)new, 1); |
| + slab_on_each_cpu(do_ccupdate_local, (void *)new); |
| |
| check_irq_on(); |
| cachep->batchcount = batchcount; |
| @@ -3945,9 +4147,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
| struct array_cache *ccold = new->new[i]; |
| if (!ccold) |
| continue; |
| - spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
| - free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); |
| - spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
| + slab_spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, |
| + this_cpu); |
| + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), |
| + &this_cpu); |
| + slab_spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, |
| + this_cpu); |
| kfree(ccold); |
| } |
| kfree(new); |
| @@ -4012,29 +4217,31 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) |
| * Drain an array if it contains any elements taking the l3 lock only if |
| * necessary. Note that the l3 listlock also protects the array_cache |
| * if drain_array() is used on the shared array. |
| + * returns non-zero if some work is done |
| */ |
| -void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
| - struct array_cache *ac, int force, int node) |
| +int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
| + struct array_cache *ac, int force, int node) |
| { |
| - int tofree; |
| + int tofree, this_cpu; |
| |
| if (!ac || !ac->avail) |
| - return; |
| + return 0; |
| if (ac->touched && !force) { |
| ac->touched = 0; |
| } else { |
| - spin_lock_irq(&l3->list_lock); |
| + slab_spin_lock_irq(&l3->list_lock, this_cpu); |
| if (ac->avail) { |
| tofree = force ? ac->avail : (ac->limit + 4) / 5; |
| if (tofree > ac->avail) |
| tofree = (ac->avail + 1) / 2; |
| - free_block(cachep, ac->entry, tofree, node); |
| + free_block(cachep, ac->entry, tofree, node, &this_cpu); |
| ac->avail -= tofree; |
| memmove(ac->entry, &(ac->entry[tofree]), |
| sizeof(void *) * ac->avail); |
| } |
| - spin_unlock_irq(&l3->list_lock); |
| + slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
| } |
| + return 1; |
| } |
| |
| /** |
| @@ -4051,10 +4258,11 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
| */ |
| static void cache_reap(struct work_struct *w) |
| { |
| + int this_cpu = raw_smp_processor_id(), node = cpu_to_node(this_cpu); |
| struct kmem_cache *searchp; |
| struct kmem_list3 *l3; |
| - int node = numa_node_id(); |
| struct delayed_work *work = to_delayed_work(w); |
| + int work_done = 0; |
| |
| if (!mutex_trylock(&cache_chain_mutex)) |
| /* Give up. Setup the next iteration. */ |
| @@ -4070,9 +4278,12 @@ static void cache_reap(struct work_struct *w) |
| */ |
| l3 = searchp->nodelists[node]; |
| |
| - reap_alien(searchp, l3); |
| + work_done += reap_alien(searchp, l3, &this_cpu); |
| + |
| + node = cpu_to_node(this_cpu); |
| |
| - drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); |
| + work_done += drain_array(searchp, l3, |
| + cpu_cache_get(searchp, this_cpu), 0, node); |
| |
| /* |
| * These are racy checks but it does not matter |
| @@ -4083,7 +4294,7 @@ static void cache_reap(struct work_struct *w) |
| |
| l3->next_reap = jiffies + REAPTIMEOUT_LIST3; |
| |
| - drain_array(searchp, l3, l3->shared, 0, node); |
| + work_done += drain_array(searchp, l3, l3->shared, 0, node); |
| |
| if (l3->free_touched) |
| l3->free_touched = 0; |
| @@ -4102,7 +4313,8 @@ next: |
| next_reap_node(); |
| out: |
| /* Set up the next iteration */ |
| - schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); |
| + schedule_delayed_work(work, |
| + round_jiffies_relative((1+!work_done) * REAPTIMEOUT_CPUC)); |
| } |
| |
| #ifdef CONFIG_SLABINFO |
| @@ -4161,7 +4373,7 @@ static int s_show(struct seq_file *m, void *p) |
| unsigned long num_slabs, free_objects = 0, shared_avail = 0; |
| const char *name; |
| char *error = NULL; |
| - int node; |
| + int this_cpu, node; |
| struct kmem_list3 *l3; |
| |
| active_objs = 0; |
| @@ -4172,7 +4384,7 @@ static int s_show(struct seq_file *m, void *p) |
| continue; |
| |
| check_irq_on(); |
| - spin_lock_irq(&l3->list_lock); |
| + slab_spin_lock_irq(&l3->list_lock, this_cpu); |
| |
| list_for_each_entry(slabp, &l3->slabs_full, list) { |
| if (slabp->inuse != cachep->num && !error) |
| @@ -4197,7 +4409,7 @@ static int s_show(struct seq_file *m, void *p) |
| if (l3->shared) |
| shared_avail += l3->shared->avail; |
| |
| - spin_unlock_irq(&l3->list_lock); |
| + slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
| } |
| num_slabs += active_slabs; |
| num_objs = num_slabs * cachep->num; |
| @@ -4407,7 +4619,7 @@ static int leaks_show(struct seq_file *m, void *p) |
| struct kmem_list3 *l3; |
| const char *name; |
| unsigned long *n = m->private; |
| - int node; |
| + int node, this_cpu; |
| int i; |
| |
| if (!(cachep->flags & SLAB_STORE_USER)) |
| @@ -4425,13 +4637,13 @@ static int leaks_show(struct seq_file *m, void *p) |
| continue; |
| |
| check_irq_on(); |
| - spin_lock_irq(&l3->list_lock); |
| + slab_spin_lock_irq(&l3->list_lock, this_cpu); |
| |
| list_for_each_entry(slabp, &l3->slabs_full, list) |
| handle_slab(n, cachep, slabp); |
| list_for_each_entry(slabp, &l3->slabs_partial, list) |
| handle_slab(n, cachep, slabp); |
| - spin_unlock_irq(&l3->list_lock); |
| + slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
| } |
| name = cachep->name; |
| if (n[0] == n[1]) { |
| -- |
| 1.7.1.1 |
| |