| Subject: mm: Enable SLUB for RT |
| From: Thomas Gleixner <tglx@linutronix.de> |
| Date: Thu, 25 Oct 2012 10:32:35 +0100 |
| |
| Make SLUB RT aware by converting locks to raw and using free lists to |
| move the freeing out of the lock held region. |
| |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| --- |
| mm/slab.h | 4 + |
| mm/slub.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- |
| 2 files changed, 111 insertions(+), 30 deletions(-) |
| |
| --- a/mm/slab.h |
| +++ b/mm/slab.h |
| @@ -452,7 +452,11 @@ static inline void slab_post_alloc_hook( |
| * The slab lists for all objects. |
| */ |
| struct kmem_cache_node { |
| +#ifdef CONFIG_SLUB |
| + raw_spinlock_t list_lock; |
| +#else |
| spinlock_t list_lock; |
| +#endif |
| |
| #ifdef CONFIG_SLAB |
| struct list_head slabs_partial; /* partial list first, better asm code */ |
| --- a/mm/slub.c |
| +++ b/mm/slub.c |
| @@ -1183,7 +1183,7 @@ static noinline int free_debug_processin |
| unsigned long uninitialized_var(flags); |
| int ret = 0; |
| |
| - spin_lock_irqsave(&n->list_lock, flags); |
| + raw_spin_lock_irqsave(&n->list_lock, flags); |
| slab_lock(page); |
| |
| if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
| @@ -1218,7 +1218,7 @@ static noinline int free_debug_processin |
| bulk_cnt, cnt); |
| |
| slab_unlock(page); |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| if (!ret) |
| slab_fix(s, "Object at 0x%p not freed", object); |
| return ret; |
| @@ -1346,6 +1346,12 @@ static inline void dec_slabs_node(struct |
| |
| #endif /* CONFIG_SLUB_DEBUG */ |
| |
| +struct slub_free_list { |
| + raw_spinlock_t lock; |
| + struct list_head list; |
| +}; |
| +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); |
| + |
| /* |
| * Hooks for other subsystems that check memory allocations. In a typical |
| * production configuration these hooks all should produce no code at all. |
| @@ -1568,7 +1574,11 @@ static struct page *allocate_slab(struct |
| |
| flags &= gfp_allowed_mask; |
| |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + if (system_state > SYSTEM_BOOTING) |
| +#else |
| if (gfpflags_allow_blocking(flags)) |
| +#endif |
| local_irq_enable(); |
| |
| flags |= s->allocflags; |
| @@ -1627,7 +1637,11 @@ static struct page *allocate_slab(struct |
| page->frozen = 1; |
| |
| out: |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + if (system_state > SYSTEM_BOOTING) |
| +#else |
| if (gfpflags_allow_blocking(flags)) |
| +#endif |
| local_irq_disable(); |
| if (!page) |
| return NULL; |
| @@ -1685,6 +1699,16 @@ static void __free_slab(struct kmem_cach |
| __free_pages(page, order); |
| } |
| |
| +static void free_delayed(struct list_head *h) |
| +{ |
| + while(!list_empty(h)) { |
| + struct page *page = list_first_entry(h, struct page, lru); |
| + |
| + list_del(&page->lru); |
| + __free_slab(page->slab_cache, page); |
| + } |
| +} |
| + |
| #define need_reserve_slab_rcu \ |
| (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) |
| |
| @@ -1716,6 +1740,12 @@ static void free_slab(struct kmem_cache |
| } |
| |
| call_rcu(head, rcu_free_slab); |
| + } else if (irqs_disabled()) { |
| + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); |
| + |
| + raw_spin_lock(&f->lock); |
| + list_add(&page->lru, &f->list); |
| + raw_spin_unlock(&f->lock); |
| } else |
| __free_slab(s, page); |
| } |
| @@ -1823,7 +1853,7 @@ static void *get_partial_node(struct kme |
| if (!n || !n->nr_partial) |
| return NULL; |
| |
| - spin_lock(&n->list_lock); |
| + raw_spin_lock(&n->list_lock); |
| list_for_each_entry_safe(page, page2, &n->partial, lru) { |
| void *t; |
| |
| @@ -1848,7 +1878,7 @@ static void *get_partial_node(struct kme |
| break; |
| |
| } |
| - spin_unlock(&n->list_lock); |
| + raw_spin_unlock(&n->list_lock); |
| return object; |
| } |
| |
| @@ -2094,7 +2124,7 @@ static void deactivate_slab(struct kmem_ |
| * that acquire_slab() will see a slab page that |
| * is frozen |
| */ |
| - spin_lock(&n->list_lock); |
| + raw_spin_lock(&n->list_lock); |
| } |
| } else { |
| m = M_FULL; |
| @@ -2105,7 +2135,7 @@ static void deactivate_slab(struct kmem_ |
| * slabs from diagnostic functions will not see |
| * any frozen slabs. |
| */ |
| - spin_lock(&n->list_lock); |
| + raw_spin_lock(&n->list_lock); |
| } |
| } |
| |
| @@ -2140,7 +2170,7 @@ static void deactivate_slab(struct kmem_ |
| goto redo; |
| |
| if (lock) |
| - spin_unlock(&n->list_lock); |
| + raw_spin_unlock(&n->list_lock); |
| |
| if (m == M_FREE) { |
| stat(s, DEACTIVATE_EMPTY); |
| @@ -2175,10 +2205,10 @@ static void unfreeze_partials(struct kme |
| n2 = get_node(s, page_to_nid(page)); |
| if (n != n2) { |
| if (n) |
| - spin_unlock(&n->list_lock); |
| + raw_spin_unlock(&n->list_lock); |
| |
| n = n2; |
| - spin_lock(&n->list_lock); |
| + raw_spin_lock(&n->list_lock); |
| } |
| |
| do { |
| @@ -2207,7 +2237,7 @@ static void unfreeze_partials(struct kme |
| } |
| |
| if (n) |
| - spin_unlock(&n->list_lock); |
| + raw_spin_unlock(&n->list_lock); |
| |
| while (discard_page) { |
| page = discard_page; |
| @@ -2244,14 +2274,21 @@ static void put_cpu_partial(struct kmem_ |
| pobjects = oldpage->pobjects; |
| pages = oldpage->pages; |
| if (drain && pobjects > s->cpu_partial) { |
| + struct slub_free_list *f; |
| unsigned long flags; |
| + LIST_HEAD(tofree); |
| /* |
| * partial array is full. Move the existing |
| * set to the per node partial list. |
| */ |
| local_irq_save(flags); |
| unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); |
| + f = this_cpu_ptr(&slub_free_list); |
| + raw_spin_lock(&f->lock); |
| + list_splice_init(&f->list, &tofree); |
| + raw_spin_unlock(&f->lock); |
| local_irq_restore(flags); |
| + free_delayed(&tofree); |
| oldpage = NULL; |
| pobjects = 0; |
| pages = 0; |
| @@ -2321,7 +2358,22 @@ static bool has_cpu_slab(int cpu, void * |
| |
| static void flush_all(struct kmem_cache *s) |
| { |
| + LIST_HEAD(tofree); |
| + int cpu; |
| + |
| on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); |
| + for_each_online_cpu(cpu) { |
| + struct slub_free_list *f; |
| + |
| + if (!has_cpu_slab(cpu, s)) |
| + continue; |
| + |
| + f = &per_cpu(slub_free_list, cpu); |
| + raw_spin_lock_irq(&f->lock); |
| + list_splice_init(&f->list, &tofree); |
| + raw_spin_unlock_irq(&f->lock); |
| + free_delayed(&tofree); |
| + } |
| } |
| |
| /* |
| @@ -2376,10 +2428,10 @@ static unsigned long count_partial(struc |
| unsigned long x = 0; |
| struct page *page; |
| |
| - spin_lock_irqsave(&n->list_lock, flags); |
| + raw_spin_lock_irqsave(&n->list_lock, flags); |
| list_for_each_entry(page, &n->partial, lru) |
| x += get_count(page); |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| return x; |
| } |
| #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ |
| @@ -2517,8 +2569,10 @@ static inline void *get_freelist(struct |
| * already disabled (which is the case for bulk allocation). |
| */ |
| static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
| - unsigned long addr, struct kmem_cache_cpu *c) |
| + unsigned long addr, struct kmem_cache_cpu *c, |
| + struct list_head *to_free) |
| { |
| + struct slub_free_list *f; |
| void *freelist; |
| struct page *page; |
| |
| @@ -2574,6 +2628,13 @@ static void *___slab_alloc(struct kmem_c |
| VM_BUG_ON(!c->page->frozen); |
| c->freelist = get_freepointer(s, freelist); |
| c->tid = next_tid(c->tid); |
| + |
| +out: |
| + f = this_cpu_ptr(&slub_free_list); |
| + raw_spin_lock(&f->lock); |
| + list_splice_init(&f->list, to_free); |
| + raw_spin_unlock(&f->lock); |
| + |
| return freelist; |
| |
| new_slab: |
| @@ -2589,7 +2650,7 @@ static void *___slab_alloc(struct kmem_c |
| |
| if (unlikely(!freelist)) { |
| slab_out_of_memory(s, gfpflags, node); |
| - return NULL; |
| + goto out; |
| } |
| |
| page = c->page; |
| @@ -2602,7 +2663,7 @@ static void *___slab_alloc(struct kmem_c |
| goto new_slab; /* Slab failed checks. Next slab needed */ |
| |
| deactivate_slab(s, page, get_freepointer(s, freelist), c); |
| - return freelist; |
| + goto out; |
| } |
| |
| /* |
| @@ -2614,6 +2675,7 @@ static void *__slab_alloc(struct kmem_ca |
| { |
| void *p; |
| unsigned long flags; |
| + LIST_HEAD(tofree); |
| |
| local_irq_save(flags); |
| #ifdef CONFIG_PREEMPT |
| @@ -2625,8 +2687,9 @@ static void *__slab_alloc(struct kmem_ca |
| c = this_cpu_ptr(s->cpu_slab); |
| #endif |
| |
| - p = ___slab_alloc(s, gfpflags, node, addr, c); |
| + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); |
| local_irq_restore(flags); |
| + free_delayed(&tofree); |
| return p; |
| } |
| |
| @@ -2812,7 +2875,7 @@ static void __slab_free(struct kmem_cach |
| |
| do { |
| if (unlikely(n)) { |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| n = NULL; |
| } |
| prior = page->freelist; |
| @@ -2844,7 +2907,7 @@ static void __slab_free(struct kmem_cach |
| * Otherwise the list_lock will synchronize with |
| * other processors updating the list of slabs. |
| */ |
| - spin_lock_irqsave(&n->list_lock, flags); |
| + raw_spin_lock_irqsave(&n->list_lock, flags); |
| |
| } |
| } |
| @@ -2886,7 +2949,7 @@ static void __slab_free(struct kmem_cach |
| add_partial(n, page, DEACTIVATE_TO_TAIL); |
| stat(s, FREE_ADD_PARTIAL); |
| } |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| return; |
| |
| slab_empty: |
| @@ -2901,7 +2964,7 @@ static void __slab_free(struct kmem_cach |
| remove_full(s, n, page); |
| } |
| |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| stat(s, FREE_SLAB); |
| discard_slab(s, page); |
| } |
| @@ -3106,6 +3169,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca |
| void **p) |
| { |
| struct kmem_cache_cpu *c; |
| + LIST_HEAD(to_free); |
| int i; |
| |
| /* memcg and kmem_cache debug support */ |
| @@ -3129,7 +3193,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca |
| * of re-populating per CPU c->freelist |
| */ |
| p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, |
| - _RET_IP_, c); |
| + _RET_IP_, c, &to_free); |
| if (unlikely(!p[i])) |
| goto error; |
| |
| @@ -3141,6 +3205,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca |
| } |
| c->tid = next_tid(c->tid); |
| local_irq_enable(); |
| + free_delayed(&to_free); |
| |
| /* Clear memory outside IRQ disabled fastpath loop */ |
| if (unlikely(flags & __GFP_ZERO)) { |
| @@ -3155,6 +3220,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca |
| return i; |
| error: |
| local_irq_enable(); |
| + free_delayed(&to_free); |
| slab_post_alloc_hook(s, flags, i, p); |
| __kmem_cache_free_bulk(s, i, p); |
| return 0; |
| @@ -3288,7 +3354,7 @@ static void |
| init_kmem_cache_node(struct kmem_cache_node *n) |
| { |
| n->nr_partial = 0; |
| - spin_lock_init(&n->list_lock); |
| + raw_spin_lock_init(&n->list_lock); |
| INIT_LIST_HEAD(&n->partial); |
| #ifdef CONFIG_SLUB_DEBUG |
| atomic_long_set(&n->nr_slabs, 0); |
| @@ -3642,6 +3708,10 @@ static void list_slab_objects(struct kme |
| const char *text) |
| { |
| #ifdef CONFIG_SLUB_DEBUG |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + /* XXX move out of irq-off section */ |
| + slab_err(s, page, text, s->name); |
| +#else |
| void *addr = page_address(page); |
| void *p; |
| unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * |
| @@ -3662,6 +3732,7 @@ static void list_slab_objects(struct kme |
| slab_unlock(page); |
| kfree(map); |
| #endif |
| +#endif |
| } |
| |
| /* |
| @@ -3675,7 +3746,7 @@ static void free_partial(struct kmem_cac |
| struct page *page, *h; |
| |
| BUG_ON(irqs_disabled()); |
| - spin_lock_irq(&n->list_lock); |
| + raw_spin_lock_irq(&n->list_lock); |
| list_for_each_entry_safe(page, h, &n->partial, lru) { |
| if (!page->inuse) { |
| remove_partial(n, page); |
| @@ -3685,7 +3756,7 @@ static void free_partial(struct kmem_cac |
| "Objects remaining in %s on __kmem_cache_shutdown()"); |
| } |
| } |
| - spin_unlock_irq(&n->list_lock); |
| + raw_spin_unlock_irq(&n->list_lock); |
| |
| list_for_each_entry_safe(page, h, &discard, lru) |
| discard_slab(s, page); |
| @@ -3947,7 +4018,7 @@ int __kmem_cache_shrink(struct kmem_cach |
| for (i = 0; i < SHRINK_PROMOTE_MAX; i++) |
| INIT_LIST_HEAD(promote + i); |
| |
| - spin_lock_irqsave(&n->list_lock, flags); |
| + raw_spin_lock_irqsave(&n->list_lock, flags); |
| |
| /* |
| * Build lists of slabs to discard or promote. |
| @@ -3978,7 +4049,7 @@ int __kmem_cache_shrink(struct kmem_cach |
| for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) |
| list_splice(promote + i, &n->partial); |
| |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| |
| /* Release empty slabs */ |
| list_for_each_entry_safe(page, t, &discard, lru) |
| @@ -4191,6 +4262,12 @@ void __init kmem_cache_init(void) |
| { |
| static __initdata struct kmem_cache boot_kmem_cache, |
| boot_kmem_cache_node; |
| + int cpu; |
| + |
| + for_each_possible_cpu(cpu) { |
| + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); |
| + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); |
| + } |
| |
| if (debug_guardpage_minorder()) |
| slub_max_order = 0; |
| @@ -4399,7 +4476,7 @@ static int validate_slab_node(struct kme |
| struct page *page; |
| unsigned long flags; |
| |
| - spin_lock_irqsave(&n->list_lock, flags); |
| + raw_spin_lock_irqsave(&n->list_lock, flags); |
| |
| list_for_each_entry(page, &n->partial, lru) { |
| validate_slab_slab(s, page, map); |
| @@ -4421,7 +4498,7 @@ static int validate_slab_node(struct kme |
| s->name, count, atomic_long_read(&n->nr_slabs)); |
| |
| out: |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| return count; |
| } |
| |
| @@ -4609,12 +4686,12 @@ static int list_locations(struct kmem_ca |
| if (!atomic_long_read(&n->nr_slabs)) |
| continue; |
| |
| - spin_lock_irqsave(&n->list_lock, flags); |
| + raw_spin_lock_irqsave(&n->list_lock, flags); |
| list_for_each_entry(page, &n->partial, lru) |
| process_slab(&t, s, page, alloc, map); |
| list_for_each_entry(page, &n->full, lru) |
| process_slab(&t, s, page, alloc, map); |
| - spin_unlock_irqrestore(&n->list_lock, flags); |
| + raw_spin_unlock_irqrestore(&n->list_lock, flags); |
| } |
| |
| for (i = 0; i < t.count; i++) { |