blob: c2962f641ae6d5f5134c6071b6c0b1687707e919 [file] [log] [blame]
Subject: mm: Enable SLUB for RT
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Oct 2012 10:32:35 +0100
Make SLUB RT aware by converting locks to raw and using free lists to
move the freeing out of the lock held region.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/slab.h | 4 +
mm/slub.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
2 files changed, 109 insertions(+), 29 deletions(-)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -465,7 +465,11 @@ static inline void slab_post_alloc_hook(
* The slab lists for all objects.
*/
struct kmem_cache_node {
+#ifdef CONFIG_SLUB
+ raw_spinlock_t list_lock;
+#else
spinlock_t list_lock;
+#endif
#ifdef CONFIG_SLAB
struct list_head slabs_partial; /* partial list first, better asm code */
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1146,7 +1146,7 @@ static noinline int free_debug_processin
unsigned long uninitialized_var(flags);
int ret = 0;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
slab_lock(page);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
@@ -1181,7 +1181,7 @@ static noinline int free_debug_processin
bulk_cnt, cnt);
slab_unlock(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
if (!ret)
slab_fix(s, "Object at 0x%p not freed", object);
return ret;
@@ -1309,6 +1309,12 @@ static inline void dec_slabs_node(struct
#endif /* CONFIG_SLUB_DEBUG */
+struct slub_free_list {
+ raw_spinlock_t lock;
+ struct list_head list;
+};
+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
+
/*
* Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all.
@@ -1535,7 +1541,11 @@ static struct page *allocate_slab(struct
flags &= gfp_allowed_mask;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (system_state > SYSTEM_BOOTING)
+#else
if (gfpflags_allow_blocking(flags))
+#endif
local_irq_enable();
flags |= s->allocflags;
@@ -1610,7 +1620,11 @@ static struct page *allocate_slab(struct
page->frozen = 1;
out:
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (system_state > SYSTEM_BOOTING)
+#else
if (gfpflags_allow_blocking(flags))
+#endif
local_irq_disable();
if (!page)
return NULL;
@@ -1670,6 +1684,16 @@ static void __free_slab(struct kmem_cach
__free_pages(page, order);
}
+static void free_delayed(struct list_head *h)
+{
+ while(!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_slab(page->slab_cache, page);
+ }
+}
+
#define need_reserve_slab_rcu \
(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
@@ -1701,6 +1725,12 @@ static void free_slab(struct kmem_cache
}
call_rcu(head, rcu_free_slab);
+ } else if (irqs_disabled()) {
+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
+
+ raw_spin_lock(&f->lock);
+ list_add(&page->lru, &f->list);
+ raw_spin_unlock(&f->lock);
} else
__free_slab(s, page);
}
@@ -1808,7 +1838,7 @@ static void *get_partial_node(struct kme
if (!n || !n->nr_partial)
return NULL;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t;
@@ -1833,7 +1863,7 @@ static void *get_partial_node(struct kme
break;
}
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
return object;
}
@@ -2079,7 +2109,7 @@ static void deactivate_slab(struct kmem_
* that acquire_slab() will see a slab page that
* is frozen
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
} else {
m = M_FULL;
@@ -2090,7 +2120,7 @@ static void deactivate_slab(struct kmem_
* slabs from diagnostic functions will not see
* any frozen slabs.
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
}
@@ -2125,7 +2155,7 @@ static void deactivate_slab(struct kmem_
goto redo;
if (lock)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
@@ -2157,10 +2187,10 @@ static void unfreeze_partials(struct kme
n2 = get_node(s, page_to_nid(page));
if (n != n2) {
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
n = n2;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
do {
@@ -2189,7 +2219,7 @@ static void unfreeze_partials(struct kme
}
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
while (discard_page) {
page = discard_page;
@@ -2228,14 +2258,21 @@ static void put_cpu_partial(struct kmem_
pobjects = oldpage->pobjects;
pages = oldpage->pages;
if (drain && pobjects > s->cpu_partial) {
+ struct slub_free_list *f;
unsigned long flags;
+ LIST_HEAD(tofree);
/*
* partial array is full. Move the existing
* set to the per node partial list.
*/
local_irq_save(flags);
unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ f = this_cpu_ptr(&slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock(&f->lock);
local_irq_restore(flags);
+ free_delayed(&tofree);
oldpage = NULL;
pobjects = 0;
pages = 0;
@@ -2307,7 +2344,22 @@ static bool has_cpu_slab(int cpu, void *
static void flush_all(struct kmem_cache *s)
{
+ LIST_HEAD(tofree);
+ int cpu;
+
on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
+ for_each_online_cpu(cpu) {
+ struct slub_free_list *f;
+
+ if (!has_cpu_slab(cpu, s))
+ continue;
+
+ f = &per_cpu(slub_free_list, cpu);
+ raw_spin_lock_irq(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock_irq(&f->lock);
+ free_delayed(&tofree);
+ }
}
/*
@@ -2362,10 +2414,10 @@ static unsigned long count_partial(struc
unsigned long x = 0;
struct page *page;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
@@ -2503,8 +2555,10 @@ static inline void *get_freelist(struct
* already disabled (which is the case for bulk allocation).
*/
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
- unsigned long addr, struct kmem_cache_cpu *c)
+ unsigned long addr, struct kmem_cache_cpu *c,
+ struct list_head *to_free)
{
+ struct slub_free_list *f;
void *freelist;
struct page *page;
@@ -2564,6 +2618,13 @@ static void *___slab_alloc(struct kmem_c
VM_BUG_ON(!c->page->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
+
+out:
+ f = this_cpu_ptr(&slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, to_free);
+ raw_spin_unlock(&f->lock);
+
return freelist;
new_slab:
@@ -2595,7 +2656,7 @@ static void *___slab_alloc(struct kmem_c
deactivate_slab(s, page, get_freepointer(s, freelist));
c->page = NULL;
c->freelist = NULL;
- return freelist;
+ goto out;
}
/*
@@ -2607,6 +2668,7 @@ static void *__slab_alloc(struct kmem_ca
{
void *p;
unsigned long flags;
+ LIST_HEAD(tofree);
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
@@ -2618,8 +2680,9 @@ static void *__slab_alloc(struct kmem_ca
c = this_cpu_ptr(s->cpu_slab);
#endif
- p = ___slab_alloc(s, gfpflags, node, addr, c);
+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
local_irq_restore(flags);
+ free_delayed(&tofree);
return p;
}
@@ -2805,7 +2868,7 @@ static void __slab_free(struct kmem_cach
do {
if (unlikely(n)) {
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;
@@ -2837,7 +2900,7 @@ static void __slab_free(struct kmem_cach
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
}
}
@@ -2879,7 +2942,7 @@ static void __slab_free(struct kmem_cach
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return;
slab_empty:
@@ -2894,7 +2957,7 @@ static void __slab_free(struct kmem_cach
remove_full(s, n, page);
}
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
}
@@ -3099,6 +3162,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
void **p)
{
struct kmem_cache_cpu *c;
+ LIST_HEAD(to_free);
int i;
/* memcg and kmem_cache debug support */
@@ -3122,7 +3186,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
* of re-populating per CPU c->freelist
*/
p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
- _RET_IP_, c);
+ _RET_IP_, c, &to_free);
if (unlikely(!p[i]))
goto error;
@@ -3134,6 +3198,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
}
c->tid = next_tid(c->tid);
local_irq_enable();
+ free_delayed(&to_free);
/* Clear memory outside IRQ disabled fastpath loop */
if (unlikely(flags & __GFP_ZERO)) {
@@ -3281,7 +3346,7 @@ static void
init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
- spin_lock_init(&n->list_lock);
+ raw_spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
atomic_long_set(&n->nr_slabs, 0);
@@ -3625,6 +3690,10 @@ static void list_slab_objects(struct kme
const char *text)
{
#ifdef CONFIG_SLUB_DEBUG
+#ifdef CONFIG_PREEMPT_RT_BASE
+ /* XXX move out of irq-off section */
+ slab_err(s, page, text, s->name);
+#else
void *addr = page_address(page);
void *p;
unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
@@ -3645,6 +3714,7 @@ static void list_slab_objects(struct kme
slab_unlock(page);
kfree(map);
#endif
+#endif
}
/*
@@ -3658,7 +3728,7 @@ static void free_partial(struct kmem_cac
struct page *page, *h;
BUG_ON(irqs_disabled());
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &n->partial, lru) {
if (!page->inuse) {
remove_partial(n, page);
@@ -3668,7 +3738,7 @@ static void free_partial(struct kmem_cac
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &discard, lru)
discard_slab(s, page);
@@ -3912,7 +3982,7 @@ int __kmem_cache_shrink(struct kmem_cach
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
INIT_LIST_HEAD(promote + i);
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
/*
* Build lists of slabs to discard or promote.
@@ -3943,7 +4013,7 @@ int __kmem_cache_shrink(struct kmem_cach
for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
list_splice(promote + i, &n->partial);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
/* Release empty slabs */
list_for_each_entry_safe(page, t, &discard, lru)
@@ -4156,6 +4226,12 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
+ }
if (debug_guardpage_minorder())
slub_max_order = 0;
@@ -4364,7 +4440,7 @@ static int validate_slab_node(struct kme
struct page *page;
unsigned long flags;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru) {
validate_slab_slab(s, page, map);
@@ -4386,7 +4462,7 @@ static int validate_slab_node(struct kme
s->name, count, atomic_long_read(&n->nr_slabs));
out:
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return count;
}
@@ -4574,12 +4650,12 @@ static int list_locations(struct kmem_ca
if (!atomic_long_read(&n->nr_slabs))
continue;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
process_slab(&t, s, page, alloc, map);
list_for_each_entry(page, &n->full, lru)
process_slab(&t, s, page, alloc, map);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
}
for (i = 0; i < t.count; i++) {