Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull slab update from Pekka Enberg:
"Highlights:
- Fix for boot-time problems on some architectures due to
init_lock_keys() not respecting kmalloc_caches boundaries
(Christoph Lameter)
- CONFIG_SLUB_CPU_PARTIAL requested by RT folks (Joonsoo Kim)
- Fix for excessive slab freelist draining (Wanpeng Li)
- SLUB and SLOB cleanups and fixes (various people)"
I ended up editing the branch, and this avoids two commits at the end
that were immediately reverted, and I instead just applied the oneliner
fix in between myself.
* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
slub: Check for page NULL before doing the node_match check
mm/slab: Give s_next and s_stop slab-specific names
slob: Check for NULL pointer before calling ctor()
slub: Make cpu partial slab support configurable
slab: add kmalloc() to kernel API documentation
slab: fix init_lock_keys
slob: use DIV_ROUND_UP where possible
slub: do not put a slab to cpu partial list when cpu_partial is 0
mm/slub: Use node_nr_slabs and node_nr_objs in get_slabinfo
mm/slub: Drop unnecessary nr_partials
mm/slab: Fix /proc/slabinfo unwriteable for slab
mm/slab: Sharing s_next and s_stop between slab and slub
mm/slab: Fix drain freelist excessively
slob: Rework #ifdeffery in slab.h
mm, slab: moved kmem_cache_alloc_node comment to correct place
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0c62175..6c5cc0e 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -169,11 +169,7 @@
struct list_head list; /* List of all slab caches on the system */
};
-#define KMALLOC_MAX_SIZE (1UL << 30)
-
-#include <linux/slob_def.h>
-
-#else /* CONFIG_SLOB */
+#endif /* CONFIG_SLOB */
/*
* Kmalloc array related definitions
@@ -195,7 +191,9 @@
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
-#else
+#endif
+
+#ifdef CONFIG_SLUB
/*
* SLUB allocates up to order 2 pages directly and otherwise
* passes the request to the page allocator.
@@ -207,6 +205,19 @@
#endif
#endif
+#ifdef CONFIG_SLOB
+/*
+ * SLOB passes all page size and larger requests to the page allocator.
+ * No kmalloc array is necessary since objects of different sizes can
+ * be allocated from the same page.
+ */
+#define KMALLOC_SHIFT_MAX 30
+#define KMALLOC_SHIFT_HIGH PAGE_SHIFT
+#ifndef KMALLOC_SHIFT_LOW
+#define KMALLOC_SHIFT_LOW 3
+#endif
+#endif
+
/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
/* Maximum size for which we actually use a slab cache */
@@ -221,6 +232,7 @@
#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
#endif
+#ifndef CONFIG_SLOB
extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
#ifdef CONFIG_ZONE_DMA
extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
@@ -275,13 +287,18 @@
/* Will never be reached. Needed because the compiler may complain */
return -1;
}
+#endif /* !CONFIG_SLOB */
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
-#elif defined(CONFIG_SLUB)
+#endif
+
+#ifdef CONFIG_SLUB
#include <linux/slub_def.h>
-#else
-#error "Unknown slab allocator"
+#endif
+
+#ifdef CONFIG_SLOB
+#include <linux/slob_def.h>
#endif
/*
@@ -291,6 +308,7 @@
*/
static __always_inline int kmalloc_size(int n)
{
+#ifndef CONFIG_SLOB
if (n > 2)
return 1 << n;
@@ -299,10 +317,9 @@
if (n == 2 && KMALLOC_MIN_SIZE <= 64)
return 192;
-
+#endif
return 0;
}
-#endif /* !CONFIG_SLOB */
/*
* Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
@@ -356,9 +373,8 @@
void print_slabinfo_header(struct seq_file *m);
/**
- * kmalloc_array - allocate memory for an array.
- * @n: number of elements.
- * @size: element size.
+ * kmalloc - allocate memory
+ * @size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
*
* The @flags argument may be one of:
@@ -405,6 +421,17 @@
* There are other flags available as well, but these are not intended
* for general use, and so are not documented here. For a full list of
* potential flags, always refer to linux/gfp.h.
+ *
+ * kmalloc is the normal method of allocating memory
+ * in the kernel.
+ */
+static __always_inline void *kmalloc(size_t size, gfp_t flags);
+
+/**
+ * kmalloc_array - allocate memory for an array.
+ * @n: number of elements.
+ * @size: element size.
+ * @flags: the type of memory to allocate (see kmalloc).
*/
static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
{
@@ -428,7 +455,7 @@
/**
* kmalloc_node - allocate memory from a specific node
* @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kcalloc).
+ * @flags: the type of memory to allocate (see kmalloc).
* @node: node to allocate from.
*
* kmalloc() for non-local nodes, used to allocate from a specific node
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index f28e14a..095a5a4 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -18,14 +18,6 @@
return __kmalloc_node(size, flags, node);
}
-/**
- * kmalloc - allocate memory
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kcalloc).
- *
- * kmalloc is the normal method of allocating memory
- * in the kernel.
- */
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
return __kmalloc_node(size, flags, NUMA_NO_NODE);
diff --git a/init/Kconfig b/init/Kconfig
index 54d3fa5..247084b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1596,6 +1596,17 @@
endchoice
+config SLUB_CPU_PARTIAL
+ default y
+ depends on SLUB
+ bool "SLUB per cpu partial cache"
+ help
+ Per cpu partial caches accellerate objects allocation and freeing
+ that is local to a processor at the price of more indeterminism
+ in the latency of the free. On overflow these caches will be cleared
+ which requires the taking of locks that may cause latency spikes.
+ Typically one would choose no for a realtime system.
+
config MMAP_ALLOW_UNINITIALIZED
bool "Allow mmapped anonymous memory to be uninitialized"
depends on EXPERT && !MMU
diff --git a/mm/slab.c b/mm/slab.c
index 8ccd296..35cb0c8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -565,7 +565,7 @@
if (slab_state < UP)
return;
- for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) {
+ for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
struct kmem_cache_node *n;
struct kmem_cache *cache = kmalloc_caches[i];
@@ -1180,6 +1180,12 @@
return 0;
}
+static inline int slabs_tofree(struct kmem_cache *cachep,
+ struct kmem_cache_node *n)
+{
+ return (n->free_objects + cachep->num - 1) / cachep->num;
+}
+
static void __cpuinit cpuup_canceled(long cpu)
{
struct kmem_cache *cachep;
@@ -1241,7 +1247,7 @@
n = cachep->node[node];
if (!n)
continue;
- drain_freelist(cachep, n, n->free_objects);
+ drain_freelist(cachep, n, slabs_tofree(cachep, n));
}
}
@@ -1408,7 +1414,7 @@
if (!n)
continue;
- drain_freelist(cachep, n, n->free_objects);
+ drain_freelist(cachep, n, slabs_tofree(cachep, n));
if (!list_empty(&n->slabs_full) ||
!list_empty(&n->slabs_partial)) {
@@ -2532,7 +2538,7 @@
if (!n)
continue;
- drain_freelist(cachep, n, n->free_objects);
+ drain_freelist(cachep, n, slabs_tofree(cachep, n));
ret += !list_empty(&n->slabs_full) ||
!list_empty(&n->slabs_partial);
@@ -3338,18 +3344,6 @@
return obj;
}
-/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- * @nodeid: node number of the target node.
- * @caller: return address of caller, used for debug information
- *
- * Identical to kmem_cache_alloc but it will allocate memory on the given
- * node, which can improve the performance for cpu bound structures.
- *
- * Fallback to other node is possible if __GFP_THISNODE is not set.
- */
static __always_inline void *
slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
unsigned long caller)
@@ -3643,6 +3637,17 @@
#endif
#ifdef CONFIG_NUMA
+/**
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
+ *
+ * Identical to kmem_cache_alloc but it will allocate memory on the given
+ * node, which can improve the performance for cpu bound structures.
+ *
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
+ */
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
@@ -4431,20 +4436,10 @@
return 0;
}
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
-{
- return seq_list_next(p, &slab_caches, pos);
-}
-
-static void s_stop(struct seq_file *m, void *p)
-{
- mutex_unlock(&slab_mutex);
-}
-
static const struct seq_operations slabstats_op = {
.start = leaks_start,
- .next = s_next,
- .stop = s_stop,
+ .next = slab_next,
+ .stop = slab_stop,
.show = leaks_show,
};
diff --git a/mm/slab.h b/mm/slab.h
index f96b49e..620ceed 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -271,3 +271,6 @@
#endif
};
+
+void *slab_next(struct seq_file *m, void *p, loff_t *pos);
+void slab_stop(struct seq_file *m, void *p);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2d41450..538bade 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -497,6 +497,13 @@
#ifdef CONFIG_SLABINFO
+
+#ifdef CONFIG_SLAB
+#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
+#else
+#define SLABINFO_RIGHTS S_IRUSR
+#endif
+
void print_slabinfo_header(struct seq_file *m)
{
/*
@@ -531,12 +538,12 @@
return seq_list_start(&slab_caches, *pos);
}
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+void *slab_next(struct seq_file *m, void *p, loff_t *pos)
{
return seq_list_next(p, &slab_caches, pos);
}
-static void s_stop(struct seq_file *m, void *p)
+void slab_stop(struct seq_file *m, void *p)
{
mutex_unlock(&slab_mutex);
}
@@ -613,8 +620,8 @@
*/
static const struct seq_operations slabinfo_op = {
.start = s_start,
- .next = s_next,
- .stop = s_stop,
+ .next = slab_next,
+ .stop = slab_stop,
.show = s_show,
};
@@ -633,7 +640,8 @@
static int __init slab_proc_init(void)
{
- proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
+ proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
+ &proc_slabinfo_operations);
return 0;
}
module_init(slab_proc_init);
diff --git a/mm/slob.c b/mm/slob.c
index eeed4a0..91bd3f2 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -122,7 +122,7 @@
}
#define SLOB_UNIT sizeof(slob_t)
-#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
+#define SLOB_UNITS(size) DIV_ROUND_UP(size, SLOB_UNIT)
/*
* struct slob_rcu is inserted at the tail of allocated slob blocks, which
@@ -554,7 +554,7 @@
flags, node);
}
- if (c->ctor)
+ if (b && c->ctor)
c->ctor(b);
kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
diff --git a/mm/slub.c b/mm/slub.c
index 57707f0..3b482c8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -123,6 +123,15 @@
#endif
}
+static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ return !kmem_cache_debug(s);
+#else
+ return false;
+#endif
+}
+
/*
* Issues still to be resolved:
*
@@ -1573,7 +1582,8 @@
put_cpu_partial(s, page, 0);
stat(s, CPU_PARTIAL_NODE);
}
- if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
+ if (!kmem_cache_has_cpu_partial(s)
+ || available > s->cpu_partial / 2)
break;
}
@@ -1884,6 +1894,7 @@
static void unfreeze_partials(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
struct kmem_cache_node *n = NULL, *n2 = NULL;
struct page *page, *discard_page = NULL;
@@ -1938,6 +1949,7 @@
discard_slab(s, page);
stat(s, FREE_SLAB);
}
+#endif
}
/*
@@ -1951,10 +1963,14 @@
*/
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *oldpage;
int pages;
int pobjects;
+ if (!s->cpu_partial)
+ return;
+
do {
pages = 0;
pobjects = 0;
@@ -1987,6 +2003,7 @@
page->next = oldpage;
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
+#endif
}
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -2358,7 +2375,7 @@
object = c->freelist;
page = c->page;
- if (unlikely(!object || !node_match(page, node)))
+ if (unlikely(!object || !page || !node_match(page, node)))
object = __slab_alloc(s, gfpflags, node, addr, c);
else {
@@ -2495,7 +2512,7 @@
new.inuse--;
if ((!new.inuse || !prior) && !was_frozen) {
- if (!kmem_cache_debug(s) && !prior)
+ if (kmem_cache_has_cpu_partial(s) && !prior)
/*
* Slab was on no list before and will be partially empty
@@ -2550,8 +2567,9 @@
* Objects left in the slab. If it was not on the partial list before
* then add it.
*/
- if (kmem_cache_debug(s) && unlikely(!prior)) {
- remove_full(s, page);
+ if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
+ if (kmem_cache_debug(s))
+ remove_full(s, page);
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
@@ -3059,7 +3077,7 @@
* per node list when we run out of per cpu objects. We only fetch 50%
* to keep some capacity around for frees.
*/
- if (kmem_cache_debug(s))
+ if (!kmem_cache_has_cpu_partial(s))
s->cpu_partial = 0;
else if (s->size >= PAGE_SIZE)
s->cpu_partial = 2;
@@ -4456,7 +4474,7 @@
err = strict_strtoul(buf, 10, &objects);
if (err)
return err;
- if (objects && kmem_cache_debug(s))
+ if (objects && !kmem_cache_has_cpu_partial(s))
return -EINVAL;
s->cpu_partial = objects;
@@ -5269,7 +5287,6 @@
#ifdef CONFIG_SLABINFO
void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
{
- unsigned long nr_partials = 0;
unsigned long nr_slabs = 0;
unsigned long nr_objs = 0;
unsigned long nr_free = 0;
@@ -5281,9 +5298,8 @@
if (!n)
continue;
- nr_partials += n->nr_partial;
- nr_slabs += atomic_long_read(&n->nr_slabs);
- nr_objs += atomic_long_read(&n->total_objects);
+ nr_slabs += node_nr_slabs(n);
+ nr_objs += node_nr_objs(n);
nr_free += count_partial(n, count_free);
}