usercopy: split user-controlled slabs to separate caches
Some userspace APIs (e.g. ipc, seq_file) provide precise control over
the size of kernel kmallocs, which provides a trivial way to perform
heap overflow attacks where the attacker must control neighboring
allocations of a specific size. Instead, move these APIs into their own
cache so they cannot interfere with standard kmallocs. This is enabled
with CONFIG_HARDENED_USERCOPY_SPLIT_KMALLOC.
This would frustrate common methods of heap grooming. As an example
http://cyseclabs.com/blog/cve-2016-6187-heap-off-by-one-exploit
recognizes this common method, saying "the standard msgget()
technique". Having the separate caches doesn't strictly _stop_ some
attacks, but it changes the nature of what the attacker has to do.
Instead of having a universal way to groom the heap, they must
be forced into other paths, which may narrow the range of possible
methods. Generally speaking this can make a given attack impossible,
more expensive to develop, or less reliable.
This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY_SLABS
code in the last public patch of grsecurity/PaX based on my understanding
of the code. Changes or omissions from the original code are mine and
don't reflect the original grsecurity/PaX code.
Co-Developed-by: David Windsor <dave@nullcore.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
diff --git a/fs/seq_file.c b/fs/seq_file.c
index c6c27f1..902f749 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -29,7 +29,7 @@
static void *seq_buf_alloc(unsigned long size)
{
- return kvmalloc(size, GFP_KERNEL_ACCOUNT);
+ return kvmalloc(size, GFP_KERNEL_ACCOUNT | GFP_USERCOPY);
}
/**
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..49e0486 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,8 +39,9 @@
#define ___GFP_DIRECT_RECLAIM 0x400000u
#define ___GFP_WRITE 0x800000u
#define ___GFP_KSWAPD_RECLAIM 0x1000000u
+#define ___GFP_USERCOPY 0x2000000u
#ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP 0x2000000u
+#define ___GFP_NOLOCKDEP 0x4000000u
#else
#define ___GFP_NOLOCKDEP 0
#endif
@@ -82,12 +83,17 @@
* node with no fallbacks or placement policy enforcements.
*
* __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ *
+ * __GFP_USERCOPY indicates that the page will be explicitly copied to/from
+ * userspace, and may be allocated from a separate kmalloc pool.
+ *
*/
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
+#define __GFP_USERCOPY ((__force gfp_t)___GFP_USERCOPY)
/*
* Watermark modifiers -- controls access to emergency reserves
@@ -205,7 +211,7 @@
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
/* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP))
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/*
@@ -283,6 +289,7 @@
#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+#define GFP_USERCOPY __GFP_USERCOPY
/* Convert GFP flags to their corresponding migrate type */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 81ebd71..a2e1c28 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -31,6 +31,8 @@
#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
/* Use GFP_DMA memory */
#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U)
+/* Keep this cache unmerged */
+#define SLAB_NO_MERGE ((slab_flags_t __force)0x00008000U)
/* DEBUG: Store the last owner for bug hunting */
#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U)
/* Panic if kmem_cache_create() fails */
@@ -301,6 +303,17 @@
#endif
/*
+ * Some userspace APIs (ipc, seq_file) provide precise control over
+ * the size of kernel kmallocs, which provides a trivial way to perform
+ * heap overflow attacks where the attacker must control neighboring
+ * allocations. Instead, move these APIs into their own cache so they
+ * cannot interfere with standard kmallocs.
+ */
+#ifdef CONFIG_HARDENED_USERCOPY_SPLIT_KMALLOC
+extern struct kmem_cache *kmalloc_usersized_caches[KMALLOC_SHIFT_HIGH + 1];
+#endif
+
+/*
* Figure out which kmalloc slab an allocation of a certain size
* belongs to.
* 0 = zero alloc
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 8459802..d35aeb7 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -53,7 +53,7 @@
size_t alen;
alen = min(len, DATALEN_MSG);
- msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
+ msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT | GFP_USERCOPY);
if (msg == NULL)
return NULL;
@@ -65,7 +65,8 @@
while (len > 0) {
struct msg_msgseg *seg;
alen = min(len, DATALEN_SEG);
- seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
+ seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT |
+ GFP_USERCOPY);
if (seg == NULL)
goto out_err;
*pseg = seg;
diff --git a/mm/slab.h b/mm/slab.h
index 68bdf49..c05f834 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -128,7 +128,8 @@
/* Legal flag mask for kmem_cache_create(), for various configurations */
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
- SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
+ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \
+ SLAB_NO_MERGE)
#if defined(CONFIG_DEBUG_SLAB)
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 98dcdc3..9d56cf2 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -50,7 +50,7 @@
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB | SLAB_KASAN)
+ SLAB_FAILSLAB | SLAB_KASAN | SLAB_NO_MERGE)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_ACCOUNT)
@@ -946,6 +946,11 @@
EXPORT_SYMBOL(kmalloc_dma_caches);
#endif
+#ifdef CONFIG_HARDENED_USERCOPY_SPLIT_KMALLOC
+struct kmem_cache *kmalloc_usersized_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
+EXPORT_SYMBOL(kmalloc_usersized_caches);
+#endif
+
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -1010,6 +1015,12 @@
return kmalloc_dma_caches[index];
#endif
+
+#ifdef CONFIG_HARDENED_USERCOPY_SPLIT_KMALLOC
+ if (unlikely((flags & GFP_USERCOPY)))
+ return kmalloc_usersized_caches[index];
+#endif
+
return kmalloc_caches[index];
}
@@ -1131,6 +1142,22 @@
}
}
#endif
+
+#ifdef CONFIG_HARDENED_USERCOPY_SPLIT_KMALLOC
+ for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
+ struct kmem_cache *s = kmalloc_caches[i];
+
+ if (s) {
+ int size = kmalloc_size(i);
+ char *n = kasprintf(GFP_NOWAIT,
+ "usersized-kmalloc-%d", size);
+
+ BUG_ON(!n);
+ kmalloc_usersized_caches[i] = create_kmalloc_cache(n,
+ size, SLAB_NO_MERGE | flags, 0, size);
+ }
+ }
+#endif /* CONFIG_HARDENED_USERCOPY_SPLIT_KMALLOC */
}
#endif /* !CONFIG_SLOB */
diff --git a/security/Kconfig b/security/Kconfig
index c430206..5904f70 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -189,6 +189,18 @@
been removed. This config is intended to be used only while
trying to find such users.
+config HARDENED_USERCOPY_SPLIT_KMALLOC
+ bool "Isolate kernel caches from user-controlled allocations"
+ default HARDENED_USERCOPY
+ help
+ This option creates a separate set of kmalloc caches used to
+ satisfy allocations from userspace APIs that allow for
+ fine-grained control over the size of kernel allocations.
+ Without this, it is much easier for attackers to precisely
+ size and attack heap overflows. If their allocations are
+ confined to a separate cache, attackers must find other ways
+ to prepare heap attacks that will be near their desired target.
+
config FORTIFY_SOURCE
bool "Harden common str/mem functions against buffer overflows"
depends on ARCH_HAS_FORTIFY_SOURCE