seccomp: Benchmark shenanigans
This introduces a terrible seccomp bypass so we can do some more
detailed benchmarking...
Signed-off-by: Kees Cook <keescook@chromium.org>
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 4192369..7f45935 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -8,7 +8,8 @@
SECCOMP_FILTER_FLAG_LOG | \
SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
SECCOMP_FILTER_FLAG_NEW_LISTENER | \
- SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \
+ SECCOMP_FILTER_FLAG_BENCHMARK)
#ifdef CONFIG_SECCOMP
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index c173545..56e58a2 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -23,6 +23,7 @@
#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+#define SECCOMP_FILTER_FLAG_BENCHMARK (1UL << 5)
/*
* All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 55a6184..10479fb 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -111,6 +111,7 @@ struct notification {
* outside of a lifetime-guarded section. In general, this
* is only needed for handling filters shared across tasks.
* @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
+ * @benchmark: true if filter should be skipped and return "allow"
* @prev: points to a previously installed, or inherited, filter
* @prog: the BPF program to evaluate
* @notif: the struct that holds all notification related information
@@ -129,6 +130,7 @@ struct notification {
struct seccomp_filter {
refcount_t usage;
bool log;
+ bool benchmark;
struct seccomp_filter *prev;
struct bpf_prog *prog;
struct notification *notif;
@@ -242,6 +244,12 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
return 0;
}
+static noinline u32 __seccomp_benchmark(struct bpf_prog *prog,
+ const struct seccomp_data *sd)
+{
+ return SECCOMP_RET_ALLOW;
+}
+
/**
* seccomp_run_filters - evaluates all seccomp filters against @sd
* @sd: optional seccomp data to be passed to filters
@@ -264,6 +272,11 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
if (WARN_ON(f == NULL))
return SECCOMP_RET_KILL_PROCESS;
+ if (f->benchmark) {
+ *match = f;
+ return __seccomp_benchmark(f->prog, sd);
+ }
+
/*
* All filters in the list are evaluated and the lowest BPF return
* value always takes priority (ignoring the DATA).
@@ -538,6 +551,9 @@ static long seccomp_attach_filter(unsigned int flags,
if (flags & SECCOMP_FILTER_FLAG_LOG)
filter->log = true;
+ if (flags & SECCOMP_FILTER_FLAG_BENCHMARK)
+ filter->benchmark = true;
+
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 5838c86..4b19029 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
+#include <errno.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>
@@ -57,6 +58,16 @@ unsigned long long calibrate(void)
}
}
+#define SECCOMP_FILTER_FLAG_BENCHMARK (1UL << 5)
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+ errno = 0;
+ return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
int main(int argc, char *argv[])
{
struct sock_filter filter[] = {
@@ -68,7 +79,7 @@ int main(int argc, char *argv[])
};
long ret;
unsigned long long samples;
- unsigned long long native, filtered;
+ unsigned long long native, filter1, filter2, no_bpf;
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
@@ -77,23 +88,49 @@ int main(int argc, char *argv[])
printf("Benchmarking %llu samples...\n", samples);
+ /* Native call */
native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
printf("getpid native: %llu ns\n", native);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
assert(ret == 0);
+ /* One filter */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
assert(ret == 0);
- filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid RET_ALLOW: %llu ns\n", filtered);
+ filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1);
- printf("Estimated seccomp overhead per syscall: %llu ns\n",
- filtered - native);
+ /* Two filters */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ assert(ret == 0);
- if (filtered == native)
- printf("Trying running again with more samples.\n");
+ filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2);
+
+ /* Benchmark bypass filter */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_BENCHMARK, &prog);
+ assert(ret == 0);
+
+ no_bpf = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid BPF-less allow: %llu ns\n", no_bpf);
+
+ /* Calculations */
+ printf("Estimated total seccomp overhead for 1 filter: %llu ns\n",
+ filter1 - native);
+
+ printf("Estimated total seccomp overhead for 2 filters: %llu ns\n",
+ filter2 - native);
+
+ printf("Estimated seccomp per-filter overhead: %llu ns\n",
+ filter2 - filter1);
+
+ printf("Estimated seccomp entry overhead: %llu ns\n",
+ filter1 - native - (filter2 - filter1));
+
+ printf("Estimated BPF overhead per filter: %llu ns\n",
+ filter1 - no_bpf);
return 0;
}