blob: 56aa27fa5846bac4d062f85e144f23e765a2232c [file] [log] [blame]
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <assert.h>
#include <string.h>
#include <errno.h>
#include <sched.h>
#include <pthread.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/prctl.h>
#ifndef PR_SET_TASK_ISOLATION // Not in system headers yet?
# define PR_SET_TASK_ISOLATION 48
# define PR_GET_TASK_ISOLATION 49
# define PR_TASK_ISOLATION_ENABLE (1 << 0)
# define PR_TASK_ISOLATION_USERSIG (1 << 1)
# define PR_TASK_ISOLATION_SET_SIG(sig) (((sig) & 0x7f) << 8)
# define PR_TASK_ISOLATION_GET_SIG(bits) (((bits) >> 8) & 0x7f)
# define PR_TASK_ISOLATION_NOSIG \
(PR_TASK_ISOLATION_USERSIG | PR_TASK_ISOLATION_SET_SIG(0))
#endif
// The cpu we are using for isolation tests.
static int task_isolation_cpu;
// Overall status, maintained as tests run.
static int exit_status = EXIT_SUCCESS;
// Set affinity to a single cpu or die if trying to do so fails.
void set_my_cpu(int cpu)
{
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(cpu, &set);
int rc = sched_setaffinity(0, sizeof(cpu_set_t), &set);
assert(rc == 0);
}
// Run a child process in task isolation mode and report its status.
// The child does mlockall() and moves itself to the task isolation cpu.
// It then runs SETUP_FUNC (if specified), calls prctl(PR_SET_TASK_ISOLATION, )
// with FLAGS (if non-zero), and then invokes TEST_FUNC and exits
// with its status.
static int run_test(void (*setup_func)(), int (*test_func)(), int flags)
{
fflush(stdout);
int pid = fork();
assert(pid >= 0);
if (pid != 0) {
// In parent; wait for child and return its status.
int status;
waitpid(pid, &status, 0);
return status;
}
// In child.
int rc = mlockall(MCL_CURRENT);
assert(rc == 0);
set_my_cpu(task_isolation_cpu);
if (setup_func)
setup_func();
if (flags) {
int rc;
do
rc = prctl(PR_SET_TASK_ISOLATION, flags);
while (rc != 0 && errno == EAGAIN);
if (rc != 0) {
printf("couldn't enable isolation (%d): FAIL\n", errno);
exit(EXIT_FAILURE);
}
}
rc = test_func();
exit(rc);
}
// Run a test and ensure it is killed with SIGKILL by default,
// for whatever misdemeanor is committed in TEST_FUNC.
// Also test it with SIGUSR1 as well to make sure that works.
static void test_killed(const char *testname, void (*setup_func)(),
int (*test_func)())
{
int status = run_test(setup_func, test_func, PR_TASK_ISOLATION_ENABLE);
if (WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) {
printf("%s: OK\n", testname);
} else {
printf("%s: FAIL (%#x)\n", testname, status);
exit_status = EXIT_FAILURE;
}
status = run_test(setup_func, test_func,
PR_TASK_ISOLATION_ENABLE | PR_TASK_ISOLATION_USERSIG |
PR_TASK_ISOLATION_SET_SIG(SIGUSR1));
if (WIFSIGNALED(status) && WTERMSIG(status) == SIGUSR1) {
printf("%s (SIGUSR1): OK\n", testname);
} else {
printf("%s (SIGUSR1): FAIL (%#x)\n", testname, status);
exit_status = EXIT_FAILURE;
}
}
// Run a test and make sure it exits with success.
static void test_ok(const char *testname, void (*setup_func)(),
int (*test_func)())
{
int status = run_test(setup_func, test_func, PR_TASK_ISOLATION_ENABLE);
if (status == EXIT_SUCCESS) {
printf("%s: OK\n", testname);
} else {
printf("%s: FAIL (%#x)\n", testname, status);
exit_status = EXIT_FAILURE;
}
}
// Run a test with no signals and make sure it exits with success.
static void test_nosig(const char *testname, void (*setup_func)(),
int (*test_func)())
{
int status =
run_test(setup_func, test_func,
PR_TASK_ISOLATION_ENABLE | PR_TASK_ISOLATION_NOSIG);
if (status == EXIT_SUCCESS) {
printf("%s: OK\n", testname);
} else {
printf("%s: FAIL (%#x)\n", testname, status);
exit_status = EXIT_FAILURE;
}
}
// Mapping address passed from setup function to test function.
static char *fault_file_mapping;
// mmap() a file in so we can test touching an unmapped page.
static void setup_fault(void)
{
char fault_file[] = "/tmp/isolation_XXXXXX";
int fd = mkstemp(fault_file);
assert(fd >= 0);
int rc = ftruncate(fd, getpagesize());
assert(rc == 0);
fault_file_mapping = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
assert(fault_file_mapping != MAP_FAILED);
close(fd);
unlink(fault_file);
}
// Now touch the unmapped page (and be killed).
static int do_fault(void)
{
*fault_file_mapping = 1;
return EXIT_FAILURE;
}
// Make a syscall (and be killed).
static int do_syscall(void)
{
write(STDOUT_FILENO, "goodbye, world\n", 13);
return EXIT_FAILURE;
}
// Turn isolation back off and don't be killed.
static int do_syscall_off(void)
{
prctl(PR_SET_TASK_ISOLATION, 0);
write(STDOUT_FILENO, "==> hello, world\n", 17);
return EXIT_SUCCESS;
}
// If we're not getting a signal, make sure we can do multiple system calls.
static int do_syscall_multi(void)
{
write(STDOUT_FILENO, "==> hello, world 1\n", 19);
write(STDOUT_FILENO, "==> hello, world 2\n", 19);
return EXIT_SUCCESS;
}
#ifdef __aarch64__
// ARM64 uses tlbi instructions so doesn't need to interrupt the remote core.
static void test_munmap(void) {}
#else
// Fork a thread that will munmap() after a short while.
// It will deliver a TLB flush to the task isolation core.
static void *start_munmap(void *p)
{
usleep(500000); // 0.5s
munmap(p, getpagesize());
return 0;
}
static void setup_munmap(void)
{
// First, go back to cpu 0 and allocate some memory.
set_my_cpu(0);
void *p = mmap(0, getpagesize(), PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_POPULATE|MAP_PRIVATE, 0, 0);
assert(p != MAP_FAILED);
// Now fire up a thread that will wait half a second on cpu 0
// and then munmap the mapping.
pthread_t thr;
int rc = pthread_create(&thr, NULL, start_munmap, p);
assert(rc == 0);
// Back to the task-isolation cpu.
set_my_cpu(task_isolation_cpu);
}
// Global variable to avoid the compiler outsmarting us.
volatile int munmap_spin;
static int do_munmap(void)
{
while (munmap_spin < 1000000000)
++munmap_spin;
return EXIT_FAILURE;
}
static void test_munmap(void)
{
test_killed("test_munmap", setup_munmap, do_munmap);
}
#endif
#ifdef __tilegx__
// Make an unaligned access (and be killed).
// Only for tilegx, since other platforms don't do in-kernel fixups.
static int
do_unaligned(void)
{
static int buf[2];
volatile int* addr = (volatile int *)((char *)buf + 1);
*addr;
asm("nop");
return EXIT_FAILURE;
}
static void test_unaligned(void)
{
test_killed("test_unaligned", NULL, do_unaligned);
}
#else
static void test_unaligned(void) {}
#endif
// Fork a process that will spin annoyingly on the same core
// for a second. Since prctl() won't work if this task is actively
// running, we following this handshake sequence:
//
// 1. Child (in setup_quiesce, here) starts up, sets state 1 to let the
// parent know it's running, and starts doing short sleeps waiting on a
// state change.
// 2. Parent (in do_quiesce, below) starts up, spins waiting for state 1,
// then spins waiting on prctl() to succeed. At that point it is in
// isolation mode and the child is completing its most recent sleep.
// Now, as soon as the parent is scheduled out, it won't schedule back
// in until the child stops spinning.
// 3. Child sees the state change to 2, sets it to 3, and starts spinning
// waiting for a second to elapse, at which point it exits.
// 4. Parent spins waiting for the state to get to 3, then makes one
// syscall. This should take about a second even though the child
// was spinning for a whole second after changing the state to 3.
volatile int *statep, *childstate;
struct timeval quiesce_start, quiesce_end;
int child_pid;
static void setup_quiesce(void)
{
// First, go back to cpu 0 and allocate some shared memory.
set_my_cpu(0);
statep = mmap(0, getpagesize(), PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_SHARED, 0, 0);
assert(statep != MAP_FAILED);
childstate = statep + 1;
gettimeofday(&quiesce_start, NULL);
// Fork and fault in all memory in both.
child_pid = fork();
assert(child_pid >= 0);
if (child_pid == 0)
*childstate = 1;
int rc = mlockall(MCL_CURRENT);
assert(rc == 0);
if (child_pid != 0) {
set_my_cpu(task_isolation_cpu);
return;
}
// In child. Wait until parent notifies us that it has completed
// its prctl, then jump to its cpu and let it know.
*childstate = 2;
while (*statep == 0)
;
*childstate = 3;
// printf("child: jumping to cpu %d\n", task_isolation_cpu);
set_my_cpu(task_isolation_cpu);
// printf("child: jumped to cpu %d\n", task_isolation_cpu);
*statep = 2;
*childstate = 4;
// Now we are competing for the runqueue on task_isolation_cpu.
// Spin for one second to ensure the parent gets caught in kernel space.
struct timeval start, tv;
gettimeofday(&start, NULL);
while (1) {
gettimeofday(&tv, NULL);
double time = (tv.tv_sec - start.tv_sec) +
(tv.tv_usec - start.tv_usec) / 1000000.0;
if (time >= 0.5)
exit(0);
}
}
static int do_quiesce(void)
{
double time;
int rc;
rc = prctl(PR_SET_TASK_ISOLATION,
PR_TASK_ISOLATION_ENABLE | PR_TASK_ISOLATION_NOSIG);
if (rc != 0) {
prctl(PR_SET_TASK_ISOLATION, 0);
printf("prctl failed: rc %d", rc);
goto fail;
}
*statep = 1;
// Wait for child to come disturb us.
while (*statep == 1) {
gettimeofday(&quiesce_end, NULL);
time = (quiesce_end.tv_sec - quiesce_start.tv_sec) +
(quiesce_end.tv_usec - quiesce_start.tv_usec)/1000000.0;
if (time > 0.1 && *statep == 1) {
prctl(PR_SET_TASK_ISOLATION, 0);
printf("timed out at %gs in child migrate loop (%d)\n",
time, *childstate);
char buf[100];
sprintf(buf, "cat /proc/%d/stack", child_pid);
system(buf);
goto fail;
}
}
assert(*statep == 2);
// At this point the child is spinning, so any interrupt will keep us
// in kernel space. Make a syscall to make sure it happens at least
// once during the second that the child is spinning.
kill(0, 0);
gettimeofday(&quiesce_end, NULL);
prctl(PR_SET_TASK_ISOLATION, 0);
time = (quiesce_end.tv_sec - quiesce_start.tv_sec) +
(quiesce_end.tv_usec - quiesce_start.tv_usec) / 1000000.0;
if (time < 0.4 || time > 0.6) {
printf("expected 1s wait after quiesce: was %g\n", time);
goto fail;
}
kill(child_pid, SIGKILL);
return EXIT_SUCCESS;
fail:
kill(child_pid, SIGKILL);
return EXIT_FAILURE;
}
#ifdef __tile__
#include <arch/spr_def.h>
#endif
static inline unsigned long get_cycle_count(void)
{
#ifdef __x86_64__
unsigned int lower, upper;
__asm__ __volatile__("rdtsc" : "=a"(lower), "=d"(upper));
return lower | ((unsigned long)upper << 32);
#elif defined(__tile__)
return __insn_mfspr(SPR_CYCLE);
#elif defined(__aarch64__)
unsigned long vtick;
__asm__ volatile("mrs %0, cntvct_el0" : "=r" (vtick));
return vtick;
#else
#error Unsupported architecture
#endif
}
// Histogram of cycle counts up to HISTSIZE cycles.
#define HISTSIZE 500
long hist[HISTSIZE];
// Information on loss of control of the cpu (more than HISTSIZE cycles).
struct jitter_info {
unsigned long at; // cycle of jitter event
long cycles; // how long we lost the cpu for
};
#define MAX_EVENTS 100
volatile struct jitter_info jitter[MAX_EVENTS];
unsigned int count; // index into jitter[]
void jitter_summarize(void)
{
printf("INFO: loop times:\n");
unsigned int i;
for (i = 0 ;i < HISTSIZE; ++i)
if (hist[i])
printf(" %d x %ld\n", i, hist[i]);
if (count)
printf("ERROR: jitter:\n");
for (i = 0; i < count; ++i)
printf(" %ld: %ld cycles\n", jitter[i].at, jitter[i].cycles);
if (count == sizeof(jitter)/sizeof(jitter[0]))
printf(" ... more\n");
}
void jitter_sigint(int sig)
{
(void)sig;
printf("\n");
jitter_summarize();
exit(exit_status);
}
void test_jitter(unsigned long waitticks)
{
printf("testing task isolation jitter for %ld ticks\n", waitticks);
signal(SIGINT, jitter_sigint);
set_my_cpu(task_isolation_cpu);
int rc = mlockall(MCL_CURRENT);
assert(rc == 0);
do
rc = prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE);
while (rc != 0 && errno == EAGAIN);
if (rc != 0) {
printf("couldn't enable isolation (%d): FAIL\n", errno);
exit(EXIT_FAILURE);
}
unsigned long start = get_cycle_count();
unsigned long last = start;
unsigned long elapsed;
do {
unsigned long next = get_cycle_count();
unsigned long delta = next - last;
elapsed = next - start;
if (__builtin_expect(delta > HISTSIZE, 0)) {
exit_status = EXIT_FAILURE;
if (count < sizeof(jitter)/sizeof(jitter[0])) {
jitter[count].cycles = delta;
jitter[count].at = elapsed;
++count;
}
} else {
hist[delta]++;
}
last = next;
} while (elapsed < waitticks);
prctl(PR_SET_TASK_ISOLATION, 0);
jitter_summarize();
}
int main(int argc, char **argv)
{
// How many billion ticks to wait after running the other tests?
unsigned long waitticks;
if (argc == 1)
waitticks = 10;
else if (argc == 2)
waitticks = strtol(argv[1], NULL, 10);
else {
printf("syntax: isolation [gigaticks]\n");
exit(EXIT_FAILURE);
}
waitticks *= 1000000000;
// Test that the /sys device is present and pick a cpu.
FILE *f = fopen("/sys/devices/system/cpu/task_isolation", "r");
if (f == NULL) {
printf("/sys device: FAIL (%s)\n", strerror(errno));
exit(EXIT_FAILURE);
}
char buf[100];
char *result = fgets(buf, sizeof(buf), f);
assert(result == buf);
fclose(f);
if (*buf == '\n') {
printf("No task_isolation cores configured; please reboot with task_isolation=NNN\n");
exit(EXIT_FAILURE);
}
char *end;
task_isolation_cpu = strtol(buf, &end, 10);
assert(end != buf);
assert(*end == ',' || *end == '-' || *end == '\n');
assert(task_isolation_cpu >= 0);
printf("/sys device : OK (using task isolation cpu %d)\n",
task_isolation_cpu);
// Test to see if with no mask set, we fail.
if (prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE) == 0 ||
errno != EINVAL) {
printf("prctl unaffinitized: FAIL\n");
exit_status = EXIT_FAILURE;
} else {
printf("prctl unaffinitized: OK\n");
}
// Or if affinitized to the wrong cpu.
set_my_cpu(0);
if (prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE) == 0 ||
errno != EINVAL) {
printf("prctl on cpu 0: FAIL\n");
exit_status = EXIT_FAILURE;
} else {
printf("prctl on cpu 0: OK\n");
}
// Run the tests.
test_killed("test_fault", setup_fault, do_fault);
test_killed("test_syscall", NULL, do_syscall);
test_munmap();
test_unaligned();
test_ok("test_off", NULL, do_syscall_off);
test_nosig("test_multi", NULL, do_syscall_multi);
test_nosig("test_quiesce", setup_quiesce, do_quiesce);
// Exit failure if any test failed.
if (exit_status != EXIT_SUCCESS) {
printf("Skipping jitter testing due to test failures\n");
return exit_status;
}
test_jitter(waitticks);
return exit_status;
}