| #define _GNU_SOURCE |
| #include <stdio.h> |
| #include <unistd.h> |
| #include <stdlib.h> |
| #include <fcntl.h> |
| #include <assert.h> |
| #include <string.h> |
| #include <errno.h> |
| #include <sched.h> |
| #include <pthread.h> |
| #include <sys/wait.h> |
| #include <sys/mman.h> |
| #include <sys/time.h> |
| #include <sys/prctl.h> |
| |
| #ifndef PR_SET_TASK_ISOLATION // Not in system headers yet? |
| # define PR_SET_TASK_ISOLATION 48 |
| # define PR_GET_TASK_ISOLATION 49 |
| # define PR_TASK_ISOLATION_ENABLE (1 << 0) |
| # define PR_TASK_ISOLATION_USERSIG (1 << 1) |
| # define PR_TASK_ISOLATION_SET_SIG(sig) (((sig) & 0x7f) << 8) |
| # define PR_TASK_ISOLATION_GET_SIG(bits) (((bits) >> 8) & 0x7f) |
| # define PR_TASK_ISOLATION_NOSIG \ |
| (PR_TASK_ISOLATION_USERSIG | PR_TASK_ISOLATION_SET_SIG(0)) |
| #endif |
| |
| // The cpu we are using for isolation tests. |
| static int task_isolation_cpu; |
| |
| // Overall status, maintained as tests run. |
| static int exit_status = EXIT_SUCCESS; |
| |
| // Set affinity to a single cpu or die if trying to do so fails. |
| void set_my_cpu(int cpu) |
| { |
| cpu_set_t set; |
| CPU_ZERO(&set); |
| CPU_SET(cpu, &set); |
| int rc = sched_setaffinity(0, sizeof(cpu_set_t), &set); |
| assert(rc == 0); |
| } |
| |
| // Run a child process in task isolation mode and report its status. |
| // The child does mlockall() and moves itself to the task isolation cpu. |
| // It then runs SETUP_FUNC (if specified), calls prctl(PR_SET_TASK_ISOLATION, ) |
| // with FLAGS (if non-zero), and then invokes TEST_FUNC and exits |
| // with its status. |
| static int run_test(void (*setup_func)(), int (*test_func)(), int flags) |
| { |
| fflush(stdout); |
| int pid = fork(); |
| assert(pid >= 0); |
| if (pid != 0) { |
| // In parent; wait for child and return its status. |
| int status; |
| waitpid(pid, &status, 0); |
| return status; |
| } |
| |
| // In child. |
| int rc = mlockall(MCL_CURRENT); |
| assert(rc == 0); |
| set_my_cpu(task_isolation_cpu); |
| if (setup_func) |
| setup_func(); |
| if (flags) { |
| int rc; |
| do |
| rc = prctl(PR_SET_TASK_ISOLATION, flags); |
| while (rc != 0 && errno == EAGAIN); |
| if (rc != 0) { |
| printf("couldn't enable isolation (%d): FAIL\n", errno); |
| exit(EXIT_FAILURE); |
| } |
| } |
| rc = test_func(); |
| exit(rc); |
| } |
| |
| // Run a test and ensure it is killed with SIGKILL by default, |
| // for whatever misdemeanor is committed in TEST_FUNC. |
| // Also test it with SIGUSR1 as well to make sure that works. |
| static void test_killed(const char *testname, void (*setup_func)(), |
| int (*test_func)()) |
| { |
| int status = run_test(setup_func, test_func, PR_TASK_ISOLATION_ENABLE); |
| if (WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) { |
| printf("%s: OK\n", testname); |
| } else { |
| printf("%s: FAIL (%#x)\n", testname, status); |
| exit_status = EXIT_FAILURE; |
| } |
| |
| status = run_test(setup_func, test_func, |
| PR_TASK_ISOLATION_ENABLE | PR_TASK_ISOLATION_USERSIG | |
| PR_TASK_ISOLATION_SET_SIG(SIGUSR1)); |
| if (WIFSIGNALED(status) && WTERMSIG(status) == SIGUSR1) { |
| printf("%s (SIGUSR1): OK\n", testname); |
| } else { |
| printf("%s (SIGUSR1): FAIL (%#x)\n", testname, status); |
| exit_status = EXIT_FAILURE; |
| } |
| } |
| |
| // Run a test and make sure it exits with success. |
| static void test_ok(const char *testname, void (*setup_func)(), |
| int (*test_func)()) |
| { |
| int status = run_test(setup_func, test_func, PR_TASK_ISOLATION_ENABLE); |
| if (status == EXIT_SUCCESS) { |
| printf("%s: OK\n", testname); |
| } else { |
| printf("%s: FAIL (%#x)\n", testname, status); |
| exit_status = EXIT_FAILURE; |
| } |
| } |
| |
| // Run a test with no signals and make sure it exits with success. |
| static void test_nosig(const char *testname, void (*setup_func)(), |
| int (*test_func)()) |
| { |
| int status = |
| run_test(setup_func, test_func, |
| PR_TASK_ISOLATION_ENABLE | PR_TASK_ISOLATION_NOSIG); |
| if (status == EXIT_SUCCESS) { |
| printf("%s: OK\n", testname); |
| } else { |
| printf("%s: FAIL (%#x)\n", testname, status); |
| exit_status = EXIT_FAILURE; |
| } |
| } |
| |
| // Mapping address passed from setup function to test function. |
| static char *fault_file_mapping; |
| |
| // mmap() a file in so we can test touching an unmapped page. |
| static void setup_fault(void) |
| { |
| char fault_file[] = "/tmp/isolation_XXXXXX"; |
| int fd = mkstemp(fault_file); |
| assert(fd >= 0); |
| int rc = ftruncate(fd, getpagesize()); |
| assert(rc == 0); |
| fault_file_mapping = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, |
| MAP_SHARED, fd, 0); |
| assert(fault_file_mapping != MAP_FAILED); |
| close(fd); |
| unlink(fault_file); |
| } |
| |
| // Now touch the unmapped page (and be killed). |
| static int do_fault(void) |
| { |
| *fault_file_mapping = 1; |
| return EXIT_FAILURE; |
| } |
| |
| // Make a syscall (and be killed). |
| static int do_syscall(void) |
| { |
| write(STDOUT_FILENO, "goodbye, world\n", 13); |
| return EXIT_FAILURE; |
| } |
| |
| // Turn isolation back off and don't be killed. |
| static int do_syscall_off(void) |
| { |
| prctl(PR_SET_TASK_ISOLATION, 0); |
| write(STDOUT_FILENO, "==> hello, world\n", 17); |
| return EXIT_SUCCESS; |
| } |
| |
| // If we're not getting a signal, make sure we can do multiple system calls. |
| static int do_syscall_multi(void) |
| { |
| write(STDOUT_FILENO, "==> hello, world 1\n", 19); |
| write(STDOUT_FILENO, "==> hello, world 2\n", 19); |
| return EXIT_SUCCESS; |
| } |
| |
| #ifdef __aarch64__ |
| // ARM64 uses tlbi instructions so doesn't need to interrupt the remote core. |
| static void test_munmap(void) {} |
| #else |
| |
| // Fork a thread that will munmap() after a short while. |
| // It will deliver a TLB flush to the task isolation core. |
| |
| static void *start_munmap(void *p) |
| { |
| usleep(500000); // 0.5s |
| munmap(p, getpagesize()); |
| return 0; |
| } |
| |
| static void setup_munmap(void) |
| { |
| // First, go back to cpu 0 and allocate some memory. |
| set_my_cpu(0); |
| void *p = mmap(0, getpagesize(), PROT_READ|PROT_WRITE, |
| MAP_ANONYMOUS|MAP_POPULATE|MAP_PRIVATE, 0, 0); |
| assert(p != MAP_FAILED); |
| |
| // Now fire up a thread that will wait half a second on cpu 0 |
| // and then munmap the mapping. |
| pthread_t thr; |
| int rc = pthread_create(&thr, NULL, start_munmap, p); |
| assert(rc == 0); |
| |
| // Back to the task-isolation cpu. |
| set_my_cpu(task_isolation_cpu); |
| } |
| |
| // Global variable to avoid the compiler outsmarting us. |
| volatile int munmap_spin; |
| |
| static int do_munmap(void) |
| { |
| while (munmap_spin < 1000000000) |
| ++munmap_spin; |
| return EXIT_FAILURE; |
| } |
| |
| static void test_munmap(void) |
| { |
| test_killed("test_munmap", setup_munmap, do_munmap); |
| } |
| #endif |
| |
| #ifdef __tilegx__ |
| // Make an unaligned access (and be killed). |
| // Only for tilegx, since other platforms don't do in-kernel fixups. |
| static int |
| do_unaligned(void) |
| { |
| static int buf[2]; |
| volatile int* addr = (volatile int *)((char *)buf + 1); |
| |
| *addr; |
| |
| asm("nop"); |
| return EXIT_FAILURE; |
| } |
| |
| static void test_unaligned(void) |
| { |
| test_killed("test_unaligned", NULL, do_unaligned); |
| } |
| #else |
| static void test_unaligned(void) {} |
| #endif |
| |
| // Fork a process that will spin annoyingly on the same core |
| // for a second. Since prctl() won't work if this task is actively |
| // running, we following this handshake sequence: |
| // |
| // 1. Child (in setup_quiesce, here) starts up, sets state 1 to let the |
| // parent know it's running, and starts doing short sleeps waiting on a |
| // state change. |
| // 2. Parent (in do_quiesce, below) starts up, spins waiting for state 1, |
| // then spins waiting on prctl() to succeed. At that point it is in |
| // isolation mode and the child is completing its most recent sleep. |
| // Now, as soon as the parent is scheduled out, it won't schedule back |
| // in until the child stops spinning. |
| // 3. Child sees the state change to 2, sets it to 3, and starts spinning |
| // waiting for a second to elapse, at which point it exits. |
| // 4. Parent spins waiting for the state to get to 3, then makes one |
| // syscall. This should take about a second even though the child |
| // was spinning for a whole second after changing the state to 3. |
| |
| volatile int *statep, *childstate; |
| struct timeval quiesce_start, quiesce_end; |
| int child_pid; |
| |
| static void setup_quiesce(void) |
| { |
| // First, go back to cpu 0 and allocate some shared memory. |
| set_my_cpu(0); |
| statep = mmap(0, getpagesize(), PROT_READ|PROT_WRITE, |
| MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
| assert(statep != MAP_FAILED); |
| childstate = statep + 1; |
| |
| gettimeofday(&quiesce_start, NULL); |
| |
| // Fork and fault in all memory in both. |
| child_pid = fork(); |
| assert(child_pid >= 0); |
| if (child_pid == 0) |
| *childstate = 1; |
| int rc = mlockall(MCL_CURRENT); |
| assert(rc == 0); |
| if (child_pid != 0) { |
| set_my_cpu(task_isolation_cpu); |
| return; |
| } |
| |
| // In child. Wait until parent notifies us that it has completed |
| // its prctl, then jump to its cpu and let it know. |
| *childstate = 2; |
| while (*statep == 0) |
| ; |
| *childstate = 3; |
| // printf("child: jumping to cpu %d\n", task_isolation_cpu); |
| set_my_cpu(task_isolation_cpu); |
| // printf("child: jumped to cpu %d\n", task_isolation_cpu); |
| *statep = 2; |
| *childstate = 4; |
| |
| // Now we are competing for the runqueue on task_isolation_cpu. |
| // Spin for one second to ensure the parent gets caught in kernel space. |
| struct timeval start, tv; |
| gettimeofday(&start, NULL); |
| while (1) { |
| gettimeofday(&tv, NULL); |
| double time = (tv.tv_sec - start.tv_sec) + |
| (tv.tv_usec - start.tv_usec) / 1000000.0; |
| if (time >= 0.5) |
| exit(0); |
| } |
| } |
| |
| static int do_quiesce(void) |
| { |
| double time; |
| int rc; |
| |
| rc = prctl(PR_SET_TASK_ISOLATION, |
| PR_TASK_ISOLATION_ENABLE | PR_TASK_ISOLATION_NOSIG); |
| if (rc != 0) { |
| prctl(PR_SET_TASK_ISOLATION, 0); |
| printf("prctl failed: rc %d", rc); |
| goto fail; |
| } |
| *statep = 1; |
| |
| // Wait for child to come disturb us. |
| while (*statep == 1) { |
| gettimeofday(&quiesce_end, NULL); |
| time = (quiesce_end.tv_sec - quiesce_start.tv_sec) + |
| (quiesce_end.tv_usec - quiesce_start.tv_usec)/1000000.0; |
| if (time > 0.1 && *statep == 1) { |
| prctl(PR_SET_TASK_ISOLATION, 0); |
| printf("timed out at %gs in child migrate loop (%d)\n", |
| time, *childstate); |
| char buf[100]; |
| sprintf(buf, "cat /proc/%d/stack", child_pid); |
| system(buf); |
| goto fail; |
| } |
| } |
| assert(*statep == 2); |
| |
| // At this point the child is spinning, so any interrupt will keep us |
| // in kernel space. Make a syscall to make sure it happens at least |
| // once during the second that the child is spinning. |
| kill(0, 0); |
| gettimeofday(&quiesce_end, NULL); |
| prctl(PR_SET_TASK_ISOLATION, 0); |
| time = (quiesce_end.tv_sec - quiesce_start.tv_sec) + |
| (quiesce_end.tv_usec - quiesce_start.tv_usec) / 1000000.0; |
| if (time < 0.4 || time > 0.6) { |
| printf("expected 1s wait after quiesce: was %g\n", time); |
| goto fail; |
| } |
| kill(child_pid, SIGKILL); |
| return EXIT_SUCCESS; |
| |
| fail: |
| kill(child_pid, SIGKILL); |
| return EXIT_FAILURE; |
| } |
| |
| #ifdef __tile__ |
| #include <arch/spr_def.h> |
| #endif |
| |
| static inline unsigned long get_cycle_count(void) |
| { |
| #ifdef __x86_64__ |
| unsigned int lower, upper; |
| __asm__ __volatile__("rdtsc" : "=a"(lower), "=d"(upper)); |
| return lower | ((unsigned long)upper << 32); |
| #elif defined(__tile__) |
| return __insn_mfspr(SPR_CYCLE); |
| #elif defined(__aarch64__) |
| unsigned long vtick; |
| __asm__ volatile("mrs %0, cntvct_el0" : "=r" (vtick)); |
| return vtick; |
| #else |
| #error Unsupported architecture |
| #endif |
| } |
| |
| // Histogram of cycle counts up to HISTSIZE cycles. |
| #define HISTSIZE 500 |
| long hist[HISTSIZE]; |
| |
| // Information on loss of control of the cpu (more than HISTSIZE cycles). |
| struct jitter_info { |
| unsigned long at; // cycle of jitter event |
| long cycles; // how long we lost the cpu for |
| }; |
| #define MAX_EVENTS 100 |
| volatile struct jitter_info jitter[MAX_EVENTS]; |
| unsigned int count; // index into jitter[] |
| |
| void jitter_summarize(void) |
| { |
| printf("INFO: loop times:\n"); |
| unsigned int i; |
| for (i = 0 ;i < HISTSIZE; ++i) |
| if (hist[i]) |
| printf(" %d x %ld\n", i, hist[i]); |
| |
| if (count) |
| printf("ERROR: jitter:\n"); |
| for (i = 0; i < count; ++i) |
| printf(" %ld: %ld cycles\n", jitter[i].at, jitter[i].cycles); |
| if (count == sizeof(jitter)/sizeof(jitter[0])) |
| printf(" ... more\n"); |
| } |
| |
| void jitter_sigint(int sig) |
| { |
| (void)sig; |
| printf("\n"); |
| jitter_summarize(); |
| exit(exit_status); |
| } |
| |
| void test_jitter(unsigned long waitticks) |
| { |
| printf("testing task isolation jitter for %ld ticks\n", waitticks); |
| |
| signal(SIGINT, jitter_sigint); |
| set_my_cpu(task_isolation_cpu); |
| int rc = mlockall(MCL_CURRENT); |
| assert(rc == 0); |
| |
| do |
| rc = prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE); |
| while (rc != 0 && errno == EAGAIN); |
| if (rc != 0) { |
| printf("couldn't enable isolation (%d): FAIL\n", errno); |
| exit(EXIT_FAILURE); |
| } |
| |
| unsigned long start = get_cycle_count(); |
| unsigned long last = start; |
| unsigned long elapsed; |
| do { |
| unsigned long next = get_cycle_count(); |
| unsigned long delta = next - last; |
| elapsed = next - start; |
| if (__builtin_expect(delta > HISTSIZE, 0)) { |
| exit_status = EXIT_FAILURE; |
| if (count < sizeof(jitter)/sizeof(jitter[0])) { |
| jitter[count].cycles = delta; |
| jitter[count].at = elapsed; |
| ++count; |
| } |
| } else { |
| hist[delta]++; |
| } |
| last = next; |
| |
| } while (elapsed < waitticks); |
| |
| prctl(PR_SET_TASK_ISOLATION, 0); |
| jitter_summarize(); |
| } |
| |
| int main(int argc, char **argv) |
| { |
| // How many billion ticks to wait after running the other tests? |
| unsigned long waitticks; |
| if (argc == 1) |
| waitticks = 10; |
| else if (argc == 2) |
| waitticks = strtol(argv[1], NULL, 10); |
| else { |
| printf("syntax: isolation [gigaticks]\n"); |
| exit(EXIT_FAILURE); |
| } |
| waitticks *= 1000000000; |
| |
| // Test that the /sys device is present and pick a cpu. |
| FILE *f = fopen("/sys/devices/system/cpu/task_isolation", "r"); |
| if (f == NULL) { |
| printf("/sys device: FAIL (%s)\n", strerror(errno)); |
| exit(EXIT_FAILURE); |
| } |
| char buf[100]; |
| char *result = fgets(buf, sizeof(buf), f); |
| assert(result == buf); |
| fclose(f); |
| if (*buf == '\n') { |
| printf("No task_isolation cores configured; please reboot with task_isolation=NNN\n"); |
| exit(EXIT_FAILURE); |
| } |
| char *end; |
| task_isolation_cpu = strtol(buf, &end, 10); |
| assert(end != buf); |
| assert(*end == ',' || *end == '-' || *end == '\n'); |
| assert(task_isolation_cpu >= 0); |
| printf("/sys device : OK (using task isolation cpu %d)\n", |
| task_isolation_cpu); |
| |
| // Test to see if with no mask set, we fail. |
| if (prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE) == 0 || |
| errno != EINVAL) { |
| printf("prctl unaffinitized: FAIL\n"); |
| exit_status = EXIT_FAILURE; |
| } else { |
| printf("prctl unaffinitized: OK\n"); |
| } |
| |
| // Or if affinitized to the wrong cpu. |
| set_my_cpu(0); |
| if (prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE) == 0 || |
| errno != EINVAL) { |
| printf("prctl on cpu 0: FAIL\n"); |
| exit_status = EXIT_FAILURE; |
| } else { |
| printf("prctl on cpu 0: OK\n"); |
| } |
| |
| // Run the tests. |
| test_killed("test_fault", setup_fault, do_fault); |
| test_killed("test_syscall", NULL, do_syscall); |
| test_munmap(); |
| test_unaligned(); |
| test_ok("test_off", NULL, do_syscall_off); |
| test_nosig("test_multi", NULL, do_syscall_multi); |
| test_nosig("test_quiesce", setup_quiesce, do_quiesce); |
| |
| // Exit failure if any test failed. |
| if (exit_status != EXIT_SUCCESS) { |
| printf("Skipping jitter testing due to test failures\n"); |
| return exit_status; |
| } |
| |
| test_jitter(waitticks); |
| |
| return exit_status; |
| } |