blob: fe39a7abdf67c70ff0775988bdb9d2398e4514b2 [file] [log] [blame]
#define _GNU_SOURCE
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include <sys/eventfd.h>
int to_thread, from_thread;
volatile int state; // 0 = warmup. 1 = benchmark. 2 = exit.
int use_xstate = 0;
int thread_cpu;
void maybe_use_xstate(void)
{
if (use_xstate)
asm volatile ("pxor %%xmm0, %%xmm0" : : : "xmm0");
}
void *threadproc(void *x)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(thread_cpu, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
printf("[WARN]\tsched_setaffinity failed\n");
while(1)
{
uint64_t buf;
if (read(to_thread, &buf, 8) != 8)
abort();
int s = state;
if (s == 2)
return 0;
else if (1 || s == 0)
maybe_use_xstate();
buf = 1;
if (write(from_thread, &buf, 8) != 8)
abort();
}
}
void bounce()
{
uint64_t buf = 1;
if (write(to_thread, &buf, 8) != 8)
abort();
if (read(from_thread, &buf, 8) != 8)
abort();
}
void killit()
{
uint64_t buf = 1;
state = 2;
if (write(to_thread, &buf, 8) != 8)
abort();
}
int main(int argc, char **argv)
{
struct timespec start, end;
pthread_t thread;
uint64_t i, iters = 100000;
uint64_t ns;
int main_cpu;
if (argc != 3) {
printf("Usage: %s <0|1> <same|different>\n\nSet the parameter to 1 to use xstate\nUse 'same' for same CPU or 'different' for cross-CPU\n",
argv[0]);
return 1;
} else {
if (!strcmp(argv[1], "1"))
use_xstate = 1;
else if (strcmp(argv[1], "0"))
abort();
if (!strcmp(argv[2], "same")) {
main_cpu = 0;
thread_cpu = 0;
} else if (!strcmp(argv[2], "different")) {
main_cpu = 0;
thread_cpu = 1;
} else {
abort();
}
}
printf("use_xstate = %d\n", use_xstate);
to_thread = eventfd(0, 0);
if (to_thread < 0)
abort();
from_thread = eventfd(0, 0);
if (from_thread < 0)
abort();
if (pthread_create(&thread, 0, threadproc, 0) != 0)
abort();
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(main_cpu, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
printf("[WARN]\tsched_setaffinity failed\n");
/* Warm up (and burn the xstate heuristic) */
for(i = 0; i < 10000; i++) {
bounce();
maybe_use_xstate();
}
/* Clear out the xstate hack. */
state = 1;
bounce();
clock_gettime(CLOCK_MONOTONIC, &start);
for (i = 0; i < iters; i++) {
bounce();
maybe_use_xstate();
}
clock_gettime(CLOCK_MONOTONIC, &end);
killit();
ns = 1000000000ULL * (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec);
printf("%llu iters at %.1f ns/switch\n",
(unsigned long long)iters, (double)ns / (2*iters));
pthread_join(thread, 0);
return 0;
}