| #define __STDC_FORMAT_MACROS |
| |
| #include <sys/time.h> |
| #include <time.h> |
| #include <stdlib.h> |
| #include <sys/syscall.h> |
| #include <unistd.h> |
| #include <dlfcn.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <inttypes.h> |
| #include <atomic> |
| #include <signal.h> |
| #include <pthread.h> |
| #include <err.h> |
| |
| typedef int (*vgettime_t)(clockid_t, timespec *); |
| typedef long (*vgetcpu_t)(unsigned *cpu, unsigned *node, void *unused); |
| |
| typedef int (*vgettime_specific_t)(timespec *); |
| |
| void describe_clock(const char *name, int id) |
| { |
| struct timespec res; |
| int ret = clock_getres(id, &res); |
| if (ret < 0) { |
| printf(" %d (%s) [failed to query resolution]\n", |
| id, name); |
| } else { |
| printf(" %d (%s) resolution = %" PRIu64 ".%09u\n", |
| id, name, |
| (uint64_t)res.tv_sec, (unsigned)res.tv_nsec); |
| } |
| } |
| |
| static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), |
| int flags) |
| { |
| struct sigaction sa; |
| memset(&sa, 0, sizeof(sa)); |
| sa.sa_sigaction = handler; |
| sa.sa_flags = SA_SIGINFO | flags; |
| sigemptyset(&sa.sa_mask); |
| if (sigaction(sig, &sa, 0)) |
| err(1, "sigaction"); |
| } |
| |
| static void sigusr1(int sig, siginfo_t *info, void *ctx_void) |
| { |
| } |
| |
| static void *empty_thread_proc(void *) |
| { |
| return NULL; |
| } |
| |
| static void iret_to_self(void) |
| { |
| #ifndef __x86_64__ |
| register void *__sp asm("esp"); |
| |
| asm volatile ( |
| "pushl %%ss\n\t" |
| "pushl %%esp\n\t" |
| "addl $4, (%%esp)\n\t" |
| "pushfl\n\t" |
| "pushl %%cs\n\t" |
| "pushl $1f\n\t" |
| "iret\n\t" |
| "1:" |
| : "+r" (__sp) : : "cc"); |
| #else |
| register void *__sp asm("rsp"); |
| unsigned long tmp; |
| |
| asm volatile ( |
| "movq %%ss, %0\n\t" |
| "pushq %0\n\t" |
| "pushq %%rsp\n\t" |
| "addq $8, (%%rsp)\n\t" |
| "pushfq\n\t" |
| "movq %%cs, %0\n\t" |
| "pushq %0\n\t" |
| "pushq $1f\n\t" |
| "iretq\n\t" |
| "1:" |
| : "=r" (tmp), "+r" (__sp) : : "cc"); |
| #endif |
| } |
| |
| int main(int argc, char **argv) |
| { |
| if (argc < 3) { |
| printf("Usage: time <iters> <mode> [POSIX clock id]\n"); |
| printf("\nClocks are:\n"); |
| describe_clock("CLOCK_REALTIME", CLOCK_REALTIME); |
| describe_clock("CLOCK_MONOTONIC", CLOCK_MONOTONIC); |
| describe_clock("CLOCK_REALTIME_COARSE", CLOCK_REALTIME_COARSE); |
| describe_clock("CLOCK_MONOTONIC_COARSE", CLOCK_MONOTONIC_COARSE); |
| return 1; |
| } |
| |
| void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); |
| if (!vdso) |
| vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); |
| if (!vdso) |
| printf("dlopen failed\n");; |
| |
| vgettime_t vgettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); |
| if (!vgettime) |
| printf("dlsym failed: %s", dlerror()); |
| |
| vgettime_specific_t vgettime_monotonic = (vgettime_specific_t)dlsym(vdso, "__vdso_clock_gettime_monotonic"); |
| |
| vgetcpu_t vgetcpu = (vgetcpu_t)dlsym(vdso, "__vdso_getcpu"); |
| if (!vgetcpu) |
| printf("dlsym failed: %s", dlerror()); |
| |
| long loop_mult = 1; |
| char *loop_arg = strdup(argv[1]); |
| size_t loop_arg_len = strlen(loop_arg); |
| if (loop_arg_len && loop_arg[loop_arg_len-1] == 'k') { |
| loop_mult = 1000; |
| loop_arg[loop_arg_len-1] = '\0'; |
| } else if (loop_arg_len && loop_arg[loop_arg_len-1] == 'M') { |
| loop_mult = 1000000; |
| loop_arg[loop_arg_len-1] = '\0'; |
| } |
| size_t loops = (size_t)atol(argv[1]) * loop_mult; |
| free(loop_arg); |
| |
| clockid_t c = argc > 3 ? atoi(argv[3]) : 0; |
| const char *mode = argv[2]; |
| |
| sethandler(SIGUSR1, sigusr1, 0); |
| |
| timespec start; |
| clock_gettime(CLOCK_MONOTONIC, &start); |
| |
| timespec t; |
| if (!strcmp(mode, "clock_gettime")) { |
| for (size_t i = 0; i < loops; ++i) |
| clock_gettime(c, &t); |
| } else if (!strcmp(mode, "rdtsc")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int a, d; |
| asm volatile ("rdtsc" : "=a" (a), "=d" (d)); |
| } |
| } else if (!strcmp(mode, "lfence_rdtsc")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int a, d; |
| asm volatile ("lfence;rdtsc" : "=a" (a), "=d" (d)); |
| } |
| } else if (!strcmp(mode, "lfence_rdtsc_lfence")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int a, d; |
| asm volatile (""); |
| asm volatile ("lfence;rdtsc;lfence" : "=a" (a), "=d" (d)); |
| } |
| } else if (!strcmp(mode, "mfence_rdtsc_mfence")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int a, d; |
| asm volatile ("mfence;rdtsc;mfence" : "=a" (a), "=d" (d)); |
| } |
| } else if (!strcmp(mode, "mfence")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int a, d; |
| asm volatile ("mfence" : "=a" (a), "=d" (d)); |
| } |
| } else if (!strcmp(mode, "sfence")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int a, d; |
| asm volatile ("sfence" : "=a" (a), "=d" (d)); |
| } |
| } else if (!strcmp(mode, "lock_addl")) { |
| std::atomic<unsigned int> x; |
| for (size_t i = 0; i < loops; ++i) |
| x += 2; |
| } else if (!strcmp(mode, "lock_xchg")) { |
| std::atomic<unsigned int> x; |
| for (size_t i = 0; i < loops; ++i) |
| x.exchange(2); |
| } else if (!strcmp(mode, "cmpxchg_mismatch")) { |
| std::atomic<unsigned long> x; |
| for (size_t i = 0; i < loops; ++i) |
| asm volatile ("cmpxchg %[newval], %[mem]" |
| : [mem] "+m" (x) |
| : "a" (1), [newval] "r" (2) : "flags"); |
| } else if (!strcmp(mode, "cmpxchg_match")) { |
| std::atomic<unsigned long> x; |
| for (size_t i = 0; i < loops/2; ++i) { |
| asm volatile ("cmpxchg %[newval], %[mem]" |
| : [mem] "+m" (x) |
| : "a" (0), [newval] "r" (2) : "flags"); |
| asm volatile ("cmpxchg %[newval], %[mem]" |
| : [mem] "+m" (x) |
| : "a" (2), [newval] "r" (0) : "flags"); |
| } |
| } else if (!strcmp(mode, "rdtscp")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int a, c, d; |
| asm volatile ("rdtscp" : "=a" (a), "=c" (c), "=d" (d)); |
| } |
| } else if (!strcmp(mode, "lsl15")) { |
| for (size_t i = 0; i < loops; ++i) { |
| uint16_t index = (15 << 3) + 3; |
| uint32_t limit; |
| asm volatile ("lsl %[index], %[limit]" |
| : [limit] "=r" (limit) |
| : [index] "r" (index) : "cc"); |
| } |
| } else if (!strcmp(mode, "lsl100")) { |
| for (size_t i = 0; i < loops; ++i) { |
| uint16_t index = (100 << 3) + 3; |
| uint32_t limit; |
| asm volatile ("lsl %[index], %[limit]" |
| : [limit] "=r" (limit) |
| : [index] "r" (index) : "cc"); |
| } |
| } else if (!strcmp(mode, "mov_to_ds")) { |
| for (size_t i = 0; i < loops; ++i) |
| asm volatile ("mov %0, %%ds" : : "rm" (0)); |
| } else if (!strcmp(mode, "zero_gs")) { |
| for (size_t i = 0; i < loops; ++i) |
| asm volatile ("mov %0, %%gs" : : "rm" (0)); |
| } else if (!strcmp(mode, "nonzero_gs")) { |
| unsigned short sel; |
| asm ("mov %%ss, %0" : "=rm" (sel)); |
| for (size_t i = 0; i < loops; ++i) |
| asm volatile ("mov %0, %%gs" : : "rm" (sel)); |
| } else if (!strcmp(mode, "rdgsbase")) { |
| asm volatile ("mov %0, %%gs" : : "rm" (0)); |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned long base; |
| asm volatile ("rdgsbase %0" : "=rm" (base)); |
| } |
| } else if (!strcmp(mode, "wrgsbase")) { |
| asm volatile ("mov %0, %%gs" : : "rm" (0)); |
| for (size_t i = 0; i < loops; ++i) |
| asm volatile ("wrgsbase %0" : : "rm" (1)); |
| } else if (!strcmp(mode, "rdwrgsbase")) { |
| asm volatile ("mov %0, %%gs" : : "rm" (0)); |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned long base; |
| asm volatile ("rdgsbase %0" : "=rm" (base)); |
| asm volatile ("wrgsbase %0" : : "rm" (base)); |
| } |
| } else if (!strcmp(mode, "xsave_legacy")) { |
| struct state { |
| unsigned char buf[65536] __attribute__ ((aligned (64))); |
| } state; |
| for (size_t i = 0; i < loops; ++i) { |
| asm volatile ("xsave %0" |
| : "+m" (state) : "a" (0x3), "d" (0)); |
| } |
| } else if (!strcmp(mode, "xsave_all")) { |
| struct state { |
| unsigned char buf[65536] __attribute__ ((aligned (64))); |
| } state; |
| unsigned long eax, edx; |
| asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); |
| for (size_t i = 0; i < loops; ++i) { |
| asm volatile ("xsave %0" |
| : "+m" (state) : "a" (eax), "d" (edx)); |
| } |
| } else if (!strcmp(mode, "xsave_bndcsr")) { |
| struct state { |
| unsigned char buf[65536] __attribute__ ((aligned (64))); |
| } state; |
| for (size_t i = 0; i < loops; ++i) { |
| asm volatile ("xsave %0" |
| : "+m" (state) : "a" (0x10), "d" (0)); |
| } |
| } else if (!strcmp(mode, "xsavec_bndcsr")) { |
| struct state { |
| unsigned char buf[65536] __attribute__ ((aligned (64))); |
| } state; |
| for (size_t i = 0; i < loops; ++i) { |
| asm volatile ("xsavec %0" |
| : "+m" (state) : "a" (0x10), "d" (0)); |
| } |
| } else if (!strcmp(mode, "sgdt")) { |
| struct { |
| unsigned short limit; |
| unsigned long base; |
| } __attribute__((packed)) val; |
| |
| for (size_t i = 0; i < loops; ++i) |
| asm volatile ("sgdt %0" : "=m" (val)); |
| } else if (!strcmp(mode, "gettimeofday")) { |
| struct timeval tv; |
| for (size_t i = 0; i < loops; ++i) |
| gettimeofday(&tv, 0); |
| } else if (!strcmp(mode, "sys_clock_gettime")) { |
| for (size_t i = 0; i < loops; ++i) |
| syscall(__NR_clock_gettime, c, &t); |
| } else if (!strcmp(mode, "vclock_gettime")) { |
| for (size_t i = 0; i < loops; ++i) |
| vgettime(c, &t); |
| } else if (!strcmp(mode, "vclock_gettime_monotonic")) { |
| for (size_t i = 0; i < loops; ++i) |
| vgettime_monotonic(&t); |
| } else if (!strcmp(mode, "vgetcpu")) { |
| unsigned cpu; |
| for (size_t i = 0; i < loops; ++i) |
| vgetcpu(&cpu, NULL, NULL); |
| } else if (!strcmp(mode, "getpid")) { |
| for (size_t i = 0; i < loops; ++i) |
| syscall(SYS_getpid); |
| } else if (!strcmp(mode, "sys_enosys")) { |
| for (size_t i = 0; i < loops; ++i) |
| syscall(0xffffffff, c, &t); |
| } else if (!strcmp(mode, "rdpmc")) { |
| // Unlikely to work. |
| unsigned int eax, edx; |
| unsigned int ecx = 0; |
| for (size_t i = 0; i < loops; ++i) |
| asm volatile ("rdpmc" : "=a" (eax), "=d" (edx) : "c" (ecx)); |
| } else if (!strcmp(mode, "memcpy_2k")) { |
| unsigned char src[2048] = {}, dst[2048]; |
| for (size_t i = 0; i < loops; ++i) { |
| asm volatile ("" : "=m" (*src) : "m" (*dst) : "memory"); |
| memcpy(dst, src, 2048); |
| } |
| #ifdef __x86_64__ |
| } else if (!strcmp(mode, "vsyscall_time")) { |
| auto vsyscall_time = (long (*)(long *))0xffffffffff600400; |
| for (size_t i = 0; i < loops; ++i) |
| vsyscall_time(nullptr); |
| #endif |
| } else if (!strcmp(mode, "raise")) { |
| for (size_t i = 0; i < loops; ++i) |
| raise(SIGUSR1); |
| #ifdef __x86_64__ |
| } else if (!strcmp(mode, "arch_prctl_42")) { |
| for (size_t i = 0; i < loops; ++i) |
| syscall(SYS_arch_prctl, 42, 0); |
| #endif |
| } else if (!strcmp(mode, "pthread_create")) { |
| pthread_t thread; |
| for (size_t i = 0; i < loops; ++i) { |
| if (pthread_create(&thread, NULL, |
| empty_thread_proc, NULL)) |
| err(1, "pthread_create"); |
| pthread_join(thread, NULL); |
| } |
| } else if (!strcmp(mode, "iret_to_self")) { |
| for (size_t i = 0; i < loops; ++i) |
| iret_to_self(); |
| } else if (!strcmp(mode, "cpuid")) { |
| for (size_t i = 0; i < loops; ++i) { |
| unsigned int ax = 1, cx = 0; |
| asm volatile ("cpuid" : "+a" (ax), "+c" (cx) : : "ebx", "edx"); |
| } |
| } else { |
| printf("Unknown mode %s\n", mode); |
| return 1; |
| } |
| |
| timespec end; |
| clock_gettime(CLOCK_MONOTONIC, &end); |
| unsigned long long duration = (end.tv_nsec - start.tv_nsec) + 1000000000ULL * (end.tv_sec - start.tv_sec); |
| printf("%ld loops in %.5fs = %.2f nsec / loop\n", |
| (long)loops, float(duration) * 1e-9, |
| float(duration) / loops); |
| if (duration == 0) |
| printf("[WARN]\tThe apparent elapsed time was exactly 0. You have precision issues.\n"); |
| return 0; |
| } |