blob: 51c33384ffe638963042677bc950cfe96c0989f0 [file] [log] [blame]
#include <linux/perf_event.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/user.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <err.h>
struct psm_counter {
int fd;
struct perf_event_mmap_page *metadata;
};
struct psm_counter *psm_counter_create(void)
{
struct psm_counter *counter = malloc(sizeof(struct psm_counter));;
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
attr.type = PERF_TYPE_HARDWARE;
attr.size = sizeof(struct perf_event_attr);
attr.config = PERF_COUNT_HW_CPU_CYCLES;
counter->fd = syscall(
SYS_perf_event_open,
&attr, /* attributes */
0, /* monitor me */
-1, /* all CPUs */
-1, /* group leader */
PERF_FLAG_FD_CLOEXEC /* flags */
);
if (counter->fd == -1)
err(1, "perf_event_open");
counter->metadata = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
counter->fd, 0);
if (counter->metadata == MAP_FAILED) {
err(1, "mmap");
}
if (!counter->metadata->cap_user_rdpmc)
errx(1, "RDPMC not supported (cap_user_rdpmc == 0)\n");
if (!counter->metadata->index)
errx(1, "RDPMC not supported (no index assigned)\n");
return counter;
}
void psm_counter_destroy(struct psm_counter *counter)
{
munmap(counter->metadata, PAGE_SIZE);
counter->metadata = MAP_FAILED;
close(counter->fd);
counter->fd = -1;
}
static inline void psm_barrier(void)
{
asm volatile ("" : : : "memory");
}
static inline void psm_serialize(void)
{
unsigned int eax = 0, ecx = 0;
asm volatile ("cpuid"
: "+a" (eax), "+c" (ecx) : : "ebx", "edx", "memory");
}
static inline uint64_t psm_rdpmc(unsigned int ecx)
{
unsigned int eax, edx;
asm volatile ("rdpmc" : "=a" (eax), "=d" (edx) : "c" (ecx));
return (((uint64_t)edx << 32) | eax);
}
typedef bool (*psm_sample_fn)(uint64_t *count,
const struct psm_counter *counter,
void *opaque);
/*
* psm_atomic is a mechanism for very precise counter measurement of
* very short operations. A psm_atomic interval will fail if the
* process is preempted during the measurement interval.
*/
struct psm_atomic {
/* Copied from metadata in psm_atomic_start. */
uint32_t perf_lock; /* If odd, this sample is bad. */
uint64_t perf_time_offset;
uint64_t initial_raw_count;
/*
* This is here to improve code generation. struct psm_atomic
* is unlikely to be referenced by an escapted pointer and hence
* be affected by a "memory" clobber.
*/
unsigned int rdpmc_ecx;
};
/*
* Starts measuring an uninterrupted duration.
*/
static inline struct psm_atomic psm_atomic_start(const struct psm_counter *ctr)
{
struct psm_atomic state;
state.perf_lock = ctr->metadata->lock;
psm_barrier();
state.perf_time_offset = ctr->metadata->time_offset;
state.rdpmc_ecx = ctr->metadata->index - 1;
psm_barrier(); /* Do rdpmc last to reduce noise */
state.initial_raw_count = psm_rdpmc(state.rdpmc_ecx);
return state;
}
static inline bool psm_atomic_elapsed(uint64_t *elapsed_count,
const struct psm_atomic *state,
const struct psm_counter *ctr)
{
/* Do the RDPMC first to reduce noise. */
uint64_t count_now = psm_rdpmc(state->rdpmc_ecx);
psm_barrier(); /* No, really, do it first. */
unsigned int shift = 64 - ctr->metadata->pmc_width;
count_now <<= shift;
uint64_t initial_count = state->initial_raw_count;
initial_count <<= shift;
*elapsed_count = (count_now - initial_count) >> shift;
psm_barrier();
if (ctr->metadata->time_offset != state->perf_time_offset)
return false; /* We were interrupted. */
/* Now check the lock. */
psm_barrier();
if (ctr->metadata->lock != state->perf_lock || (state->perf_lock & 1))
return false;
return true;
}
bool psm_atomic_sample_empty(uint64_t *count,
const struct psm_counter *ctr, void *opaque)
{
struct psm_atomic duration = psm_atomic_start(ctr);
return psm_atomic_elapsed(count, &duration, ctr);
}
bool psm_atomic_sample_enosys(uint64_t *count,
const struct psm_counter *ctr, void *opaque)
{
struct psm_atomic duration = psm_atomic_start(ctr);
#ifdef __x86_64__
unsigned long rax;
rax = 0xbfffffff;
asm volatile ("syscall" : "+a" (rax) : : "rcx", "r11");
#else
syscall(0x3fffffff);
#endif
return psm_atomic_elapsed(count, &duration, ctr);
}
bool psm_atomic_sample_bad_prctl_x1000(uint64_t *count,
const struct psm_counter *ctr, void *opaque)
{
struct psm_atomic duration = psm_atomic_start(ctr);
#ifdef __x86_64__
unsigned long rax;
register unsigned long rdi asm("rdi");
for (int i = 0; i < 1000; i++) {
rax = SYS_prctl;
rdi = -1;
asm volatile ("syscall" : "+a" (rax), "+r" (rdi) : : "rcx", "r11");
}
#else
for (int i = 0; i < 1000; i++)
syscall(SYS_prctl, -1);
#endif
return psm_atomic_elapsed(count, &duration, ctr);
}
static int compare_ui64(const void *a_void, const void *b_void)
{
const uint64_t a = *(uint64_t*)a_void;
const uint64_t b = *(uint64_t*)b_void;
if (a < b)
return -1;
else if (a > b)
return 1;
else
return 0;
}
uint64_t psm_integer_quantile(const struct psm_counter *ctr, psm_sample_fn fn,
void *opaque, size_t q, size_t n)
{
if (q >= n)
abort();
uint64_t *array = calloc(sizeof(uint64_t), n);
for (size_t i = 0; i < n; ) {
uint64_t sample;
if (!fn(&sample, ctr, opaque))
continue;
array[i++] = sample;
}
qsort(array, n, sizeof(uint64_t), compare_ui64);
uint64_t ret = array[q];
free(array);
return ret;
}
void psm_settle(const struct psm_counter *ctr, psm_sample_fn fn, void *opaque)
{
uint64_t val = UINT64_MAX;
int good_iters = 0;
for (int total_iters = 0; total_iters < 500; total_iters++) {
uint64_t best = UINT64_MAX;
for (int inner = 0; inner < 10; ) {
uint64_t count;
if (!fn(&count, ctr, opaque))
continue; /* Rejected sample */
if (count < best)
best = count;
inner++;
}
if (best != val) {
good_iters = 1;
val = best;
} else {
good_iters++;
if (good_iters == 10)
return;
}
}
}
int main()
{
struct psm_counter *ctr = psm_counter_create();
psm_settle(ctr, psm_atomic_sample_empty, NULL);
uint64_t baseline =
psm_integer_quantile(ctr, psm_atomic_sample_empty,
NULL, 250, 500);
printf("An empty sample takes %llu cycles\n",
(unsigned long long)baseline);
for (int i = 0; i < 20; i++) {
uint64_t cost = psm_integer_quantile(ctr, psm_atomic_sample_enosys, NULL, 250, 500) - baseline;
printf("1x ENOSYS: %llu\n", (unsigned long long)cost);
}
for (int i = 0; i < 20; i++) {
uint64_t cost = psm_integer_quantile(ctr, psm_atomic_sample_bad_prctl_x1000, NULL, 250, 500) - baseline;
printf("1000x bad prctl: %llu\n", (unsigned long long)cost/1000);
}
psm_counter_destroy(ctr);
}