blob: c5a8a44faac58f5f978a516d36598ebd9f07ca20 [file] [log] [blame]
#include <linux/perf_event.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/user.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <err.h>
struct psm_counter {
int fd;
struct perf_event_mmap_page *metadata;
};
struct psm_counter *psm_counter_create(void)
{
struct psm_counter *counter = malloc(sizeof(struct psm_counter));
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
attr.type = PERF_TYPE_HARDWARE;
attr.size = sizeof(struct perf_event_attr);
attr.config = PERF_COUNT_HW_CPU_CYCLES;
counter->fd = syscall(
SYS_perf_event_open,
&attr, /* attributes */
0, /* monitor me */
-1, /* all CPUs */
-1, /* group leader */
PERF_FLAG_FD_CLOEXEC /* flags */
);
if (counter->fd == -1)
err(1, "perf_event_open");
counter->metadata = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
counter->fd, 0);
if (counter->metadata == MAP_FAILED) {
err(1, "mmap");
}
if (!counter->metadata->cap_user_rdpmc)
errx(1, "RDPMC not supported (cap_user_rdpmc == 0)\n");
if (!counter->metadata->index)
errx(1, "RDPMC not supported (no index assigned)\n");
return counter;
}
void psm_counter_destroy(struct psm_counter *counter)
{
munmap(counter->metadata, PAGE_SIZE);
counter->metadata = MAP_FAILED;
close(counter->fd);
counter->fd = -1;
}
static inline void psm_barrier(void)
{
asm volatile ("" : : : "memory");
}
static inline void psm_serialize(void)
{
unsigned int eax = 0, ecx = 0;
asm volatile ("cpuid"
: "+a" (eax), "+c" (ecx) : : "ebx", "edx", "memory");
}
static inline uint64_t psm_rdpmc(unsigned int ecx)
{
unsigned int eax, edx;
asm volatile ("rdpmc" : "=a" (eax), "=d" (edx) : "c" (ecx));
return (((uint64_t)edx << 32) | eax);
}
typedef bool (*psm_sample_fn)(uint64_t *count,
const struct psm_counter *counter,
int reps,
void *opaque);
/*
* psm_atomic is a mechanism for very precise counter measurement of
* very short operations. A psm_atomic interval will fail if the
* process is preempted during the measurement interval.
*/
struct psm_atomic {
/* Copied from metadata in psm_atomic_start. */
uint32_t perf_lock; /* If odd, this sample is bad. */
uint64_t perf_time_offset;
uint64_t initial_raw_count;
/*
* This is here to improve code generation. struct psm_atomic
* is unlikely to be referenced by an escapted pointer and hence
* be affected by a "memory" clobber.
*/
unsigned int rdpmc_ecx;
};
/*
* Starts measuring an uninterrupted duration.
*/
static inline struct psm_atomic psm_atomic_start(const struct psm_counter *ctr)
{
struct psm_atomic state;
state.perf_lock = ctr->metadata->lock;
psm_barrier();
state.perf_time_offset = ctr->metadata->time_offset;
state.rdpmc_ecx = ctr->metadata->index - 1;
psm_barrier(); /* Do rdpmc last to reduce noise */
state.initial_raw_count = psm_rdpmc(state.rdpmc_ecx);
return state;
}
static inline bool psm_atomic_elapsed(uint64_t *elapsed_count,
const struct psm_atomic *state,
const struct psm_counter *ctr)
{
/* Do the RDPMC first to reduce noise. */
uint64_t count_now = psm_rdpmc(state->rdpmc_ecx);
psm_barrier(); /* No, really, do it first. */
unsigned int shift = 64 - ctr->metadata->pmc_width;
count_now <<= shift;
uint64_t initial_count = state->initial_raw_count;
initial_count <<= shift;
*elapsed_count = (count_now - initial_count) >> shift;
psm_barrier();
if (ctr->metadata->time_offset != state->perf_time_offset)
return false; /* We were interrupted. */
/* Now check the lock. */
psm_barrier();
if (ctr->metadata->lock != state->perf_lock || (state->perf_lock & 1))
return false;
return true;
}
bool psm_atomic_sample_empty(uint64_t *count,
const struct psm_counter *ctr,
int reps, void *opaque)
{
struct psm_atomic duration = psm_atomic_start(ctr);
return psm_atomic_elapsed(count, &duration, ctr);
}
bool psm_atomic_sample_enosys(uint64_t *count,
const struct psm_counter *ctr,
int reps, void *opaque)
{
struct psm_atomic duration = psm_atomic_start(ctr);
#ifdef __x86_64__
unsigned long rax;
for (int i = 0; i < reps; i++) {
rax = 0xbfffffff;
asm volatile ("syscall" : "+a" (rax) : : "rcx", "r11");
}
#else
for (int i = 0; i < reps; i++)
syscall(0x3fffffff);
#endif
return psm_atomic_elapsed(count, &duration, ctr);
}
bool psm_atomic_sample_bad_prctl(uint64_t *count,
const struct psm_counter *ctr,
int reps, void *opaque)
{
struct psm_atomic duration = psm_atomic_start(ctr);
#ifdef __x86_64__
unsigned long rax;
register unsigned long rdi asm("rdi");
for (int i = 0; i < reps; i++) {
rax = SYS_prctl;
rdi = -1;
asm volatile ("syscall" : "+a" (rax), "+r" (rdi) : : "rcx", "r11");
}
#else
for (int i = 0; i < reps; i++)
syscall(SYS_prctl, -1);
#endif
return psm_atomic_elapsed(count, &duration, ctr);
}
static int compare_ui64(const void *a_void, const void *b_void)
{
const uint64_t a = *(uint64_t*)a_void;
const uint64_t b = *(uint64_t*)b_void;
if (a < b)
return -1;
else if (a > b)
return 1;
else
return 0;
}
uint64_t psm_integer_quantile(const struct psm_counter *ctr, psm_sample_fn fn,
int reps, void *opaque,
size_t q, size_t n)
{
if (q >= n)
abort();
uint64_t *array = calloc(sizeof(uint64_t), n);
for (size_t i = 0; i < n; ) {
uint64_t sample;
if (!fn(&sample, ctr, reps, opaque))
continue;
array[i++] = sample;
}
qsort(array, n, sizeof(uint64_t), compare_ui64);
uint64_t ret = array[q];
free(array);
return ret;
}
uint64_t psm_settle(const struct psm_counter *ctr, psm_sample_fn fn,
int reps, void *opaque)
{
uint64_t val = UINT64_MAX;
int good_iters = 0;
for (int total_iters = 0; total_iters < 500; total_iters++) {
uint64_t best = UINT64_MAX;
for (int inner = 0; inner < 10; ) {
uint64_t count;
if (!fn(&count, ctr, reps, opaque))
continue; /* Rejected sample */
if (count < best)
best = count;
inner++;
}
if (best != val) {
good_iters = 1;
val = best;
} else {
good_iters++;
if (good_iters == 10)
return val;
}
}
return ~0ULL;
}
int reparray[] = {1, 200};
struct psm_costestimate
{
double baseline;
double per_rep;
};
struct pair
{
int x;
double y;
};
bool psm_estimate_cost(struct psm_costestimate *est,
const struct psm_counter *ctr,
psm_sample_fn fn, void *opaque)
{
int rounds = 50;
int scale = 1;
int steps = 100;
int n = rounds * steps;
int i = 0;
uint64_t sample;
struct pair *array = calloc(sizeof(struct pair), n);
for (int r = 0; r < rounds; r++) {
for (int s = 0; s < steps; s++) {
array[i].x = s * scale;
i++;
}
}
/* Now randomly permute it. */
for (i = n - 1; i >= 1; i--) {
int j = rand() % i;
int tmp = array[i].x;
array[i].x = array[j].x;
array[j].x = tmp;
}
/* Burn a big sample. */
fn(&sample, ctr, scale * steps * (rounds / 2), opaque);
/* Now get all the samples and accumulate. */
double sum_xy = 0.0, sum_x = 0.0, sum_xx = 0.0, sum_y = 0.0;
for (i = 0; i < n; i++) {
while (!fn(&sample, ctr, array[i].x, opaque))
;
array[i].y = sample;
sum_x += array[i].x;
sum_xy += array[i].x * array[i].y;
sum_xx += (double)array[i].x * (double)array[i].x;
sum_y += array[i].y;
}
/* Calculate a simple linear regression. */
est->per_rep = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x);
est->baseline = (sum_y - est->per_rep * sum_x) / n;
free(array);
return true;
};
int main()
{
struct psm_counter *ctr = psm_counter_create();
uint64_t baseline = psm_settle(ctr, psm_atomic_sample_empty, 1, NULL);
if (baseline == (uint64_t)~0ULL)
printf("Self-monitoring warm-up didn't settle down\n");
else
printf("Self-monitoring warmed up: overhead is %llu cycles\n",
(unsigned long long)baseline);
/*
for (int repidx = 0; repidx < 2; repidx++) {
int reps = reparray[repidx];
for (int i = 0; i < 10; i++) {
uint64_t cost = psm_integer_quantile(ctr, psm_atomic_sample_enosys, reps, NULL, 250, 500) - baseline;
printf("%dx ENOSYS: %llu\n", reps, (unsigned long long)cost/reps);
}
}
for (int repidx = 0; repidx < 2; repidx++) {
int reps = reparray[repidx];
for (int i = 0; i < 10; i++) {
uint64_t cost = psm_integer_quantile(ctr, psm_atomic_sample_bad_prctl, reps, NULL, 250, 500) - baseline;
printf("%dx bad prctl: %llu\n", reps, (unsigned long long)cost/reps);
}
}
*/
struct psm_costestimate est;
psm_estimate_cost(&est, ctr, psm_atomic_sample_enosys, NULL);
printf("ENOSYS:\t\t%.2f cycles (plus %.2f cycles self-monitoring overhead)\n", est.per_rep, est.baseline);
psm_estimate_cost(&est, ctr, psm_atomic_sample_bad_prctl, NULL);
printf("bad prctl:\t%.2f cycles (plus %.2f cycles self-monitoring overhead)\n", est.per_rep, est.baseline);
psm_counter_destroy(ctr);
}