blob: 2e699eefca5b89b63e19ee9a5f14f046ca7e7f93 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/rcupdate.h>
#include <linux/percpu_counter.h>
#include <asm/tsc.h>
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
/*
* On SMP, spin_trylock is sufficient protection.
* On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
*/
#define pcp_trylock_prepare(flags) do { } while (0)
#define pcp_trylock_finish(flag) do { } while (0)
#else
/* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
#define pcp_trylock_prepare(flags) local_irq_save(flags)
#define pcp_trylock_finish(flags) local_irq_restore(flags)
#endif
/*
* Locking a pcp requires a PCP lookup followed by a spinlock. To avoid
* a migration causing the wrong PCP to be locked and remote memory being
* potentially allocated, pin the task to the CPU for the lookup+lock.
* preempt_disable is used on !RT because it is faster than migrate_disable.
* migrate_disable is used on RT because otherwise RT spinlock usage is
* interfered with and a high priority task cannot preempt the allocator.
*/
#ifndef CONFIG_PREEMPT_RT
#define pcpu_task_pin() preempt_disable()
#define pcpu_task_unpin() preempt_enable()
#else
#define pcpu_task_pin() migrate_disable()
#define pcpu_task_unpin() migrate_enable()
#endif
/*
* Generic helper to lookup and a per-cpu variable with an embedded spinlock.
* Return value should be used with equivalent unlock helper.
*/
#define pcpu_spin_lock(type, member, ptr) \
({ \
type *_ret; \
pcpu_task_pin(); \
_ret = this_cpu_ptr(ptr); \
spin_lock(&_ret->member); \
_ret; \
})
#define pcpu_spin_trylock(type, member, ptr) \
({ \
type *_ret; \
pcpu_task_pin(); \
_ret = this_cpu_ptr(ptr); \
if (!spin_trylock(&_ret->member)) { \
pcpu_task_unpin(); \
_ret = NULL; \
} \
_ret; \
})
#define pcpu_spin_unlock(member, ptr) \
({ \
spin_unlock(&ptr->member); \
pcpu_task_unpin(); \
})
/* struct per_cpu_pages specific helpers. */
#define pcp_spin_lock(ptr) \
pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
#define pcp_spin_trylock(ptr) \
pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
#define pcp_spin_unlock(ptr) \
pcpu_spin_unlock(lock, ptr)
typedef union {
struct {
unsigned long counter;
void *dummy;
};
u128 full;
} counter_ptr_t;
struct test_pcp {
local_lock_t llock;
localtry_lock_t ltlock;
spinlock_t slock;
unsigned long counter;
counter_ptr_t counter_ptr;
};
static bool __dummy;
static DEFINE_PER_CPU(struct test_pcp, test_pcps) = {
.llock = INIT_LOCAL_LOCK(llock),
.ltlock = INIT_LOCALTRY_LOCK(ltlock),
.slock = __SPIN_LOCK_UNLOCKED(slock),
};
static counter_ptr_t counter_ptr;
struct test_bsl {
unsigned long page_flags;
unsigned long counter;
};
static struct test_bsl bsl = {};
#define TIMING_ITERATIONS 1000000000
#define print_result(name) \
pr_info("%-35s %12llu cycles\n", name, after - before)
static int __init locking_bench(void)
{
unsigned long long before, after;
unsigned long __maybe_unused UP_flags;
struct test_pcp *pcp;
struct percpu_counter pcpc;
unsigned long flags;
percpu_counter_init(&pcpc, 0, GFP_KERNEL);
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
if (this_cpu_inc_return(test_pcps.counter) == 0)
__dummy = true;
}
after = rdtsc_ordered();
cond_resched();
print_result("this_cpu_inc_return");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
unsigned long old, new;
do {
old = this_cpu_read(test_pcps.counter);
new = old + 1;
} while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new));
}
after = rdtsc_ordered();
cond_resched();
print_result("this_cpu_try_cmpxchg");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
unsigned long old, new;
do {
old = raw_cpu_read(test_pcps.counter);
new = old + 1;
} while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new));
}
after = rdtsc_ordered();
cond_resched();
print_result("raw+this_cpu_try_cmpxchg");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
counter_ptr_t old, new;
do {
struct test_pcp *pcp = raw_cpu_ptr(&test_pcps);
old.full = pcp->counter_ptr.full;
new.counter = old.counter + 1;
new.dummy = old.dummy;
} while (!this_cpu_try_cmpxchg128(test_pcps.counter_ptr.full,
&old.full, new.full));
}
after = rdtsc_ordered();
cond_resched();
print_result("this_cpu_try_cmpxchg128");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
counter_ptr_t *test;
counter_ptr_t old, new;
do {
test = &counter_ptr;
old.full = test->full;
new.counter = old.counter + 1;
new.dummy = old.dummy;
} while (!try_cmpxchg128(&test->full,
&old.full, new.full));
}
after = rdtsc_ordered();
cond_resched();
print_result("try_cmpxchg128");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
struct test_bsl *test = &bsl;
bit_spin_lock(PG_locked, &test->page_flags);
test->counter++;
bit_spin_unlock(PG_locked, &test->page_flags);
}
after = rdtsc_ordered();
cond_resched();
print_result("bit_spin_lock");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
percpu_counter_inc(&pcpc);
}
after = rdtsc_ordered();
cond_resched();
print_result("percpu_counter_inc");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
percpu_counter_add_local(&pcpc, 1);
}
after = rdtsc_ordered();
cond_resched();
print_result("percpu_counter_inc_local");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
local_lock(&test_pcps.llock);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
local_unlock(&test_pcps.llock);
}
after = rdtsc_ordered();
cond_resched();
print_result("local_lock");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
local_lock_irq(&test_pcps.llock);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
local_unlock_irq(&test_pcps.llock);
}
after = rdtsc_ordered();
cond_resched();
print_result("local_lock_irq");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
local_lock_irqsave(&test_pcps.llock, flags);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
local_unlock_irqrestore(&test_pcps.llock, flags);
}
after = rdtsc_ordered();
cond_resched();
print_result("local_lock_irqsave");
before = rdtsc_ordered();
for (unsigned j = 0; j < 10; j++) {
local_irq_disable();
for (unsigned long i = 0; i < TIMING_ITERATIONS/10; i++) {
local_lock_irqsave(&test_pcps.llock, flags);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
local_unlock_irqrestore(&test_pcps.llock, flags);
}
local_irq_enable();
}
after = rdtsc_ordered();
cond_resched();
print_result("irq_dis(local_lock_irqsave)");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
localtry_lock(&test_pcps.ltlock);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
localtry_unlock(&test_pcps.ltlock);
}
after = rdtsc_ordered();
cond_resched();
print_result("localtry_lock");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
localtry_lock_irq(&test_pcps.ltlock);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
localtry_unlock_irq(&test_pcps.ltlock);
}
after = rdtsc_ordered();
cond_resched();
print_result("localtry_lock_irq");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
localtry_lock_irqsave(&test_pcps.ltlock, flags);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
localtry_unlock_irqrestore(&test_pcps.ltlock, flags);
}
after = rdtsc_ordered();
cond_resched();
print_result("localtry_lock_irqsave");
before = rdtsc_ordered();
for (unsigned j = 0; j < 10; j++) {
local_irq_disable();
for (unsigned long i = 0; i < TIMING_ITERATIONS/10; i++) {
localtry_lock_irqsave(&test_pcps.ltlock, flags);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
localtry_unlock_irqrestore(&test_pcps.ltlock, flags);
}
local_irq_enable();
}
after = rdtsc_ordered();
cond_resched();
print_result("irq_dis(localtry_lock_irqsave)");
before = rdtsc_ordered();
for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
pcp_trylock_prepare(UP_flags);
pcp = pcpu_spin_trylock(struct test_pcp, slock, &test_pcps);
pcp = this_cpu_ptr(&test_pcps);
pcp->counter++;
pcpu_spin_unlock(slock, pcp);
pcp_trylock_finish(UP_flags);
}
after = rdtsc_ordered();
cond_resched();
print_result("pcpu_spin_trylock");
percpu_counter_destroy(&pcpc);
/*
* Everything is OK. Return error just to let user run benchmark
* again without annoying rmmod.
*/
return -EINVAL;
}
module_init(locking_bench);
MODULE_DESCRIPTION("Benchmark for (pcp) locking schemes");
MODULE_LICENSE("GPL");