| From: Thomas Gleixner <tglx@linutronix.de> |
| Date: Thu, 12 Oct 2017 17:18:06 +0200 |
| Subject: rtmutex: add rwlock implementation based on rtmutex |
| |
| The implementation is bias-based, similar to the rwsem implementation. |
| |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| include/linux/rwlock_rt.h | 119 ++++++++++++ |
| include/linux/rwlock_types_rt.h | 55 +++++ |
| kernel/locking/rwlock-rt.c | 368 ++++++++++++++++++++++++++++++++++++++++ |
| 3 files changed, 542 insertions(+) |
| create mode 100644 include/linux/rwlock_rt.h |
| create mode 100644 include/linux/rwlock_types_rt.h |
| create mode 100644 kernel/locking/rwlock-rt.c |
| |
| --- /dev/null |
| +++ b/include/linux/rwlock_rt.h |
| @@ -0,0 +1,119 @@ |
| +#ifndef __LINUX_RWLOCK_RT_H |
| +#define __LINUX_RWLOCK_RT_H |
| + |
| +#ifndef __LINUX_SPINLOCK_H |
| +#error Do not include directly. Use spinlock.h |
| +#endif |
| + |
| +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); |
| +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); |
| +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); |
| +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); |
| +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); |
| +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); |
| +extern int __lockfunc rt_read_can_lock(rwlock_t *rwlock); |
| +extern int __lockfunc rt_write_can_lock(rwlock_t *rwlock); |
| +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); |
| + |
| +#define read_can_lock(rwlock) rt_read_can_lock(rwlock) |
| +#define write_can_lock(rwlock) rt_write_can_lock(rwlock) |
| + |
| +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) |
| +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) |
| + |
| +static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags) |
| +{ |
| + /* XXX ARCH_IRQ_ENABLED */ |
| + *flags = 0; |
| + return rt_write_trylock(lock); |
| +} |
| + |
| +#define write_trylock_irqsave(lock, flags) \ |
| + __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags))) |
| + |
| +#define read_lock_irqsave(lock, flags) \ |
| + do { \ |
| + typecheck(unsigned long, flags); \ |
| + rt_read_lock(lock); \ |
| + flags = 0; \ |
| + } while (0) |
| + |
| +#define write_lock_irqsave(lock, flags) \ |
| + do { \ |
| + typecheck(unsigned long, flags); \ |
| + rt_write_lock(lock); \ |
| + flags = 0; \ |
| + } while (0) |
| + |
| +#define read_lock(lock) rt_read_lock(lock) |
| + |
| +#define read_lock_bh(lock) \ |
| + do { \ |
| + local_bh_disable(); \ |
| + rt_read_lock(lock); \ |
| + } while (0) |
| + |
| +#define read_lock_irq(lock) read_lock(lock) |
| + |
| +#define write_lock(lock) rt_write_lock(lock) |
| + |
| +#define write_lock_bh(lock) \ |
| + do { \ |
| + local_bh_disable(); \ |
| + rt_write_lock(lock); \ |
| + } while (0) |
| + |
| +#define write_lock_irq(lock) write_lock(lock) |
| + |
| +#define read_unlock(lock) rt_read_unlock(lock) |
| + |
| +#define read_unlock_bh(lock) \ |
| + do { \ |
| + rt_read_unlock(lock); \ |
| + local_bh_enable(); \ |
| + } while (0) |
| + |
| +#define read_unlock_irq(lock) read_unlock(lock) |
| + |
| +#define write_unlock(lock) rt_write_unlock(lock) |
| + |
| +#define write_unlock_bh(lock) \ |
| + do { \ |
| + rt_write_unlock(lock); \ |
| + local_bh_enable(); \ |
| + } while (0) |
| + |
| +#define write_unlock_irq(lock) write_unlock(lock) |
| + |
| +#define read_unlock_irqrestore(lock, flags) \ |
| + do { \ |
| + typecheck(unsigned long, flags); \ |
| + (void) flags; \ |
| + rt_read_unlock(lock); \ |
| + } while (0) |
| + |
| +#define write_unlock_irqrestore(lock, flags) \ |
| + do { \ |
| + typecheck(unsigned long, flags); \ |
| + (void) flags; \ |
| + rt_write_unlock(lock); \ |
| + } while (0) |
| + |
| +#define rwlock_init(rwl) \ |
| +do { \ |
| + static struct lock_class_key __key; \ |
| + \ |
| + __rt_rwlock_init(rwl, #rwl, &__key); \ |
| +} while (0) |
| + |
| +/* |
| + * Internal functions made global for CPU pinning |
| + */ |
| +void __read_rt_lock(struct rt_rw_lock *lock); |
| +int __read_rt_trylock(struct rt_rw_lock *lock); |
| +void __write_rt_lock(struct rt_rw_lock *lock); |
| +int __write_rt_trylock(struct rt_rw_lock *lock); |
| +void __read_rt_unlock(struct rt_rw_lock *lock); |
| +void __write_rt_unlock(struct rt_rw_lock *lock); |
| + |
| +#endif |
| --- /dev/null |
| +++ b/include/linux/rwlock_types_rt.h |
| @@ -0,0 +1,55 @@ |
| +#ifndef __LINUX_RWLOCK_TYPES_RT_H |
| +#define __LINUX_RWLOCK_TYPES_RT_H |
| + |
| +#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#error "Do not include directly. Include spinlock_types.h instead" |
| +#endif |
| + |
| +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
| +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } |
| +#else |
| +# define RW_DEP_MAP_INIT(lockname) |
| +#endif |
| + |
| +typedef struct rt_rw_lock rwlock_t; |
| + |
| +#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) |
| + |
| +#define DEFINE_RWLOCK(name) \ |
| + rwlock_t name = __RW_LOCK_UNLOCKED(name) |
| + |
| +/* |
| + * A reader biased implementation primarily for CPU pinning. |
| + * |
| + * Can be selected as general replacement for the single reader RT rwlock |
| + * variant |
| + */ |
| +struct rt_rw_lock { |
| + struct rt_mutex rtmutex; |
| + atomic_t readers; |
| +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
| + struct lockdep_map dep_map; |
| +#endif |
| +}; |
| + |
| +#define READER_BIAS (1U << 31) |
| +#define WRITER_BIAS (1U << 30) |
| + |
| +#define __RWLOCK_RT_INITIALIZER(name) \ |
| +{ \ |
| + .readers = ATOMIC_INIT(READER_BIAS), \ |
| + .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \ |
| + RW_DEP_MAP_INIT(name) \ |
| +} |
| + |
| +void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, |
| + struct lock_class_key *key); |
| + |
| +#define rwlock_biased_rt_init(rwlock) \ |
| + do { \ |
| + static struct lock_class_key __key; \ |
| + \ |
| + __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \ |
| + } while (0) |
| + |
| +#endif |
| --- /dev/null |
| +++ b/kernel/locking/rwlock-rt.c |
| @@ -0,0 +1,368 @@ |
| +/* |
| + */ |
| +#include <linux/sched/debug.h> |
| +#include <linux/export.h> |
| + |
| +#include "rtmutex_common.h" |
| +#include <linux/rwlock_types_rt.h> |
| + |
| +/* |
| + * RT-specific reader/writer locks |
| + * |
| + * write_lock() |
| + * 1) Lock lock->rtmutex |
| + * 2) Remove the reader BIAS to force readers into the slow path |
| + * 3) Wait until all readers have left the critical region |
| + * 4) Mark it write locked |
| + * |
| + * write_unlock() |
| + * 1) Remove the write locked marker |
| + * 2) Set the reader BIAS so readers can use the fast path again |
| + * 3) Unlock lock->rtmutex to release blocked readers |
| + * |
| + * read_lock() |
| + * 1) Try fast path acquisition (reader BIAS is set) |
| + * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag |
| + * 3) If !writelocked, acquire it for read |
| + * 4) If writelocked, block on lock->rtmutex |
| + * 5) unlock lock->rtmutex, goto 1) |
| + * |
| + * read_unlock() |
| + * 1) Try fast path release (reader count != 1) |
| + * 2) Wake the writer waiting in write_lock()#3 |
| + * |
| + * read_lock()#3 has the consequence, that rw locks on RT are not writer |
| + * fair, but writers, which should be avoided in RT tasks (think tasklist |
| + * lock), are subject to the rtmutex priority/DL inheritance mechanism. |
| + * |
| + * It's possible to make the rw locks writer fair by keeping a list of |
| + * active readers. A blocked writer would force all newly incoming readers |
| + * to block on the rtmutex, but the rtmutex would have to be proxy locked |
| + * for one reader after the other. We can't use multi-reader inheritance |
| + * because there is no way to support that with |
| + * SCHED_DEADLINE. Implementing the one by one reader boosting/handover |
| + * mechanism is a major surgery for a very dubious value. |
| + * |
| + * The risk of writer starvation is there, but the pathological use cases |
| + * which trigger it are not necessarily the typical RT workloads. |
| + */ |
| + |
| +void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, |
| + struct lock_class_key *key) |
| +{ |
| +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
| + /* |
| + * Make sure we are not reinitializing a held semaphore: |
| + */ |
| + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); |
| + lockdep_init_map(&lock->dep_map, name, key, 0); |
| +#endif |
| + atomic_set(&lock->readers, READER_BIAS); |
| + rt_mutex_init(&lock->rtmutex); |
| + lock->rtmutex.save_state = 1; |
| +} |
| + |
| +int __read_rt_trylock(struct rt_rw_lock *lock) |
| +{ |
| + int r, old; |
| + |
| + /* |
| + * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is |
| + * set. |
| + */ |
| + for (r = atomic_read(&lock->readers); r < 0;) { |
| + old = atomic_cmpxchg(&lock->readers, r, r + 1); |
| + if (likely(old == r)) |
| + return 1; |
| + r = old; |
| + } |
| + return 0; |
| +} |
| + |
| +void __sched __read_rt_lock(struct rt_rw_lock *lock) |
| +{ |
| + struct rt_mutex *m = &lock->rtmutex; |
| + struct rt_mutex_waiter waiter; |
| + unsigned long flags; |
| + |
| + if (__read_rt_trylock(lock)) |
| + return; |
| + |
| + raw_spin_lock_irqsave(&m->wait_lock, flags); |
| + /* |
| + * Allow readers as long as the writer has not completely |
| + * acquired the semaphore for write. |
| + */ |
| + if (atomic_read(&lock->readers) != WRITER_BIAS) { |
| + atomic_inc(&lock->readers); |
| + raw_spin_unlock_irqrestore(&m->wait_lock, flags); |
| + return; |
| + } |
| + |
| + /* |
| + * Call into the slow lock path with the rtmutex->wait_lock |
| + * held, so this can't result in the following race: |
| + * |
| + * Reader1 Reader2 Writer |
| + * read_lock() |
| + * write_lock() |
| + * rtmutex_lock(m) |
| + * swait() |
| + * read_lock() |
| + * unlock(m->wait_lock) |
| + * read_unlock() |
| + * swake() |
| + * lock(m->wait_lock) |
| + * lock->writelocked=true |
| + * unlock(m->wait_lock) |
| + * |
| + * write_unlock() |
| + * lock->writelocked=false |
| + * rtmutex_unlock(m) |
| + * read_lock() |
| + * write_lock() |
| + * rtmutex_lock(m) |
| + * swait() |
| + * rtmutex_lock(m) |
| + * |
| + * That would put Reader1 behind the writer waiting on |
| + * Reader2 to call read_unlock() which might be unbound. |
| + */ |
| + rt_mutex_init_waiter(&waiter, true); |
| + rt_spin_lock_slowlock_locked(m, &waiter, flags); |
| + /* |
| + * The slowlock() above is guaranteed to return with the rtmutex is |
| + * now held, so there can't be a writer active. Increment the reader |
| + * count and immediately drop the rtmutex again. |
| + */ |
| + atomic_inc(&lock->readers); |
| + raw_spin_unlock_irqrestore(&m->wait_lock, flags); |
| + rt_spin_lock_slowunlock(m); |
| + |
| + debug_rt_mutex_free_waiter(&waiter); |
| +} |
| + |
| +void __read_rt_unlock(struct rt_rw_lock *lock) |
| +{ |
| + struct rt_mutex *m = &lock->rtmutex; |
| + struct task_struct *tsk; |
| + |
| + /* |
| + * sem->readers can only hit 0 when a writer is waiting for the |
| + * active readers to leave the critical region. |
| + */ |
| + if (!atomic_dec_and_test(&lock->readers)) |
| + return; |
| + |
| + raw_spin_lock_irq(&m->wait_lock); |
| + /* |
| + * Wake the writer, i.e. the rtmutex owner. It might release the |
| + * rtmutex concurrently in the fast path, but to clean up the rw |
| + * lock it needs to acquire m->wait_lock. The worst case which can |
| + * happen is a spurious wakeup. |
| + */ |
| + tsk = rt_mutex_owner(m); |
| + if (tsk) |
| + wake_up_process(tsk); |
| + |
| + raw_spin_unlock_irq(&m->wait_lock); |
| +} |
| + |
| +static void __write_unlock_common(struct rt_rw_lock *lock, int bias, |
| + unsigned long flags) |
| +{ |
| + struct rt_mutex *m = &lock->rtmutex; |
| + |
| + atomic_add(READER_BIAS - bias, &lock->readers); |
| + raw_spin_unlock_irqrestore(&m->wait_lock, flags); |
| + rt_spin_lock_slowunlock(m); |
| +} |
| + |
| +void __sched __write_rt_lock(struct rt_rw_lock *lock) |
| +{ |
| + struct rt_mutex *m = &lock->rtmutex; |
| + struct task_struct *self = current; |
| + unsigned long flags; |
| + |
| + /* Take the rtmutex as a first step */ |
| + __rt_spin_lock(m); |
| + |
| + /* Force readers into slow path */ |
| + atomic_sub(READER_BIAS, &lock->readers); |
| + |
| + raw_spin_lock_irqsave(&m->wait_lock, flags); |
| + |
| + raw_spin_lock(&self->pi_lock); |
| + self->saved_state = self->state; |
| + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); |
| + raw_spin_unlock(&self->pi_lock); |
| + |
| + for (;;) { |
| + /* Have all readers left the critical region? */ |
| + if (!atomic_read(&lock->readers)) { |
| + atomic_set(&lock->readers, WRITER_BIAS); |
| + raw_spin_lock(&self->pi_lock); |
| + __set_current_state_no_track(self->saved_state); |
| + self->saved_state = TASK_RUNNING; |
| + raw_spin_unlock(&self->pi_lock); |
| + raw_spin_unlock_irqrestore(&m->wait_lock, flags); |
| + return; |
| + } |
| + |
| + raw_spin_unlock_irqrestore(&m->wait_lock, flags); |
| + |
| + if (atomic_read(&lock->readers) != 0) |
| + schedule(); |
| + |
| + raw_spin_lock_irqsave(&m->wait_lock, flags); |
| + |
| + raw_spin_lock(&self->pi_lock); |
| + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); |
| + raw_spin_unlock(&self->pi_lock); |
| + } |
| +} |
| + |
| +int __write_rt_trylock(struct rt_rw_lock *lock) |
| +{ |
| + struct rt_mutex *m = &lock->rtmutex; |
| + unsigned long flags; |
| + |
| + if (!__rt_mutex_trylock(m)) |
| + return 0; |
| + |
| + atomic_sub(READER_BIAS, &lock->readers); |
| + |
| + raw_spin_lock_irqsave(&m->wait_lock, flags); |
| + if (!atomic_read(&lock->readers)) { |
| + atomic_set(&lock->readers, WRITER_BIAS); |
| + raw_spin_unlock_irqrestore(&m->wait_lock, flags); |
| + return 1; |
| + } |
| + __write_unlock_common(lock, 0, flags); |
| + return 0; |
| +} |
| + |
| +void __write_rt_unlock(struct rt_rw_lock *lock) |
| +{ |
| + struct rt_mutex *m = &lock->rtmutex; |
| + unsigned long flags; |
| + |
| + raw_spin_lock_irqsave(&m->wait_lock, flags); |
| + __write_unlock_common(lock, WRITER_BIAS, flags); |
| +} |
| + |
| +/* Map the reader biased implementation */ |
| +static inline int do_read_rt_trylock(rwlock_t *rwlock) |
| +{ |
| + return __read_rt_trylock(rwlock); |
| +} |
| + |
| +static inline int do_write_rt_trylock(rwlock_t *rwlock) |
| +{ |
| + return __write_rt_trylock(rwlock); |
| +} |
| + |
| +static inline void do_read_rt_lock(rwlock_t *rwlock) |
| +{ |
| + __read_rt_lock(rwlock); |
| +} |
| + |
| +static inline void do_write_rt_lock(rwlock_t *rwlock) |
| +{ |
| + __write_rt_lock(rwlock); |
| +} |
| + |
| +static inline void do_read_rt_unlock(rwlock_t *rwlock) |
| +{ |
| + __read_rt_unlock(rwlock); |
| +} |
| + |
| +static inline void do_write_rt_unlock(rwlock_t *rwlock) |
| +{ |
| + __write_rt_unlock(rwlock); |
| +} |
| + |
| +static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name, |
| + struct lock_class_key *key) |
| +{ |
| + __rwlock_biased_rt_init(rwlock, name, key); |
| +} |
| + |
| +int __lockfunc rt_read_can_lock(rwlock_t *rwlock) |
| +{ |
| + return atomic_read(&rwlock->readers) < 0; |
| +} |
| + |
| +int __lockfunc rt_write_can_lock(rwlock_t *rwlock) |
| +{ |
| + return atomic_read(&rwlock->readers) == READER_BIAS; |
| +} |
| + |
| +/* |
| + * The common functions which get wrapped into the rwlock API. |
| + */ |
| +int __lockfunc rt_read_trylock(rwlock_t *rwlock) |
| +{ |
| + int ret; |
| + |
| + migrate_disable(); |
| + ret = do_read_rt_trylock(rwlock); |
| + if (ret) |
| + rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); |
| + else |
| + migrate_enable(); |
| + return ret; |
| +} |
| +EXPORT_SYMBOL(rt_read_trylock); |
| + |
| +int __lockfunc rt_write_trylock(rwlock_t *rwlock) |
| +{ |
| + int ret; |
| + |
| + migrate_disable(); |
| + ret = do_write_rt_trylock(rwlock); |
| + if (ret) |
| + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); |
| + else |
| + migrate_enable(); |
| + return ret; |
| +} |
| +EXPORT_SYMBOL(rt_write_trylock); |
| + |
| +void __lockfunc rt_read_lock(rwlock_t *rwlock) |
| +{ |
| + migrate_disable(); |
| + rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); |
| + do_read_rt_lock(rwlock); |
| +} |
| +EXPORT_SYMBOL(rt_read_lock); |
| + |
| +void __lockfunc rt_write_lock(rwlock_t *rwlock) |
| +{ |
| + migrate_disable(); |
| + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); |
| + do_write_rt_lock(rwlock); |
| +} |
| +EXPORT_SYMBOL(rt_write_lock); |
| + |
| +void __lockfunc rt_read_unlock(rwlock_t *rwlock) |
| +{ |
| + rwlock_release(&rwlock->dep_map, _RET_IP_); |
| + do_read_rt_unlock(rwlock); |
| + migrate_enable(); |
| +} |
| +EXPORT_SYMBOL(rt_read_unlock); |
| + |
| +void __lockfunc rt_write_unlock(rwlock_t *rwlock) |
| +{ |
| + rwlock_release(&rwlock->dep_map, _RET_IP_); |
| + do_write_rt_unlock(rwlock); |
| + migrate_enable(); |
| +} |
| +EXPORT_SYMBOL(rt_write_unlock); |
| + |
| +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) |
| +{ |
| + do_rwlock_rt_init(rwlock, name, key); |
| +} |
| +EXPORT_SYMBOL(__rt_rwlock_init); |