| From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| Date: Thu, 12 Oct 2017 17:34:38 +0200 |
| Subject: rtmutex: add ww_mutex addon for mutex-rt |
| |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| kernel/locking/rtmutex.c | 269 ++++++++++++++++++++++++++++++++++++++-- |
| kernel/locking/rtmutex_common.h | 2 |
| kernel/locking/rwsem-rt.c | 2 |
| 3 files changed, 259 insertions(+), 14 deletions(-) |
| |
| --- a/kernel/locking/rtmutex.c |
| +++ b/kernel/locking/rtmutex.c |
| @@ -24,6 +24,7 @@ |
| #include <linux/sched/wake_q.h> |
| #include <linux/sched/debug.h> |
| #include <linux/timer.h> |
| +#include <linux/ww_mutex.h> |
| |
| #include "rtmutex_common.h" |
| |
| @@ -1244,6 +1245,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); |
| |
| #endif /* PREEMPT_RT */ |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| + static inline int __sched |
| +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) |
| +{ |
| + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); |
| + struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); |
| + |
| + if (!hold_ctx) |
| + return 0; |
| + |
| + if (unlikely(ctx == hold_ctx)) |
| + return -EALREADY; |
| + |
| + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && |
| + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { |
| +#ifdef CONFIG_DEBUG_MUTEXES |
| + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); |
| + ctx->contending_lock = ww; |
| +#endif |
| + return -EDEADLK; |
| + } |
| + |
| + return 0; |
| +} |
| +#else |
| + static inline int __sched |
| +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) |
| +{ |
| + BUG(); |
| + return 0; |
| +} |
| + |
| +#endif |
| + |
| static inline int |
| try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, |
| struct rt_mutex_waiter *waiter) |
| @@ -1522,7 +1557,8 @@ void rt_mutex_init_waiter(struct rt_mute |
| static int __sched |
| __rt_mutex_slowlock(struct rt_mutex *lock, int state, |
| struct hrtimer_sleeper *timeout, |
| - struct rt_mutex_waiter *waiter) |
| + struct rt_mutex_waiter *waiter, |
| + struct ww_acquire_ctx *ww_ctx) |
| { |
| int ret = 0; |
| |
| @@ -1540,6 +1576,12 @@ static int __sched |
| break; |
| } |
| |
| + if (ww_ctx && ww_ctx->acquired > 0) { |
| + ret = __mutex_lock_check_stamp(lock, ww_ctx); |
| + if (ret) |
| + break; |
| + } |
| + |
| raw_spin_unlock_irq(&lock->wait_lock); |
| |
| debug_rt_mutex_print_deadlock(waiter); |
| @@ -1574,16 +1616,106 @@ static void rt_mutex_handle_deadlock(int |
| } |
| } |
| |
| +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, |
| + struct ww_acquire_ctx *ww_ctx) |
| +{ |
| +#ifdef CONFIG_DEBUG_MUTEXES |
| + /* |
| + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, |
| + * but released with a normal mutex_unlock in this call. |
| + * |
| + * This should never happen, always use ww_mutex_unlock. |
| + */ |
| + DEBUG_LOCKS_WARN_ON(ww->ctx); |
| + |
| + /* |
| + * Not quite done after calling ww_acquire_done() ? |
| + */ |
| + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); |
| + |
| + if (ww_ctx->contending_lock) { |
| + /* |
| + * After -EDEADLK you tried to |
| + * acquire a different ww_mutex? Bad! |
| + */ |
| + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); |
| + |
| + /* |
| + * You called ww_mutex_lock after receiving -EDEADLK, |
| + * but 'forgot' to unlock everything else first? |
| + */ |
| + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); |
| + ww_ctx->contending_lock = NULL; |
| + } |
| + |
| + /* |
| + * Naughty, using a different class will lead to undefined behavior! |
| + */ |
| + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); |
| +#endif |
| + ww_ctx->acquired++; |
| +} |
| + |
| +#ifdef CONFIG_PREEMPT_RT |
| +static void ww_mutex_account_lock(struct rt_mutex *lock, |
| + struct ww_acquire_ctx *ww_ctx) |
| +{ |
| + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); |
| + struct rt_mutex_waiter *waiter, *n; |
| + |
| + /* |
| + * This branch gets optimized out for the common case, |
| + * and is only important for ww_mutex_lock. |
| + */ |
| + ww_mutex_lock_acquired(ww, ww_ctx); |
| + ww->ctx = ww_ctx; |
| + |
| + /* |
| + * Give any possible sleeping processes the chance to wake up, |
| + * so they can recheck if they have to back off. |
| + */ |
| + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, |
| + tree_entry) { |
| + /* XXX debug rt mutex waiter wakeup */ |
| + |
| + BUG_ON(waiter->lock != lock); |
| + rt_mutex_wake_waiter(waiter); |
| + } |
| +} |
| + |
| +#else |
| + |
| +static void ww_mutex_account_lock(struct rt_mutex *lock, |
| + struct ww_acquire_ctx *ww_ctx) |
| +{ |
| + BUG(); |
| +} |
| +#endif |
| + |
| int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, |
| struct hrtimer_sleeper *timeout, |
| enum rtmutex_chainwalk chwalk, |
| + struct ww_acquire_ctx *ww_ctx, |
| struct rt_mutex_waiter *waiter) |
| { |
| int ret; |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| + if (ww_ctx) { |
| + struct ww_mutex *ww; |
| + |
| + ww = container_of(lock, struct ww_mutex, base.lock); |
| + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) |
| + return -EALREADY; |
| + } |
| +#endif |
| + |
| /* Try to acquire the lock again: */ |
| - if (try_to_take_rt_mutex(lock, current, NULL)) |
| + if (try_to_take_rt_mutex(lock, current, NULL)) { |
| + if (ww_ctx) |
| + ww_mutex_account_lock(lock, ww_ctx); |
| return 0; |
| + } |
| |
| set_current_state(state); |
| |
| @@ -1593,14 +1725,24 @@ int __sched rt_mutex_slowlock_locked(str |
| |
| ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); |
| |
| - if (likely(!ret)) |
| + if (likely(!ret)) { |
| /* sleep on the mutex */ |
| - ret = __rt_mutex_slowlock(lock, state, timeout, waiter); |
| + ret = __rt_mutex_slowlock(lock, state, timeout, waiter, |
| + ww_ctx); |
| + } else if (ww_ctx) { |
| + /* ww_mutex received EDEADLK, let it become EALREADY */ |
| + ret = __mutex_lock_check_stamp(lock, ww_ctx); |
| + BUG_ON(!ret); |
| + } |
| |
| if (unlikely(ret)) { |
| __set_current_state(TASK_RUNNING); |
| remove_waiter(lock, waiter); |
| - rt_mutex_handle_deadlock(ret, chwalk, waiter); |
| + /* ww_mutex wants to report EDEADLK/EALREADY, let it */ |
| + if (!ww_ctx) |
| + rt_mutex_handle_deadlock(ret, chwalk, waiter); |
| + } else if (ww_ctx) { |
| + ww_mutex_account_lock(lock, ww_ctx); |
| } |
| |
| /* |
| @@ -1617,7 +1759,8 @@ int __sched rt_mutex_slowlock_locked(str |
| static int __sched |
| rt_mutex_slowlock(struct rt_mutex *lock, int state, |
| struct hrtimer_sleeper *timeout, |
| - enum rtmutex_chainwalk chwalk) |
| + enum rtmutex_chainwalk chwalk, |
| + struct ww_acquire_ctx *ww_ctx) |
| { |
| struct rt_mutex_waiter waiter; |
| unsigned long flags; |
| @@ -1635,7 +1778,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, |
| */ |
| raw_spin_lock_irqsave(&lock->wait_lock, flags); |
| |
| - ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); |
| + ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, |
| + &waiter); |
| |
| raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
| |
| @@ -1765,29 +1909,33 @@ static bool __sched rt_mutex_slowunlock( |
| */ |
| static inline int |
| rt_mutex_fastlock(struct rt_mutex *lock, int state, |
| + struct ww_acquire_ctx *ww_ctx, |
| int (*slowfn)(struct rt_mutex *lock, int state, |
| struct hrtimer_sleeper *timeout, |
| - enum rtmutex_chainwalk chwalk)) |
| + enum rtmutex_chainwalk chwalk, |
| + struct ww_acquire_ctx *ww_ctx)) |
| { |
| if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) |
| return 0; |
| |
| - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); |
| + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); |
| } |
| |
| static inline int |
| rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, |
| struct hrtimer_sleeper *timeout, |
| enum rtmutex_chainwalk chwalk, |
| + struct ww_acquire_ctx *ww_ctx, |
| int (*slowfn)(struct rt_mutex *lock, int state, |
| struct hrtimer_sleeper *timeout, |
| - enum rtmutex_chainwalk chwalk)) |
| + enum rtmutex_chainwalk chwalk, |
| + struct ww_acquire_ctx *ww_ctx)) |
| { |
| if (chwalk == RT_MUTEX_MIN_CHAINWALK && |
| likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) |
| return 0; |
| |
| - return slowfn(lock, state, timeout, chwalk); |
| + return slowfn(lock, state, timeout, chwalk, ww_ctx); |
| } |
| |
| static inline int |
| @@ -1832,7 +1980,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc |
| int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) |
| { |
| might_sleep(); |
| - return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); |
| + return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); |
| } |
| |
| /** |
| @@ -1952,6 +2100,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc |
| mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, |
| RT_MUTEX_MIN_CHAINWALK, |
| + NULL, |
| rt_mutex_slowlock); |
| if (ret) |
| mutex_release(&lock->dep_map, _RET_IP_); |
| @@ -2329,7 +2478,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m |
| raw_spin_lock_irq(&lock->wait_lock); |
| /* sleep on the mutex */ |
| set_current_state(TASK_INTERRUPTIBLE); |
| - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); |
| + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); |
| /* |
| * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might |
| * have to fix that up. |
| @@ -2399,3 +2548,97 @@ bool rt_mutex_cleanup_proxy_lock(struct |
| |
| return cleanup; |
| } |
| + |
| +static inline int |
| +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
| +{ |
| +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH |
| + unsigned tmp; |
| + |
| + if (ctx->deadlock_inject_countdown-- == 0) { |
| + tmp = ctx->deadlock_inject_interval; |
| + if (tmp > UINT_MAX/4) |
| + tmp = UINT_MAX; |
| + else |
| + tmp = tmp*2 + tmp + tmp/2; |
| + |
| + ctx->deadlock_inject_interval = tmp; |
| + ctx->deadlock_inject_countdown = tmp; |
| + ctx->contending_lock = lock; |
| + |
| + ww_mutex_unlock(lock); |
| + |
| + return -EDEADLK; |
| + } |
| +#endif |
| + |
| + return 0; |
| +} |
| + |
| +#ifdef CONFIG_PREEMPT_RT |
| +int __sched |
| +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
| +{ |
| + int ret; |
| + |
| + might_sleep(); |
| + |
| + mutex_acquire_nest(&lock->base.dep_map, 0, 0, |
| + ctx ? &ctx->dep_map : NULL, _RET_IP_); |
| + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, |
| + ctx); |
| + if (ret) |
| + mutex_release(&lock->base.dep_map, _RET_IP_); |
| + else if (!ret && ctx && ctx->acquired > 1) |
| + return ww_mutex_deadlock_injection(lock, ctx); |
| + |
| + return ret; |
| +} |
| +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); |
| + |
| +int __sched |
| +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
| +{ |
| + int ret; |
| + |
| + might_sleep(); |
| + |
| + mutex_acquire_nest(&lock->base.dep_map, 0, 0, |
| + ctx ? &ctx->dep_map : NULL, _RET_IP_); |
| + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, |
| + ctx); |
| + if (ret) |
| + mutex_release(&lock->base.dep_map, _RET_IP_); |
| + else if (!ret && ctx && ctx->acquired > 1) |
| + return ww_mutex_deadlock_injection(lock, ctx); |
| + |
| + return ret; |
| +} |
| +EXPORT_SYMBOL_GPL(ww_mutex_lock); |
| + |
| +void __sched ww_mutex_unlock(struct ww_mutex *lock) |
| +{ |
| + /* |
| + * The unlocking fastpath is the 0->1 transition from 'locked' |
| + * into 'unlocked' state: |
| + */ |
| + if (lock->ctx) { |
| +#ifdef CONFIG_DEBUG_MUTEXES |
| + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); |
| +#endif |
| + if (lock->ctx->acquired > 0) |
| + lock->ctx->acquired--; |
| + lock->ctx = NULL; |
| + } |
| + |
| + mutex_release(&lock->base.dep_map, _RET_IP_); |
| + __rt_mutex_unlock(&lock->base.lock); |
| +} |
| +EXPORT_SYMBOL(ww_mutex_unlock); |
| + |
| +int __rt_mutex_owner_current(struct rt_mutex *lock) |
| +{ |
| + return rt_mutex_owner(lock) == current; |
| +} |
| +EXPORT_SYMBOL(__rt_mutex_owner_current); |
| +#endif |
| --- a/kernel/locking/rtmutex_common.h |
| +++ b/kernel/locking/rtmutex_common.h |
| @@ -165,6 +165,7 @@ extern void rt_mutex_postunlock(struct w |
| struct wake_q_head *wake_sleeper_q); |
| |
| /* RW semaphore special interface */ |
| +struct ww_acquire_ctx; |
| |
| extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); |
| extern int __rt_mutex_trylock(struct rt_mutex *lock); |
| @@ -172,6 +173,7 @@ extern void __rt_mutex_unlock(struct rt_ |
| int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, |
| struct hrtimer_sleeper *timeout, |
| enum rtmutex_chainwalk chwalk, |
| + struct ww_acquire_ctx *ww_ctx, |
| struct rt_mutex_waiter *waiter); |
| void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, |
| struct rt_mutex_waiter *waiter, |
| --- a/kernel/locking/rwsem-rt.c |
| +++ b/kernel/locking/rwsem-rt.c |
| @@ -131,7 +131,7 @@ static int __sched __down_read_common(st |
| */ |
| rt_mutex_init_waiter(&waiter, false); |
| ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, |
| - &waiter); |
| + NULL, &waiter); |
| /* |
| * The slowlock() above is guaranteed to return with the rtmutex (for |
| * ret = 0) is now held, so there can't be a writer active. Increment |