| From: Peter Zijlstra <peterz@infradead.org> |
| Date: Mon, 7 Jan 2019 13:52:31 +0100 |
| Subject: [PATCH] sched/fair: Robustify CFS-bandwidth timer locking |
| |
| Traditionally hrtimer callbacks were run with IRQs disabled, but with |
| the introduction of HRTIMER_MODE_SOFT it is possible they run from |
| SoftIRQ context, which does _NOT_ have IRQs disabled. |
| |
| Allow for the CFS bandwidth timers (period_timer and slack_timer) to |
| be ran from SoftIRQ context; this entails removing the assumption that |
| IRQs are already disabled from the locking. |
| |
| While mainline doesn't strictly need this, -RT forces all timers not |
| explicitly marked with MODE_HARD into MODE_SOFT and trips over this. |
| And marking these timers as MODE_HARD doesn't make sense as they're |
| not required for RT operation and can potentially be quite expensive. |
| |
| Cc: Ingo Molnar <mingo@redhat.com> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| Reported-by: Tom Putzeys <tom.putzeys@be.atlascopco.com> |
| Tested-by: Mike Galbraith <efault@gmx.de> |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Link: https://lkml.kernel.org/r/20190107125231.GE14122@hirez.programming.kicks-ass.net |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| kernel/sched/fair.c | 30 ++++++++++++++++-------------- |
| 1 file changed, 16 insertions(+), 14 deletions(-) |
| |
| --- a/kernel/sched/fair.c |
| +++ b/kernel/sched/fair.c |
| @@ -4569,7 +4569,7 @@ static u64 distribute_cfs_runtime(struct |
| struct rq *rq = rq_of(cfs_rq); |
| struct rq_flags rf; |
| |
| - rq_lock(rq, &rf); |
| + rq_lock_irqsave(rq, &rf); |
| if (!cfs_rq_throttled(cfs_rq)) |
| goto next; |
| |
| @@ -4586,7 +4586,7 @@ static u64 distribute_cfs_runtime(struct |
| unthrottle_cfs_rq(cfs_rq); |
| |
| next: |
| - rq_unlock(rq, &rf); |
| + rq_unlock_irqrestore(rq, &rf); |
| |
| if (!remaining) |
| break; |
| @@ -4602,7 +4602,7 @@ static u64 distribute_cfs_runtime(struct |
| * period the timer is deactivated until scheduling resumes; cfs_b->idle is |
| * used to track this state. |
| */ |
| -static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) |
| +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags) |
| { |
| u64 runtime, runtime_expires; |
| int throttled; |
| @@ -4644,11 +4644,11 @@ static int do_sched_cfs_period_timer(str |
| while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { |
| runtime = cfs_b->runtime; |
| cfs_b->distribute_running = 1; |
| - raw_spin_unlock(&cfs_b->lock); |
| + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); |
| /* we can't nest cfs_b->lock while distributing bandwidth */ |
| runtime = distribute_cfs_runtime(cfs_b, runtime, |
| runtime_expires); |
| - raw_spin_lock(&cfs_b->lock); |
| + raw_spin_lock_irqsave(&cfs_b->lock, flags); |
| |
| cfs_b->distribute_running = 0; |
| throttled = !list_empty(&cfs_b->throttled_cfs_rq); |
| @@ -4757,17 +4757,18 @@ static __always_inline void return_cfs_r |
| static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) |
| { |
| u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); |
| + unsigned long flags; |
| u64 expires; |
| |
| /* confirm we're still not at a refresh boundary */ |
| - raw_spin_lock(&cfs_b->lock); |
| + raw_spin_lock_irqsave(&cfs_b->lock, flags); |
| if (cfs_b->distribute_running) { |
| - raw_spin_unlock(&cfs_b->lock); |
| + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); |
| return; |
| } |
| |
| if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { |
| - raw_spin_unlock(&cfs_b->lock); |
| + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); |
| return; |
| } |
| |
| @@ -4778,18 +4779,18 @@ static void do_sched_cfs_slack_timer(str |
| if (runtime) |
| cfs_b->distribute_running = 1; |
| |
| - raw_spin_unlock(&cfs_b->lock); |
| + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); |
| |
| if (!runtime) |
| return; |
| |
| runtime = distribute_cfs_runtime(cfs_b, runtime, expires); |
| |
| - raw_spin_lock(&cfs_b->lock); |
| + raw_spin_lock_irqsave(&cfs_b->lock, flags); |
| if (expires == cfs_b->runtime_expires) |
| lsub_positive(&cfs_b->runtime, runtime); |
| cfs_b->distribute_running = 0; |
| - raw_spin_unlock(&cfs_b->lock); |
| + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); |
| } |
| |
| /* |
| @@ -4869,11 +4870,12 @@ static enum hrtimer_restart sched_cfs_pe |
| { |
| struct cfs_bandwidth *cfs_b = |
| container_of(timer, struct cfs_bandwidth, period_timer); |
| + unsigned long flags; |
| int overrun; |
| int idle = 0; |
| int count = 0; |
| |
| - raw_spin_lock(&cfs_b->lock); |
| + raw_spin_lock_irqsave(&cfs_b->lock, flags); |
| for (;;) { |
| overrun = hrtimer_forward_now(timer, cfs_b->period); |
| if (!overrun) |
| @@ -4901,11 +4903,11 @@ static enum hrtimer_restart sched_cfs_pe |
| count = 0; |
| } |
| |
| - idle = do_sched_cfs_period_timer(cfs_b, overrun); |
| + idle = do_sched_cfs_period_timer(cfs_b, overrun, flags); |
| } |
| if (idle) |
| cfs_b->period_active = 0; |
| - raw_spin_unlock(&cfs_b->lock); |
| + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); |
| |
| return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; |
| } |