Merge tag 'sched-urgent-2025-12-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar:
"Miscellaneous scheduler fixes/cleanups:
- Fix psi_dequeue() for Proxy Execution
- Fix hrtick() vs. scheduling context bug
- Fix unfairness caused by stalled tg_load_avg_contrib when the last
task migrates out
- Fix whitespace noise in headers
- Remove a preempt-disable section in rt_mutex_setprio()"
* tag 'sched-urgent-2025-12-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/core: Fix psi_dequeue() for Proxy Execution
sched/fair: Fix unfairness caused by stalled tg_load_avg_contrib when the last task migrates out
sched/rt: Remove a preempt-disable section in rt_mutex_setprio()
sched/hrtick: Fix hrtick() vs. scheduling context
sched/headers: Remove whitespace noise from kernel/sched/sched.h
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7801cd..41ba0be 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -878,7 +878,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
rq_lock(rq, &rf);
update_rq_clock(rq);
- rq->donor->sched_class->task_tick(rq, rq->curr, 1);
+ rq->donor->sched_class->task_tick(rq, rq->donor, 1);
rq_unlock(rq, &rf);
return HRTIMER_NORESTART;
@@ -7360,15 +7360,12 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
p->prio = prio;
}
out_unlock:
- /* Avoid rq from going away on us: */
- preempt_disable();
+ /* Caller holds task_struct::pi_lock, IRQs are still disabled */
rq_unpin_lock(rq, &rf);
__balance_callbacks(rq);
rq_repin_lock(rq, &rf);
__task_rq_unlock(rq, p, &rf);
-
- preempt_enable();
}
#endif /* CONFIG_RT_MUTEXES */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 769d7b7..da46c31 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4034,6 +4034,9 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
if (child_cfs_rq_on_list(cfs_rq))
return false;
+ if (cfs_rq->tg_load_avg_contrib)
+ return false;
+
return true;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index bbf513b..d30cca6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1167,7 +1167,7 @@ struct rq {
* one CPU and if it got migrated afterwards it may decrease
* it on another CPU. Always updated under the runqueue lock:
*/
- unsigned long nr_uninterruptible;
+ unsigned long nr_uninterruptible;
#ifdef CONFIG_SCHED_PROXY_EXEC
struct task_struct __rcu *donor; /* Scheduling context */
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index cbf7206..c903f1a 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -180,8 +180,13 @@ static inline void psi_dequeue(struct task_struct *p, int flags)
* avoid walking all ancestors twice, psi_task_switch() handles
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
* Do nothing here.
+ *
+ * In the SCHED_PROXY_EXECUTION case we may do sleeping
+ * dequeues that are not followed by a task switch, so check
+ * TSK_ONCPU is set to ensure the task switch is imminent.
+ * Otherwise clear the flags as usual.
*/
- if (flags & DEQUEUE_SLEEP)
+ if ((flags & DEQUEUE_SLEEP) && (p->psi_flags & TSK_ONCPU))
return;
/*