| From 60588bfa223ff675b95f866249f90616613fbe31 Mon Sep 17 00:00:00 2001 |
| From: Cheng Jian <cj.chengjian@huawei.com> |
| Date: Fri, 13 Dec 2019 10:45:30 +0800 |
| Subject: sched/fair: Optimize select_idle_cpu |
| |
| From: Cheng Jian <cj.chengjian@huawei.com> |
| |
| commit 60588bfa223ff675b95f866249f90616613fbe31 upstream. |
| |
| select_idle_cpu() will scan the LLC domain for idle CPUs, |
| it's always expensive. so the next commit : |
| |
| 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()") |
| |
| introduces a way to limit how many CPUs we scan. |
| |
| But it consume some CPUs out of 'nr' that are not allowed |
| for the task and thus waste our attempts. The function |
| always return nr_cpumask_bits, and we can't find a CPU |
| which our task is allowed to run. |
| |
| Cpumask may be too big, similar to select_idle_core(), use |
| per_cpu_ptr 'select_idle_mask' to prevent stack overflow. |
| |
| Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()") |
| Signed-off-by: Cheng Jian <cj.chengjian@huawei.com> |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> |
| Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org> |
| Reviewed-by: Valentin Schneider <valentin.schneider@arm.com> |
| Link: https://lkml.kernel.org/r/20191213024530.28052-1-cj.chengjian@huawei.com |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| kernel/sched/fair.c | 7 ++++--- |
| 1 file changed, 4 insertions(+), 3 deletions(-) |
| |
| --- a/kernel/sched/fair.c |
| +++ b/kernel/sched/fair.c |
| @@ -5828,6 +5828,7 @@ static inline int select_idle_smt(struct |
| */ |
| static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) |
| { |
| + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); |
| struct sched_domain *this_sd; |
| u64 avg_cost, avg_idle; |
| u64 time, cost; |
| @@ -5859,11 +5860,11 @@ static int select_idle_cpu(struct task_s |
| |
| time = cpu_clock(this); |
| |
| - for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { |
| + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); |
| + |
| + for_each_cpu_wrap(cpu, cpus, target) { |
| if (!--nr) |
| return si_cpu; |
| - if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
| - continue; |
| if (available_idle_cpu(cpu)) |
| break; |
| if (si_cpu == -1 && sched_idle_cpu(cpu)) |