| From 7eb9d157e7299b16d47ed56cb9458bfc1f1aeb3a Mon Sep 17 00:00:00 2001 |
| From: Cheng Jian <cj.chengjian@huawei.com> |
| Date: Fri, 13 Dec 2019 10:45:30 +0800 |
| Subject: [PATCH] sched/fair: Optimize select_idle_cpu |
| |
| commit 60588bfa223ff675b95f866249f90616613fbe31 upstream. |
| |
| select_idle_cpu() will scan the LLC domain for idle CPUs, |
| it's always expensive. so the next commit : |
| |
| 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()") |
| |
| introduces a way to limit how many CPUs we scan. |
| |
| But it consume some CPUs out of 'nr' that are not allowed |
| for the task and thus waste our attempts. The function |
| always return nr_cpumask_bits, and we can't find a CPU |
| which our task is allowed to run. |
| |
| Cpumask may be too big, similar to select_idle_core(), use |
| per_cpu_ptr 'select_idle_mask' to prevent stack overflow. |
| |
| Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()") |
| Signed-off-by: Cheng Jian <cj.chengjian@huawei.com> |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> |
| Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org> |
| Reviewed-by: Valentin Schneider <valentin.schneider@arm.com> |
| Link: https://lkml.kernel.org/r/20191213024530.28052-1-cj.chengjian@huawei.com |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
| index fa7ecd667259..46198a9adf55 100644 |
| --- a/kernel/sched/fair.c |
| +++ b/kernel/sched/fair.c |
| @@ -6187,6 +6187,7 @@ static inline int select_idle_smt(struct task_struct *p, int target) |
| */ |
| static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) |
| { |
| + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); |
| struct sched_domain *this_sd; |
| u64 avg_cost, avg_idle; |
| u64 time, cost; |
| @@ -6217,11 +6218,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t |
| |
| time = local_clock(); |
| |
| - for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { |
| + cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); |
| + |
| + for_each_cpu_wrap(cpu, cpus, target) { |
| if (!--nr) |
| return -1; |
| - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) |
| - continue; |
| if (available_idle_cpu(cpu)) |
| break; |
| } |
| -- |
| 2.7.4 |
| |