| From 9b66ef88fabba97473f4925cb1e18546c4ac8531 Mon Sep 17 00:00:00 2001 |
| From: Oleg Nesterov <oleg@redhat.com> |
| Date: Mon, 15 Mar 2010 10:10:27 +0100 |
| Subject: [PATCH] sched: Make select_fallback_rq() cpuset friendly |
| |
| commit 111abc89fbfb6b34de96d95bf90bcfdfcfac901d in tip. |
| |
| Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems |
| with select_fallback_rq(). It can be called from any context and can't use |
| any cpuset locks including task_lock(). It is called when the task doesn't |
| have online cpus in ->cpus_allowed but ttwu/etc must be able to find a |
| suitable cpu. |
| |
| I am not proud of this patch. Everything which needs such a fat comment |
| can't be good even if correct. But I'd prefer to not change the locking |
| rules in the code I hardly understand, and in any case I believe this |
| simple change make the code much more correct compared to deadlocks we |
| currently have. |
| |
| [ upstream commit: 9084bb8246ea935b98320554229e2f371f7f52fa ] |
| |
| Signed-off-by: Oleg Nesterov <oleg@redhat.com> |
| Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| LKML-Reference: <20100315091027.GA9155@redhat.com> |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h |
| index eeaaee7..a73454a 100644 |
| --- a/include/linux/cpuset.h |
| +++ b/include/linux/cpuset.h |
| @@ -21,6 +21,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ |
| extern int cpuset_init(void); |
| extern void cpuset_init_smp(void); |
| extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); |
| +extern int cpuset_cpus_allowed_fallback(struct task_struct *p); |
| extern nodemask_t cpuset_mems_allowed(struct task_struct *p); |
| #define cpuset_current_mems_allowed (current->mems_allowed) |
| void cpuset_init_current_mems_allowed(void); |
| @@ -101,6 +102,12 @@ static inline void cpuset_cpus_allowed(struct task_struct *p, |
| cpumask_copy(mask, cpu_possible_mask); |
| } |
| |
| +static inline int cpuset_cpus_allowed_fallback(struct task_struct *p) |
| +{ |
| + cpumask_copy(&p->cpus_allowed, cpu_possible_mask); |
| + return cpumask_any(cpu_active_mask); |
| +} |
| + |
| static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) |
| { |
| return node_possible_map; |
| diff --git a/kernel/cpuset.c b/kernel/cpuset.c |
| index 4eb24cf..4a7676c 100644 |
| --- a/kernel/cpuset.c |
| +++ b/kernel/cpuset.c |
| @@ -2146,6 +2146,48 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) |
| mutex_unlock(&callback_mutex); |
| } |
| |
| +int cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
| +{ |
| + const struct cpuset *cs; |
| + int cpu; |
| + |
| + rcu_read_lock(); |
| + cs = task_cs(tsk); |
| + if (cs) |
| + cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); |
| + rcu_read_unlock(); |
| + |
| + /* |
| + * We own tsk->cpus_allowed, nobody can change it under us. |
| + * |
| + * But we used cs && cs->cpus_allowed lockless and thus can |
| + * race with cgroup_attach_task() or update_cpumask() and get |
| + * the wrong tsk->cpus_allowed. However, both cases imply the |
| + * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() |
| + * which takes task_rq_lock(). |
| + * |
| + * If we are called after it dropped the lock we must see all |
| + * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary |
| + * set any mask even if it is not right from task_cs() pov, |
| + * the pending set_cpus_allowed_ptr() will fix things. |
| + */ |
| + |
| + cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask); |
| + if (cpu >= nr_cpu_ids) { |
| + /* |
| + * Either tsk->cpus_allowed is wrong (see above) or it |
| + * is actually empty. The latter case is only possible |
| + * if we are racing with remove_tasks_in_empty_cpuset(). |
| + * Like above we can temporary set any mask and rely on |
| + * set_cpus_allowed_ptr() as synchronization point. |
| + */ |
| + cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); |
| + cpu = cpumask_any(cpu_active_mask); |
| + } |
| + |
| + return cpu; |
| +} |
| + |
| void cpuset_init_current_mems_allowed(void) |
| { |
| nodes_setall(current->mems_allowed); |
| diff --git a/kernel/sched.c b/kernel/sched.c |
| index c38298d..c36457d 100644 |
| --- a/kernel/sched.c |
| +++ b/kernel/sched.c |
| @@ -2399,9 +2399,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) |
| |
| /* No more Mr. Nice Guy. */ |
| if (unlikely(dest_cpu >= nr_cpu_ids)) { |
| - cpumask_copy(&p->cpus_allowed, cpu_possible_mask); |
| - dest_cpu = cpumask_any(cpu_active_mask); |
| - |
| + dest_cpu = cpuset_cpus_allowed_fallback(p); |
| /* |
| * Don't tell them about moving exiting tasks or |
| * kernel threads (both mm NULL), since they never |
| -- |
| 1.7.1.1 |
| |