| From: Mike Galbraith <efault@gmx.de> |
| Date: Sun, 8 Jan 2017 09:32:25 +0100 |
| Subject: [PATCH] cpuset: Convert callback_lock to raw_spinlock_t |
| |
| The two commits below add up to a cpuset might_sleep() splat for RT: |
| |
| 8447a0fee974 cpuset: convert callback_mutex to a spinlock |
| 344736f29b35 cpuset: simplify cpuset_node_allowed API |
| |
| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:995 |
| in_atomic(): 0, irqs_disabled(): 1, pid: 11718, name: cset |
| CPU: 135 PID: 11718 Comm: cset Tainted: G E 4.10.0-rt1-rt #4 |
| Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0056.R01.1409242327 09/24/2014 |
| Call Trace: |
| ? dump_stack+0x5c/0x81 |
| ? ___might_sleep+0xf4/0x170 |
| ? rt_spin_lock+0x1c/0x50 |
| ? __cpuset_node_allowed+0x66/0xc0 |
| ? ___slab_alloc+0x390/0x570 <disables IRQs> |
| ? anon_vma_fork+0x8f/0x140 |
| ? copy_page_range+0x6cf/0xb00 |
| ? anon_vma_fork+0x8f/0x140 |
| ? __slab_alloc.isra.74+0x5a/0x81 |
| ? anon_vma_fork+0x8f/0x140 |
| ? kmem_cache_alloc+0x1b5/0x1f0 |
| ? anon_vma_fork+0x8f/0x140 |
| ? copy_process.part.35+0x1670/0x1ee0 |
| ? _do_fork+0xdd/0x3f0 |
| ? _do_fork+0xdd/0x3f0 |
| ? do_syscall_64+0x61/0x170 |
| ? entry_SYSCALL64_slow_path+0x25/0x25 |
| |
| The later ensured that a NUMA box WILL take callback_lock in atomic |
| context by removing the allocator and reclaim path __GFP_HARDWALL |
| usage which prevented such contexts from taking callback_mutex. |
| |
| One option would be to reinstate __GFP_HARDWALL protections for |
| RT, however, as the 8447a0fee974 changelog states: |
| |
| The callback_mutex is only used to synchronize reads/updates of cpusets' |
| flags and cpu/node masks. These operations should always proceed fast so |
| there's no reason why we can't use a spinlock instead of the mutex. |
| |
| Cc: stable-rt@vger.kernel.org |
| Signed-off-by: Mike Galbraith <efault@gmx.de> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| kernel/cgroup/cpuset.c | 66 ++++++++++++++++++++++++------------------------- |
| 1 file changed, 33 insertions(+), 33 deletions(-) |
| |
| --- a/kernel/cgroup/cpuset.c |
| +++ b/kernel/cgroup/cpuset.c |
| @@ -286,7 +286,7 @@ static struct cpuset top_cpuset = { |
| */ |
| |
| static DEFINE_MUTEX(cpuset_mutex); |
| -static DEFINE_SPINLOCK(callback_lock); |
| +static DEFINE_RAW_SPINLOCK(callback_lock); |
| |
| static struct workqueue_struct *cpuset_migrate_mm_wq; |
| |
| @@ -909,9 +909,9 @@ static void update_cpumasks_hier(struct |
| continue; |
| rcu_read_unlock(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cpumask_copy(cp->effective_cpus, new_cpus); |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && |
| !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); |
| @@ -976,9 +976,9 @@ static int update_cpumask(struct cpuset |
| if (retval < 0) |
| return retval; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| /* use trialcs->cpus_allowed as a temp variable */ |
| update_cpumasks_hier(cs, trialcs->cpus_allowed); |
| @@ -1178,9 +1178,9 @@ static void update_nodemasks_hier(struct |
| continue; |
| rcu_read_unlock(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cp->effective_mems = *new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && |
| !nodes_equal(cp->mems_allowed, cp->effective_mems)); |
| @@ -1248,9 +1248,9 @@ static int update_nodemask(struct cpuset |
| if (retval < 0) |
| goto done; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cs->mems_allowed = trialcs->mems_allowed; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| /* use trialcs->mems_allowed as a temp variable */ |
| update_nodemasks_hier(cs, &trialcs->mems_allowed); |
| @@ -1341,9 +1341,9 @@ static int update_flag(cpuset_flagbits_t |
| spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) |
| || (is_spread_page(cs) != is_spread_page(trialcs))); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cs->flags = trialcs->flags; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
| rebuild_sched_domains_locked(); |
| @@ -1758,7 +1758,7 @@ static int cpuset_common_seq_show(struct |
| cpuset_filetype_t type = seq_cft(sf)->private; |
| int ret = 0; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| |
| switch (type) { |
| case FILE_CPULIST: |
| @@ -1777,7 +1777,7 @@ static int cpuset_common_seq_show(struct |
| ret = -EINVAL; |
| } |
| |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| return ret; |
| } |
| |
| @@ -1991,12 +1991,12 @@ static int cpuset_css_online(struct cgro |
| |
| cpuset_inc(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { |
| cpumask_copy(cs->effective_cpus, parent->effective_cpus); |
| cs->effective_mems = parent->effective_mems; |
| } |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) |
| goto out_unlock; |
| @@ -2023,12 +2023,12 @@ static int cpuset_css_online(struct cgro |
| } |
| rcu_read_unlock(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cs->mems_allowed = parent->mems_allowed; |
| cs->effective_mems = parent->mems_allowed; |
| cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); |
| cpumask_copy(cs->effective_cpus, parent->cpus_allowed); |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| out_unlock: |
| mutex_unlock(&cpuset_mutex); |
| return 0; |
| @@ -2067,7 +2067,7 @@ static void cpuset_css_free(struct cgrou |
| static void cpuset_bind(struct cgroup_subsys_state *root_css) |
| { |
| mutex_lock(&cpuset_mutex); |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| |
| if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { |
| cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); |
| @@ -2078,7 +2078,7 @@ static void cpuset_bind(struct cgroup_su |
| top_cpuset.mems_allowed = top_cpuset.effective_mems; |
| } |
| |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| mutex_unlock(&cpuset_mutex); |
| } |
| |
| @@ -2179,12 +2179,12 @@ hotplug_update_tasks_legacy(struct cpuse |
| { |
| bool is_empty; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cpumask_copy(cs->cpus_allowed, new_cpus); |
| cpumask_copy(cs->effective_cpus, new_cpus); |
| cs->mems_allowed = *new_mems; |
| cs->effective_mems = *new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| /* |
| * Don't call update_tasks_cpumask() if the cpuset becomes empty, |
| @@ -2221,10 +2221,10 @@ hotplug_update_tasks(struct cpuset *cs, |
| if (nodes_empty(*new_mems)) |
| *new_mems = parent_cs(cs)->effective_mems; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cpumask_copy(cs->effective_cpus, new_cpus); |
| cs->effective_mems = *new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| if (cpus_updated) |
| update_tasks_cpumask(cs); |
| @@ -2310,21 +2310,21 @@ static void cpuset_hotplug_workfn(struct |
| |
| /* synchronize cpus_allowed to cpu_active_mask */ |
| if (cpus_updated) { |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| if (!on_dfl) |
| cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); |
| cpumask_copy(top_cpuset.effective_cpus, &new_cpus); |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| /* we don't mess with cpumasks of tasks in top_cpuset */ |
| } |
| |
| /* synchronize mems_allowed to N_MEMORY */ |
| if (mems_updated) { |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| if (!on_dfl) |
| top_cpuset.mems_allowed = new_mems; |
| top_cpuset.effective_mems = new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| update_tasks_nodemask(&top_cpuset); |
| } |
| |
| @@ -2422,11 +2422,11 @@ void cpuset_cpus_allowed(struct task_str |
| { |
| unsigned long flags; |
| |
| - spin_lock_irqsave(&callback_lock, flags); |
| + raw_spin_lock_irqsave(&callback_lock, flags); |
| rcu_read_lock(); |
| guarantee_online_cpus(task_cs(tsk), pmask); |
| rcu_read_unlock(); |
| - spin_unlock_irqrestore(&callback_lock, flags); |
| + raw_spin_unlock_irqrestore(&callback_lock, flags); |
| } |
| |
| void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
| @@ -2474,11 +2474,11 @@ nodemask_t cpuset_mems_allowed(struct ta |
| nodemask_t mask; |
| unsigned long flags; |
| |
| - spin_lock_irqsave(&callback_lock, flags); |
| + raw_spin_lock_irqsave(&callback_lock, flags); |
| rcu_read_lock(); |
| guarantee_online_mems(task_cs(tsk), &mask); |
| rcu_read_unlock(); |
| - spin_unlock_irqrestore(&callback_lock, flags); |
| + raw_spin_unlock_irqrestore(&callback_lock, flags); |
| |
| return mask; |
| } |
| @@ -2570,14 +2570,14 @@ bool __cpuset_node_allowed(int node, gfp |
| return true; |
| |
| /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
| - spin_lock_irqsave(&callback_lock, flags); |
| + raw_spin_lock_irqsave(&callback_lock, flags); |
| |
| rcu_read_lock(); |
| cs = nearest_hardwall_ancestor(task_cs(current)); |
| allowed = node_isset(node, cs->mems_allowed); |
| rcu_read_unlock(); |
| |
| - spin_unlock_irqrestore(&callback_lock, flags); |
| + raw_spin_unlock_irqrestore(&callback_lock, flags); |
| return allowed; |
| } |
| |