| From: Mike Galbraith <efault@gmx.de> |
| Date: Sun, 8 Jan 2017 09:32:25 +0100 |
| Subject: [PATCH] cpuset: Convert callback_lock to raw_spinlock_t |
| |
| The two commits below add up to a cpuset might_sleep() splat for RT: |
| |
| 8447a0fee974 cpuset: convert callback_mutex to a spinlock |
| 344736f29b35 cpuset: simplify cpuset_node_allowed API |
| |
| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:995 |
| in_atomic(): 0, irqs_disabled(): 1, pid: 11718, name: cset |
| CPU: 135 PID: 11718 Comm: cset Tainted: G E 4.10.0-rt1-rt #4 |
| Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0056.R01.1409242327 09/24/2014 |
| Call Trace: |
| ? dump_stack+0x5c/0x81 |
| ? ___might_sleep+0xf4/0x170 |
| ? rt_spin_lock+0x1c/0x50 |
| ? __cpuset_node_allowed+0x66/0xc0 |
| ? ___slab_alloc+0x390/0x570 <disables IRQs> |
| ? anon_vma_fork+0x8f/0x140 |
| ? copy_page_range+0x6cf/0xb00 |
| ? anon_vma_fork+0x8f/0x140 |
| ? __slab_alloc.isra.74+0x5a/0x81 |
| ? anon_vma_fork+0x8f/0x140 |
| ? kmem_cache_alloc+0x1b5/0x1f0 |
| ? anon_vma_fork+0x8f/0x140 |
| ? copy_process.part.35+0x1670/0x1ee0 |
| ? _do_fork+0xdd/0x3f0 |
| ? _do_fork+0xdd/0x3f0 |
| ? do_syscall_64+0x61/0x170 |
| ? entry_SYSCALL64_slow_path+0x25/0x25 |
| |
| The later ensured that a NUMA box WILL take callback_lock in atomic |
| context by removing the allocator and reclaim path __GFP_HARDWALL |
| usage which prevented such contexts from taking callback_mutex. |
| |
| One option would be to reinstate __GFP_HARDWALL protections for |
| RT, however, as the 8447a0fee974 changelog states: |
| |
| The callback_mutex is only used to synchronize reads/updates of cpusets' |
| flags and cpu/node masks. These operations should always proceed fast so |
| there's no reason why we can't use a spinlock instead of the mutex. |
| |
| Cc: stable-rt@vger.kernel.org |
| Signed-off-by: Mike Galbraith <efault@gmx.de> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| kernel/cgroup/cpuset.c | 70 ++++++++++++++++++++++++------------------------- |
| 1 file changed, 35 insertions(+), 35 deletions(-) |
| |
| --- a/kernel/cgroup/cpuset.c |
| +++ b/kernel/cgroup/cpuset.c |
| @@ -345,7 +345,7 @@ void cpuset_read_unlock(void) |
| percpu_up_read(&cpuset_rwsem); |
| } |
| |
| -static DEFINE_SPINLOCK(callback_lock); |
| +static DEFINE_RAW_SPINLOCK(callback_lock); |
| |
| static struct workqueue_struct *cpuset_migrate_mm_wq; |
| |
| @@ -1253,7 +1253,7 @@ static int update_parent_subparts_cpumas |
| * Newly added CPUs will be removed from effective_cpus and |
| * newly deleted ones will be added back to effective_cpus. |
| */ |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| if (adding) { |
| cpumask_or(parent->subparts_cpus, |
| parent->subparts_cpus, tmp->addmask); |
| @@ -1272,7 +1272,7 @@ static int update_parent_subparts_cpumas |
| } |
| |
| parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| return cmd == partcmd_update; |
| } |
| @@ -1377,7 +1377,7 @@ static void update_cpumasks_hier(struct |
| continue; |
| rcu_read_unlock(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| |
| cpumask_copy(cp->effective_cpus, tmp->new_cpus); |
| if (cp->nr_subparts_cpus && |
| @@ -1408,7 +1408,7 @@ static void update_cpumasks_hier(struct |
| = cpumask_weight(cp->subparts_cpus); |
| } |
| } |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| WARN_ON(!is_in_v2_mode() && |
| !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); |
| @@ -1526,7 +1526,7 @@ static int update_cpumask(struct cpuset |
| return -EINVAL; |
| } |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
| |
| /* |
| @@ -1537,7 +1537,7 @@ static int update_cpumask(struct cpuset |
| cs->cpus_allowed); |
| cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); |
| } |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| update_cpumasks_hier(cs, &tmp); |
| |
| @@ -1731,9 +1731,9 @@ static void update_nodemasks_hier(struct |
| continue; |
| rcu_read_unlock(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cp->effective_mems = *new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| WARN_ON(!is_in_v2_mode() && |
| !nodes_equal(cp->mems_allowed, cp->effective_mems)); |
| @@ -1801,9 +1801,9 @@ static int update_nodemask(struct cpuset |
| if (retval < 0) |
| goto done; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cs->mems_allowed = trialcs->mems_allowed; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| /* use trialcs->mems_allowed as a temp variable */ |
| update_nodemasks_hier(cs, &trialcs->mems_allowed); |
| @@ -1894,9 +1894,9 @@ static int update_flag(cpuset_flagbits_t |
| spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) |
| || (is_spread_page(cs) != is_spread_page(trialcs))); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cs->flags = trialcs->flags; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
| rebuild_sched_domains_locked(); |
| @@ -2405,7 +2405,7 @@ static int cpuset_common_seq_show(struct |
| cpuset_filetype_t type = seq_cft(sf)->private; |
| int ret = 0; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| |
| switch (type) { |
| case FILE_CPULIST: |
| @@ -2427,7 +2427,7 @@ static int cpuset_common_seq_show(struct |
| ret = -EINVAL; |
| } |
| |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| return ret; |
| } |
| |
| @@ -2740,14 +2740,14 @@ static int cpuset_css_online(struct cgro |
| |
| cpuset_inc(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| if (is_in_v2_mode()) { |
| cpumask_copy(cs->effective_cpus, parent->effective_cpus); |
| cs->effective_mems = parent->effective_mems; |
| cs->use_parent_ecpus = true; |
| parent->child_ecpus_count++; |
| } |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) |
| goto out_unlock; |
| @@ -2774,12 +2774,12 @@ static int cpuset_css_online(struct cgro |
| } |
| rcu_read_unlock(); |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cs->mems_allowed = parent->mems_allowed; |
| cs->effective_mems = parent->mems_allowed; |
| cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); |
| cpumask_copy(cs->effective_cpus, parent->cpus_allowed); |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| out_unlock: |
| percpu_up_write(&cpuset_rwsem); |
| put_online_cpus(); |
| @@ -2835,7 +2835,7 @@ static void cpuset_css_free(struct cgrou |
| static void cpuset_bind(struct cgroup_subsys_state *root_css) |
| { |
| percpu_down_write(&cpuset_rwsem); |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| |
| if (is_in_v2_mode()) { |
| cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); |
| @@ -2846,7 +2846,7 @@ static void cpuset_bind(struct cgroup_su |
| top_cpuset.mems_allowed = top_cpuset.effective_mems; |
| } |
| |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| percpu_up_write(&cpuset_rwsem); |
| } |
| |
| @@ -2943,12 +2943,12 @@ hotplug_update_tasks_legacy(struct cpuse |
| { |
| bool is_empty; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cpumask_copy(cs->cpus_allowed, new_cpus); |
| cpumask_copy(cs->effective_cpus, new_cpus); |
| cs->mems_allowed = *new_mems; |
| cs->effective_mems = *new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| /* |
| * Don't call update_tasks_cpumask() if the cpuset becomes empty, |
| @@ -2985,10 +2985,10 @@ hotplug_update_tasks(struct cpuset *cs, |
| if (nodes_empty(*new_mems)) |
| *new_mems = parent_cs(cs)->effective_mems; |
| |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| cpumask_copy(cs->effective_cpus, new_cpus); |
| cs->effective_mems = *new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| |
| if (cpus_updated) |
| update_tasks_cpumask(cs); |
| @@ -3143,7 +3143,7 @@ static void cpuset_hotplug_workfn(struct |
| |
| /* synchronize cpus_allowed to cpu_active_mask */ |
| if (cpus_updated) { |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| if (!on_dfl) |
| cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); |
| /* |
| @@ -3163,17 +3163,17 @@ static void cpuset_hotplug_workfn(struct |
| } |
| } |
| cpumask_copy(top_cpuset.effective_cpus, &new_cpus); |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| /* we don't mess with cpumasks of tasks in top_cpuset */ |
| } |
| |
| /* synchronize mems_allowed to N_MEMORY */ |
| if (mems_updated) { |
| - spin_lock_irq(&callback_lock); |
| + raw_spin_lock_irq(&callback_lock); |
| if (!on_dfl) |
| top_cpuset.mems_allowed = new_mems; |
| top_cpuset.effective_mems = new_mems; |
| - spin_unlock_irq(&callback_lock); |
| + raw_spin_unlock_irq(&callback_lock); |
| update_tasks_nodemask(&top_cpuset); |
| } |
| |
| @@ -3274,11 +3274,11 @@ void cpuset_cpus_allowed(struct task_str |
| { |
| unsigned long flags; |
| |
| - spin_lock_irqsave(&callback_lock, flags); |
| + raw_spin_lock_irqsave(&callback_lock, flags); |
| rcu_read_lock(); |
| guarantee_online_cpus(task_cs(tsk), pmask); |
| rcu_read_unlock(); |
| - spin_unlock_irqrestore(&callback_lock, flags); |
| + raw_spin_unlock_irqrestore(&callback_lock, flags); |
| } |
| |
| /** |
| @@ -3339,11 +3339,11 @@ nodemask_t cpuset_mems_allowed(struct ta |
| nodemask_t mask; |
| unsigned long flags; |
| |
| - spin_lock_irqsave(&callback_lock, flags); |
| + raw_spin_lock_irqsave(&callback_lock, flags); |
| rcu_read_lock(); |
| guarantee_online_mems(task_cs(tsk), &mask); |
| rcu_read_unlock(); |
| - spin_unlock_irqrestore(&callback_lock, flags); |
| + raw_spin_unlock_irqrestore(&callback_lock, flags); |
| |
| return mask; |
| } |
| @@ -3435,14 +3435,14 @@ bool __cpuset_node_allowed(int node, gfp |
| return true; |
| |
| /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
| - spin_lock_irqsave(&callback_lock, flags); |
| + raw_spin_lock_irqsave(&callback_lock, flags); |
| |
| rcu_read_lock(); |
| cs = nearest_hardwall_ancestor(task_cs(current)); |
| allowed = node_isset(node, cs->mems_allowed); |
| rcu_read_unlock(); |
| |
| - spin_unlock_irqrestore(&callback_lock, flags); |
| + raw_spin_unlock_irqrestore(&callback_lock, flags); |
| return allowed; |
| } |
| |