| /* |
| * Copyright (C) 2012 Red Hat, Inc. |
| * |
| * This work is licensed under the terms of the GNU GPL, version 2. See |
| * the COPYING file in the top-level directory. |
| */ |
| |
| #include <linux/sched.h> |
| #include <linux/autonuma_sched.h> |
| #include <asm/tlb.h> |
| |
| #include "sched.h" |
| |
| //#define AUTONUMA_BALANCE_BLIND |
| #ifdef AUTONUMA_BALANCE_BLIND |
| static int autonuma_balance_blind(struct task_struct *p, int this_cpu, |
| int cpu_nid, struct cpumask *allowed, |
| int *selected_nid_p) |
| { |
| int nid, cpu, nr_mm, nr_mm_max, selected_nid; |
| struct mm_struct *mm; |
| DECLARE_BITMAP(nodes, MAX_NUMNODES); |
| |
| #if 1 |
| if (p->autonuma_node >= 0) |
| return -1; |
| #endif |
| |
| bitmap_zero(nodes, MAX_NUMNODES); |
| __set_bit(cpu_nid, nodes); |
| mm = p->mm; |
| |
| for (;;) { |
| selected_nid = cpu_nid; |
| |
| nr_mm_max = 0; |
| for_each_cpu_and(cpu, cpumask_of_node(cpu_nid), allowed) { |
| struct rq *rq = cpu_rq(cpu); |
| if (rq->curr->mm == mm) |
| nr_mm_max++; |
| } |
| for_each_online_node(nid) { |
| if (test_bit(nid, nodes)) |
| continue; |
| nr_mm = 0; |
| for_each_cpu_and(cpu, cpumask_of_node(nid), allowed) { |
| struct rq *rq = cpu_rq(cpu); |
| if (rq->curr->mm == mm) |
| nr_mm++; |
| } |
| if (nr_mm > nr_mm_max) { |
| nr_mm_max = nr_mm; |
| selected_nid = nid; |
| } |
| } |
| |
| if (selected_nid == cpu_nid) { |
| *selected_nid_p = selected_nid; |
| return this_cpu; |
| } |
| |
| for_each_cpu_and(cpu, cpumask_of_node(selected_nid), allowed) { |
| struct rq *rq = cpu_rq(cpu); |
| if (idle_cpu(cpu) && |
| rq->avg_idle > sysctl_sched_migration_cost) { |
| *selected_nid_p = selected_nid; |
| return cpu; |
| } |
| } |
| __set_bit(selected_nid, nodes); |
| } |
| } |
| #endif /* AUTONUMA_BALANCE_BLIND */ |
| |
| #define AUTONUMA_BALANCE_SCALE 1000 |
| |
| /* |
| * node |
| * 90 10 task |
| * 95 5 current |
| * 75 20 task |
| * 0 0 idle |
| */ |
| void sched_autonuma_balance(void) |
| { |
| int cpu, nid, selected_cpu, selected_nid; |
| int cpu_nid = numa_node_id(); |
| int this_cpu = smp_processor_id(); |
| unsigned long p_w, p_t, m_w, m_t; |
| unsigned long weight_delta_max, weight; |
| struct cpumask *allowed; |
| struct migration_arg arg; |
| struct task_struct *p = current; |
| struct sched_autonuma *sched_autonuma = p->sched_autonuma; |
| |
| /* per-cpu statically allocated in runqueues */ |
| long *weight_others; |
| long *weight_current; |
| long *weight_current_mm; |
| unsigned long *mm_mask; |
| |
| if (!sched_autonuma || sched_autonuma->autonuma_stop_one_cpu || !p->mm) |
| return; |
| |
| if (!autonuma_enabled()) { |
| if (sched_autonuma->autonuma_node != -1) |
| sched_autonuma->autonuma_node = -1; |
| return; |
| } |
| |
| allowed = tsk_cpus_allowed(p); |
| |
| m_t = ACCESS_ONCE(p->mm->mm_autonuma->numa_fault_tot); |
| p_t = sched_autonuma->numa_fault_tot; |
| if (!m_t || !p_t) { |
| #ifdef AUTONUMA_BALANCE_BLIND |
| selected_cpu = autonuma_balance_blind(p, this_cpu, cpu_nid, |
| allowed, &selected_nid); |
| if (selected_cpu < 0) |
| return; |
| goto selected; |
| #else |
| return; |
| #endif |
| } |
| |
| weight_others = cpu_rq(this_cpu)->weight_others; |
| weight_current = cpu_rq(this_cpu)->weight_current; |
| weight_current_mm = cpu_rq(this_cpu)->weight_current_mm; |
| mm_mask = cpu_rq(this_cpu)->mm_mask; |
| |
| for_each_online_node(nid) { |
| m_w = ACCESS_ONCE(p->mm->mm_autonuma->numa_fault[nid]); |
| p_w = sched_autonuma->numa_fault[nid]; |
| if (m_w > m_t) |
| m_t = m_w; |
| //weight_current[nid] = m_w*AUTONUMA_BALANCE_SCALE/m_t; |
| //weight_current[nid] = m_w; |
| weight_current_mm[nid] = m_w*AUTONUMA_BALANCE_SCALE/m_t; |
| if (p_w > p_t) |
| p_t = p_w; |
| //weight_current[nid] += p_w*AUTONUMA_BALANCE_SCALE/p_t; |
| weight_current[nid] = p_w*AUTONUMA_BALANCE_SCALE/p_t; |
| } |
| |
| bitmap_zero(mm_mask, NR_CPUS); |
| for_each_online_node(nid) { |
| if (nid == cpu_nid) |
| continue; |
| for_each_cpu_and(cpu, cpumask_of_node(nid), allowed) { |
| struct mm_struct *mm; |
| struct rq *rq = cpu_rq(cpu); |
| if (!cpu_online(cpu)) |
| continue; |
| //weight_others[cpu] = AUTONUMA_BALANCE_SCALE*2+1; |
| weight_others[cpu] = LONG_MAX; |
| #if 1 |
| if (idle_cpu(cpu) && |
| rq->avg_idle > sysctl_sched_migration_cost) { |
| if (weight_current[nid] > |
| weight_current[cpu_nid] && |
| weight_current_mm[nid] > |
| weight_current_mm[cpu_nid]) |
| weight_others[cpu] = -1; |
| continue; |
| } |
| #endif |
| mm = rq->curr->mm; |
| if (!mm) |
| continue; |
| raw_spin_lock_irq(&rq->lock); |
| /* recheck after implicit barrier() */ |
| mm = rq->curr->mm; |
| if (!mm) { |
| raw_spin_unlock_irq(&rq->lock); |
| continue; |
| } |
| m_t = ACCESS_ONCE(mm->mm_autonuma->numa_fault_tot); |
| p_t = rq->curr->sched_autonuma->numa_fault_tot; |
| if (!m_t || !p_t) { |
| raw_spin_unlock_irq(&rq->lock); |
| continue; |
| } |
| m_w = ACCESS_ONCE(mm->mm_autonuma->numa_fault[nid]); |
| p_w = rq->curr->sched_autonuma->numa_fault[nid]; |
| raw_spin_unlock_irq(&rq->lock); |
| if (m_w > m_t) |
| m_t = m_w; |
| //weight_others[cpu] = m_w*AUTONUMA_BALANCE_SCALE/m_t; |
| //weight_others[cpu] = m_w; |
| weight_others[cpu] = m_w*AUTONUMA_BALANCE_SCALE/m_t; |
| if (p_w > p_t) |
| p_t = p_w; |
| //weight_others[cpu] += p_w* |
| // AUTONUMA_BALANCE_SCALE/p_t; |
| //weight_others[cpu] = p_w* |
| // AUTONUMA_BALANCE_SCALE/p_t; |
| if (mm == p->mm) { |
| __set_bit(cpu, mm_mask); |
| weight_others[cpu] = p_w* |
| AUTONUMA_BALANCE_SCALE/p_t; |
| } |
| } |
| } |
| |
| selected_cpu = this_cpu; |
| selected_nid = cpu_nid; |
| weight_delta_max = 0; |
| |
| for_each_online_node(nid) { |
| if (nid == cpu_nid) |
| continue; |
| for_each_cpu_and(cpu, cpumask_of_node(nid), allowed) { |
| long w_nid, w_cpu_nid; |
| if (!cpu_online(cpu)) |
| continue; |
| if (test_bit(cpu, mm_mask)) { |
| w_nid = weight_current[nid]; |
| w_cpu_nid = weight_current[cpu_nid]; |
| } else { |
| w_nid = weight_current_mm[nid]; |
| w_cpu_nid = weight_current_mm[cpu_nid]; |
| } |
| if (w_nid > weight_others[cpu] && |
| w_nid > w_cpu_nid) { |
| weight = w_nid - |
| weight_others[cpu] + |
| w_nid - |
| w_cpu_nid; |
| if (weight > weight_delta_max) { |
| weight_delta_max = weight; |
| selected_cpu = cpu; |
| selected_nid = nid; |
| } |
| } |
| } |
| } |
| |
| #ifdef AUTONUMA_BALANCE_BLIND |
| selected: |
| #endif |
| if (sched_autonuma->autonuma_node != selected_nid) |
| sched_autonuma->autonuma_node = selected_nid; |
| if (selected_cpu != this_cpu) { |
| #if 0 |
| struct rq *rq = cpu_rq(selected_cpu); |
| raw_spin_lock_irq(&rq->lock); |
| if (rq->curr->autonuma_node == selected_nid) |
| rq->curr->autonuma_node = cpu_nid; |
| raw_spin_unlock_irq(&rq->lock); |
| #endif |
| if (autonuma_debug()) |
| printk("%p %d - %dto%d - %dto%d - %ld %ld %ld - %s\n", |
| p->mm, p->pid, cpu_nid, selected_nid, |
| this_cpu, selected_cpu, |
| weight_others[selected_cpu], |
| test_bit(selected_cpu, mm_mask) ? |
| weight_current[selected_nid] : |
| weight_current_mm[selected_nid], |
| test_bit(selected_cpu, mm_mask) ? |
| weight_current[cpu_nid] : |
| weight_current_mm[cpu_nid], |
| test_bit(selected_cpu, mm_mask) ? |
| "thread" : "process"); |
| BUG_ON(cpu_nid == selected_nid); |
| goto found; |
| } |
| |
| return; |
| |
| found: |
| arg = (struct migration_arg) { p, selected_cpu }; |
| /* Need help from migration thread: drop lock and wait. */ |
| sched_autonuma->autonuma_stop_one_cpu = true; |
| preempt_enable_no_resched(); |
| stop_one_cpu(this_cpu, migration_cpu_stop, &arg); |
| preempt_disable(); |
| sched_autonuma->autonuma_stop_one_cpu = false; |
| tlb_migrate_finish(p->mm); |
| } |
| |
| bool sched_autonuma_can_migrate_task(struct task_struct *p, int this_cpu, |
| enum cpu_idle_type idle, |
| struct cpumask *allowed) |
| { |
| if (!task_autonuma_cpu(p, this_cpu)) { |
| int cpu; |
| int autonuma_node; |
| |
| autonuma_node = ACCESS_ONCE(p->sched_autonuma->autonuma_node); |
| if (autonuma_load_balance_strict() && |
| idle != CPU_NEWLY_IDLE && idle != CPU_IDLE) |
| return false; |
| if (idle == CPU_NUMA) |
| return false; |
| for_each_cpu_and(cpu, cpumask_of_node(autonuma_node), |
| allowed) { |
| struct rq *rq = cpu_rq(cpu); |
| int _autonuma_node; |
| struct sched_autonuma *sa; |
| if (!cpu_online(cpu)) |
| continue; |
| sa = rq->curr->sched_autonuma; |
| _autonuma_node = ACCESS_ONCE(sa->autonuma_node); |
| if (_autonuma_node != autonuma_node) |
| return false; |
| if (idle_cpu(cpu) && rq->avg_idle >= |
| sysctl_sched_migration_cost) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| void sched_autonuma_dump_mm(void) |
| { |
| int nid, cpu; |
| struct cpumask x; |
| cpumask_setall(&x); |
| for_each_online_node(nid) { |
| for_each_cpu(cpu, cpumask_of_node(nid)) { |
| struct rq *rq = cpu_rq(cpu); |
| struct mm_struct *mm = rq->curr->mm; |
| int nr = 0, cpux; |
| if (!cpumask_test_cpu(cpu, &x)) |
| continue; |
| for_each_cpu(cpux, cpumask_of_node(nid)) { |
| struct rq *rqx = cpu_rq(cpux); |
| if (rqx->curr->mm == mm) { |
| nr++; |
| cpumask_clear_cpu(cpux, &x); |
| } |
| } |
| printk("nid %d mm %p nr %d\n", nid, mm, nr); |
| } |
| } |
| } |