| Subject: mm, rt: kmap_atomic scheduling |
| From: Peter Zijlstra <peterz@infradead.org> |
| Date: Thu, 28 Jul 2011 10:43:51 +0200 |
| |
| In fact, with migrate_disable() existing one could play games with |
| kmap_atomic. You could save/restore the kmap_atomic slots on context |
| switch (if there are any in use of course), this should be esp easy now |
| that we have a kmap_atomic stack. |
| |
| Something like the below.. it wants replacing all the preempt_disable() |
| stuff with pagefault_disable() && migrate_disable() of course, but then |
| you can flip kmaps around like below. |
| |
| Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| [dvhart@linux.intel.com: build fix] |
| Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins |
| |
| [tglx@linutronix.de: Get rid of the per cpu variable and store the idx |
| and the pte content right away in the task struct. |
| Shortens the context switch code. ] |
| --- |
| arch/x86/kernel/process_32.c | 32 ++++++++++++++++++++++++++++++++ |
| arch/x86/mm/highmem_32.c | 9 ++++++++- |
| arch/x86/mm/iomap_32.c | 9 ++++++++- |
| include/linux/highmem.h | 27 +++++++++++++++++++++++---- |
| include/linux/sched.h | 7 +++++++ |
| mm/highmem.c | 6 ++++-- |
| mm/memory.c | 2 ++ |
| 7 files changed, 84 insertions(+), 8 deletions(-) |
| |
| --- a/arch/x86/kernel/process_32.c |
| +++ b/arch/x86/kernel/process_32.c |
| @@ -36,6 +36,7 @@ |
| #include <linux/uaccess.h> |
| #include <linux/io.h> |
| #include <linux/kdebug.h> |
| +#include <linux/highmem.h> |
| |
| #include <asm/pgtable.h> |
| #include <asm/ldt.h> |
| @@ -214,6 +215,35 @@ start_thread(struct pt_regs *regs, unsig |
| } |
| EXPORT_SYMBOL_GPL(start_thread); |
| |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) |
| +{ |
| + int i; |
| + |
| + /* |
| + * Clear @prev's kmap_atomic mappings |
| + */ |
| + for (i = 0; i < prev_p->kmap_idx; i++) { |
| + int idx = i + KM_TYPE_NR * smp_processor_id(); |
| + pte_t *ptep = kmap_pte - idx; |
| + |
| + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); |
| + } |
| + /* |
| + * Restore @next_p's kmap_atomic mappings |
| + */ |
| + for (i = 0; i < next_p->kmap_idx; i++) { |
| + int idx = i + KM_TYPE_NR * smp_processor_id(); |
| + |
| + if (!pte_none(next_p->kmap_pte[i])) |
| + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); |
| + } |
| +} |
| +#else |
| +static inline void |
| +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } |
| +#endif |
| + |
| |
| /* |
| * switch_to(x,y) should switch tasks from x to y. |
| @@ -293,6 +323,8 @@ __switch_to(struct task_struct *prev_p, |
| task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
| __switch_to_xtra(prev_p, next_p, tss); |
| |
| + switch_kmaps(prev_p, next_p); |
| + |
| /* |
| * Leave lazy mode, flushing any hypercalls made here. |
| * This must be done before restoring TLS segments so |
| --- a/arch/x86/mm/highmem_32.c |
| +++ b/arch/x86/mm/highmem_32.c |
| @@ -31,6 +31,7 @@ EXPORT_SYMBOL(kunmap); |
| */ |
| void *kmap_atomic_prot(struct page *page, pgprot_t prot) |
| { |
| + pte_t pte = mk_pte(page, prot); |
| unsigned long vaddr; |
| int idx, type; |
| |
| @@ -44,7 +45,10 @@ void *kmap_atomic_prot(struct page *page |
| idx = type + KM_TYPE_NR*smp_processor_id(); |
| vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
| BUG_ON(!pte_none(*(kmap_pte-idx))); |
| - set_pte(kmap_pte-idx, mk_pte(page, prot)); |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + current->kmap_pte[type] = pte; |
| +#endif |
| + set_pte(kmap_pte-idx, pte); |
| arch_flush_lazy_mmu_mode(); |
| |
| return (void *)vaddr; |
| @@ -87,6 +91,9 @@ void __kunmap_atomic(void *kvaddr) |
| * is a bad idea also, in case the page changes cacheability |
| * attributes or becomes a protected page in a hypervisor. |
| */ |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + current->kmap_pte[type] = __pte(0); |
| +#endif |
| kpte_clear_flush(kmap_pte-idx, vaddr); |
| kmap_atomic_idx_pop(); |
| arch_flush_lazy_mmu_mode(); |
| --- a/arch/x86/mm/iomap_32.c |
| +++ b/arch/x86/mm/iomap_32.c |
| @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); |
| |
| void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) |
| { |
| + pte_t pte = pfn_pte(pfn, prot); |
| unsigned long vaddr; |
| int idx, type; |
| |
| @@ -64,7 +65,10 @@ void *kmap_atomic_prot_pfn(unsigned long |
| type = kmap_atomic_idx_push(); |
| idx = type + KM_TYPE_NR * smp_processor_id(); |
| vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
| - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + current->kmap_pte[type] = pte; |
| +#endif |
| + set_pte(kmap_pte - idx, pte); |
| arch_flush_lazy_mmu_mode(); |
| |
| return (void *)vaddr; |
| @@ -110,6 +114,9 @@ iounmap_atomic(void __iomem *kvaddr) |
| * is a bad idea also, in case the page changes cacheability |
| * attributes or becomes a protected page in a hypervisor. |
| */ |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + current->kmap_pte[type] = __pte(0); |
| +#endif |
| kpte_clear_flush(kmap_pte-idx, vaddr); |
| kmap_atomic_idx_pop(); |
| } |
| --- a/include/linux/highmem.h |
| +++ b/include/linux/highmem.h |
| @@ -85,32 +85,51 @@ static inline void __kunmap_atomic(void |
| |
| #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) |
| |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| DECLARE_PER_CPU(int, __kmap_atomic_idx); |
| +#endif |
| |
| static inline int kmap_atomic_idx_push(void) |
| { |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; |
| |
| -#ifdef CONFIG_DEBUG_HIGHMEM |
| +# ifdef CONFIG_DEBUG_HIGHMEM |
| WARN_ON_ONCE(in_irq() && !irqs_disabled()); |
| BUG_ON(idx > KM_TYPE_NR); |
| -#endif |
| +# endif |
| return idx; |
| +#else |
| + current->kmap_idx++; |
| + BUG_ON(current->kmap_idx > KM_TYPE_NR); |
| + return current->kmap_idx - 1; |
| +#endif |
| } |
| |
| static inline int kmap_atomic_idx(void) |
| { |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| return __this_cpu_read(__kmap_atomic_idx) - 1; |
| +#else |
| + return current->kmap_idx - 1; |
| +#endif |
| } |
| |
| static inline void kmap_atomic_idx_pop(void) |
| { |
| -#ifdef CONFIG_DEBUG_HIGHMEM |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| +# ifdef CONFIG_DEBUG_HIGHMEM |
| int idx = __this_cpu_dec_return(__kmap_atomic_idx); |
| |
| BUG_ON(idx < 0); |
| -#else |
| +# else |
| __this_cpu_dec(__kmap_atomic_idx); |
| +# endif |
| +#else |
| + current->kmap_idx--; |
| +# ifdef CONFIG_DEBUG_HIGHMEM |
| + BUG_ON(current->kmap_idx < 0); |
| +# endif |
| #endif |
| } |
| |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -23,6 +23,7 @@ struct sched_param { |
| #include <linux/nodemask.h> |
| #include <linux/mm_types.h> |
| |
| +#include <asm/kmap_types.h> |
| #include <asm/page.h> |
| #include <asm/ptrace.h> |
| #include <asm/cputime.h> |
| @@ -1449,6 +1450,12 @@ struct task_struct { |
| struct rcu_head put_rcu; |
| int softirq_nestcnt; |
| #endif |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 |
| + int kmap_idx; |
| + pte_t kmap_pte[KM_TYPE_NR]; |
| +# endif |
| +#endif |
| }; |
| |
| #ifdef CONFIG_NUMA_BALANCING |
| --- a/mm/highmem.c |
| +++ b/mm/highmem.c |
| @@ -29,10 +29,11 @@ |
| #include <linux/kgdb.h> |
| #include <asm/tlbflush.h> |
| |
| - |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) |
| DEFINE_PER_CPU(int, __kmap_atomic_idx); |
| #endif |
| +#endif |
| |
| /* |
| * Virtual_count is not a pure "count". |
| @@ -47,8 +48,9 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx); |
| unsigned long totalhigh_pages __read_mostly; |
| EXPORT_SYMBOL(totalhigh_pages); |
| |
| - |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); |
| +#endif |
| |
| unsigned int nr_free_highpages (void) |
| { |
| --- a/mm/memory.c |
| +++ b/mm/memory.c |
| @@ -3754,6 +3754,7 @@ unlock: |
| #ifdef CONFIG_PREEMPT_RT_FULL |
| void pagefault_disable(void) |
| { |
| + migrate_disable(); |
| current->pagefault_disabled++; |
| /* |
| * make sure to have issued the store before a pagefault |
| @@ -3771,6 +3772,7 @@ void pagefault_enable(void) |
| */ |
| barrier(); |
| current->pagefault_disabled--; |
| + migrate_enable(); |
| } |
| EXPORT_SYMBOL(pagefault_enable); |
| #endif |