| From 523033e3bc5966318ba09c8064c22e5b2ccb04bf Mon Sep 17 00:00:00 2001 |
| From: Ingo Molnar <mingo@elte.hu> |
| Date: Fri, 3 Jul 2009 08:30:02 -0500 |
| Subject: [PATCH] x86: preempt-rt preparatory patches (32bit) |
| |
| commit 12fdd3645bfacc8fb96c633030ab85153e1ec3f4 in tip. |
| |
| [PG: add additional EXPORT_SYMBOL found in the two big |
| tip merges, 570f410b and 5f854cfc] |
| |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/arch/Kconfig b/arch/Kconfig |
| index 9d055b4..4c3c06a 100644 |
| --- a/arch/Kconfig |
| +++ b/arch/Kconfig |
| @@ -46,6 +46,11 @@ config OPROFILE_EVENT_MULTIPLEX |
| config HAVE_OPROFILE |
| bool |
| |
| +config PROFILE_NMI |
| + bool |
| + depends on OPROFILE |
| + default y |
| + |
| config KPROBES |
| bool "Kprobes" |
| depends on KALLSYMS && MODULES |
| diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug |
| index bc01e3e..4fd6ace 100644 |
| --- a/arch/x86/Kconfig.debug |
| +++ b/arch/x86/Kconfig.debug |
| @@ -126,6 +126,7 @@ config DEBUG_NX_TEST |
| config 4KSTACKS |
| bool "Use 4Kb for kernel stacks instead of 8Kb" |
| depends on X86_32 |
| + default y |
| ---help--- |
| If you say Y here the kernel will use a 4Kb stacksize for the |
| kernel stack attached to each process/thread. This facilitates |
| diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h |
| index 014c2b8..d1c2886 100644 |
| --- a/arch/x86/include/asm/highmem.h |
| +++ b/arch/x86/include/asm/highmem.h |
| @@ -58,6 +58,16 @@ extern void *kmap_high(struct page *page); |
| extern void kunmap_high(struct page *page); |
| |
| void *kmap(struct page *page); |
| +extern void kunmap_virt(void *ptr); |
| +extern struct page *kmap_to_page(void *ptr); |
| +void kunmap(struct page *page); |
| + |
| +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); |
| +void *__kmap_atomic(struct page *page, enum km_type type); |
| +void __kunmap_atomic(void *kvaddr, enum km_type type); |
| +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type); |
| +struct page *__kmap_atomic_to_page(void *ptr); |
| + |
| void kunmap(struct page *page); |
| void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); |
| void *kmap_atomic(struct page *page, enum km_type type); |
| @@ -75,6 +85,23 @@ struct page *kmap_atomic_to_page(void *ptr); |
| extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, |
| unsigned long end_pfn); |
| |
| +/* |
| + * on PREEMPT_RT kmap_atomic() is a wrapper that uses kmap(): |
| + */ |
| +#ifdef CONFIG_PREEMPT_RT |
| +# define kmap_atomic_prot(page, type, prot) kmap(page) |
| +# define kmap_atomic(page, type) kmap(page) |
| +# define kmap_atomic_pfn(pfn, type) kmap(pfn_to_page(pfn)) |
| +# define kunmap_atomic(kvaddr, type) kunmap_virt(kvaddr) |
| +# define kmap_atomic_to_page(kvaddr) kmap_to_page(kvaddr) |
| +#else |
| +# define kmap_atomic_prot(page, type, prot) __kmap_atomic_prot(page, type, prot) |
| +# define kmap_atomic(page, type) __kmap_atomic(page, type) |
| +# define kmap_atomic_pfn(pfn, type) __kmap_atomic_pfn(pfn, type) |
| +# define kunmap_atomic(kvaddr, type) __kunmap_atomic(kvaddr, type) |
| +# define kmap_atomic_to_page(kvaddr) __kmap_atomic_to_page(kvaddr) |
| +#endif |
| + |
| #endif /* __KERNEL__ */ |
| |
| #endif /* _ASM_X86_HIGHMEM_H */ |
| diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h |
| index 2601600..1c77e81 100644 |
| --- a/arch/x86/include/asm/tlbflush.h |
| +++ b/arch/x86/include/asm/tlbflush.h |
| @@ -7,6 +7,21 @@ |
| #include <asm/processor.h> |
| #include <asm/system.h> |
| |
| +/* |
| + * TLB-flush needs to be nonpreemptible on PREEMPT_RT due to the |
| + * following complex race scenario: |
| + * |
| + * if the current task is lazy-TLB and does a TLB flush and |
| + * gets preempted after the movl %%r3, %0 but before the |
| + * movl %0, %%cr3 then its ->active_mm might change and it will |
| + * install the wrong cr3 when it switches back. This is not a |
| + * problem for the lazy-TLB task itself, but if the next task it |
| + * switches to has an ->mm that is also the lazy-TLB task's |
| + * new ->active_mm, then the scheduler will assume that cr3 is |
| + * the new one, while we overwrote it with the old one. The result |
| + * is the wrong cr3 in the new (non-lazy-TLB) task, which typically |
| + * causes an infinite pagefault upon the next userspace access. |
| + */ |
| #ifdef CONFIG_PARAVIRT |
| #include <asm/paravirt.h> |
| #else |
| @@ -97,6 +112,13 @@ static inline void __flush_tlb_one(unsigned long addr) |
| |
| static inline void flush_tlb_mm(struct mm_struct *mm) |
| { |
| + /* |
| + * This is safe on PREEMPT_RT because if we preempt |
| + * right after the check but before the __flush_tlb(), |
| + * and if ->active_mm changes, then we might miss a |
| + * TLB flush, but that TLB flush happened already when |
| + * ->active_mm was changed: |
| + */ |
| if (mm == current->active_mm) |
| __flush_tlb(); |
| } |
| diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h |
| index 133b40a..7a6aa68 100644 |
| --- a/arch/x86/include/asm/xor_32.h |
| +++ b/arch/x86/include/asm/xor_32.h |
| @@ -865,7 +865,21 @@ static struct xor_block_template xor_block_pIII_sse = { |
| #include <asm-generic/xor.h> |
| |
| #undef XOR_TRY_TEMPLATES |
| -#define XOR_TRY_TEMPLATES \ |
| +/* |
| + * MMX/SSE ops disable preemption for long periods of time, |
| + * so on PREEMPT_RT use the register-based ops only: |
| + */ |
| +#ifdef CONFIG_PREEMPT_RT |
| +# define XOR_TRY_TEMPLATES \ |
| + do { \ |
| + xor_speed(&xor_block_8regs); \ |
| + xor_speed(&xor_block_8regs_p); \ |
| + xor_speed(&xor_block_32regs); \ |
| + xor_speed(&xor_block_32regs_p); \ |
| + } while (0) |
| +# define XOR_SELECT_TEMPLATE(FASTEST) (FASTEST) |
| +#else |
| +# define XOR_TRY_TEMPLATES \ |
| do { \ |
| xor_speed(&xor_block_8regs); \ |
| xor_speed(&xor_block_8regs_p); \ |
| @@ -882,7 +896,8 @@ do { \ |
| /* We force the use of the SSE xor block because it can write around L2. |
| We may also be able to load into the L1 only depending on how the cpu |
| deals with a load to a line that is being prefetched. */ |
| -#define XOR_SELECT_TEMPLATE(FASTEST) \ |
| +# define XOR_SELECT_TEMPLATE(FASTEST) \ |
| (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) |
| +#endif /* CONFIG_PREEMPT_RT */ |
| |
| #endif /* _ASM_X86_XOR_32_H */ |
| diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c |
| index ae775ca..04078f1 100644 |
| --- a/arch/x86/kernel/dumpstack_32.c |
| +++ b/arch/x86/kernel/dumpstack_32.c |
| @@ -98,6 +98,12 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
| } |
| |
| |
| +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_EVENT_TRACE) |
| +extern unsigned long worst_stack_left; |
| +#else |
| +# define worst_stack_left -1L |
| +#endif |
| + |
| void show_registers(struct pt_regs *regs) |
| { |
| int i; |
| diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S |
| index 7fd318b..4fabb36 100644 |
| --- a/arch/x86/kernel/head_32.S |
| +++ b/arch/x86/kernel/head_32.S |
| @@ -596,6 +596,7 @@ ignore_int: |
| call dump_stack |
| |
| addl $(5*4),%esp |
| + call dump_stack |
| popl %ds |
| popl %es |
| popl %edx |
| diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c |
| index e1cd13c..525df46 100644 |
| --- a/arch/x86/kernel/process_32.c |
| +++ b/arch/x86/kernel/process_32.c |
| @@ -148,8 +148,10 @@ void __show_regs(struct pt_regs *regs, int all) |
| regs->ax, regs->bx, regs->cx, regs->dx); |
| printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", |
| regs->si, regs->di, regs->bp, sp); |
| - printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", |
| - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); |
| + printk(KERN_DEFAULT |
| + " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x preempt:%08x\n", |
| + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, |
| + preempt_count()); |
| |
| if (!all) |
| return; |
| diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c |
| index 5ffb562..8ea7e48 100644 |
| --- a/arch/x86/kernel/vm86_32.c |
| +++ b/arch/x86/kernel/vm86_32.c |
| @@ -137,6 +137,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) |
| local_irq_enable(); |
| |
| if (!current->thread.vm86_info) { |
| + local_irq_disable(); |
| printk("no vm86_info: BAD\n"); |
| do_exit(SIGSEGV); |
| } |
| diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c |
| index ed046ed..60a0aa5 100644 |
| --- a/arch/x86/mm/fault.c |
| +++ b/arch/x86/mm/fault.c |
| @@ -554,6 +554,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) |
| nr = (address - idt_descr.address) >> 3; |
| |
| if (nr == 6) { |
| + zap_rt_locks(); |
| do_invalid_op(regs, 0); |
| return 1; |
| } |
| diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c |
| index 76e0544..856a2a3 100644 |
| --- a/arch/x86/mm/highmem_32.c |
| +++ b/arch/x86/mm/highmem_32.c |
| @@ -19,6 +19,27 @@ void kunmap(struct page *page) |
| kunmap_high(page); |
| } |
| |
| +void kunmap_virt(void *ptr) |
| +{ |
| + struct page *page; |
| + |
| + if ((unsigned long)ptr < PKMAP_ADDR(0)) |
| + return; |
| + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); |
| + kunmap(page); |
| +} |
| + |
| +struct page *kmap_to_page(void *ptr) |
| +{ |
| + struct page *page; |
| + |
| + if ((unsigned long)ptr < PKMAP_ADDR(0)) |
| + return virt_to_page(ptr); |
| + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); |
| + return page; |
| +} |
| +EXPORT_SYMBOL_GPL(kmap_to_page); /* PREEMPT_RT converts some modules to use this */ |
| + |
| /* |
| * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because |
| * no global lock is needed and because the kmap code must perform a global TLB |
| @@ -27,7 +48,7 @@ void kunmap(struct page *page) |
| * However when holding an atomic kmap it is not legal to sleep, so atomic |
| * kmaps are appropriate for short, tight code paths only. |
| */ |
| -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) |
| +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) |
| { |
| enum fixed_addresses idx; |
| unsigned long vaddr; |
| @@ -49,12 +70,12 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) |
| return (void *)vaddr; |
| } |
| |
| -void *kmap_atomic(struct page *page, enum km_type type) |
| +void *__kmap_atomic(struct page *page, enum km_type type) |
| { |
| return kmap_atomic_prot(page, type, kmap_prot); |
| } |
| |
| -void kunmap_atomic(void *kvaddr, enum km_type type) |
| +void __kunmap_atomic(void *kvaddr, enum km_type type) |
| { |
| unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; |
| enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); |
| @@ -82,13 +103,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type) |
| * This is the same as kmap_atomic() but can map memory that doesn't |
| * have a struct page associated with it. |
| */ |
| -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) |
| +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) |
| { |
| return kmap_atomic_prot_pfn(pfn, type, kmap_prot); |
| } |
| -EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ |
| +EXPORT_SYMBOL_GPL(__kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ |
| |
| -struct page *kmap_atomic_to_page(void *ptr) |
| +struct page *__kmap_atomic_to_page(void *ptr) |
| { |
| unsigned long idx, vaddr = (unsigned long)ptr; |
| pte_t *pte; |
| @@ -103,10 +124,11 @@ struct page *kmap_atomic_to_page(void *ptr) |
| |
| EXPORT_SYMBOL(kmap); |
| EXPORT_SYMBOL(kunmap); |
| -EXPORT_SYMBOL(kmap_atomic); |
| -EXPORT_SYMBOL(kunmap_atomic); |
| -EXPORT_SYMBOL(kmap_atomic_prot); |
| -EXPORT_SYMBOL(kmap_atomic_to_page); |
| +EXPORT_SYMBOL(kunmap_virt); |
| +EXPORT_SYMBOL(__kmap_atomic); |
| +EXPORT_SYMBOL(__kunmap_atomic); |
| +EXPORT_SYMBOL(__kmap_atomic_prot); |
| +EXPORT_SYMBOL(__kmap_atomic_to_page); |
| |
| void __init set_highmem_pages_init(void) |
| { |
| diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c |
| index bd33620..2cc4115 100644 |
| --- a/arch/x86/pci/direct.c |
| +++ b/arch/x86/pci/direct.c |
| @@ -224,16 +224,23 @@ static int __init pci_check_type1(void) |
| unsigned int tmp; |
| int works = 0; |
| |
| - local_irq_save(flags); |
| + raw_spin_lock_irqsave(&pci_config_lock, flags); |
| |
| outb(0x01, 0xCFB); |
| tmp = inl(0xCF8); |
| outl(0x80000000, 0xCF8); |
| - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { |
| - works = 1; |
| + |
| + if (inl(0xCF8) == 0x80000000) { |
| + raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| + |
| + if (pci_sanity_check(&pci_direct_conf1)) |
| + works = 1; |
| + |
| + raw_spin_lock_irqsave(&pci_config_lock, flags); |
| } |
| outl(tmp, 0xCF8); |
| - local_irq_restore(flags); |
| + |
| + raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| |
| return works; |
| } |
| @@ -243,17 +250,19 @@ static int __init pci_check_type2(void) |
| unsigned long flags; |
| int works = 0; |
| |
| - local_irq_save(flags); |
| + raw_spin_lock_irqsave(&pci_config_lock, flags); |
| |
| outb(0x00, 0xCFB); |
| outb(0x00, 0xCF8); |
| outb(0x00, 0xCFA); |
| - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && |
| - pci_sanity_check(&pci_direct_conf2)) { |
| - works = 1; |
| - } |
| |
| - local_irq_restore(flags); |
| + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { |
| + raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| + |
| + if (pci_sanity_check(&pci_direct_conf2)) |
| + works = 1; |
| + } else |
| + raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| |
| return works; |
| } |
| -- |
| 1.7.1.1 |
| |