| From c651e0c06f3ea64d302837cc3b576d6993694fd2 Mon Sep 17 00:00:00 2001 |
| From: Ingo Molnar <mingo@elte.hu> |
| Date: Fri, 3 Jul 2009 08:30:07 -0500 |
| Subject: [PATCH] rt: core implementation |
| |
| commit e9888fb95225bb3b786d79fd983eb67e1acad338 in tip. |
| |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h |
| index cee2da4..3223348 100644 |
| --- a/include/linux/hardirq.h |
| +++ b/include/linux/hardirq.h |
| @@ -83,9 +83,9 @@ |
| * Are we doing bottom half or hardware interrupt processing? |
| * Are we in a softirq context? Interrupt context? |
| */ |
| -#define in_irq() (hardirq_count()) |
| -#define in_softirq() (softirq_count()) |
| -#define in_interrupt() (irq_count()) |
| +#define in_irq() (hardirq_count() || (current->flags & PF_HARDIRQ)) |
| +#define in_softirq() (softirq_count() || (current->flags & PF_SOFTIRQ)) |
| +#define in_interrupt() (irq_count()) |
| |
| /* |
| * Are we in NMI context? |
| diff --git a/include/linux/kernel.h b/include/linux/kernel.h |
| index 1221d23..3489c31 100644 |
| --- a/include/linux/kernel.h |
| +++ b/include/linux/kernel.h |
| @@ -123,7 +123,7 @@ extern int _cond_resched(void); |
| # define might_resched() do { } while (0) |
| #endif |
| |
| -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
| +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT) |
| void __might_sleep(const char *file, int line, int preempt_offset); |
| /** |
| * might_sleep - annotation for functions that can sleep |
| @@ -287,6 +287,12 @@ extern void printk_tick(void); |
| extern void asmlinkage __attribute__((format(printf, 1, 2))) |
| early_printk(const char *fmt, ...); |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +extern void zap_rt_locks(void); |
| +#else |
| +# define zap_rt_locks() do { } while (0) |
| +#endif |
| + |
| unsigned long int_sqrt(unsigned long); |
| |
| static inline void console_silent(void) |
| @@ -316,6 +322,7 @@ extern int root_mountflags; |
| /* Values used for system_state */ |
| extern enum system_states { |
| SYSTEM_BOOTING, |
| + SYSTEM_BOOTING_SCHEDULER_OK, |
| SYSTEM_RUNNING, |
| SYSTEM_HALT, |
| SYSTEM_POWER_OFF, |
| diff --git a/include/linux/profile.h b/include/linux/profile.h |
| index a0fc322..5b72082 100644 |
| --- a/include/linux/profile.h |
| +++ b/include/linux/profile.h |
| @@ -8,10 +8,11 @@ |
| |
| #include <asm/errno.h> |
| |
| -#define CPU_PROFILING 1 |
| -#define SCHED_PROFILING 2 |
| -#define SLEEP_PROFILING 3 |
| -#define KVM_PROFILING 4 |
| +#define CPU_PROFILING 1 |
| +#define SCHED_PROFILING 2 |
| +#define SLEEP_PROFILING 3 |
| +#define KVM_PROFILING 4 |
| +#define PREEMPT_PROFILING 5 |
| |
| struct proc_dir_entry; |
| struct pt_regs; |
| @@ -36,6 +37,8 @@ enum profile_type { |
| PROFILE_MUNMAP |
| }; |
| |
| +extern int prof_pid; |
| + |
| #ifdef CONFIG_PROFILING |
| |
| extern int prof_on __read_mostly; |
| diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h |
| index c5da749..9eb17f9 100644 |
| --- a/include/linux/radix-tree.h |
| +++ b/include/linux/radix-tree.h |
| @@ -169,7 +169,18 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, |
| unsigned long index, unsigned long max_scan); |
| unsigned long radix_tree_prev_hole(struct radix_tree_root *root, |
| unsigned long index, unsigned long max_scan); |
| +/* |
| + * On a mutex based kernel we can freely schedule within the radix code: |
| + */ |
| +#ifdef CONFIG_PREEMPT_RT |
| +static inline int radix_tree_preload(gfp_t gfp_mask) |
| +{ |
| + return 0; |
| +} |
| +#else |
| int radix_tree_preload(gfp_t gfp_mask); |
| +#endif |
| + |
| void radix_tree_init(void); |
| void *radix_tree_tag_set(struct radix_tree_root *root, |
| unsigned long index, unsigned int tag); |
| @@ -189,7 +200,9 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); |
| |
| static inline void radix_tree_preload_end(void) |
| { |
| +#ifndef CONFIG_PREEMPT_RT |
| preempt_enable(); |
| +#endif |
| } |
| |
| #endif /* _LINUX_RADIX_TREE_H */ |
| diff --git a/include/linux/smp.h b/include/linux/smp.h |
| index 7a0570e..c55f2ca 100644 |
| --- a/include/linux/smp.h |
| +++ b/include/linux/smp.h |
| @@ -50,6 +50,16 @@ extern void smp_send_stop(void); |
| */ |
| extern void smp_send_reschedule(int cpu); |
| |
| +/* |
| + * trigger a reschedule on all other CPUs: |
| + */ |
| +extern void smp_send_reschedule_allbutself(void); |
| + |
| +/* |
| + * trigger a reschedule on all other CPUs: |
| + */ |
| +extern void smp_send_reschedule_allbutself(void); |
| + |
| |
| /* |
| * Prepare machine for booting other CPUs. |
| @@ -136,6 +146,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) |
| 0; \ |
| }) |
| static inline void smp_send_reschedule(int cpu) { } |
| +static inline void smp_send_reschedule_allbutself(void) { } |
| #define num_booting_cpus() 1 |
| #define smp_prepare_boot_cpu() do {} while (0) |
| #define smp_call_function_many(mask, func, info, wait) \ |
| diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h |
| index 9466e86..ec218ce 100644 |
| --- a/include/linux/workqueue.h |
| +++ b/include/linux/workqueue.h |
| @@ -211,6 +211,9 @@ __create_workqueue_key(const char *name, int singlethread, |
| #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0) |
| #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0) |
| |
| +extern void set_workqueue_prio(struct workqueue_struct *wq, int policy, |
| + int rt_priority, int nice); |
| + |
| extern void destroy_workqueue(struct workqueue_struct *wq); |
| |
| extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); |
| diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt |
| index 7c20f45..f4602f8 100644 |
| --- a/kernel/Kconfig.preempt |
| +++ b/kernel/Kconfig.preempt |
| @@ -1,14 +1,13 @@ |
| - |
| choice |
| - prompt "Preemption Model" |
| - default PREEMPT_NONE |
| + prompt "Preemption Mode" |
| + default PREEMPT_RT |
| |
| config PREEMPT_NONE |
| bool "No Forced Preemption (Server)" |
| help |
| - This is the traditional Linux preemption model, geared towards |
| + This is the traditional Linux preemption model geared towards |
| throughput. It will still provide good latencies most of the |
| - time, but there are no guarantees and occasional longer delays |
| + time but there are no guarantees and occasional long delays |
| are possible. |
| |
| Select this option if you are building a kernel for a server or |
| @@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY |
| help |
| This option reduces the latency of the kernel by adding more |
| "explicit preemption points" to the kernel code. These new |
| - preemption points have been selected to reduce the maximum |
| + preemption points have been selected to minimize the maximum |
| latency of rescheduling, providing faster application reactions, |
| at the cost of slightly lower throughput. |
| |
| @@ -33,38 +32,73 @@ config PREEMPT_VOLUNTARY |
| |
| Select this if you are building a kernel for a desktop system. |
| |
| -config PREEMPT |
| +config PREEMPT_DESKTOP |
| bool "Preemptible Kernel (Low-Latency Desktop)" |
| help |
| This option reduces the latency of the kernel by making |
| - all kernel code (that is not executing in a critical section) |
| + all kernel code that is not executing in a critical section |
| preemptible. This allows reaction to interactive events by |
| permitting a low priority process to be preempted involuntarily |
| even if it is in kernel mode executing a system call and would |
| - otherwise not be about to reach a natural preemption point. |
| - This allows applications to run more 'smoothly' even when the |
| - system is under load, at the cost of slightly lower throughput |
| - and a slight runtime overhead to kernel code. |
| + otherwise not about to reach a preemption point. This allows |
| + applications to run more 'smoothly' even when the system is |
| + under load, at the cost of slighly lower throughput and a |
| + slight runtime overhead to kernel code. |
| + |
| + (According to profiles, when this mode is selected then even |
| + during kernel-intense workloads the system is in an immediately |
| + preemptible state more than 50% of the time.) |
| |
| Select this if you are building a kernel for a desktop or |
| embedded system with latency requirements in the milliseconds |
| range. |
| |
| +config PREEMPT_RT |
| + bool "Complete Preemption (Real-Time)" |
| + select PREEMPT_SOFTIRQS |
| + select PREEMPT_HARDIRQS |
| + select PREEMPT_RCU |
| + select RT_MUTEXES |
| + help |
| + This option further reduces the scheduling latency of the |
| + kernel by replacing almost every spinlock used by the kernel |
| + with preemptible mutexes and thus making all but the most |
| + critical kernel code involuntarily preemptible. The remaining |
| + handful of lowlevel non-preemptible codepaths are short and |
| + have a deterministic latency of a couple of tens of |
| + microseconds (depending on the hardware). This also allows |
| + applications to run more 'smoothly' even when the system is |
| + under load, at the cost of lower throughput and runtime |
| + overhead to kernel code. |
| + |
| + (According to profiles, when this mode is selected then even |
| + during kernel-intense workloads the system is in an immediately |
| + preemptible state more than 95% of the time.) |
| + |
| + Select this if you are building a kernel for a desktop, |
| + embedded or real-time system with guaranteed latency |
| + requirements of 100 usecs or lower. |
| + |
| endchoice |
| |
| +config PREEMPT |
| + bool |
| + default y |
| + depends on PREEMPT_DESKTOP || PREEMPT_RT |
| + |
| config PREEMPT_SOFTIRQS |
| bool "Thread Softirqs" |
| default n |
| # depends on PREEMPT |
| help |
| This option reduces the latency of the kernel by 'threading' |
| - soft interrupts. This means that all softirqs will execute |
| - in softirqd's context. While this helps latency, it can also |
| - reduce performance. |
| + soft interrupts. This means that all softirqs will execute |
| + in softirqd's context. While this helps latency, it can also |
| + reduce performance. |
| |
| - The threading of softirqs can also be controlled via |
| - /proc/sys/kernel/softirq_preemption runtime flag and the |
| - sofirq-preempt=0/1 boot-time option. |
| + The threading of softirqs can also be controlled via |
| + /proc/sys/kernel/softirq_preemption runtime flag and the |
| + sofirq-preempt=0/1 boot-time option. |
| |
| Say N if you are unsure. |
| |
| @@ -75,14 +109,14 @@ config PREEMPT_HARDIRQS |
| select PREEMPT_SOFTIRQS |
| help |
| This option reduces the latency of the kernel by 'threading' |
| - hardirqs. This means that all (or selected) hardirqs will run |
| - in their own kernel thread context. While this helps latency, |
| - this feature can also reduce performance. |
| - |
| - The threading of hardirqs can also be controlled via the |
| - /proc/sys/kernel/hardirq_preemption runtime flag and the |
| - hardirq-preempt=0/1 boot-time option. Per-irq threading can |
| - be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded |
| - runtime flags. |
| + hardirqs. This means that all (or selected) hardirqs will run |
| + in their own kernel thread context. While this helps latency, |
| + this feature can also reduce performance. |
| + |
| + The threading of hardirqs can also be controlled via the |
| + /proc/sys/kernel/hardirq_preemption runtime flag and the |
| + hardirq-preempt=0/1 boot-time option. Per-irq threading can |
| + be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded |
| + runtime flags. |
| |
| Say N if you are unsure. |
| diff --git a/kernel/exit.c b/kernel/exit.c |
| index 309df57..adeffd2 100644 |
| --- a/kernel/exit.c |
| +++ b/kernel/exit.c |
| @@ -69,7 +69,9 @@ static void __unhash_process(struct task_struct *p) |
| |
| list_del_rcu(&p->tasks); |
| list_del_init(&p->sibling); |
| + preempt_disable(); |
| __get_cpu_var(process_counts)--; |
| + preempt_enable(); |
| } |
| list_del_rcu(&p->thread_group); |
| } |
| @@ -694,9 +696,11 @@ static void exit_mm(struct task_struct * tsk) |
| task_lock(tsk); |
| tsk->mm = NULL; |
| up_read(&mm->mmap_sem); |
| + preempt_disable(); // FIXME |
| enter_lazy_tlb(mm, current); |
| /* We don't want this task to be frozen prematurely */ |
| clear_freeze_flag(tsk); |
| + preempt_enable(); |
| task_unlock(tsk); |
| mm_update_next_owner(mm); |
| mmput(mm); |
| @@ -1501,6 +1505,9 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, |
| struct task_struct *p) |
| { |
| int ret = eligible_child(wo, p); |
| + |
| + BUG_ON(!atomic_read(&p->usage)); |
| + |
| if (!ret) |
| return ret; |
| |
| diff --git a/kernel/fork.c b/kernel/fork.c |
| index c49f839..30086f9 100644 |
| --- a/kernel/fork.c |
| +++ b/kernel/fork.c |
| @@ -186,6 +186,16 @@ void __put_task_struct(struct task_struct *tsk) |
| free_task(tsk); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +void __put_task_struct_cb(struct rcu_head *rhp) |
| +{ |
| + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
| + |
| + __put_task_struct(tsk); |
| + |
| +} |
| +#endif |
| + |
| /* |
| * macro override instead of weak attribute alias, to workaround |
| * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. |
| diff --git a/kernel/notifier.c b/kernel/notifier.c |
| index 2488ba7..88d65e6 100644 |
| --- a/kernel/notifier.c |
| +++ b/kernel/notifier.c |
| @@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl, |
| * @returns: notifier_call_chain returns the value returned by the |
| * last notifier function called. |
| */ |
| -static int __kprobes notifier_call_chain(struct notifier_block **nl, |
| +static int __kprobes notrace notifier_call_chain(struct notifier_block **nl, |
| unsigned long val, void *v, |
| int nr_to_call, int *nr_calls) |
| { |
| @@ -217,7 +217,7 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, |
| * not yet working and interrupts must remain disabled. At |
| * such times we must not call down_write(). |
| */ |
| - if (unlikely(system_state == SYSTEM_BOOTING)) |
| + if (unlikely(system_state < SYSTEM_RUNNING)) |
| return notifier_chain_register(&nh->head, n); |
| |
| down_write(&nh->rwsem); |
| diff --git a/kernel/signal.c b/kernel/signal.c |
| index 5c2181b..9dda83b 100644 |
| --- a/kernel/signal.c |
| +++ b/kernel/signal.c |
| @@ -949,7 +949,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, |
| |
| trace_signal_generate(sig, info, t); |
| |
| +#ifdef CONFIG_SMP |
| assert_spin_locked(&t->sighand->siglock); |
| +#endif |
| |
| if (!prepare_signal(sig, t, from_ancestor_ns)) |
| return 0; |
| @@ -1710,15 +1712,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) |
| read_lock(&tasklist_lock); |
| if (may_ptrace_stop()) { |
| do_notify_parent_cldstop(current, CLD_TRAPPED); |
| - /* |
| - * Don't want to allow preemption here, because |
| - * sys_ptrace() needs this task to be inactive. |
| - * |
| - * XXX: implement read_unlock_no_resched(). |
| - */ |
| - preempt_disable(); |
| read_unlock(&tasklist_lock); |
| - preempt_enable_and_schedule(); |
| + schedule(); |
| } else { |
| /* |
| * By the time we got the lock, our tracer went away. |
| diff --git a/kernel/softirq.c b/kernel/softirq.c |
| index 31db011..b021c2d 100644 |
| --- a/kernel/softirq.c |
| +++ b/kernel/softirq.c |
| @@ -20,6 +20,7 @@ |
| #include <linux/kernel_stat.h> |
| #include <linux/interrupt.h> |
| #include <linux/init.h> |
| +#include <linux/delay.h> |
| #include <linux/mm.h> |
| #include <linux/notifier.h> |
| #include <linux/percpu.h> |
| @@ -106,6 +107,8 @@ static void trigger_softirqs(void) |
| } |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| + |
| /* |
| * This one is for softirq.c-internal use, |
| * where hardirqs are disabled legitimately: |
| @@ -207,6 +210,8 @@ void local_bh_enable_ip(unsigned long ip) |
| } |
| EXPORT_SYMBOL(local_bh_enable_ip); |
| |
| +#endif |
| + |
| /* |
| * We restart softirq processing MAX_SOFTIRQ_RESTART times, |
| * and we fall back to softirqd after that. |
| @@ -606,7 +611,7 @@ void tasklet_kill(struct tasklet_struct *t) |
| |
| while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { |
| do { |
| - yield(); |
| + msleep(1); |
| } while (test_bit(TASKLET_STATE_SCHED, &t->state)); |
| } |
| tasklet_unlock_wait(t); |
| @@ -1057,6 +1062,11 @@ int softirq_preemption = 1; |
| |
| EXPORT_SYMBOL(softirq_preemption); |
| |
| +/* |
| + * Real-Time Preemption depends on softirq threading: |
| + */ |
| +#ifndef CONFIG_PREEMPT_RT |
| + |
| static int __init softirq_preempt_setup (char *str) |
| { |
| if (!strncmp(str, "off", 3)) |
| @@ -1070,7 +1080,7 @@ static int __init softirq_preempt_setup (char *str) |
| } |
| |
| __setup("softirq-preempt=", softirq_preempt_setup); |
| - |
| +#endif |
| #endif |
| |
| #ifdef CONFIG_SMP |
| diff --git a/kernel/workqueue.c b/kernel/workqueue.c |
| index dee4865..fb74031 100644 |
| --- a/kernel/workqueue.c |
| +++ b/kernel/workqueue.c |
| @@ -26,6 +26,7 @@ |
| #include <linux/slab.h> |
| #include <linux/cpu.h> |
| #include <linux/notifier.h> |
| +#include <linux/syscalls.h> |
| #include <linux/kthread.h> |
| #include <linux/hardirq.h> |
| #include <linux/mempolicy.h> |
| @@ -36,6 +37,8 @@ |
| #define CREATE_TRACE_POINTS |
| #include <trace/events/workqueue.h> |
| |
| +#include <asm/uaccess.h> |
| + |
| /* |
| * The per-CPU workqueue (if single thread, we always use the first |
| * possible cpu). |
| @@ -270,13 +273,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, |
| * |
| * We queue the work to the CPU on which it was submitted, but if the CPU dies |
| * it can be processed by another CPU. |
| + * |
| + * Especially no such guarantee on PREEMPT_RT. |
| */ |
| int queue_work(struct workqueue_struct *wq, struct work_struct *work) |
| { |
| - int ret; |
| + int ret = 0, cpu = raw_smp_processor_id(); |
| |
| - ret = queue_work_on(get_cpu(), wq, work); |
| - put_cpu(); |
| + ret = queue_work_on(cpu, wq, work); |
| |
| return ret; |
| } |
| @@ -774,9 +778,9 @@ void flush_delayed_work(struct delayed_work *dwork) |
| { |
| if (del_timer_sync(&dwork->timer)) { |
| struct cpu_workqueue_struct *cwq; |
| - cwq = wq_per_cpu(keventd_wq, get_cpu()); |
| + int cpu = raw_smp_processor_id(); |
| + cwq = wq_per_cpu(keventd_wq, cpu); |
| __queue_work(cwq, &dwork->work); |
| - put_cpu(); |
| } |
| flush_work(&dwork->work); |
| } |
| @@ -1044,6 +1048,49 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) |
| cwq->thread = NULL; |
| } |
| |
| +void set_workqueue_thread_prio(struct workqueue_struct *wq, int cpu, |
| + int policy, int rt_priority, int nice) |
| +{ |
| + struct sched_param param = { .sched_priority = rt_priority }; |
| + struct cpu_workqueue_struct *cwq; |
| + mm_segment_t oldfs = get_fs(); |
| + struct task_struct *p; |
| + unsigned long flags; |
| + int ret; |
| + |
| + cwq = per_cpu_ptr(wq->cpu_wq, cpu); |
| + spin_lock_irqsave(&cwq->lock, flags); |
| + p = cwq->thread; |
| + spin_unlock_irqrestore(&cwq->lock, flags); |
| + |
| + set_user_nice(p, nice); |
| + |
| + set_fs(KERNEL_DS); |
| + ret = sys_sched_setscheduler(p->pid, policy, ¶m); |
| + set_fs(oldfs); |
| + |
| + WARN_ON(ret); |
| +} |
| + |
| +void set_workqueue_prio(struct workqueue_struct *wq, int policy, |
| + int rt_priority, int nice) |
| +{ |
| + int cpu; |
| + |
| + /* We don't need the distraction of CPUs appearing and vanishing. */ |
| + get_online_cpus(); |
| + spin_lock(&workqueue_lock); |
| + if (is_wq_single_threaded(wq)) |
| + set_workqueue_thread_prio(wq, 0, policy, rt_priority, nice); |
| + else { |
| + for_each_online_cpu(cpu) |
| + set_workqueue_thread_prio(wq, cpu, policy, |
| + rt_priority, nice); |
| + } |
| + spin_unlock(&workqueue_lock); |
| + put_online_cpus(); |
| +} |
| + |
| /** |
| * destroy_workqueue - safely terminate a workqueue |
| * @wq: target workqueue |
| @@ -1176,4 +1223,5 @@ void __init init_workqueues(void) |
| hotcpu_notifier(workqueue_cpu_callback, 0); |
| keventd_wq = create_workqueue("events"); |
| BUG_ON(!keventd_wq); |
| + set_workqueue_prio(keventd_wq, SCHED_FIFO, 1, -20); |
| } |
| diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug |
| index 5e3407d..aa0142b 100644 |
| --- a/lib/Kconfig.debug |
| +++ b/lib/Kconfig.debug |
| @@ -415,6 +415,8 @@ config DEBUG_RT_MUTEXES |
| help |
| This allows rt mutex semantics violations and rt mutex related |
| deadlocks (lockups) to be detected and reported automatically. |
| + When realtime preemption is enabled this includes spinlocks, |
| + rwlocks, mutexes and (rw)semaphores |
| |
| config DEBUG_PI_LIST |
| bool |
| @@ -438,7 +440,7 @@ config DEBUG_SPINLOCK |
| |
| config DEBUG_MUTEXES |
| bool "Mutex debugging: basic checks" |
| - depends on DEBUG_KERNEL |
| + depends on DEBUG_KERNEL && !PREEMPT_RT |
| help |
| This feature allows mutex semantics violations to be detected and |
| reported. |
| diff --git a/lib/Makefile b/lib/Makefile |
| index 3b0b4a6..2d21722 100644 |
| --- a/lib/Makefile |
| +++ b/lib/Makefile |
| @@ -34,7 +34,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o |
| obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o |
| obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o |
| obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o |
| -lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
| +obj-$(CONFIG_PREEMPT_RT) += plist.o |
| +obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
| lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
| lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o |
| lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o |
| diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c |
| index 79f9fac..a77306a 100644 |
| --- a/lib/kernel_lock.c |
| +++ b/lib/kernel_lock.c |
| @@ -38,6 +38,8 @@ struct semaphore kernel_sem; |
| * about recursion, both due to the down() and due to the enabling of |
| * preemption. schedule() will re-check the preemption flag after |
| * reacquiring the semaphore. |
| + * |
| + * Called with interrupts disabled. |
| */ |
| int __lockfunc __reacquire_kernel_lock(void) |
| { |
| @@ -76,7 +78,11 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line) |
| * No recursion worries - we set up lock_depth _after_ |
| */ |
| down(&kernel_sem); |
| +#ifdef CONFIG_DEBUG_RT_MUTEXES |
| + current->last_kernel_lock = __builtin_return_address(0); |
| +#endif |
| } |
| + |
| current->lock_depth = depth; |
| } |
| |
| @@ -84,9 +90,12 @@ void __lockfunc _unlock_kernel(const char *func, const char *file, int line) |
| { |
| BUG_ON(current->lock_depth < 0); |
| |
| - if (likely(--current->lock_depth < 0)) |
| + if (likely(--current->lock_depth < 0)) { |
| +#ifdef CONFIG_DEBUG_RT_MUTEXES |
| + current->last_kernel_lock = NULL; |
| +#endif |
| up(&kernel_sem); |
| - |
| + } |
| trace_unlock_kernel(func, file, line); |
| } |
| |
| diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c |
| index 619313e..65e7eab 100644 |
| --- a/lib/locking-selftest.c |
| +++ b/lib/locking-selftest.c |
| @@ -158,7 +158,7 @@ static void init_shared_classes(void) |
| local_bh_disable(); \ |
| local_irq_disable(); \ |
| lockdep_softirq_enter(); \ |
| - WARN_ON(!in_softirq()); |
| + /* FIXME: preemptible softirqs. WARN_ON(!in_softirq()); */ |
| |
| #define SOFTIRQ_EXIT() \ |
| lockdep_softirq_exit(); \ |
| @@ -550,6 +550,11 @@ GENERATE_TESTCASE(init_held_rsem) |
| #undef E |
| |
| /* |
| + * FIXME: turns these into raw-spinlock tests on -rt |
| + */ |
| +#ifndef CONFIG_PREEMPT_RT |
| + |
| +/* |
| * locking an irq-safe lock with irqs enabled: |
| */ |
| #define E1() \ |
| @@ -890,6 +895,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) |
| #include "locking-selftest-softirq.h" |
| // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft) |
| |
| +#endif /* !CONFIG_PREEMPT_RT */ |
| + |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) |
| # define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map) |
| @@ -998,7 +1005,7 @@ static inline void print_testname(const char *testname) |
| |
| #define DO_TESTCASE_1(desc, name, nr) \ |
| print_testname(desc"/"#nr); \ |
| - dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ |
| + dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ |
| printk("\n"); |
| |
| #define DO_TESTCASE_1B(desc, name, nr) \ |
| @@ -1006,17 +1013,17 @@ static inline void print_testname(const char *testname) |
| dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \ |
| printk("\n"); |
| |
| -#define DO_TESTCASE_3(desc, name, nr) \ |
| - print_testname(desc"/"#nr); \ |
| - dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ |
| - dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ |
| +#define DO_TESTCASE_3(desc, name, nr) \ |
| + print_testname(desc"/"#nr); \ |
| + dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ |
| + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ |
| dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ |
| printk("\n"); |
| |
| -#define DO_TESTCASE_3RW(desc, name, nr) \ |
| - print_testname(desc"/"#nr); \ |
| +#define DO_TESTCASE_3RW(desc, name, nr) \ |
| + print_testname(desc"/"#nr); \ |
| dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\ |
| - dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ |
| + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ |
| dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ |
| printk("\n"); |
| |
| @@ -1047,7 +1054,7 @@ static inline void print_testname(const char *testname) |
| print_testname(desc); \ |
| dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \ |
| dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \ |
| - dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ |
| + dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ |
| dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ |
| dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ |
| dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ |
| @@ -1179,6 +1186,7 @@ void locking_selftest(void) |
| /* |
| * irq-context testcases: |
| */ |
| +#ifndef CONFIG_PREEMPT_RT |
| DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); |
| DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); |
| DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); |
| @@ -1188,6 +1196,7 @@ void locking_selftest(void) |
| |
| DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); |
| // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); |
| +#endif |
| |
| if (unexpected_testcase_failures) { |
| printk("-----------------------------------------------------------------\n"); |
| diff --git a/lib/radix-tree.c b/lib/radix-tree.c |
| index 6b9670d..149f285 100644 |
| --- a/lib/radix-tree.c |
| +++ b/lib/radix-tree.c |
| @@ -157,12 +157,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) |
| * succeed in getting a node here (and never reach |
| * kmem_cache_alloc) |
| */ |
| + rtp = &get_cpu_var(radix_tree_preloads); |
| rtp = &__get_cpu_var(radix_tree_preloads); |
| if (rtp->nr) { |
| ret = rtp->nodes[rtp->nr - 1]; |
| rtp->nodes[rtp->nr - 1] = NULL; |
| rtp->nr--; |
| } |
| + put_cpu_var(radix_tree_preloads); |
| } |
| if (ret == NULL) |
| ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); |
| @@ -195,6 +197,8 @@ radix_tree_node_free(struct radix_tree_node *node) |
| call_rcu(&node->rcu_head, radix_tree_node_rcu_free); |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| + |
| /* |
| * Load up this CPU's radix_tree_node buffer with sufficient objects to |
| * ensure that the addition of a single element in the tree cannot fail. On |
| @@ -230,6 +234,8 @@ out: |
| } |
| EXPORT_SYMBOL(radix_tree_preload); |
| |
| +#endif |
| + |
| /* |
| * Return the maximum key which can be store into a |
| * radix tree with height HEIGHT. |
| -- |
| 1.7.1.1 |
| |