| From 68f202e4e87cfab4439568bf397fcc5c7cf8d729 Mon Sep 17 00:00:00 2001 |
| From: Suresh Siddha <suresh.b.siddha@intel.com> |
| Date: Fri, 30 Jul 2010 11:46:42 -0700 |
| Subject: x86, mtrr: Use stop machine context to rendezvous all the cpu's |
| |
| From: Suresh Siddha <suresh.b.siddha@intel.com> |
| |
| commit 68f202e4e87cfab4439568bf397fcc5c7cf8d729 upstream. |
| |
| Use the stop machine context rather than IPI's to rendezvous all the cpus for |
| MTRR initialization that happens during cpu bringup or for MTRR modifications |
| during runtime. |
| |
| This avoids deadlock scenario (reported by Prarit) like: |
| |
| cpu A holds a read_lock (tasklist_lock for example) with irqs enabled |
| cpu B waits for the same lock with irqs disabled using write_lock_irq |
| cpu C doing set_mtrr() (during AP bringup for example), which will try to |
| rendezvous all the cpus using IPI's |
| |
| This will result in C and A come to the rendezvous point and waiting |
| for B. B is stuck forever waiting for the lock and thus not |
| reaching the rendezvous point. |
| |
| Using stop cpu (run in the process context of per cpu based keventd) to do |
| this rendezvous, avoids this deadlock scenario. |
| |
| Also make sure all the cpu's are in the rendezvous handler before we proceed |
| with the local_irq_save() on each cpu. This lock step disabling irqs on all |
| the cpus will avoid other deadlock scenarios (for example involving |
| with the blocking smp_call_function's etc). |
| |
| [ This problem is very old. Marking -stable only for 2.6.35 as the |
| stop_one_cpu_nowait() API is present only in 2.6.35. Any older |
| kernel interested in this fix need to do some more work in backporting |
| this patch. ] |
| |
| Reported-by: Prarit Bhargava <prarit@redhat.com> |
| Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> |
| LKML-Reference: <1280515602.2682.10.camel@sbsiddha-MOBL3.sc.intel.com> |
| Acked-by: Prarit Bhargava <prarit@redhat.com> |
| Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| arch/x86/kernel/cpu/mtrr/main.c | 56 ++++++++++++++++++++++++++++++---------- |
| arch/x86/kernel/smpboot.c | 7 +++++ |
| 2 files changed, 50 insertions(+), 13 deletions(-) |
| |
| --- a/arch/x86/kernel/cpu/mtrr/main.c |
| +++ b/arch/x86/kernel/cpu/mtrr/main.c |
| @@ -35,6 +35,7 @@ |
| |
| #include <linux/types.h> /* FIXME: kvm_para.h needs this */ |
| |
| +#include <linux/stop_machine.h> |
| #include <linux/kvm_para.h> |
| #include <linux/uaccess.h> |
| #include <linux/module.h> |
| @@ -143,22 +144,28 @@ struct set_mtrr_data { |
| mtrr_type smp_type; |
| }; |
| |
| +static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); |
| + |
| /** |
| - * ipi_handler - Synchronisation handler. Executed by "other" CPUs. |
| + * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. |
| * @info: pointer to mtrr configuration data |
| * |
| * Returns nothing. |
| */ |
| -static void ipi_handler(void *info) |
| +static int mtrr_work_handler(void *info) |
| { |
| #ifdef CONFIG_SMP |
| struct set_mtrr_data *data = info; |
| unsigned long flags; |
| |
| + atomic_dec(&data->count); |
| + while (!atomic_read(&data->gate)) |
| + cpu_relax(); |
| + |
| local_irq_save(flags); |
| |
| atomic_dec(&data->count); |
| - while (!atomic_read(&data->gate)) |
| + while (atomic_read(&data->gate)) |
| cpu_relax(); |
| |
| /* The master has cleared me to execute */ |
| @@ -173,12 +180,13 @@ static void ipi_handler(void *info) |
| } |
| |
| atomic_dec(&data->count); |
| - while (atomic_read(&data->gate)) |
| + while (!atomic_read(&data->gate)) |
| cpu_relax(); |
| |
| atomic_dec(&data->count); |
| local_irq_restore(flags); |
| #endif |
| + return 0; |
| } |
| |
| static inline int types_compatible(mtrr_type type1, mtrr_type type2) |
| @@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_ |
| * |
| * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: |
| * |
| - * 1. Send IPI to do the following: |
| + * 1. Queue work to do the following on all processors: |
| * 2. Disable Interrupts |
| * 3. Wait for all procs to do so |
| * 4. Enter no-fill cache mode |
| @@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_ |
| * 15. Enable interrupts. |
| * |
| * What does that mean for us? Well, first we set data.count to the number |
| - * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait |
| - * until it hits 0 and proceed. We set the data.gate flag and reset data.count. |
| - * Meanwhile, they are waiting for that flag to be set. Once it's set, each |
| + * of CPUs. As each CPU announces that it started the rendezvous handler by |
| + * decrementing the count, We reset data.count and set the data.gate flag |
| + * allowing all the cpu's to proceed with the work. As each cpu disables |
| + * interrupts, it'll decrement data.count once. We wait until it hits 0 and |
| + * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they |
| + * are waiting for that flag to be cleared. Once it's cleared, each |
| * CPU goes through the transition of updating MTRRs. |
| * The CPU vendors may each do it differently, |
| * so we call mtrr_if->set() callback and let them take care of it. |
| * When they're done, they again decrement data->count and wait for data.gate |
| - * to be reset. |
| + * to be set. |
| * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag |
| * Everyone then enables interrupts and we all continue on. |
| * |
| @@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long |
| { |
| struct set_mtrr_data data; |
| unsigned long flags; |
| + int cpu; |
| + |
| + preempt_disable(); |
| |
| data.smp_reg = reg; |
| data.smp_base = base; |
| @@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long |
| atomic_set(&data.gate, 0); |
| |
| /* Start the ball rolling on other CPUs */ |
| - if (smp_call_function(ipi_handler, &data, 0) != 0) |
| - panic("mtrr: timed out waiting for other CPUs\n"); |
| + for_each_online_cpu(cpu) { |
| + struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); |
| + |
| + if (cpu == smp_processor_id()) |
| + continue; |
| + |
| + stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); |
| + } |
| |
| - local_irq_save(flags); |
| |
| while (atomic_read(&data.count)) |
| cpu_relax(); |
| @@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long |
| smp_wmb(); |
| atomic_set(&data.gate, 1); |
| |
| + local_irq_save(flags); |
| + |
| + while (atomic_read(&data.count)) |
| + cpu_relax(); |
| + |
| + /* Ok, reset count and toggle gate */ |
| + atomic_set(&data.count, num_booting_cpus() - 1); |
| + smp_wmb(); |
| + atomic_set(&data.gate, 0); |
| + |
| /* Do our MTRR business */ |
| |
| /* |
| @@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long |
| |
| atomic_set(&data.count, num_booting_cpus() - 1); |
| smp_wmb(); |
| - atomic_set(&data.gate, 0); |
| + atomic_set(&data.gate, 1); |
| |
| /* |
| * Wait here for everyone to have seen the gate change |
| @@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long |
| cpu_relax(); |
| |
| local_irq_restore(flags); |
| + preempt_enable(); |
| } |
| |
| /** |
| --- a/arch/x86/kernel/smpboot.c |
| +++ b/arch/x86/kernel/smpboot.c |
| @@ -816,6 +816,13 @@ do_rest: |
| if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
| break; /* It has booted */ |
| udelay(100); |
| + /* |
| + * Allow other tasks to run while we wait for the |
| + * AP to come online. This also gives a chance |
| + * for the MTRR work(triggered by the AP coming online) |
| + * to be completed in the stop machine context. |
| + */ |
| + schedule(); |
| } |
| |
| if (cpumask_test_cpu(cpu, cpu_callin_mask)) |