| From 9466f4fe2a3429fa51f4566769228ba4127402d2 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Sat, 22 May 2021 00:56:23 +0900 |
| Subject: rcu/tree: Handle VM stoppage in stall detection |
| |
| From: Sergey Senozhatsky <senozhatsky@chromium.org> |
| |
| [ Upstream commit ccfc9dd6914feaa9a81f10f9cce56eb0f7712264 ] |
| |
| The soft watchdog timer function checks if a virtual machine |
| was suspended and hence what looks like a lockup in fact |
| is a false positive. |
| |
| This is what kvm_check_and_clear_guest_paused() does: it |
| tests guest PVCLOCK_GUEST_STOPPED (which is set by the host) |
| and if it's set then we need to touch all watchdogs and bail |
| out. |
| |
| Watchdog timer function runs from IRQ, so PVCLOCK_GUEST_STOPPED |
| check works fine. |
| |
| There is, however, one more watchdog that runs from IRQ, so |
| watchdog timer fn races with it, and that watchdog is not aware |
| of PVCLOCK_GUEST_STOPPED - RCU stall detector. |
| |
| apic_timer_interrupt() |
| smp_apic_timer_interrupt() |
| hrtimer_interrupt() |
| __hrtimer_run_queues() |
| tick_sched_timer() |
| tick_sched_handle() |
| update_process_times() |
| rcu_sched_clock_irq() |
| |
| This triggers RCU stalls on our devices during VM resume. |
| |
| If tick_sched_handle()->rcu_sched_clock_irq() runs on a VCPU |
| before watchdog_timer_fn()->kvm_check_and_clear_guest_paused() |
| then there is nothing on this VCPU that touches watchdogs and |
| RCU reads stale gp stall timestamp and new jiffies value, which |
| makes it think that RCU has stalled. |
| |
| Make RCU stall watchdog aware of PVCLOCK_GUEST_STOPPED and |
| don't report RCU stalls when we resume the VM. |
| |
| Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> |
| Signed-off-by: Signed-off-by: Paul E. McKenney <paulmck@kernel.org> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| kernel/rcu/tree_stall.h | 18 ++++++++++++++++++ |
| 1 file changed, 18 insertions(+) |
| |
| diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h |
| index 6c76988cc019..3d11155e0033 100644 |
| --- a/kernel/rcu/tree_stall.h |
| +++ b/kernel/rcu/tree_stall.h |
| @@ -7,6 +7,8 @@ |
| * Author: Paul E. McKenney <paulmck@linux.ibm.com> |
| */ |
| |
| +#include <linux/kvm_para.h> |
| + |
| ////////////////////////////////////////////////////////////////////////////// |
| // |
| // Controlling CPU stall warnings, including delay calculation. |
| @@ -696,6 +698,14 @@ static void check_cpu_stall(struct rcu_data *rdp) |
| (READ_ONCE(rnp->qsmask) & rdp->grpmask) && |
| cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { |
| |
| + /* |
| + * If a virtual machine is stopped by the host it can look to |
| + * the watchdog like an RCU stall. Check to see if the host |
| + * stopped the vm. |
| + */ |
| + if (kvm_check_and_clear_guest_paused()) |
| + return; |
| + |
| /* We haven't checked in, so go dump stack. */ |
| print_cpu_stall(gps); |
| if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) |
| @@ -705,6 +715,14 @@ static void check_cpu_stall(struct rcu_data *rdp) |
| ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && |
| cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { |
| |
| + /* |
| + * If a virtual machine is stopped by the host it can look to |
| + * the watchdog like an RCU stall. Check to see if the host |
| + * stopped the vm. |
| + */ |
| + if (kvm_check_and_clear_guest_paused()) |
| + return; |
| + |
| /* They had a few time units to dump stack, so complain. */ |
| print_other_cpu_stall(gs2, gps); |
| if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) |
| -- |
| 2.30.2 |
| |