rcu/exp: Warn on CPU lagging within hotplug IPI's blindspot
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6058a73..cf5fede 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -406,8 +406,18 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) {
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
unsigned long mask = rdp->grpmask;
+ int nr_retries = 0;
retry_ipi:
+ /*
+ * CPU either is lagging:
+ *
+ * - between CPUHP_TEARDOWN_CPU and rcutree_report_cpu_dead()
+ * or:
+ * - between rcutree_report_cpu_starting() and set_cpu_online()
+ */
+ WARN_ON_ONCE(nr_retries > 5);
+
if (rcu_watching_snap_stopped_since(rdp, rdp->exp_watching_snap)) {
mask_ofl_test |= mask;
continue;
@@ -431,6 +441,7 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("selectofl"));
schedule_timeout_idle(1);
+ nr_retries++;
goto retry_ipi;
}
/* CPU really is offline, so we must report its QS. */