rcu/trace: Add tracing for how segcb list changes
Track how the segcb list changes before/after acceleration, during
queuing and during dequeuing.
This has proved useful to discover an optimization to avoid unwanted GP
requests when there are no callbacks accelerated.
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 02dcd11..a6d4986 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -507,6 +507,31 @@
__entry->qlen)
);
+TRACE_EVENT_RCU(rcu_segcb,
+
+ TP_PROTO(const char *ctx, int *cb_count, unsigned long *gp_seq),
+
+ TP_ARGS(ctx, cb_count, gp_seq),
+
+ TP_STRUCT__entry(
+ __field(const char *, ctx)
+ __array(int, cb_count, 4)
+ __array(unsigned long, gp_seq, 4)
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ memcpy(__entry->cb_count, cb_count, 4 * sizeof(int));
+ memcpy(__entry->gp_seq, gp_seq, 4 * sizeof(unsigned long));
+ ),
+
+ TP_printk("%s cb_count: (DONE=%d, WAIT=%d, NEXT_READY=%d, NEXT=%d) "
+ "gp_seq: (DONE=%lu, WAIT=%lu, NEXT_READY=%lu, NEXT=%lu)", __entry->ctx,
+ __entry->cb_count[0], __entry->cb_count[1], __entry->cb_count[2], __entry->cb_count[3],
+ __entry->gp_seq[0], __entry->gp_seq[1], __entry->gp_seq[2], __entry->gp_seq[3])
+
+);
+
/*
* Tracepoint for the registration of a single RCU callback of the special
* kfree() form. The first argument is the RCU type, the second argument
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 4782cf1..036d4ab 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -317,6 +317,43 @@
}
/*
+ * Return how many CBs each segment along with their gp_seq values.
+ *
+ * This function is O(N) where N is the number of callbacks. Only used from
+ * tracing code which is usually disabled in production.
+ */
+#ifdef CONFIG_RCU_TRACE
+void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
+ int cbcount[RCU_CBLIST_NSEGS],
+ unsigned long gpseq[RCU_CBLIST_NSEGS])
+{
+ struct rcu_head **cur_tail, *h;
+ int i, c;
+
+ for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+ cbcount[i] = 0;
+
+ cur_tail = &(rsclp->head);
+
+ for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
+ c = 0;
+ // List empty?
+ if (rsclp->tails[i] != cur_tail) {
+ // The loop skips the last node
+ c = 1;
+ for (h = *cur_tail; h->next != *(rsclp->tails[i]); h = h->next) {
+ c++;
+ }
+ }
+
+ cbcount[i] = c;
+ gpseq[i] = rsclp->gp_seq[i];
+ cur_tail = rsclp->tails[i];
+ }
+}
+#endif
+
+/*
* Extract only those callbacks still pending (not yet ready to be
* invoked) from the specified rcu_segcblist structure and place them in
* the specified rcu_cblist structure. Note that this loses information
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 5c293af..0a8dbac 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -104,3 +104,10 @@
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
struct rcu_segcblist *src_rsclp);
+#ifdef CONFIG_RCU_TRACE
+void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
+ int cbcount[RCU_CBLIST_NSEGS],
+ unsigned long gpseq[RCU_CBLIST_NSEGS]);
+#else
+#define rcu_segcblist_countseq(...)
+#endif
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ebce14e..c61af6a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1407,6 +1407,8 @@
{
unsigned long gp_seq_req;
bool ret = false;
+ int cbs[RCU_CBLIST_NSEGS];
+ unsigned long gps[RCU_CBLIST_NSEGS];
rcu_lockdep_assert_cblist_protected(rdp);
raw_lockdep_assert_held_rcu_node(rnp);
@@ -1415,6 +1417,10 @@
if (!rcu_segcblist_pend_cbs(&rdp->cblist))
return false;
+ /* Count CBs for tracing. */
+ rcu_segcblist_countseq(&rdp->cblist, cbs, gps);
+ trace_rcu_segcb("SegCbPreAcc", cbs, gps);
+
/*
* Callbacks are often registered with incomplete grace-period
* information. Something about the fact that getting exact
@@ -1434,6 +1440,11 @@
trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccWaitCB"));
else
trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccReadyCB"));
+
+ /* Count CBs for tracing. */
+ rcu_segcblist_countseq(&rdp->cblist, cbs, gps);
+ trace_rcu_segcb("SegCbPostAcc", cbs, gps);
+
return ret;
}
@@ -2316,6 +2327,8 @@
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
long bl, count;
long pending, tlimit = 0;
+ int cbs[RCU_CBLIST_NSEGS];
+ unsigned long gps[RCU_CBLIST_NSEGS];
/* If no callbacks are ready, just return. */
if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
@@ -2350,6 +2363,11 @@
/* Invoke callbacks. */
tick_dep_set_task(current, TICK_DEP_BIT_RCU);
rhp = rcu_cblist_dequeue(&rcl);
+
+ /* Count CBs for tracing. */
+ rcu_segcblist_countseq(&rdp->cblist, cbs, gps);
+ trace_rcu_segcb("SegCbDequeued", cbs, gps);
+
for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
rcu_callback_t f;
@@ -2808,6 +2826,8 @@
unsigned long flags;
struct rcu_data *rdp;
bool was_alldone;
+ int cbs[RCU_CBLIST_NSEGS];
+ unsigned long gps[RCU_CBLIST_NSEGS];
/* Misaligned rcu_head! */
WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
@@ -2852,6 +2872,10 @@
trace_rcu_callback(rcu_state.name, head,
rcu_segcblist_n_cbs(&rdp->cblist));
+ /* Count CBs for tracing. */
+ rcu_segcblist_countseq(&rdp->cblist, cbs, gps);
+ trace_rcu_segcb("SegCBQueued", cbs, gps);
+
/* Go handle any RCU core processing required. */
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {