debug

Signed-off-by: Joel Fernandes <joelaf@google.com>
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 42b1d48..dab71ce 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -92,6 +92,7 @@
 {
 	long ret;
 
+	trace_printk("syscall: Enter from user\n");
 	enter_from_user_mode(regs);
 
 	instrumentation_begin();
@@ -287,11 +288,14 @@
 	local_irq_disable_exit_to_user();
 	exit_to_user_mode_prepare(regs);
 	instrumentation_end();
+	trace_printk("syscall: Entering exit_to_user_mode\n");
 	exit_to_user_mode();
+	trace_printk("syscall: Leaving exit_to_user_mode\n");
 }
 
 noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
 {
+	trace_printk("irq: Enter from user\n");
 	enter_from_user_mode(regs);
 }
 
@@ -300,7 +304,9 @@
 	instrumentation_begin();
 	exit_to_user_mode_prepare(regs);
 	instrumentation_end();
+	trace_printk("irq: Entering exit_to_user_mode\n");
 	exit_to_user_mode();
+	trace_printk("irq: Leaving exit_to_user_mode\n");
 }
 
 noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index fbff25a..cd18de2 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -102,8 +102,10 @@
 	if (cpu != smp_processor_id()) {
 		/* Arch remote IPI send/receive backend aren't NMI safe */
 		WARN_ON_ONCE(in_nmi());
+		trace_printk("Queuing on cpu %d\n", cpu);
 		__smp_call_single_queue(cpu, &work->node.llist);
 	} else {
+		trace_printk("Queuing local\n");
 		__irq_work_queue_local(work);
 	}
 	preempt_enable();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 22fad1a..fba4dd6e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4695,6 +4695,17 @@
  */
 static void sched_core_irq_work(struct irq_work *work)
 {
+	struct rq *rq;
+	int cpu;
+	cpu = smp_processor_id();
+	rq = cpu_rq(cpu);
+
+	WARN_ON_ONCE(!rq->pause_pending);
+
+	trace_printk("Entering irq work hnd\n");
+	trace_printk("Exit irq work hnd\n");
+
+	rq->pause_pending = false;
 	return;
 }
 
@@ -4719,6 +4730,7 @@
 	bool restart = false;
 	struct rq *rq;
 	int cpu;
+	int loops = 0;
 
 	/* We clear the thread flag only at the end, so need to check for it. */
 	ti_check &= ~_TIF_UNSAFE_RET;
@@ -4733,12 +4745,14 @@
 	preempt_disable();
 	local_irq_enable();
 
+	trace_printk("Enter wait\n");
+
 	/*
 	 * Wait till the core of this HT is not in an unsafe state.
 	 *
 	 * Pair with smp_store_release() in sched_core_unsafe_exit().
 	 */
-	while (smp_load_acquire(&rq->core->core_unsafe_nest) > 0) {
+	while (smp_load_acquire(&rq->core->core_unsafe_nest) > 0 && loops++ < 100000000) {
 		cpu_relax();
 		if (READ_ONCE(current_thread_info()->flags) & ti_check) {
 			restart = true;
@@ -4750,6 +4764,11 @@
 	local_irq_disable();
 	preempt_enable();
 
+	trace_printk("Exit wait\n");
+
+	if (WARN_ON_ONCE(loops >= 100000000))
+		panic("excessive spinning\n");
+
 ret:
 	if (!restart)
 		clear_tsk_thread_flag(current, TIF_UNSAFE_RET);
@@ -4785,6 +4804,11 @@
 
 	/* Count unsafe_enter() calls received without unsafe_exit() on this CPU. */
 	rq->core_this_unsafe_nest++;
+	trace_printk("enter: unsafe this nest now: %d\n", rq->core_this_unsafe_nest);
+	if (rq->core_this_unsafe_nest < 0) {
+		trace_printk("issue stop\n");
+		tracing_stop();
+	}
 
 	/* Should not nest: enter() should only pair with exit(). */
 	if (WARN_ON_ONCE(rq->core_this_unsafe_nest != 1))
@@ -4795,16 +4819,23 @@
 
 	/* Contribute this CPU's unsafe_enter() to core-wide unsafe_enter() count. */
 	WRITE_ONCE(rq->core->core_unsafe_nest, rq->core->core_unsafe_nest + 1);
+	trace_printk("enter: unsafe nest now: %d\n", rq->core->core_unsafe_nest);
+	if (rq->core->core_unsafe_nest < 0) {
+		trace_printk("issue stop core-wide\n");
+		tracing_stop();
+	}
 
 	if (WARN_ON_ONCE(rq->core->core_unsafe_nest == UINT_MAX))
 		goto unlock;
 
 	if (irq_work_is_busy(&rq->core_irq_work)) {
+		WARN_ON_ONCE(!rq->pause_pending);
 		/*
 		 * Do nothing more since we are in an IPI sent from another
 		 * sibling to enforce safety. That sibling would have sent IPIs
 		 * to all of the HTs.
 		 */
+		trace_printk("We are in IPI, do nothing more.\n");
 		goto unlock;
 	}
 
@@ -4812,8 +4843,10 @@
 	 * If we are not the first ones on the core to enter core-wide unsafe
 	 * state, do nothing.
 	 */
-	if (rq->core->core_unsafe_nest > 1)
+	if (rq->core->core_unsafe_nest > 1) {
+		trace_printk("Inner core-wide nest.\n");
 		goto unlock;
+	}
 
 	/* Do nothing more if the core is not tagged. */
 	if (!rq->core->core_cookie)
@@ -4837,6 +4870,8 @@
 		 * pending, no new IPIs are sent. This is Ok since the receiver
 		 * would already be in the kernel, or on its way to it.
 		 */
+		trace_printk("Queuing irq_work on %d\n", i);
+		srq->pause_pending = true;
 		irq_work_queue_on(&srq->core_irq_work, i);
 	}
 unlock:
@@ -4877,6 +4912,11 @@
 		goto ret;
 
 	rq->core_this_unsafe_nest--;
+	trace_printk("exit: unsafe this nest now: %d\n", rq->core_this_unsafe_nest);
+	if (rq->core_this_unsafe_nest < 0) {
+		trace_printk("issue stop\n");
+		tracing_stop();
+	}
 
 	/* enter() should be paired with exit() only. */
 	if (WARN_ON_ONCE(rq->core_this_unsafe_nest != 0))
@@ -4892,6 +4932,7 @@
 
 	/* Pair with smp_load_acquire() in sched_core_wait_till_safe(). */
 	smp_store_release(&rq->core->core_unsafe_nest, nest - 1);
+	trace_printk("exit: unsafe nest now: %d\n", rq->core->core_unsafe_nest);
 	raw_spin_unlock(rq_lockp(rq));
 ret:
 	local_irq_restore(flags);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fcafe2b..dff9078 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1068,6 +1068,7 @@
 	struct rb_root		core_tree;
 	struct irq_work		core_irq_work; /* To force HT into kernel */
 	unsigned int		core_this_unsafe_nest;
+	bool pause_pending;
 
 	/* shared state */
 	unsigned int		core_task_seq;