WIP: arm64: implement HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS

TODO: commit message

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 56c2a19..b1f7d05 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -183,6 +183,8 @@
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
 		if $(cc-option,-fpatchable-function-entry=2)
+	select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
+		if DYNAMIC_FTRACE_WITH_ARGS
 	select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
 		if DYNAMIC_FTRACE_WITH_ARGS
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 2234624..ae345b0 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -9,6 +9,7 @@
 
 #include <linux/arm_sdei.h>
 #include <linux/sched.h>
+#include <linux/ftrace.h>
 #include <linux/kexec.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
@@ -194,5 +195,8 @@
   DEFINE(KIMAGE_START,			offsetof(struct kimage, start));
   BLANK();
 #endif
+#ifdef CONFIG_FUNCTION_TRACER
+  DEFINE(FTRACE_OPS_FUNC,		offsetof(struct ftrace_ops, func));
+#endif
   return 0;
 }
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index d28446d..c5b9926 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -64,13 +64,19 @@
 	stp	x29, x30, [sp, #FREGS_SIZE]
 	add	x29, sp, #FREGS_SIZE
 
-	sub	x0, x30, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
-	mov	x1, x9				// parent_ip (callsite's LR)
-	ldr_l	x2, function_trace_op		// op
-	mov	x3, sp				// regs
+	sub	x0, x30, #AARCH64_INSN_SIZE		// ip (callsite's BL insn)
+	mov	x1, x9					// parent_ip (callsite's LR)
 
-SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
-	bl	ftrace_stub
+	/*
+	 * See ftrace_call_adjust()
+	 */
+	bic	x2, x30, 0x7
+	ldr	x2, [x2, #-(4 * AARCH64_INSN_SIZE)]	// op
+
+	mov	x3, sp					// regs
+
+	ldr	x4, [x2, #FTRACE_OPS_FUNC]		// op->func
+	blr	x4					// op->func(ip, parent_ip, op, regs)
 
 /*
  * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index f635750..81f0a8a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -17,6 +17,21 @@
 #include <asm/insn.h>
 #include <asm/patching.h>
 
+static const struct ftrace_ops *arm64_rec_get_ops(struct dyn_ftrace *rec)
+{
+	const struct ftrace_ops *ops = NULL;
+
+	if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+		ops = ftrace_find_unique_ops(rec);
+		WARN_ON_ONCE(!ops);
+	}
+
+	if (!ops)
+		ops = &ftrace_list_ops;
+
+	return ops;
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /*
  * Replace a single instruction, which may be a branch or NOP.
@@ -53,6 +68,9 @@
  */
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+	return 0;
+#else
 	unsigned long pc;
 	u32 new;
 
@@ -61,6 +79,7 @@
 					  AARCH64_INSN_BRANCH_LINK);
 
 	return ftrace_modify_code(pc, 0, new, false);
+#endif
 }
 
 static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
@@ -134,14 +153,30 @@
 	return true;
 }
 
+/* TODO: rework this API, place something in a header */
+int aarch64_insn_write_u64(void *addr, u64 val);
+
+static int ftrace_rec_set_ops(const struct dyn_ftrace *rec,
+			      const struct ftrace_ops *ops)
+{
+	unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8);
+	return aarch64_insn_write_u64((void *)literal, (unsigned long)ops);
+}
+
 /*
  * Turn on the call to ftrace_caller() in instrumented function
  */
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned long pc = rec->ip;
+	const struct ftrace_ops *ops;
 	u32 old, new;
 
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) {
+		ops = arm64_rec_get_ops(rec);
+		ftrace_rec_set_ops(rec, ops);
+	}
+
 	if (!ftrace_find_callable_addr(rec, NULL, &addr))
 		return -EINVAL;
 
@@ -152,6 +187,18 @@
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	if (WARN_ON_ONCE(addr != old_addr))
+		return -EINVAL;
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+		return ftrace_rec_set_ops(rec, arm64_rec_get_ops(rec));
+
+	return -EINVAL;
+}
+
 unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
@@ -233,6 +280,11 @@
 {
 	unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
 	u32 old, new;
+	int ret;
+
+	ret = ftrace_rec_set_ops(rec, &ftrace_list_ops);
+	if (ret)
+		return ret;
 
 	old = aarch64_insn_gen_nop();
 	new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
@@ -250,9 +302,16 @@
 {
 	unsigned long pc = rec->ip;
 	u32 old = 0, new;
+	int ret;
 
 	new = aarch64_insn_gen_nop();
 
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) {
+		ret = ftrace_rec_set_ops(rec, &ftrace_list_ops);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * When using mcount, callsites in modules may have been initalized to
 	 * call an arbitrary module PLT (which redirects to the _mcount stub)
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index 33e0fab..2e3f3ba 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -88,6 +88,23 @@
 	return __aarch64_insn_write(addr, cpu_to_le32(insn));
 }
 
+noinstr int aarch64_insn_write_u64(void *addr, u64 val)
+{
+	u64 *waddr;
+	unsigned long flags;
+	int ret;
+
+	raw_spin_lock_irqsave(&patch_lock, flags);
+	waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+	ret = copy_to_kernel_nofault(waddr, &val, sizeof(val));
+
+	patch_unmap(FIX_TEXT_POKE0);
+	raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
 int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
 {
 	u32 *tp = addr;