all instrument WIP
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 14caa9d..c5404f9 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -5,7 +5,7 @@
 #include <linux/stringify.h>
 
 #undef notrace
-#define notrace __attribute__((no_instrument_function))
+#define notrace __attribute__((fentry_name("calldepth_hook")))
 
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 30df295..0c57ffd 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -142,7 +142,7 @@
 }
 
 /* Can be uninlined because referenced by paravirt */
-static inline void notrace
+static inline void force_notrace
 native_write_msr(unsigned int msr, u32 low, u32 high)
 {
 	__wrmsr(msr, low, high);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ae13bc9..e4fa79d 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -102,7 +102,7 @@
  * executable.)
  */
 #define RESERVE_BRK(name,sz)						\
-	static void __section(.discard.text) __used notrace		\
+	static void __section(.discard.text) __used force_notrace	\
 	__brk_reservation_fn_##name##__(void) {				\
 		asm volatile (						\
 			".pushsection .brk_reservation,\"aw\",@nobits;" \
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7e2baf7..9e134cf 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -13,11 +13,19 @@
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
+ifeq (1,1)
+CFLAGS_tsc.o += -mfentry-name=calldepth_hook
+CFLAGS_paravirt-spinlocks.o += -mfentry-name=calldepth_hook
+CFLAGS_pvclock.o += -mfentry-name=calldepth_hook
+CFLAGS_kvmclock.o += -mfentry-name=calldepth_hook
+CFLAGS_ftrace.o += -mfentry-name=calldepth_hook
+else
 CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_pvclock.o = -pg
 CFLAGS_REMOVE_kvmclock.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
+endif
 CFLAGS_REMOVE_early_printk.o = -pg
 CFLAGS_REMOVE_head64.o = -pg
 endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d63f4b57..1a213e0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -536,7 +536,7 @@
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
  */
-void switch_to_new_gdt(int cpu)
+force_notrace void switch_to_new_gdt(int cpu)
 {
 	/* Load the original GDT */
 	load_direct_gdt(cpu);
diff --git a/arch/x86/kernel/cpu/deepasm.S b/arch/x86/kernel/cpu/deepasm.S
index ee06c22..71c7979 100644
--- a/arch/x86/kernel/cpu/deepasm.S
+++ b/arch/x86/kernel/cpu/deepasm.S
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/linkage.h>
 #include <linux/compiler.h>
+#include <asm/export.h>
 #include <asm/nospec-branch.h>
 #include <asm/types.h>
 
@@ -22,6 +23,7 @@
 	popq	%rbx
 	ret
 END(calldepth_hook)
+EXPORT_SYMBOL(calldepth_hook)
 
 /*
  * Return hook to maintain call depth count.
diff --git a/arch/x86/kernel/cpu/deepchain.c b/arch/x86/kernel/cpu/deepchain.c
index 3b36c52..4da725e 100644
--- a/arch/x86/kernel/cpu/deepchain.c
+++ b/arch/x86/kernel/cpu/deepchain.c
@@ -54,7 +54,7 @@
 		call[0] = 0xe9;	/* jmp */
 		offset = (unsigned long)__return__ - (unsigned long)insnp - 5;
 		memcpy(call + 1, &offset, 4);
-		memcpy(insnp, call, 5);
+		text_poke_early(insnp, call, 5);
 	}
 }
 
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6b79a9b..9ab692e 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -59,10 +59,11 @@
 #endif
 
 #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
-#define notrace __attribute__((hotpatch(0,0)))
+#define notrace __attribute__((hotpatch(0,0))) POISON1
 #else
-#define notrace __attribute__((no_instrument_function))
+#define notrace __attribute__((fentry_name("calldepth_hook")))
 #endif
+#define force_notrace __attribute__((no_instrument_function))
 
 /* Intel compiler defines __GNUC__. So we will overwrite implementations
  * coming from above header files here
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 392c7f2..6b8283f 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -6,11 +6,18 @@
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
 ifdef CONFIG_FUNCTION_TRACER
+ifeq (0,0)
+CFLAGS_lockdep.o += -mfentry-name=calldepth_hook
+CFLAGS_lockdep_proc.o = -mfentry-name=calldepth_hook
+CFLAGS_mutex-debug.o = -mfentry-name=calldepth_hook
+CFLAGS_rtmutex-debug.o = -mfentry-name=calldepth_hook
+else
 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
 endif
+endif
 
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_LOCKDEP) += lockdep.o
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index e2f9d4f..845f8a3 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -1,7 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 ifdef CONFIG_FUNCTION_TRACER
+ifeq (0,0)
+CFLAGS_clock.o += -mfentry-name=calldepth_hook
+else
 CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
 endif
+endif
 
 # These files are disabled because they produce non-interesting flaky coverage
 # that is not a function of syscall inputs. E.g. involuntary context switches.
diff --git a/lib/Makefile b/lib/Makefile
index d11c48e..ab41b92 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -4,8 +4,9 @@
 #
 
 ifdef CONFIG_FUNCTION_TRACER
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
+#ORIG_CFLAGS := $(KBUILD_CFLAGS)
+#KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
+KBUILD_CFLAGS += -mfentry-name=calldepth_hook
 endif
 
 # These files are disabled because they produce lots of non-interesting and/or