kbuild: distributed build support for Clang ThinLTO
Add distributed ThinLTO build support for the Linux kernel.
This new mode offers several advantages: (1) Increased
flexibility in handling user-specified build options.
(2) Improved user-friendliness for developers. (3) Greater
convenience for integrating with objtool and livepatch.
Note that "distributed" in this context refers to a term
that differentiates in-process ThinLTO builds by invoking
backend compilation through the linker, not necessarily
building in distributed environments.
Distributed ThinLTO is enabled via the
`CONFIG_LTO_CLANG_THIN_DIST` Kconfig option. For example:
> make LLVM=1 defconfig
> scripts/config -e LTO_CLANG_THIN_DIST
> make LLVM=1 oldconfig
> make LLVM=1 vmlinux -j <..>
The implementation changes the top-level Makefile with a
macro for generating `vmlinux.o` for distributed ThinLTO
builds. It uses the existing Kbuild infrastructure to
perform two recursive passes through the subdirectories.
The first pass generates LLVM IR object files, similar to
in-process ThinLTO. Following the thin-link stage, a second
pass compiles these IR files into the final native object
files. The build rules and actions for this two-pass process
are primarily implemented in `scripts/Makefile.build`.
Currently, this patch focuses on building the main kernel
image (`vmlinux`) only. Support for building kernel modules
using this method is planned for a subsequent patch.
Tested on the following arch: x86, arm64, loongarch, and
riscv.
Some implementation details can be found here:
https://discourse.llvm.org/t/rfc-distributed-thinlto-build-for-kernel/85934
Signed-off-by: Rong Xu <xur@google.com>
Co-developed-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
diff --git a/.gitignore b/.gitignore
index f2f63e4..dc1dfd6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,7 @@
*.zst
Module.symvers
dtbs-list
+builtin.order
modules.order
#
@@ -65,6 +66,7 @@
/vmlinux.32
/vmlinux.map
/vmlinux.symvers
+/vmlinux.thinlto-index
/vmlinux.unstripped
/vmlinux-gdb.py
/vmlinuz
diff --git a/Makefile b/Makefile
index ef9409c..61f4e76 100644
--- a/Makefile
+++ b/Makefile
@@ -991,10 +991,10 @@
endif
ifdef CONFIG_LTO_CLANG
-ifdef CONFIG_LTO_CLANG_THIN
-CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit
-else
+ifdef CONFIG_LTO_CLANG_FULL
CC_FLAGS_LTO := -flto
+else
+CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit
endif
CC_FLAGS_LTO += -fvisibility=hidden
@@ -1561,6 +1561,7 @@
CLEAN_FILES += vmlinux.symvers modules-only.symvers \
modules.builtin modules.builtin.modinfo modules.nsdeps \
modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \
+ vmlinux.thinlto-index builtin.order \
compile_commands.json rust/test \
rust-project.json .vmlinux.objs .vmlinux.export.c \
.builtin-dtbs-list .builtin-dtb.S
@@ -2002,7 +2003,7 @@
$(call cmd,rmfiles)
@find . $(RCS_FIND_IGNORE) \
\( -name '*.[aios]' -o -name '*.rsi' -o -name '*.ko' -o -name '.*.cmd' \
- -o -name '*.ko.*' \
+ -o -name '*.ko.*' -o -name '*.o.thinlto.bc' \
-o -name '*.dtb' -o -name '*.dtbo' \
-o -name '*.dtb.S' -o -name '*.dtbo.S' \
-o -name '*.dt.yaml' -o -name 'dtbs-list' \
diff --git a/arch/Kconfig b/arch/Kconfig
index b0adb66..1c2ddb1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -810,6 +810,25 @@
https://clang.llvm.org/docs/ThinLTO.html
If unsure, say Y.
+
+config LTO_CLANG_THIN_DIST
+ bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)"
+ depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
+ select LTO_CLANG
+ help
+ This option enables Clang's ThinLTO in distributed build mode.
+ In this mode, the linker performs the thin-link, generating
+ ThinLTO index files. Subsequently, the build system explicitly
+ invokes ThinLTO backend compilation using these index files
+ and pre-linked IR objects. The resulting native object files
+ are with the .thinlto-native.o suffix.
+
+ This build mode offers improved visibility into the ThinLTO
+ process through explicit subcommand exposure. It also makes
+ final native object files directly available, benefiting
+ tools like objtool and kpatch. Additionally, it provides
+ crucial granular control over back-end options, enabling
+ module-specific compiler options, and simplifies debugging.
endchoice
config ARCH_SUPPORTS_AUTOFDO_CLANG
diff --git a/scripts/Makefile.thinlto b/scripts/Makefile.thinlto
new file mode 100644
index 0000000..ec98fa2
--- /dev/null
+++ b/scripts/Makefile.thinlto
@@ -0,0 +1,38 @@
+PHONY := __default
+__default:
+
+include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
+
+native-objs := $(patsubst %.o,%.thinlto-native.o,$(call read-file, vmlinux.thinlto-index))
+
+__default: $(native-objs)
+
+# Generate .thinlto-native.o (obj) from .o (bitcode) and .thinlto.bc (summary) files
+# ---------------------------------------------------------------------------
+quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@
+ cmd_cc_o_bc = \
+ $(CC) $(_c_flags) -fno-lto -Wno-unused-command-line-argument \
+ -fthinlto-index=$(word 2, $^) -c -o $@ $<
+
+targets += $(native-objs)
+$(native-objs): %.thinlto-native.o: %.o %.o.thinlto.bc FORCE
+ $(call if_changed,cc_o_bc)
+
+# Add FORCE to the prerequisites of a target to force it to be always rebuilt.
+# ---------------------------------------------------------------------------
+
+PHONY += FORCE
+FORCE:
+
+# Read all saved command lines and dependencies for the $(targets) we
+# may be building above, using $(if_changed{,_dep}). As an
+# optimization, we don't need to read them if the target does not
+# exist, we will rebuild anyway in that case.
+
+existing-targets := $(wildcard $(sort $(targets)))
+
+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
+
+.PHONY: $(PHONY)
diff --git a/scripts/Makefile.vmlinux_a b/scripts/Makefile.vmlinux_a
index 9774f02..382cebe 100644
--- a/scripts/Makefile.vmlinux_a
+++ b/scripts/Makefile.vmlinux_a
@@ -21,6 +21,41 @@
built-in-fixup.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE
$(call if_changed,ar_builtin_fixup)
+ifdef CONFIG_LTO_CLANG_THIN_DIST
+
+quiet_cmd_builtin.order = GEN $@
+ cmd_builtin.order = $(AR) t $< > $@
+
+targets += builtin.order
+builtin.order: built-in-fixup.a FORCE
+ $(call if_changed,builtin.order)
+
+quiet_cmd_ld_thinlto_index = LD $@
+ cmd_ld_thinlto_index = \
+ $(LD) $(KBUILD_LDFLAGS) -r --thinlto-index-only=$@ @$<
+
+targets += vmlinux.thinlto-index
+vmlinux.thinlto-index: builtin.order FORCE
+ $(call if_changed,ld_thinlto_index)
+
+quiet_cmd_ar_vmlinux.a = GEN $@
+ cmd_ar_vmlinux.a = \
+ rm -f $@; \
+ while read -r obj; do \
+ if grep -q $${obj} $(word 2, $^); then \
+ echo $${obj%.o}.thinlto-native.o; \
+ else \
+ echo $${obj}; \
+ fi; \
+ done < $< | xargs $(AR) cDPrS --thin $@
+
+targets += vmlinux.a
+vmlinux.a: builtin.order vmlinux.thinlto-index FORCE
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.thinlto
+ $(call if_changed,ar_vmlinux.a)
+
+else
+
# vmlinux.a
# ---------------------------------------------------------------------------
@@ -28,6 +63,8 @@
vmlinux.a: built-in-fixup.a FORCE
$(call if_changed,copy)
+endif
+
# Add FORCE to the prerequisites of a target to force it to be always rebuilt.
# ---------------------------------------------------------------------------
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 5ca7c26..8b01746 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1473,13 +1473,22 @@ static void extract_crcs_for_object(const char *object, struct module *mod)
char cmd_file[PATH_MAX];
char *buf, *p;
const char *base;
- int dirlen, ret;
+ int dirlen, baselen_without_suffix, ret;
base = get_basename(object);
dirlen = base - object;
- ret = snprintf(cmd_file, sizeof(cmd_file), "%.*s.%s.cmd",
- dirlen, object, base);
+ baselen_without_suffix = strlen(object) - dirlen - strlen(".o");
+
+ /*
+ * When CONFIG_LTO_CLANG_THIN_DIST=y, the ELF is *.thinlto-native.o
+ * but the symbol CRCs are recorded in *.o.cmd file.
+ */
+ if (strends(object, ".thinlto-native.o"))
+ baselen_without_suffix -= strlen(".thinlto-native");
+
+ ret = snprintf(cmd_file, sizeof(cmd_file), "%.*s.%.*s.o.cmd",
+ dirlen, object, baselen_without_suffix, base);
if (ret >= sizeof(cmd_file)) {
error("%s: too long path was truncated\n", cmd_file);
return;