| From: Alexander Graf <graf@amazon.com> |
| Subject: kexec: add Kexec HandOver (KHO) generation helpers |
| Date: Fri, 9 May 2025 00:46:22 -0700 |
| |
| Add the infrastructure to generate Kexec HandOver metadata. Kexec |
| HandOver is a mechanism that allows Linux to preserve state - arbitrary |
| properties as well as memory locations - across kexec. |
| |
| It does so using 2 concepts: |
| |
| 1) KHO FDT - Every KHO kexec carries a KHO specific flattened device tree |
| blob that describes preserved memory regions. Device drivers can |
| register to KHO to serialize and preserve their states before kexec. |
| |
| 2) Scratch Regions - CMA regions that we allocate in the first kernel. |
| CMA gives us the guarantee that no handover pages land in those |
| regions, because handover pages must be at a static physical memory |
| location. We use these regions as the place to load future kexec |
| images so that they won't collide with any handover data. |
| |
| Link: https://lkml.kernel.org/r/20250509074635.3187114-5-changyuanl@google.com |
| Signed-off-by: Alexander Graf <graf@amazon.com> |
| Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> |
| Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> |
| Co-developed-by: Pratyush Yadav <ptyadav@amazon.de> |
| Signed-off-by: Pratyush Yadav <ptyadav@amazon.de> |
| Co-developed-by: Changyuan Lyu <changyuanl@google.com> |
| Signed-off-by: Changyuan Lyu <changyuanl@google.com> |
| Cc: Andy Lutomirski <luto@kernel.org> |
| Cc: Anthony Yznaga <anthony.yznaga@oracle.com> |
| Cc: Arnd Bergmann <arnd@arndb.de> |
| Cc: Ashish Kalra <ashish.kalra@amd.com> |
| Cc: Ben Herrenschmidt <benh@kernel.crashing.org> |
| Cc: Borislav Betkov <bp@alien8.de> |
| Cc: Catalin Marinas <catalin.marinas@arm.com> |
| Cc: Dave Hansen <dave.hansen@linux.intel.com> |
| Cc: David Woodhouse <dwmw2@infradead.org> |
| Cc: Eric Biederman <ebiederm@xmission.com> |
| Cc: "H. Peter Anvin" <hpa@zytor.com> |
| Cc: Ingo Molnar <mingo@redhat.com> |
| Cc: James Gowans <jgowans@amazon.com> |
| Cc: Jason Gunthorpe <jgg@nvidia.com> |
| Cc: Jonathan Corbet <corbet@lwn.net> |
| Cc: Krzysztof Kozlowski <krzk@kernel.org> |
| Cc: Marc Rutland <mark.rutland@arm.com> |
| Cc: Paolo Bonzini <pbonzini@redhat.com> |
| Cc: Pasha Tatashin <pasha.tatashin@soleen.com> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Rob Herring <robh@kernel.org> |
| Cc: Saravana Kannan <saravanak@google.com> |
| Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com> |
| Cc: Steven Rostedt <rostedt@goodmis.org> |
| Cc: Thomas Gleinxer <tglx@linutronix.de> |
| Cc: Thomas Lendacky <thomas.lendacky@amd.com> |
| Cc: Will Deacon <will@kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| MAINTAINERS | 9 |
| include/linux/kexec_handover.h | 59 +++ |
| kernel/Makefile | 1 |
| kernel/kexec_handover.c | 557 +++++++++++++++++++++++++++++++ |
| mm/mm_init.c | 8 |
| 5 files changed, 634 insertions(+) |
| |
| diff --git a/include/linux/kexec_handover.h a/include/linux/kexec_handover.h |
| new file mode 100644 |
| --- /dev/null |
| +++ a/include/linux/kexec_handover.h |
| @@ -0,0 +1,59 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +#ifndef LINUX_KEXEC_HANDOVER_H |
| +#define LINUX_KEXEC_HANDOVER_H |
| + |
| +#include <linux/types.h> |
| +#include <linux/errno.h> |
| + |
| +struct kho_scratch { |
| + phys_addr_t addr; |
| + phys_addr_t size; |
| +}; |
| + |
| +/* KHO Notifier index */ |
| +enum kho_event { |
| + KEXEC_KHO_FINALIZE = 0, |
| + KEXEC_KHO_ABORT = 1, |
| +}; |
| + |
| +struct notifier_block; |
| + |
| +struct kho_serialization; |
| + |
| +#ifdef CONFIG_KEXEC_HANDOVER |
| +bool kho_is_enabled(void); |
| + |
| +int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); |
| + |
| +int register_kho_notifier(struct notifier_block *nb); |
| +int unregister_kho_notifier(struct notifier_block *nb); |
| + |
| +void kho_memory_init(void); |
| +#else |
| +static inline bool kho_is_enabled(void) |
| +{ |
| + return false; |
| +} |
| + |
| +static inline int kho_add_subtree(struct kho_serialization *ser, |
| + const char *name, void *fdt) |
| +{ |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +static inline int register_kho_notifier(struct notifier_block *nb) |
| +{ |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +static inline int unregister_kho_notifier(struct notifier_block *nb) |
| +{ |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +static inline void kho_memory_init(void) |
| +{ |
| +} |
| +#endif /* CONFIG_KEXEC_HANDOVER */ |
| + |
| +#endif /* LINUX_KEXEC_HANDOVER_H */ |
| diff --git a/kernel/kexec_handover.c a/kernel/kexec_handover.c |
| new file mode 100644 |
| --- /dev/null |
| +++ a/kernel/kexec_handover.c |
| @@ -0,0 +1,557 @@ |
| +// SPDX-License-Identifier: GPL-2.0-only |
| +/* |
| + * kexec_handover.c - kexec handover metadata processing |
| + * Copyright (C) 2023 Alexander Graf <graf@amazon.com> |
| + * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org> |
| + * Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com> |
| + */ |
| + |
| +#define pr_fmt(fmt) "KHO: " fmt |
| + |
| +#include <linux/cma.h> |
| +#include <linux/debugfs.h> |
| +#include <linux/kexec.h> |
| +#include <linux/kexec_handover.h> |
| +#include <linux/libfdt.h> |
| +#include <linux/list.h> |
| +#include <linux/memblock.h> |
| +#include <linux/notifier.h> |
| +#include <linux/page-isolation.h> |
| +/* |
| + * KHO is tightly coupled with mm init and needs access to some of mm |
| + * internal APIs. |
| + */ |
| +#include "../mm/internal.h" |
| + |
| +#define KHO_FDT_COMPATIBLE "kho-v1" |
| +#define PROP_PRESERVED_MEMORY_MAP "preserved-memory-map" |
| +#define PROP_SUB_FDT "fdt" |
| + |
| +static bool kho_enable __ro_after_init; |
| + |
| +bool kho_is_enabled(void) |
| +{ |
| + return kho_enable; |
| +} |
| +EXPORT_SYMBOL_GPL(kho_is_enabled); |
| + |
| +static int __init kho_parse_enable(char *p) |
| +{ |
| + return kstrtobool(p, &kho_enable); |
| +} |
| +early_param("kho", kho_parse_enable); |
| + |
| +struct kho_serialization { |
| + struct page *fdt; |
| + struct list_head fdt_list; |
| + struct dentry *sub_fdt_dir; |
| +}; |
| + |
| +/* |
| + * With KHO enabled, memory can become fragmented because KHO regions may |
| + * be anywhere in physical address space. The scratch regions give us a |
| + * safe zones that we will never see KHO allocations from. This is where we |
| + * can later safely load our new kexec images into and then use the scratch |
| + * area for early allocations that happen before page allocator is |
| + * initialized. |
| + */ |
| +static struct kho_scratch *kho_scratch; |
| +static unsigned int kho_scratch_cnt; |
| + |
| +/* |
| + * The scratch areas are scaled by default as percent of memory allocated from |
| + * memblock. A user can override the scale with command line parameter: |
| + * |
| + * kho_scratch=N% |
| + * |
| + * It is also possible to explicitly define size for a lowmem, a global and |
| + * per-node scratch areas: |
| + * |
| + * kho_scratch=l[KMG],n[KMG],m[KMG] |
| + * |
| + * The explicit size definition takes precedence over scale definition. |
| + */ |
| +static unsigned int scratch_scale __initdata = 200; |
| +static phys_addr_t scratch_size_global __initdata; |
| +static phys_addr_t scratch_size_pernode __initdata; |
| +static phys_addr_t scratch_size_lowmem __initdata; |
| + |
| +static int __init kho_parse_scratch_size(char *p) |
| +{ |
| + size_t len; |
| + unsigned long sizes[3]; |
| + int i; |
| + |
| + if (!p) |
| + return -EINVAL; |
| + |
| + len = strlen(p); |
| + if (!len) |
| + return -EINVAL; |
| + |
| + /* parse nn% */ |
| + if (p[len - 1] == '%') { |
| + /* unsigned int max is 4,294,967,295, 10 chars */ |
| + char s_scale[11] = {}; |
| + int ret = 0; |
| + |
| + if (len > ARRAY_SIZE(s_scale)) |
| + return -EINVAL; |
| + |
| + memcpy(s_scale, p, len - 1); |
| + ret = kstrtouint(s_scale, 10, &scratch_scale); |
| + if (!ret) |
| + pr_notice("scratch scale is %d%%\n", scratch_scale); |
| + return ret; |
| + } |
| + |
| + /* parse ll[KMG],mm[KMG],nn[KMG] */ |
| + for (i = 0; i < ARRAY_SIZE(sizes); i++) { |
| + char *endp = p; |
| + |
| + if (i > 0) { |
| + if (*p != ',') |
| + return -EINVAL; |
| + p += 1; |
| + } |
| + |
| + sizes[i] = memparse(p, &endp); |
| + if (!sizes[i] || endp == p) |
| + return -EINVAL; |
| + p = endp; |
| + } |
| + |
| + scratch_size_lowmem = sizes[0]; |
| + scratch_size_global = sizes[1]; |
| + scratch_size_pernode = sizes[2]; |
| + scratch_scale = 0; |
| + |
| + pr_notice("scratch areas: lowmem: %lluMiB global: %lluMiB pernode: %lldMiB\n", |
| + (u64)(scratch_size_lowmem >> 20), |
| + (u64)(scratch_size_global >> 20), |
| + (u64)(scratch_size_pernode >> 20)); |
| + |
| + return 0; |
| +} |
| +early_param("kho_scratch", kho_parse_scratch_size); |
| + |
| +static void __init scratch_size_update(void) |
| +{ |
| + phys_addr_t size; |
| + |
| + if (!scratch_scale) |
| + return; |
| + |
| + size = memblock_reserved_kern_size(ARCH_LOW_ADDRESS_LIMIT, |
| + NUMA_NO_NODE); |
| + size = size * scratch_scale / 100; |
| + scratch_size_lowmem = round_up(size, CMA_MIN_ALIGNMENT_BYTES); |
| + |
| + size = memblock_reserved_kern_size(MEMBLOCK_ALLOC_ANYWHERE, |
| + NUMA_NO_NODE); |
| + size = size * scratch_scale / 100 - scratch_size_lowmem; |
| + scratch_size_global = round_up(size, CMA_MIN_ALIGNMENT_BYTES); |
| +} |
| + |
| +static phys_addr_t __init scratch_size_node(int nid) |
| +{ |
| + phys_addr_t size; |
| + |
| + if (scratch_scale) { |
| + size = memblock_reserved_kern_size(MEMBLOCK_ALLOC_ANYWHERE, |
| + nid); |
| + size = size * scratch_scale / 100; |
| + } else { |
| + size = scratch_size_pernode; |
| + } |
| + |
| + return round_up(size, CMA_MIN_ALIGNMENT_BYTES); |
| +} |
| + |
| +/** |
| + * kho_reserve_scratch - Reserve a contiguous chunk of memory for kexec |
| + * |
| + * With KHO we can preserve arbitrary pages in the system. To ensure we still |
| + * have a large contiguous region of memory when we search the physical address |
| + * space for target memory, let's make sure we always have a large CMA region |
| + * active. This CMA region will only be used for movable pages which are not a |
| + * problem for us during KHO because we can just move them somewhere else. |
| + */ |
| +static void __init kho_reserve_scratch(void) |
| +{ |
| + phys_addr_t addr, size; |
| + int nid, i = 0; |
| + |
| + if (!kho_enable) |
| + return; |
| + |
| + scratch_size_update(); |
| + |
| + /* FIXME: deal with node hot-plug/remove */ |
| + kho_scratch_cnt = num_online_nodes() + 2; |
| + size = kho_scratch_cnt * sizeof(*kho_scratch); |
| + kho_scratch = memblock_alloc(size, PAGE_SIZE); |
| + if (!kho_scratch) |
| + goto err_disable_kho; |
| + |
| + /* |
| + * reserve scratch area in low memory for lowmem allocations in the |
| + * next kernel |
| + */ |
| + size = scratch_size_lowmem; |
| + addr = memblock_phys_alloc_range(size, CMA_MIN_ALIGNMENT_BYTES, 0, |
| + ARCH_LOW_ADDRESS_LIMIT); |
| + if (!addr) |
| + goto err_free_scratch_desc; |
| + |
| + kho_scratch[i].addr = addr; |
| + kho_scratch[i].size = size; |
| + i++; |
| + |
| + /* reserve large contiguous area for allocations without nid */ |
| + size = scratch_size_global; |
| + addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES); |
| + if (!addr) |
| + goto err_free_scratch_areas; |
| + |
| + kho_scratch[i].addr = addr; |
| + kho_scratch[i].size = size; |
| + i++; |
| + |
| + for_each_online_node(nid) { |
| + size = scratch_size_node(nid); |
| + addr = memblock_alloc_range_nid(size, CMA_MIN_ALIGNMENT_BYTES, |
| + 0, MEMBLOCK_ALLOC_ACCESSIBLE, |
| + nid, true); |
| + if (!addr) |
| + goto err_free_scratch_areas; |
| + |
| + kho_scratch[i].addr = addr; |
| + kho_scratch[i].size = size; |
| + i++; |
| + } |
| + |
| + return; |
| + |
| +err_free_scratch_areas: |
| + for (i--; i >= 0; i--) |
| + memblock_phys_free(kho_scratch[i].addr, kho_scratch[i].size); |
| +err_free_scratch_desc: |
| + memblock_free(kho_scratch, kho_scratch_cnt * sizeof(*kho_scratch)); |
| +err_disable_kho: |
| + kho_enable = false; |
| +} |
| + |
| +struct fdt_debugfs { |
| + struct list_head list; |
| + struct debugfs_blob_wrapper wrapper; |
| + struct dentry *file; |
| +}; |
| + |
| +static int kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir, |
| + const char *name, const void *fdt) |
| +{ |
| + struct fdt_debugfs *f; |
| + struct dentry *file; |
| + |
| + f = kmalloc(sizeof(*f), GFP_KERNEL); |
| + if (!f) |
| + return -ENOMEM; |
| + |
| + f->wrapper.data = (void *)fdt; |
| + f->wrapper.size = fdt_totalsize(fdt); |
| + |
| + file = debugfs_create_blob(name, 0400, dir, &f->wrapper); |
| + if (IS_ERR(file)) { |
| + kfree(f); |
| + return PTR_ERR(file); |
| + } |
| + |
| + f->file = file; |
| + list_add(&f->list, list); |
| + |
| + return 0; |
| +} |
| + |
| +/** |
| + * kho_add_subtree - record the physical address of a sub FDT in KHO root tree. |
| + * @ser: serialization control object passed by KHO notifiers. |
| + * @name: name of the sub tree. |
| + * @fdt: the sub tree blob. |
| + * |
| + * Creates a new child node named @name in KHO root FDT and records |
| + * the physical address of @fdt. The pages of @fdt must also be preserved |
| + * by KHO for the new kernel to retrieve it after kexec. |
| + * |
| + * A debugfs blob entry is also created at |
| + * ``/sys/kernel/debug/kho/out/sub_fdts/@name``. |
| + * |
| + * Return: 0 on success, error code on failure |
| + */ |
| +int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt) |
| +{ |
| + int err = 0; |
| + u64 phys = (u64)virt_to_phys(fdt); |
| + void *root = page_to_virt(ser->fdt); |
| + |
| + err |= fdt_begin_node(root, name); |
| + err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys)); |
| + err |= fdt_end_node(root); |
| + |
| + if (err) |
| + return err; |
| + |
| + return kho_debugfs_fdt_add(&ser->fdt_list, ser->sub_fdt_dir, name, fdt); |
| +} |
| +EXPORT_SYMBOL_GPL(kho_add_subtree); |
| + |
| +struct kho_out { |
| + struct blocking_notifier_head chain_head; |
| + |
| + struct dentry *dir; |
| + |
| + struct mutex lock; /* protects KHO FDT finalization */ |
| + |
| + struct kho_serialization ser; |
| + bool finalized; |
| +}; |
| + |
| +static struct kho_out kho_out = { |
| + .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head), |
| + .lock = __MUTEX_INITIALIZER(kho_out.lock), |
| + .ser = { |
| + .fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list), |
| + }, |
| + .finalized = false, |
| +}; |
| + |
| +int register_kho_notifier(struct notifier_block *nb) |
| +{ |
| + return blocking_notifier_chain_register(&kho_out.chain_head, nb); |
| +} |
| +EXPORT_SYMBOL_GPL(register_kho_notifier); |
| + |
| +int unregister_kho_notifier(struct notifier_block *nb) |
| +{ |
| + return blocking_notifier_chain_unregister(&kho_out.chain_head, nb); |
| +} |
| +EXPORT_SYMBOL_GPL(unregister_kho_notifier); |
| + |
| +/* Handling for debug/kho/out */ |
| + |
| +static struct dentry *debugfs_root; |
| + |
| +static int kho_out_update_debugfs_fdt(void) |
| +{ |
| + int err = 0; |
| + struct fdt_debugfs *ff, *tmp; |
| + |
| + if (kho_out.finalized) { |
| + err = kho_debugfs_fdt_add(&kho_out.ser.fdt_list, kho_out.dir, |
| + "fdt", page_to_virt(kho_out.ser.fdt)); |
| + } else { |
| + list_for_each_entry_safe(ff, tmp, &kho_out.ser.fdt_list, list) { |
| + debugfs_remove(ff->file); |
| + list_del(&ff->list); |
| + kfree(ff); |
| + } |
| + } |
| + |
| + return err; |
| +} |
| + |
| +static int kho_abort(void) |
| +{ |
| + int err; |
| + |
| + err = blocking_notifier_call_chain(&kho_out.chain_head, KEXEC_KHO_ABORT, |
| + NULL); |
| + err = notifier_to_errno(err); |
| + |
| + if (err) |
| + pr_err("Failed to abort KHO finalization: %d\n", err); |
| + |
| + return err; |
| +} |
| + |
| +static int kho_finalize(void) |
| +{ |
| + int err = 0; |
| + void *fdt = page_to_virt(kho_out.ser.fdt); |
| + |
| + err |= fdt_create(fdt, PAGE_SIZE); |
| + err |= fdt_finish_reservemap(fdt); |
| + err |= fdt_begin_node(fdt, ""); |
| + err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE); |
| + if (err) |
| + goto abort; |
| + |
| + err = blocking_notifier_call_chain(&kho_out.chain_head, |
| + KEXEC_KHO_FINALIZE, &kho_out.ser); |
| + err = notifier_to_errno(err); |
| + if (err) |
| + goto abort; |
| + |
| + err |= fdt_end_node(fdt); |
| + err |= fdt_finish(fdt); |
| + |
| +abort: |
| + if (err) { |
| + pr_err("Failed to convert KHO state tree: %d\n", err); |
| + kho_abort(); |
| + } |
| + |
| + return err; |
| +} |
| + |
| +static int kho_out_finalize_get(void *data, u64 *val) |
| +{ |
| + mutex_lock(&kho_out.lock); |
| + *val = kho_out.finalized; |
| + mutex_unlock(&kho_out.lock); |
| + |
| + return 0; |
| +} |
| + |
| +static int kho_out_finalize_set(void *data, u64 _val) |
| +{ |
| + int ret = 0; |
| + bool val = !!_val; |
| + |
| + mutex_lock(&kho_out.lock); |
| + |
| + if (val == kho_out.finalized) { |
| + if (kho_out.finalized) |
| + ret = -EEXIST; |
| + else |
| + ret = -ENOENT; |
| + goto unlock; |
| + } |
| + |
| + if (val) |
| + ret = kho_finalize(); |
| + else |
| + ret = kho_abort(); |
| + |
| + if (ret) |
| + goto unlock; |
| + |
| + kho_out.finalized = val; |
| + ret = kho_out_update_debugfs_fdt(); |
| + |
| +unlock: |
| + mutex_unlock(&kho_out.lock); |
| + return ret; |
| +} |
| + |
| +DEFINE_DEBUGFS_ATTRIBUTE(fops_kho_out_finalize, kho_out_finalize_get, |
| + kho_out_finalize_set, "%llu\n"); |
| + |
| +static int scratch_phys_show(struct seq_file *m, void *v) |
| +{ |
| + for (int i = 0; i < kho_scratch_cnt; i++) |
| + seq_printf(m, "0x%llx\n", kho_scratch[i].addr); |
| + |
| + return 0; |
| +} |
| +DEFINE_SHOW_ATTRIBUTE(scratch_phys); |
| + |
| +static int scratch_len_show(struct seq_file *m, void *v) |
| +{ |
| + for (int i = 0; i < kho_scratch_cnt; i++) |
| + seq_printf(m, "0x%llx\n", kho_scratch[i].size); |
| + |
| + return 0; |
| +} |
| +DEFINE_SHOW_ATTRIBUTE(scratch_len); |
| + |
| +static __init int kho_out_debugfs_init(void) |
| +{ |
| + struct dentry *dir, *f, *sub_fdt_dir; |
| + |
| + dir = debugfs_create_dir("out", debugfs_root); |
| + if (IS_ERR(dir)) |
| + return -ENOMEM; |
| + |
| + sub_fdt_dir = debugfs_create_dir("sub_fdts", dir); |
| + if (IS_ERR(sub_fdt_dir)) |
| + goto err_rmdir; |
| + |
| + f = debugfs_create_file("scratch_phys", 0400, dir, NULL, |
| + &scratch_phys_fops); |
| + if (IS_ERR(f)) |
| + goto err_rmdir; |
| + |
| + f = debugfs_create_file("scratch_len", 0400, dir, NULL, |
| + &scratch_len_fops); |
| + if (IS_ERR(f)) |
| + goto err_rmdir; |
| + |
| + f = debugfs_create_file("finalize", 0600, dir, NULL, |
| + &fops_kho_out_finalize); |
| + if (IS_ERR(f)) |
| + goto err_rmdir; |
| + |
| + kho_out.dir = dir; |
| + kho_out.ser.sub_fdt_dir = sub_fdt_dir; |
| + return 0; |
| + |
| +err_rmdir: |
| + debugfs_remove_recursive(dir); |
| + return -ENOENT; |
| +} |
| + |
| +static __init int kho_init(void) |
| +{ |
| + int err = 0; |
| + |
| + if (!kho_enable) |
| + return 0; |
| + |
| + kho_out.ser.fdt = alloc_page(GFP_KERNEL); |
| + if (!kho_out.ser.fdt) { |
| + err = -ENOMEM; |
| + goto err_free_scratch; |
| + } |
| + |
| + debugfs_root = debugfs_create_dir("kho", NULL); |
| + if (IS_ERR(debugfs_root)) { |
| + err = -ENOENT; |
| + goto err_free_fdt; |
| + } |
| + |
| + err = kho_out_debugfs_init(); |
| + if (err) |
| + goto err_free_fdt; |
| + |
| + for (int i = 0; i < kho_scratch_cnt; i++) { |
| + unsigned long base_pfn = PHYS_PFN(kho_scratch[i].addr); |
| + unsigned long count = kho_scratch[i].size >> PAGE_SHIFT; |
| + unsigned long pfn; |
| + |
| + for (pfn = base_pfn; pfn < base_pfn + count; |
| + pfn += pageblock_nr_pages) |
| + init_cma_reserved_pageblock(pfn_to_page(pfn)); |
| + } |
| + |
| + return 0; |
| + |
| +err_free_fdt: |
| + put_page(kho_out.ser.fdt); |
| + kho_out.ser.fdt = NULL; |
| +err_free_scratch: |
| + for (int i = 0; i < kho_scratch_cnt; i++) { |
| + void *start = __va(kho_scratch[i].addr); |
| + void *end = start + kho_scratch[i].size; |
| + |
| + free_reserved_area(start, end, -1, ""); |
| + } |
| + kho_enable = false; |
| + return err; |
| +} |
| +late_initcall(kho_init); |
| + |
| +void __init kho_memory_init(void) |
| +{ |
| + kho_reserve_scratch(); |
| +} |
| --- a/kernel/Makefile~kexec-add-kexec-handover-kho-generation-helpers |
| +++ a/kernel/Makefile |
| @@ -80,6 +80,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_core.o |
| obj-$(CONFIG_KEXEC) += kexec.o |
| obj-$(CONFIG_KEXEC_FILE) += kexec_file.o |
| obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o |
| +obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o |
| obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o |
| obj-$(CONFIG_COMPAT) += compat.o |
| obj-$(CONFIG_CGROUPS) += cgroup/ |
| --- a/MAINTAINERS~kexec-add-kexec-handover-kho-generation-helpers |
| +++ a/MAINTAINERS |
| @@ -13139,6 +13139,15 @@ F: include/linux/kexec.h |
| F: include/uapi/linux/kexec.h |
| F: kernel/kexec* |
| |
| +KEXEC HANDOVER (KHO) |
| +M: Alexander Graf <graf@amazon.com> |
| +M: Mike Rapoport <rppt@kernel.org> |
| +M: Changyuan Lyu <changyuanl@google.com> |
| +L: kexec@lists.infradead.org |
| +S: Maintained |
| +F: include/linux/kexec_handover.h |
| +F: kernel/kexec_handover.c |
| + |
| KEYS-ENCRYPTED |
| M: Mimi Zohar <zohar@linux.ibm.com> |
| L: linux-integrity@vger.kernel.org |
| --- a/mm/mm_init.c~kexec-add-kexec-handover-kho-generation-helpers |
| +++ a/mm/mm_init.c |
| @@ -30,6 +30,7 @@ |
| #include <linux/crash_dump.h> |
| #include <linux/execmem.h> |
| #include <linux/vmstat.h> |
| +#include <linux/kexec_handover.h> |
| #include <linux/hugetlb.h> |
| #include "internal.h" |
| #include "slab.h" |
| @@ -2770,6 +2771,13 @@ void __init mm_core_init(void) |
| report_meminit(); |
| kmsan_init_shadow(); |
| stack_depot_early_init(); |
| + |
| + /* |
| + * KHO memory setup must happen while memblock is still active, but |
| + * as close as possible to buddy initialization |
| + */ |
| + kho_memory_init(); |
| + |
| memblock_free_all(); |
| mem_init(); |
| kmem_cache_init(); |
| _ |