blob: ec6df4b8beca05924e3940ebc7509a122c645d45 [file] [log] [blame]
/*
* ARM64 kexec.
*/
#define _GNU_SOURCE
#include <assert.h>
#include <errno.h>
#include <getopt.h>
#include <inttypes.h>
#include <libfdt.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <linux/elf-em.h>
#include <elf.h>
#include <elf_info.h>
#include <unistd.h>
#include <syscall.h>
#include <errno.h>
#include <linux/random.h>
#include "kexec.h"
#include "kexec-arm64.h"
#include "crashdump.h"
#include "crashdump-arm64.h"
#include "dt-ops.h"
#include "fs2dt.h"
#include "iomem.h"
#include "kexec-syscall.h"
#include "mem_regions.h"
#include "arch/options.h"
#define ROOT_NODE_ADDR_CELLS_DEFAULT 1
#define ROOT_NODE_SIZE_CELLS_DEFAULT 1
#define PROP_ADDR_CELLS "#address-cells"
#define PROP_SIZE_CELLS "#size-cells"
#define PROP_ELFCOREHDR "linux,elfcorehdr"
#define PROP_USABLE_MEM_RANGE "linux,usable-memory-range"
#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36)
#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39)
#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42)
#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47)
#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48)
/* Global flag which indicates that we have tried reading
* PHYS_OFFSET from 'kcore' already.
*/
static bool try_read_phys_offset_from_kcore = false;
/* Machine specific details. */
static int va_bits = -1;
static unsigned long page_offset;
/* Global varables the core kexec routines expect. */
unsigned char reuse_initrd;
off_t initrd_base;
off_t initrd_size;
const struct arch_map_entry arches[] = {
{ "aarch64", KEXEC_ARCH_ARM64 },
{ "aarch64_be", KEXEC_ARCH_ARM64 },
{ NULL, 0 },
};
struct file_type file_type[] = {
{"vmlinux", elf_arm64_probe, elf_arm64_load, elf_arm64_usage},
{"Image", image_arm64_probe, image_arm64_load, image_arm64_usage},
{"uImage", uImage_arm64_probe, uImage_arm64_load, uImage_arm64_usage},
{"zImage", zImage_arm64_probe, zImage_arm64_load, zImage_arm64_usage},
};
int file_types = sizeof(file_type) / sizeof(file_type[0]);
/* arm64 global varables. */
struct arm64_opts arm64_opts;
struct arm64_mem arm64_mem = {
.phys_offset = arm64_mem_ngv,
.vp_offset = arm64_mem_ngv,
};
uint64_t get_phys_offset(void)
{
assert(arm64_mem.phys_offset != arm64_mem_ngv);
return arm64_mem.phys_offset;
}
uint64_t get_vp_offset(void)
{
assert(arm64_mem.vp_offset != arm64_mem_ngv);
return arm64_mem.vp_offset;
}
/**
* arm64_process_image_header - Process the arm64 image header.
*
* Make a guess that KERNEL_IMAGE_SIZE will be enough for older kernels.
*/
int arm64_process_image_header(const struct arm64_image_header *h)
{
#if !defined(KERNEL_IMAGE_SIZE)
# define KERNEL_IMAGE_SIZE MiB(16)
#endif
if (!arm64_header_check_magic(h))
return EFAILED;
if (h->image_size) {
arm64_mem.text_offset = arm64_header_text_offset(h);
arm64_mem.image_size = arm64_header_image_size(h);
} else {
/* For 3.16 and older kernels. */
arm64_mem.text_offset = 0x80000;
arm64_mem.image_size = KERNEL_IMAGE_SIZE;
fprintf(stderr,
"kexec: %s: Warning: Kernel image size set to %lu MiB.\n"
" Please verify compatability with lodaed kernel.\n",
__func__, KERNEL_IMAGE_SIZE / 1024UL / 1024UL);
}
return 0;
}
void arch_usage(void)
{
printf(arm64_opts_usage);
}
int arch_process_options(int argc, char **argv)
{
static const char short_options[] = KEXEC_OPT_STR "";
static const struct option options[] = {
KEXEC_ARCH_OPTIONS
{ 0 }
};
int opt;
char *cmdline = NULL;
const char *append = NULL;
int do_kexec_file_syscall = 0;
for (opt = 0; opt != -1; ) {
opt = getopt_long(argc, argv, short_options, options, 0);
switch (opt) {
case OPT_APPEND:
append = optarg;
break;
case OPT_REUSE_CMDLINE:
cmdline = get_command_line();
break;
case OPT_DTB:
arm64_opts.dtb = optarg;
break;
case OPT_INITRD:
arm64_opts.initrd = optarg;
break;
case OPT_KEXEC_FILE_SYSCALL:
do_kexec_file_syscall = 1;
case OPT_SERIAL:
arm64_opts.console = optarg;
break;
default:
break; /* Ignore core and unknown options. */
}
}
arm64_opts.command_line = concat_cmdline(cmdline, append);
dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__,
arm64_opts.command_line);
dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__,
arm64_opts.initrd);
dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__,
(do_kexec_file_syscall && arm64_opts.dtb ? "(ignored)" :
arm64_opts.dtb));
dbgprintf("%s:%d: console: %s\n", __func__, __LINE__,
arm64_opts.console);
if (do_kexec_file_syscall)
arm64_opts.dtb = NULL;
return 0;
}
/**
* find_purgatory_sink - Find a sink for purgatory output.
*/
static uint64_t find_purgatory_sink(const char *console)
{
int fd, ret;
char device[255], mem[255];
struct stat sb;
char buffer[10];
uint64_t iomem = 0x0;
if (!console)
return 0;
ret = snprintf(device, sizeof(device), "/sys/class/tty/%s", console);
if (ret < 0 || ret >= sizeof(device)) {
fprintf(stderr, "snprintf failed: %s\n", strerror(errno));
return 0;
}
if (stat(device, &sb) || !S_ISDIR(sb.st_mode)) {
fprintf(stderr, "kexec: %s: No valid console found for %s\n",
__func__, device);
return 0;
}
ret = snprintf(mem, sizeof(mem), "%s%s", device, "/iomem_base");
if (ret < 0 || ret >= sizeof(mem)) {
fprintf(stderr, "snprintf failed: %s\n", strerror(errno));
return 0;
}
printf("console memory read from %s\n", mem);
fd = open(mem, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "kexec: %s: No able to open %s\n",
__func__, mem);
return 0;
}
memset(buffer, '\0', sizeof(buffer));
ret = read(fd, buffer, sizeof(buffer));
if (ret < 0) {
fprintf(stderr, "kexec: %s: not able to read fd\n", __func__);
close(fd);
return 0;
}
sscanf(buffer, "%lx", &iomem);
printf("console memory is at %#lx\n", iomem);
close(fd);
return iomem;
}
/**
* struct dtb - Info about a binary device tree.
*
* @buf: Device tree data.
* @size: Device tree data size.
* @name: Shorthand name of this dtb for messages.
* @path: Filesystem path.
*/
struct dtb {
char *buf;
off_t size;
const char *name;
const char *path;
};
/**
* dump_reservemap - Dump the dtb's reservemap.
*/
static void dump_reservemap(const struct dtb *dtb)
{
int i;
for (i = 0; ; i++) {
uint64_t address;
uint64_t size;
fdt_get_mem_rsv(dtb->buf, i, &address, &size);
if (!size)
break;
dbgprintf("%s: %s {%" PRIx64 ", %" PRIx64 "}\n", __func__,
dtb->name, address, size);
}
}
/**
* set_bootargs - Set the dtb's bootargs.
*/
static int set_bootargs(struct dtb *dtb, const char *command_line)
{
int result;
if (!command_line || !command_line[0])
return 0;
result = dtb_set_bootargs(&dtb->buf, &dtb->size, command_line);
if (result) {
fprintf(stderr,
"kexec: Set device tree bootargs failed.\n");
return EFAILED;
}
return 0;
}
/**
* read_proc_dtb - Read /proc/device-tree.
*/
static int read_proc_dtb(struct dtb *dtb)
{
int result;
struct stat s;
static const char path[] = "/proc/device-tree";
result = stat(path, &s);
if (result) {
dbgprintf("%s: %s\n", __func__, strerror(errno));
return EFAILED;
}
dtb->path = path;
create_flatten_tree((char **)&dtb->buf, &dtb->size, NULL);
return 0;
}
/**
* read_sys_dtb - Read /sys/firmware/fdt.
*/
static int read_sys_dtb(struct dtb *dtb)
{
int result;
struct stat s;
static const char path[] = "/sys/firmware/fdt";
result = stat(path, &s);
if (result) {
dbgprintf("%s: %s\n", __func__, strerror(errno));
return EFAILED;
}
dtb->path = path;
dtb->buf = slurp_file(path, &dtb->size);
return 0;
}
/**
* read_1st_dtb - Read the 1st stage kernel's dtb.
*/
static int read_1st_dtb(struct dtb *dtb)
{
int result;
dtb->name = "dtb_sys";
result = read_sys_dtb(dtb);
if (!result)
goto on_success;
dtb->name = "dtb_proc";
result = read_proc_dtb(dtb);
if (!result)
goto on_success;
dbgprintf("%s: not found\n", __func__);
return EFAILED;
on_success:
dbgprintf("%s: found %s\n", __func__, dtb->path);
return 0;
}
static int get_cells_size(void *fdt, uint32_t *address_cells,
uint32_t *size_cells)
{
int nodeoffset;
const uint32_t *prop = NULL;
int prop_len;
/* default values */
*address_cells = ROOT_NODE_ADDR_CELLS_DEFAULT;
*size_cells = ROOT_NODE_SIZE_CELLS_DEFAULT;
/* under root node */
nodeoffset = fdt_path_offset(fdt, "/");
if (nodeoffset < 0)
goto on_error;
prop = fdt_getprop(fdt, nodeoffset, PROP_ADDR_CELLS, &prop_len);
if (prop) {
if (prop_len == sizeof(*prop))
*address_cells = fdt32_to_cpu(*prop);
else
goto on_error;
}
prop = fdt_getprop(fdt, nodeoffset, PROP_SIZE_CELLS, &prop_len);
if (prop) {
if (prop_len == sizeof(*prop))
*size_cells = fdt32_to_cpu(*prop);
else
goto on_error;
}
dbgprintf("%s: #address-cells:%d #size-cells:%d\n", __func__,
*address_cells, *size_cells);
return 0;
on_error:
return EFAILED;
}
static bool cells_size_fitted(uint32_t address_cells, uint32_t size_cells,
struct memory_range *range)
{
dbgprintf("%s: %llx-%llx\n", __func__, range->start, range->end);
/* if *_cells >= 2, cells can hold 64-bit values anyway */
if ((address_cells == 1) && (range->start >= (1ULL << 32)))
return false;
if ((size_cells == 1) &&
((range->end - range->start + 1) >= (1ULL << 32)))
return false;
return true;
}
static void fill_property(void *buf, uint64_t val, uint32_t cells)
{
uint32_t val32;
int i;
if (cells == 1) {
val32 = cpu_to_fdt32((uint32_t)val);
memcpy(buf, &val32, sizeof(uint32_t));
} else {
for (i = 0;
i < (cells * sizeof(uint32_t) - sizeof(uint64_t)); i++)
*(char *)buf++ = 0;
val = cpu_to_fdt64(val);
memcpy(buf, &val, sizeof(uint64_t));
}
}
static int fdt_setprop_ranges(void *fdt, int nodeoffset, const char *name,
struct memory_range *ranges, int nr_ranges, bool reverse,
uint32_t address_cells, uint32_t size_cells)
{
void *buf, *prop;
size_t buf_size;
int i, result;
struct memory_range *range;
buf_size = (address_cells + size_cells) * sizeof(uint32_t) * nr_ranges;
prop = buf = xmalloc(buf_size);
if (!buf)
return -ENOMEM;
for (i = 0; i < nr_ranges; i++) {
if (reverse)
range = ranges + (nr_ranges - 1 - i);
else
range = ranges + i;
fill_property(prop, range->start, address_cells);
prop += address_cells * sizeof(uint32_t);
fill_property(prop, range->end - range->start + 1, size_cells);
prop += size_cells * sizeof(uint32_t);
}
result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size);
free(buf);
return result;
}
/**
* setup_2nd_dtb - Setup the 2nd stage kernel's dtb.
*/
static int setup_2nd_dtb(struct dtb *dtb, char *command_line, int on_crash)
{
uint32_t address_cells, size_cells;
uint64_t fdt_val64;
uint64_t *prop;
char *new_buf = NULL;
int len, range_len;
int nodeoffset;
int new_size;
int i, result, kaslr_seed;
result = fdt_check_header(dtb->buf);
if (result) {
fprintf(stderr, "kexec: Invalid 2nd device tree.\n");
return EFAILED;
}
result = set_bootargs(dtb, command_line);
if (result) {
fprintf(stderr, "kexec: cannot set bootargs.\n");
result = -EINVAL;
goto on_error;
}
/* determine #address-cells and #size-cells */
result = get_cells_size(dtb->buf, &address_cells, &size_cells);
if (result) {
fprintf(stderr, "kexec: cannot determine cells-size.\n");
result = -EINVAL;
goto on_error;
}
if (!cells_size_fitted(address_cells, size_cells,
&elfcorehdr_mem)) {
fprintf(stderr, "kexec: elfcorehdr doesn't fit cells-size.\n");
result = -EINVAL;
goto on_error;
}
for (i = 0; i < usablemem_rgns.size; i++) {
if (!cells_size_fitted(address_cells, size_cells,
&crash_reserved_mem[i])) {
fprintf(stderr, "kexec: usable memory range doesn't fit cells-size.\n");
result = -EINVAL;
goto on_error;
}
}
/* duplicate dt blob */
range_len = sizeof(uint32_t) * (address_cells + size_cells);
new_size = fdt_totalsize(dtb->buf)
+ fdt_prop_len(PROP_ELFCOREHDR, range_len)
+ fdt_prop_len(PROP_USABLE_MEM_RANGE, range_len * usablemem_rgns.size);
new_buf = xmalloc(new_size);
result = fdt_open_into(dtb->buf, new_buf, new_size);
if (result) {
dbgprintf("%s: fdt_open_into failed: %s\n", __func__,
fdt_strerror(result));
result = -ENOSPC;
goto on_error;
}
/* fixup 'kaslr-seed' with a random value, if supported */
nodeoffset = fdt_path_offset(new_buf, "/chosen");
prop = fdt_getprop_w(new_buf, nodeoffset,
"kaslr-seed", &len);
if (!prop || len != sizeof(uint64_t)) {
dbgprintf("%s: no kaslr-seed found\n",
__func__);
/* for kexec warm reboot case, we don't need to fixup
* other dtb properties
*/
if (!on_crash) {
dump_reservemap(dtb);
if (new_buf)
free(new_buf);
return result;
}
} else {
kaslr_seed = fdt64_to_cpu(*prop);
/* kaslr_seed must be wiped clean by primary
* kernel during boot
*/
if (kaslr_seed != 0) {
dbgprintf("%s: kaslr-seed is not wiped to 0.\n",
__func__);
result = -EINVAL;
goto on_error;
}
/*
* Invoke the getrandom system call with
* GRND_NONBLOCK, to make sure we
* have a valid random seed to pass to the
* secondary kernel.
*/
result = syscall(SYS_getrandom, &fdt_val64,
sizeof(fdt_val64),
GRND_NONBLOCK);
if(result == -1) {
fprintf(stderr, "%s: Reading random bytes failed.\n",
__func__);
/* Currently on some arm64 platforms this
* 'getrandom' system call fails while booting
* the platform.
*
* In case, this happens at best we can set
* the 'kaslr_seed' as 0, indicating that the
* 2nd kernel will be booted with a 'nokaslr'
* like behaviour.
*/
fdt_val64 = 0UL;
dbgprintf("%s: Disabling KASLR in secondary kernel.\n",
__func__);
}
nodeoffset = fdt_path_offset(new_buf, "/chosen");
result = fdt_setprop_inplace(new_buf,
nodeoffset, "kaslr-seed",
&fdt_val64, sizeof(fdt_val64));
if (result) {
dbgprintf("%s: fdt_setprop failed: %s\n",
__func__, fdt_strerror(result));
result = -EINVAL;
goto on_error;
}
}
if (on_crash) {
/* add linux,elfcorehdr */
nodeoffset = fdt_path_offset(new_buf, "/chosen");
result = fdt_setprop_ranges(new_buf, nodeoffset,
PROP_ELFCOREHDR, &elfcorehdr_mem, 1, false,
address_cells, size_cells);
if (result) {
dbgprintf("%s: fdt_setprop failed: %s\n", __func__,
fdt_strerror(result));
result = -EINVAL;
goto on_error;
}
/*
* add linux,usable-memory-range
*
* crash dump kernel support one or two regions, to make
* compatibility with existing user-space and older kdump, the
* low region is always the last one.
*/
nodeoffset = fdt_path_offset(new_buf, "/chosen");
result = fdt_setprop_ranges(new_buf, nodeoffset,
PROP_USABLE_MEM_RANGE,
usablemem_rgns.ranges, usablemem_rgns.size, true,
address_cells, size_cells);
if (result) {
dbgprintf("%s: fdt_setprop failed: %s\n", __func__,
fdt_strerror(result));
result = -EINVAL;
goto on_error;
}
}
fdt_pack(new_buf);
dtb->buf = new_buf;
dtb->size = fdt_totalsize(new_buf);
dump_reservemap(dtb);
return result;
on_error:
fprintf(stderr, "kexec: %s failed.\n", __func__);
if (new_buf)
free(new_buf);
return result;
}
unsigned long arm64_locate_kernel_segment(struct kexec_info *info)
{
unsigned long hole;
if (info->kexec_flags & KEXEC_ON_CRASH) {
unsigned long hole_end;
hole = (crash_reserved_mem[usablemem_rgns.size - 1].start < mem_min ?
mem_min : crash_reserved_mem[usablemem_rgns.size - 1].start);
hole = _ALIGN_UP(hole, MiB(2));
hole_end = hole + arm64_mem.text_offset + arm64_mem.image_size;
if ((hole_end > mem_max) ||
(hole_end > crash_reserved_mem[usablemem_rgns.size - 1].end)) {
dbgprintf("%s: Crash kernel out of range\n", __func__);
hole = ULONG_MAX;
}
} else {
hole = locate_hole(info,
arm64_mem.text_offset + arm64_mem.image_size,
MiB(2), 0, ULONG_MAX, 1);
if (hole == ULONG_MAX)
dbgprintf("%s: locate_hole failed\n", __func__);
}
return hole;
}
/**
* arm64_load_other_segments - Prepare the dtb, initrd and purgatory segments.
*/
int arm64_load_other_segments(struct kexec_info *info,
unsigned long image_base)
{
int result;
unsigned long dtb_base;
unsigned long hole_min;
unsigned long hole_max;
unsigned long initrd_end;
uint64_t purgatory_sink;
char *initrd_buf = NULL;
struct dtb dtb;
char command_line[COMMAND_LINE_SIZE] = "";
if (arm64_opts.command_line) {
if (strlen(arm64_opts.command_line) >
sizeof(command_line) - 1) {
fprintf(stderr,
"Kernel command line too long for kernel!\n");
return EFAILED;
}
strncpy(command_line, arm64_opts.command_line,
sizeof(command_line) - 1);
command_line[sizeof(command_line) - 1] = 0;
}
purgatory_sink = find_purgatory_sink(arm64_opts.console);
dbgprintf("%s:%d: purgatory sink: 0x%" PRIx64 "\n", __func__, __LINE__,
purgatory_sink);
if (arm64_opts.dtb) {
dtb.name = "dtb_user";
dtb.buf = slurp_file(arm64_opts.dtb, &dtb.size);
} else {
result = read_1st_dtb(&dtb);
if (result) {
fprintf(stderr,
"kexec: Error: No device tree available.\n");
return EFAILED;
}
}
result = setup_2nd_dtb(&dtb, command_line,
info->kexec_flags & KEXEC_ON_CRASH);
if (result)
return EFAILED;
/* Put the other segments after the image. */
hole_min = image_base + arm64_mem.image_size;
if (info->kexec_flags & KEXEC_ON_CRASH)
hole_max = crash_reserved_mem[usablemem_rgns.size - 1].end;
else
hole_max = ULONG_MAX;
if (arm64_opts.initrd) {
initrd_buf = slurp_file(arm64_opts.initrd, &initrd_size);
if (!initrd_buf)
fprintf(stderr, "kexec: Empty ramdisk file.\n");
else {
/* Put the initrd after the kernel. */
initrd_base = add_buffer_phys_virt(info, initrd_buf,
initrd_size, initrd_size, 0,
hole_min, hole_max, 1, 0);
initrd_end = initrd_base + initrd_size;
/* Check limits as specified in booting.txt.
* The kernel may have as little as 32 GB of address space to map
* system memory and both kernel and initrd must be 1GB aligend.
*/
if (_ALIGN_UP(initrd_end, GiB(1)) - _ALIGN_DOWN(image_base, GiB(1)) > GiB(32)) {
fprintf(stderr, "kexec: Error: image + initrd too big.\n");
return EFAILED;
}
dbgprintf("initrd: base %lx, size %lxh (%ld)\n",
initrd_base, initrd_size, initrd_size);
result = dtb_set_initrd((char **)&dtb.buf,
&dtb.size, initrd_base,
initrd_base + initrd_size);
if (result)
return EFAILED;
}
}
if (!initrd_buf) {
/* Don't reuse the initrd addresses from 1st DTB */
dtb_clear_initrd((char **)&dtb.buf, &dtb.size);
}
/* Check size limit as specified in booting.txt. */
if (dtb.size > MiB(2)) {
fprintf(stderr, "kexec: Error: dtb too big.\n");
return EFAILED;
}
dtb_base = add_buffer_phys_virt(info, dtb.buf, dtb.size, dtb.size,
0, hole_min, hole_max, 1, 0);
/* dtb_base is valid if we got here. */
dbgprintf("dtb: base %lx, size %lxh (%ld)\n", dtb_base, dtb.size,
dtb.size);
elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size,
hole_min, hole_max, 1, 0);
info->entry = (void *)elf_rel_get_addr(&info->rhdr, "purgatory_start");
elf_rel_set_symbol(&info->rhdr, "arm64_sink", &purgatory_sink,
sizeof(purgatory_sink));
elf_rel_set_symbol(&info->rhdr, "arm64_kernel_entry", &image_base,
sizeof(image_base));
elf_rel_set_symbol(&info->rhdr, "arm64_dtb_addr", &dtb_base,
sizeof(dtb_base));
return 0;
}
/**
* virt_to_phys - For processing elf file values.
*/
unsigned long virt_to_phys(unsigned long v)
{
unsigned long p;
p = v - get_vp_offset() + get_phys_offset();
return p;
}
/**
* phys_to_virt - For crashdump setup.
*/
unsigned long phys_to_virt(struct crash_elf_info *elf_info,
unsigned long long p)
{
unsigned long v;
v = p - get_phys_offset() + elf_info->page_offset;
return v;
}
/**
* add_segment - Use virt_to_phys when loading elf files.
*/
void add_segment(struct kexec_info *info, const void *buf, size_t bufsz,
unsigned long base, size_t memsz)
{
add_segment_phys_virt(info, buf, bufsz, base, memsz, 1);
}
static inline void set_phys_offset(int64_t v, char *set_method)
{
if (arm64_mem.phys_offset == arm64_mem_ngv
|| v < arm64_mem.phys_offset) {
arm64_mem.phys_offset = v;
dbgprintf("%s: phys_offset : %016lx (method : %s)\n",
__func__, arm64_mem.phys_offset,
set_method);
}
}
/**
* get_va_bits - Helper for getting VA_BITS
*/
static int get_va_bits(void)
{
unsigned long long stext_sym_addr;
/*
* if already got from kcore
*/
if (va_bits != -1)
goto out;
/* For kernel older than v4.19 */
fprintf(stderr, "Warning, can't get the VA_BITS from kcore\n");
stext_sym_addr = get_kernel_sym("_stext");
if (stext_sym_addr == 0) {
fprintf(stderr, "Can't get the symbol of _stext.\n");
return -1;
}
/* Derive va_bits as per arch/arm64/Kconfig */
if ((stext_sym_addr & PAGE_OFFSET_36) == PAGE_OFFSET_36) {
va_bits = 36;
} else if ((stext_sym_addr & PAGE_OFFSET_39) == PAGE_OFFSET_39) {
va_bits = 39;
} else if ((stext_sym_addr & PAGE_OFFSET_42) == PAGE_OFFSET_42) {
va_bits = 42;
} else if ((stext_sym_addr & PAGE_OFFSET_47) == PAGE_OFFSET_47) {
va_bits = 47;
} else if ((stext_sym_addr & PAGE_OFFSET_48) == PAGE_OFFSET_48) {
va_bits = 48;
} else {
fprintf(stderr,
"Cannot find a proper _stext for calculating VA_BITS\n");
return -1;
}
out:
dbgprintf("va_bits : %d\n", va_bits);
return 0;
}
/**
* get_page_offset - Helper for getting PAGE_OFFSET
*/
int get_page_offset(unsigned long *page_offset)
{
unsigned long long text_sym_addr, kernel_va_mid;
int ret;
text_sym_addr = get_kernel_sym("_text");
if (text_sym_addr == 0) {
fprintf(stderr, "Can't get the symbol of _text to calculate page_offset.\n");
return -1;
}
ret = get_va_bits();
if (ret < 0)
return ret;
/* Since kernel 5.4, kernel image is put above
* UINT64_MAX << (va_bits - 1)
*/
kernel_va_mid = UINT64_MAX << (va_bits - 1);
/* older kernel */
if (text_sym_addr < kernel_va_mid)
*page_offset = UINT64_MAX << (va_bits - 1);
else
*page_offset = UINT64_MAX << va_bits;
dbgprintf("page_offset : %lx\n", *page_offset);
return 0;
}
static void arm64_scan_vmcoreinfo(char *pos)
{
const char *str;
str = "NUMBER(VA_BITS)=";
if (memcmp(str, pos, strlen(str)) == 0)
va_bits = strtoul(pos + strlen(str), NULL, 10);
}
/**
* get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET (and va_bits)
* from VMCOREINFO note inside 'kcore'.
*/
static int get_phys_offset_from_vmcoreinfo_pt_note(long *phys_offset)
{
int fd, ret = 0;
if ((fd = open("/proc/kcore", O_RDONLY)) < 0) {
fprintf(stderr, "Can't open (%s).\n", "/proc/kcore");
return EFAILED;
}
arch_scan_vmcoreinfo = arm64_scan_vmcoreinfo;
ret = read_phys_offset_elf_kcore(fd, phys_offset);
close(fd);
return ret;
}
/**
* get_phys_base_from_pt_load - Helper for getting PHYS_OFFSET
* from PT_LOADs inside 'kcore'.
*/
int get_phys_base_from_pt_load(long *phys_offset)
{
int i, fd, ret;
unsigned long long phys_start;
unsigned long long virt_start;
ret = get_page_offset(&page_offset);
if (ret < 0)
return ret;
if ((fd = open("/proc/kcore", O_RDONLY)) < 0) {
fprintf(stderr, "Can't open (%s).\n", "/proc/kcore");
return EFAILED;
}
read_elf(fd);
for (i = 0; get_pt_load(i,
&phys_start, NULL, &virt_start, NULL);
i++) {
if (virt_start != NOT_KV_ADDR
&& virt_start >= page_offset
&& phys_start != NOT_PADDR)
*phys_offset = phys_start -
(virt_start & ~page_offset);
}
close(fd);
return 0;
}
static bool to_be_excluded(char *str, unsigned long long start, unsigned long long end)
{
if (!strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) {
uint64_t load_start, load_end;
if (!get_crash_kernel_load_range(&load_start, &load_end) &&
(load_start == start) && (load_end == end))
return false;
return true;
}
if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) ||
!strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) ||
!strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)))
return false;
else
return true;
}
/**
* get_memory_ranges - Try to get the memory ranges from
* /proc/iomem.
*/
int get_memory_ranges(struct memory_range **range, int *ranges,
unsigned long kexec_flags)
{
long phys_offset = -1;
FILE *fp;
const char *iomem = proc_iomem();
char line[MAX_LINE], *str;
unsigned long long start, end;
int n, consumed;
struct memory_ranges memranges;
struct memory_range *last, excl_range;
int ret;
if (!try_read_phys_offset_from_kcore) {
/* Since kernel version 4.19, 'kcore' contains
* a new PT_NOTE which carries the VMCOREINFO
* information.
* If the same is available, one should prefer the
* same to retrieve 'PHYS_OFFSET' value exported by
* the kernel as this is now the standard interface
* exposed by kernel for sharing machine specific
* details with the userland.
*/
ret = get_phys_offset_from_vmcoreinfo_pt_note(&phys_offset);
if (!ret) {
if (phys_offset != -1)
set_phys_offset(phys_offset,
"vmcoreinfo pt_note");
} else {
/* If we are running on a older kernel,
* try to retrieve the 'PHYS_OFFSET' value
* exported by the kernel in the 'kcore'
* file by reading the PT_LOADs and determining
* the correct combination.
*/
ret = get_phys_base_from_pt_load(&phys_offset);
if (!ret)
if (phys_offset != -1)
set_phys_offset(phys_offset,
"pt_load");
}
try_read_phys_offset_from_kcore = true;
}
fp = fopen(iomem, "r");
if (!fp)
die("Cannot open %s\n", iomem);
memranges.ranges = NULL;
memranges.size = memranges.max_size = 0;
while (fgets(line, sizeof(line), fp) != 0) {
n = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed);
if (n != 2)
continue;
str = line + consumed;
if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) {
ret = mem_regions_alloc_and_add(&memranges,
start, end - start + 1, RANGE_RAM);
if (ret) {
fprintf(stderr,
"Cannot allocate memory for ranges\n");
fclose(fp);
return -ENOMEM;
}
dbgprintf("%s:+[%d] %016llx - %016llx\n", __func__,
memranges.size - 1,
memranges.ranges[memranges.size - 1].start,
memranges.ranges[memranges.size - 1].end);
} else if (to_be_excluded(str, start, end)) {
if (!memranges.size)
continue;
/*
* Note: mem_regions_exclude() doesn't guarantee
* that the ranges are sorted out, but as long as
* we cope with /proc/iomem, we only operate on
* the last entry and so it is safe.
*/
/* The last System RAM range */
last = &memranges.ranges[memranges.size - 1];
if (last->end < start)
/* New resource outside of System RAM */
continue;
if (end < last->start)
/* Already excluded by parent resource */
continue;
excl_range.start = start;
excl_range.end = end;
ret = mem_regions_alloc_and_exclude(&memranges, &excl_range);
if (ret) {
fprintf(stderr,
"Cannot allocate memory for ranges (exclude)\n");
fclose(fp);
return -ENOMEM;
}
dbgprintf("%s:- %016llx - %016llx\n",
__func__, start, end);
}
}
fclose(fp);
*range = memranges.ranges;
*ranges = memranges.size;
/* As a fallback option, we can try determining the PHYS_OFFSET
* value from the '/proc/iomem' entries as well.
*
* But note that this can be flaky, as on certain arm64
* platforms, it has been noticed that due to a hole at the
* start of physical ram exposed to kernel
* (i.e. it doesn't start from address 0), the kernel still
* calculates the 'memstart_addr' kernel variable as 0.
*
* Whereas the SYSTEM_RAM or IOMEM_RESERVED range in
* '/proc/iomem' would carry a first entry whose start address
* is non-zero (as the physical ram exposed to the kernel
* starts from a non-zero address).
*
* In such cases, if we rely on '/proc/iomem' entries to
* calculate the phys_offset, then we will have mismatch
* between the user-space and kernel space 'PHYS_OFFSET'
* value.
*/
if (memranges.size)
set_phys_offset(memranges.ranges[0].start, "iomem");
dbgprint_mem_range("System RAM ranges;",
memranges.ranges, memranges.size);
return 0;
}
int arch_compat_trampoline(struct kexec_info *info)
{
return 0;
}
int machine_verify_elf_rel(struct mem_ehdr *ehdr)
{
return (ehdr->e_machine == EM_AARCH64);
}
enum aarch64_rel_type {
R_AARCH64_NONE = 0,
R_AARCH64_ABS64 = 257,
R_AARCH64_PREL32 = 261,
R_AARCH64_MOVW_UABS_G0_NC = 264,
R_AARCH64_MOVW_UABS_G1_NC = 266,
R_AARCH64_MOVW_UABS_G2_NC = 268,
R_AARCH64_MOVW_UABS_G3 =269,
R_AARCH64_LD_PREL_LO19 = 273,
R_AARCH64_ADR_PREL_LO21 = 274,
R_AARCH64_ADR_PREL_PG_HI21 = 275,
R_AARCH64_ADD_ABS_LO12_NC = 277,
R_AARCH64_JUMP26 = 282,
R_AARCH64_CALL26 = 283,
R_AARCH64_LDST64_ABS_LO12_NC = 286,
R_AARCH64_LDST128_ABS_LO12_NC = 299
};
static uint32_t get_bits(uint32_t value, int start, int end)
{
uint32_t mask = ((uint32_t)1 << (end + 1 - start)) - 1;
return (value >> start) & mask;
}
void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *UNUSED(sym),
unsigned long r_type, void *ptr, unsigned long address,
unsigned long value)
{
uint64_t *loc64;
uint32_t *loc32;
uint64_t *location = (uint64_t *)ptr;
uint64_t data = *location;
uint64_t imm;
const char *type = NULL;
switch((enum aarch64_rel_type)r_type) {
case R_AARCH64_ABS64:
type = "ABS64";
loc64 = ptr;
*loc64 = cpu_to_elf64(ehdr, value);
break;
case R_AARCH64_PREL32:
type = "PREL32";
loc32 = ptr;
*loc32 = cpu_to_elf32(ehdr, value - address);
break;
/* Set a MOV[KZ] immediate field to bits [15:0] of X. No overflow check */
case R_AARCH64_MOVW_UABS_G0_NC:
type = "MOVW_UABS_G0_NC";
loc32 = ptr;
imm = get_bits(value, 0, 15);
*loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
break;
/* Set a MOV[KZ] immediate field to bits [31:16] of X. No overflow check */
case R_AARCH64_MOVW_UABS_G1_NC:
type = "MOVW_UABS_G1_NC";
loc32 = ptr;
imm = get_bits(value, 16, 31);
*loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
break;
/* Set a MOV[KZ] immediate field to bits [47:32] of X. No overflow check */
case R_AARCH64_MOVW_UABS_G2_NC:
type = "MOVW_UABS_G2_NC";
loc32 = ptr;
imm = get_bits(value, 32, 47);
*loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
break;
/* Set a MOV[KZ] immediate field to bits [63:48] of X */
case R_AARCH64_MOVW_UABS_G3:
type = "MOVW_UABS_G3";
loc32 = ptr;
imm = get_bits(value, 48, 63);
*loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
break;
case R_AARCH64_LD_PREL_LO19:
type = "LD_PREL_LO19";
loc32 = ptr;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ (((value - address) << 3) & 0xffffe0));
break;
case R_AARCH64_ADR_PREL_LO21:
if (value & 3)
die("%s: ERROR Unaligned value: %lx\n", __func__,
value);
type = "ADR_PREL_LO21";
loc32 = ptr;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ (((value - address) << 3) & 0xffffe0));
break;
case R_AARCH64_ADR_PREL_PG_HI21:
type = "ADR_PREL_PG_HI21";
imm = ((value & ~0xfff) - (address & ~0xfff)) >> 12;
loc32 = ptr;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ ((imm & 3) << 29) + ((imm & 0x1ffffc) << (5 - 2)));
break;
case R_AARCH64_ADD_ABS_LO12_NC:
type = "ADD_ABS_LO12_NC";
loc32 = ptr;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ ((value & 0xfff) << 10));
break;
case R_AARCH64_JUMP26:
type = "JUMP26";
loc32 = ptr;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ (((value - address) >> 2) & 0x3ffffff));
break;
case R_AARCH64_CALL26:
type = "CALL26";
loc32 = ptr;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ (((value - address) >> 2) & 0x3ffffff));
break;
/* encode imm field with bits [11:3] of value */
case R_AARCH64_LDST64_ABS_LO12_NC:
if (value & 7)
die("%s: ERROR Unaligned value: %lx\n", __func__,
value);
type = "LDST64_ABS_LO12_NC";
loc32 = ptr;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ ((value & 0xff8) << (10 - 3)));
break;
/* encode imm field with bits [11:4] of value */
case R_AARCH64_LDST128_ABS_LO12_NC:
if (value & 15)
die("%s: ERROR Unaligned value: %lx\n", __func__,
value);
type = "LDST128_ABS_LO12_NC";
loc32 = ptr;
imm = value & 0xff0;
*loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << (10 - 4)));
break;
default:
die("%s: ERROR Unknown type: %lu\n", __func__, r_type);
break;
}
dbgprintf("%s: %s %016lx->%016lx\n", __func__, type, data, *location);
}
void arch_reuse_initrd(void)
{
reuse_initrd = 1;
}
void arch_update_purgatory(struct kexec_info *UNUSED(info))
{
}