blob: 26f9a01a8174c75657fdd28cd5bfc65282e60a6e [file] [log] [blame]
/*
* kexec: Linux boots Linux
*
* Created by: R Sharada (sharada@in.ibm.com)
* Copyright (C) IBM Corporation, 2005. All rights reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation (version 2 of the License).
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <limits.h>
#include <elf.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "../../kexec.h"
#include "../../kexec-elf.h"
#include "../../kexec-syscall.h"
#include "../../crashdump.h"
#include "kexec-ppc64.h"
#include "../../fs2dt.h"
#include "crashdump-ppc64.h"
#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base"
#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size"
unsigned int num_of_lmb_sets;
unsigned int is_dyn_mem_v2;
uint64_t lmb_size;
static struct crash_elf_info elf_info64 =
{
class: ELFCLASS64,
#if BYTE_ORDER == LITTLE_ENDIAN
data: ELFDATA2LSB,
#else
data: ELFDATA2MSB,
#endif
machine: EM_PPC64,
page_offset: PAGE_OFFSET,
lowmem_limit: MAXMEM,
};
static struct crash_elf_info elf_info32 =
{
class: ELFCLASS32,
data: ELFDATA2MSB,
machine: EM_PPC64,
page_offset: PAGE_OFFSET,
lowmem_limit: MAXMEM,
};
extern struct arch_options_t arch_options;
/* Stores a sorted list of RAM memory ranges for which to create elf headers.
* A separate program header is created for backup region
*/
static struct memory_range *crash_memory_range = NULL;
/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */
static int crash_max_memory_ranges;
/*
* Used to save various memory ranges/regions needed for the captured
* kernel to boot. (lime memmap= option in other archs)
*/
mem_rgns_t usablemem_rgns = {0, NULL};
static unsigned long long cstart, cend;
static int memory_ranges;
/*
* Exclude the region that lies within crashkernel and above the memory
* limit which is reflected by mem= kernel option.
*/
static void exclude_crash_region(uint64_t start, uint64_t end)
{
/* If memory_limit is set then exclude the memory region above it. */
if (memory_limit) {
if (start >= memory_limit)
return;
if (end > memory_limit)
end = memory_limit;
}
if (cstart < end && cend > start) {
if (start < cstart && end > cend) {
crash_memory_range[memory_ranges].start = start;
crash_memory_range[memory_ranges].end = cstart;
crash_memory_range[memory_ranges].type = RANGE_RAM;
memory_ranges++;
crash_memory_range[memory_ranges].start = cend;
crash_memory_range[memory_ranges].end = end;
crash_memory_range[memory_ranges].type = RANGE_RAM;
memory_ranges++;
} else if (start < cstart) {
crash_memory_range[memory_ranges].start = start;
crash_memory_range[memory_ranges].end = cstart;
crash_memory_range[memory_ranges].type = RANGE_RAM;
memory_ranges++;
} else if (end > cend) {
crash_memory_range[memory_ranges].start = cend;
crash_memory_range[memory_ranges].end = end;
crash_memory_range[memory_ranges].type = RANGE_RAM;
memory_ranges++;
}
} else {
crash_memory_range[memory_ranges].start = start;
crash_memory_range[memory_ranges].end = end;
crash_memory_range[memory_ranges].type = RANGE_RAM;
memory_ranges++;
}
}
static int get_dyn_reconf_crash_memory_ranges(void)
{
uint64_t start, end;
uint64_t startrange, endrange;
uint64_t size;
char fname[128], buf[32];
FILE *file;
unsigned int i;
int n;
uint32_t flags;
strcpy(fname, "/proc/device-tree/");
strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory");
if (is_dyn_mem_v2)
strcat(fname, "-v2");
if ((file = fopen(fname, "r")) == NULL) {
perror(fname);
return -1;
}
fseek(file, 4, SEEK_SET);
startrange = endrange = 0;
size = lmb_size;
for (i = 0; i < num_of_lmb_sets; i++) {
if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) {
perror(fname);
fclose(file);
return -1;
}
if (memory_ranges >= (max_memory_ranges + 1)) {
/* No space to insert another element. */
fprintf(stderr,
"Error: Number of crash memory ranges"
" excedeed the max limit\n");
return -1;
}
/*
* If the property is ibm,dynamic-memory-v2, the first 4 bytes
* tell the number of sequential LMBs in this entry.
*/
if (is_dyn_mem_v2)
size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size;
start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR]));
end = start + size;
if (start == 0 && end >= (BACKUP_SRC_END + 1))
start = BACKUP_SRC_END + 1;
flags = be32_to_cpu((*((uint32_t *)&buf[DRCONF_FLAGS])));
/* skip this block if the reserved bit is set in flags (0x80)
or if the block is not assigned to this partition (0x8) */
if ((flags & 0x80) || !(flags & 0x8))
continue;
if (start != endrange) {
if (startrange != endrange)
exclude_crash_region(startrange, endrange);
startrange = start;
}
endrange = end;
}
if (startrange != endrange)
exclude_crash_region(startrange, endrange);
fclose(file);
return 0;
}
/*
* For a given memory node, check if it is mapped to system RAM or
* to onboard memory on accelerator device like GPU card or such.
*/
static int is_coherent_device_mem(const char *fname)
{
char fpath[PATH_LEN];
char buf[32];
DIR *dmem;
FILE *file;
struct dirent *mentry;
int cnt, ret = 0;
strcpy(fpath, fname);
if ((dmem = opendir(fpath)) == NULL) {
perror(fpath);
return -1;
}
while ((mentry = readdir(dmem)) != NULL) {
if (strcmp(mentry->d_name, "compatible"))
continue;
strcat(fpath, "/compatible");
if ((file = fopen(fpath, "r")) == NULL) {
perror(fpath);
ret = -1;
break;
}
if ((cnt = fread(buf, 1, 32, file)) < 0) {
perror(fpath);
fclose(file);
ret = -1;
break;
}
if (!strncmp(buf, "ibm,coherent-device-memory", 26)) {
fclose(file);
ret = 1;
break;
}
fclose(file);
}
closedir(dmem);
return ret;
}
/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
* create Elf headers. Keeping it separate from get_memory_ranges() as
* requirements are different in the case of normal kexec and crashdumps.
*
* Normal kexec needs to look at all of available physical memory irrespective
* of the fact how much of it is being used by currently running kernel.
* Crashdumps need to have access to memory regions actually being used by
* running kernel. Expecting a different file/data structure than /proc/iomem
* to look into down the line. May be something like /proc/kernelmem or may
* be zone data structures exported from kernel.
*/
static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
{
char device_tree[256] = "/proc/device-tree/";
char fname[PATH_LEN];
char buf[MAXBYTES];
DIR *dir, *dmem;
FILE *file;
struct dirent *dentry, *mentry;
int n, ret, crash_rng_len = 0;
unsigned long long start, end;
int page_size;
crash_max_memory_ranges = max_memory_ranges + 6;
crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges;
crash_memory_range = (struct memory_range *) malloc(crash_rng_len);
if (!crash_memory_range) {
fprintf(stderr, "Allocation for crash memory range failed\n");
return -1;
}
memset(crash_memory_range, 0, crash_rng_len);
/* create a separate program header for the backup region */
crash_memory_range[0].start = BACKUP_SRC_START;
crash_memory_range[0].end = BACKUP_SRC_END + 1;
crash_memory_range[0].type = RANGE_RAM;
memory_ranges++;
if ((dir = opendir(device_tree)) == NULL) {
perror(device_tree);
goto err;
}
cstart = crash_base;
cend = crash_base + crash_size;
while ((dentry = readdir(dir)) != NULL) {
if (!strncmp(dentry->d_name,
"ibm,dynamic-reconfiguration-memory", 35)){
get_dyn_reconf_crash_memory_ranges();
continue;
}
if (strncmp(dentry->d_name, "memory@", 7) &&
strcmp(dentry->d_name, "memory"))
continue;
strcpy(fname, device_tree);
strcat(fname, dentry->d_name);
ret = is_coherent_device_mem(fname);
if (ret == -1) {
closedir(dir);
goto err;
} else if (ret == 1) {
/*
* Avoid adding this memory region as it is not
* mapped to system RAM.
*/
continue;
}
if ((dmem = opendir(fname)) == NULL) {
perror(fname);
closedir(dir);
goto err;
}
while ((mentry = readdir(dmem)) != NULL) {
if (strcmp(mentry->d_name, "reg"))
continue;
strcat(fname, "/reg");
if ((file = fopen(fname, "r")) == NULL) {
perror(fname);
closedir(dmem);
closedir(dir);
goto err;
}
if ((n = fread(buf, 1, MAXBYTES, file)) < 0) {
perror(fname);
fclose(file);
closedir(dmem);
closedir(dir);
goto err;
}
if (memory_ranges >= (max_memory_ranges + 1)) {
/* No space to insert another element. */
fprintf(stderr,
"Error: Number of crash memory ranges"
" excedeed the max limit\n");
goto err;
}
start = be64_to_cpu(((unsigned long long *)buf)[0]);
end = start +
be64_to_cpu(((unsigned long long *)buf)[1]);
if (start == 0 && end >= (BACKUP_SRC_END + 1))
start = BACKUP_SRC_END + 1;
exclude_crash_region(start, end);
fclose(file);
}
closedir(dmem);
}
closedir(dir);
/*
* If RTAS region is overlapped with crashkernel, need to create ELF
* Program header for the overlapped memory.
*/
if (crash_base < rtas_base + rtas_size &&
rtas_base < crash_base + crash_size) {
page_size = getpagesize();
cstart = rtas_base;
cend = rtas_base + rtas_size;
if (cstart < crash_base)
cstart = crash_base;
if (cend > crash_base + crash_size)
cend = crash_base + crash_size;
/*
* The rtas section created here is formed by reading rtas-base
* and rtas-size from /proc/device-tree/rtas. Unfortunately
* rtas-size is not required to be a multiple of PAGE_SIZE
* The remainder of the page it ends on is just garbage, and is
* safe to read, its just not accounted in rtas-size. Since
* we're creating an elf section here though, lets round it up
* to the next page size boundary though, so makedumpfile can
* read it safely without going south on us.
*/
cend = _ALIGN(cend, page_size);
crash_memory_range[memory_ranges].start = cstart;
crash_memory_range[memory_ranges++].end = cend;
}
/*
* If OPAL region is overlapped with crashkernel, need to create ELF
* Program header for the overlapped memory.
*/
if (crash_base < opal_base + opal_size &&
opal_base < crash_base + crash_size) {
page_size = getpagesize();
cstart = opal_base;
cend = opal_base + opal_size;
if (cstart < crash_base)
cstart = crash_base;
if (cend > crash_base + crash_size)
cend = crash_base + crash_size;
/*
* The opal section created here is formed by reading opal-base
* and opal-size from /proc/device-tree/ibm,opal. Unfortunately
* opal-size is not required to be a multiple of PAGE_SIZE
* The remainder of the page it ends on is just garbage, and is
* safe to read, its just not accounted in opal-size. Since
* we're creating an elf section here though, lets round it up
* to the next page size boundary though, so makedumpfile can
* read it safely without going south on us.
*/
cend = _ALIGN(cend, page_size);
crash_memory_range[memory_ranges].start = cstart;
crash_memory_range[memory_ranges++].end = cend;
}
*range = crash_memory_range;
*ranges = memory_ranges;
int j;
dbgprintf("CRASH MEMORY RANGES\n");
for(j = 0; j < *ranges; j++) {
start = crash_memory_range[j].start;
end = crash_memory_range[j].end;
dbgprintf("%016Lx-%016Lx\n", start, end);
}
return 0;
err:
if (crash_memory_range)
free(crash_memory_range);
return -1;
}
/* Converts unsigned long to ascii string. */
static void ultoa(uint64_t i, char *str)
{
int j = 0, k;
char tmp;
do {
str[j++] = i % 10 + '0';
} while ((i /=10) > 0);
str[j] = '\0';
/* Reverse the string. */
for (j = 0, k = strlen(str) - 1; j < k; j++, k--) {
tmp = str[k];
str[k] = str[j];
str[j] = tmp;
}
}
static int add_cmdline_param(char *cmdline, uint64_t addr, char *cmdstr,
char *byte)
{
int cmdline_size, cmdlen, len, align = 1024;
char str[COMMAND_LINE_SIZE], *ptr;
/* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/
switch (byte[0]) {
case 'K':
if (addr%align)
return -1;
addr = addr/align;
break;
case 'M':
addr = addr/(align *align);
break;
}
ptr = str;
strcpy(str, cmdstr);
ptr += strlen(str);
ultoa(addr, ptr);
strcat(str, byte);
len = strlen(str);
cmdlen = strlen(cmdline) + len;
cmdline_size = (kernel_version() < KERNEL_VERSION(3, 15, 0) ?
512 : COMMAND_LINE_SIZE);
if (cmdlen > (cmdline_size - 1))
die("Command line overflow\n");
strcat(cmdline, str);
dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline);
return 0;
}
/* Loads additional segments in case of a panic kernel is being loaded.
* One segment for backup region, another segment for storing elf headers
* for crash memory image.
*/
int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline,
uint64_t max_addr, unsigned long min_base)
{
void *tmp;
unsigned long sz;
uint64_t elfcorehdr;
int nr_ranges, align = 1024, i;
unsigned long long end;
struct memory_range *mem_range;
if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0)
return -1;
info->backup_src_start = BACKUP_SRC_START;
info->backup_src_size = BACKUP_SRC_SIZE;
/* Create a backup region segment to store backup data*/
sz = _ALIGN(BACKUP_SRC_SIZE, align);
tmp = xmalloc(sz);
memset(tmp, 0, sz);
info->backup_start = add_buffer(info, tmp, sz, sz, align,
0, max_addr, 1);
reserve(info->backup_start, sz);
/* On ppc64 memory ranges in device-tree is denoted as start
* and size rather than start and end, as is the case with
* other architectures like i386 . Because of this when loading
* the memory ranges in crashdump-elf.c the filesz calculation
* [ end - start + 1 ] goes for a toss.
*
* To be in sync with other archs adjust the end value for
* every crash memory range before calling the generic function
*/
for (i = 0; i < nr_ranges; i++) {
end = crash_memory_range[i].end - 1;
crash_memory_range[i].end = end;
}
/* Create elf header segment and store crash image data. */
if (arch_options.core_header_type == CORE_TYPE_ELF64) {
if (crash_create_elf64_headers(info, &elf_info64,
crash_memory_range, nr_ranges,
&tmp, &sz,
ELF_CORE_HEADER_ALIGN) < 0) {
free (tmp);
return -1;
}
}
else {
if (crash_create_elf32_headers(info, &elf_info32,
crash_memory_range, nr_ranges,
&tmp, &sz,
ELF_CORE_HEADER_ALIGN) < 0) {
free(tmp);
return -1;
}
}
elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base,
max_addr, 1);
reserve(elfcorehdr, sz);
/* modify and store the cmdline in a global array. This is later
* read by flatten_device_tree and modified if required
*/
add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K");
return 0;
}
/*
* Used to save various memory regions needed for the captured kernel.
*/
void add_usable_mem_rgns(unsigned long long base, unsigned long long size)
{
unsigned int i;
unsigned long long end = base + size;
unsigned long long ustart, uend;
base = _ALIGN_DOWN(base, getpagesize());
end = _ALIGN_UP(end, getpagesize());
for (i=0; i < usablemem_rgns.size; i++) {
ustart = usablemem_rgns.ranges[i].start;
uend = usablemem_rgns.ranges[i].end;
if (base < uend && end > ustart) {
if ((base >= ustart) && (end <= uend))
return;
if (base < ustart && end > uend) {
usablemem_rgns.ranges[i].start = base;
usablemem_rgns.ranges[i].end = end;
#ifdef DEBUG
fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx\n",
i, base, size);
#endif
return;
} else if (base < ustart) {
usablemem_rgns.ranges[i].start = base;
#ifdef DEBUG
fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx",
i, base, usablemem_rgns.ranges[i].end - base);
#endif
return;
} else if (end > uend){
usablemem_rgns.ranges[i].end = end;
#ifdef DEBUG
fprintf(stderr, "usable memory rgn %u: new end:%llx, new size:%llx",
i, end, end - usablemem_rgns.ranges[i].start);
#endif
return;
}
}
}
usablemem_rgns.ranges[usablemem_rgns.size].start = base;
usablemem_rgns.ranges[usablemem_rgns.size++].end = end;
dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n",
usablemem_rgns.size, base, size);
}
int get_crash_kernel_load_range(uint64_t *start, uint64_t *end)
{
unsigned long long value;
if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value))
*start = be64_to_cpu(value);
else
return -1;
if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value))
*end = *start + be64_to_cpu(value) - 1;
else
return -1;
return 0;
}
int is_crashkernel_mem_reserved(void)
{
int fd;
fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY);
if (fd < 0)
return 0;
close(fd);
return 1;
}
#if 0
static int sort_regions(mem_rgns_t *rgn)
{
int i, j;
unsigned long long tstart, tend;
for (i = 0; i < rgn->size; i++) {
for (j = 0; j < rgn->size - i - 1; j++) {
if (rgn->ranges[j].start > rgn->ranges[j+1].start) {
tstart = rgn->ranges[j].start;
tend = rgn->ranges[j].end;
rgn->ranges[j].start = rgn->ranges[j+1].start;
rgn->ranges[j].end = rgn->ranges[j+1].end;
rgn->ranges[j+1].start = tstart;
rgn->ranges[j+1].end = tend;
}
}
}
return 0;
}
#endif