blob: d3fbc7a5e8b41a23414680ea42d2097929142a0b [file] [log] [blame]
* SPDX-License-Identifier: GPL-2.0
* Copyright (C) 2020 Red Hat Inc, Clark Williams <>
#define _GNU_SOURCE
#include <ctype.h>
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>
#include <linux/sched.h>
#include <sys/sysinfo.h>
#include "stalld.h"
long get_long_from_str(char *start)
long value;
char *end;
errno = 0;
value = strtol(start, &end, 10);
if (errno || start == end) {
warn("Invalid ID '%s'", value);
return -1;
return value;
long get_long_after_colon(char *start)
* Find the ":"
start = strstr(start, ":");
if (!start)
return -1;
* skip ":"
return get_long_from_str(start);
long get_variable_long_value(char *buffer, const char *variable)
char *start;
* Line:
* ' .nr_running : 0'
* Find the ".nr_running"
start = strstr(buffer, variable);
if (!start)
return -1;
return get_long_after_colon(start);
* SIGINT handler for main
static void inthandler (int signo, siginfo_t *info, void *extra)
log_msg("received signal %d, starting shutdown\n", signo);
running = 0;
static void set_sig_handler()
struct sigaction action;
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = inthandler;
if (sigaction(SIGINT, &action, NULL) == -1) {
warn("error setting SIGINT handler: %s\n",
int setup_signal_handling(void)
int status;
sigset_t sigset;
/* mask off all signals */
status = sigfillset(&sigset);
if (status) {
warn("setting up full signal set %s\n", strerror(status));
return status;
status = pthread_sigmask(SIG_BLOCK, &sigset, NULL);
if (status) {
warn("setting signal mask: %s\n", strerror(status));
return status;
/* now allow SIGINT and SIGTERM to be delivered */
status = sigemptyset(&sigset);
if (status) {
warn("creating empty signal set: %s\n", strerror(status));
return status;
status = sigaddset(&sigset, SIGINT);
if (status) {
warn("adding SIGINT to signal set: %s\n", strerror(status));
return status;
status = sigaddset(&sigset, SIGTERM);
if (status) {
warn("adding SIGTERM to signal set: %s\n", strerror(status));
return status;
status = pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
if (status) {
warn("unblocking signals: %s\n", strerror(status));
return status;
/* now register our signal handler */
return 0;
* print any error messages and exit
void die(const char *fmt, ...)
volatile int zero = 0;
va_list ap;
int ret = errno;
if (errno)
perror("stalld: ");
ret = -1;
va_start(ap, fmt);
fprintf(stderr, " ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
* Die with a divizion by zero to keep the stack on GDB.
if (config_verbose)
zero = 10 / zero;
* print the error messages and but do not exit.
void warn(const char *fmt, ...)
va_list ap;
if (errno)
perror("stalld: ");
va_start(ap, fmt);
fprintf(stderr, " ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
void log_msg(const char *fmt, ...)
const char *log_prefix = "stalld: ";
char message[1024];
char *log;
int kmesg_fd;
va_list ap;
log = message + strlen(log_prefix);
sprintf(message, log_prefix, strlen(log_prefix));
va_start(ap, fmt);
vsprintf(log, fmt, ap);
* print the entire message (including PREFIX).
if (config_verbose)
fprintf(stderr, "%s", message);
* print the entire message (including PREFIX).
if (config_write_kmesg) {
kmesg_fd = open("/dev/kmsg", O_WRONLY);
* Log iff possible.
if (kmesg_fd) {
if (write(kmesg_fd, message, strlen(message)) < 0)
warn("write to klog failed");
* print the log (syslog adds PREFIX).
if (config_log_syslog)
syslog(LOG_INFO, "%s", log);
* Based on:
void deamonize(void)
pid_t pid;
* Fork off the parent process.
pid = fork();
* An error occurred.
if (pid < 0)
die("Error while forking the deamon");
* Success: Let the parent terminate.
if (pid > 0)
* On success: The child process becomes session leader.
if (setsid() < 0)
die("Error while creating the deamon (setsid)");
* Catch, ignore and handle signals.
* XXX: Implement a working signal handler.
//signal(SIGCHLD, SIG_IGN);
//signal(SIGHUP, SIG_IGN);
* Fork off for the second time.
pid = fork();
* An error occurred.
if (pid < 0)
die("Error while forking the deamon (the second)");
* Success: Let the parent terminate.
if (pid > 0)
* Set new file permissions.
* Change the working directory to the root directory.
if (chdir("/"))
die("Cannot change directory to '/'");
* Set HRTICK and frinds: Based on cyclicdeadline by Steven Rostedt.
#define _STR(x) #x
#define STR(x) _STR(x)
#ifndef MAXPATH
#define MAXPATH 1024
static int find_mount(const char *mount, char *debugfs)
char type[100];
FILE *fp;
if ((fp = fopen("/proc/mounts","r")) == NULL)
return 0;
while (fscanf(fp, "%*s %"
"s %99s %*s %*d %*d\n",
debugfs, type) == 2) {
if (strcmp(type, mount) == 0)
if (strcmp(type, mount) != 0)
return 0;
return 1;
static const char *find_debugfs(void)
static int debugfs_found;
static char debugfs[MAXPATH+1];
if (debugfs_found)
return debugfs;
if (!find_mount("debugfs", debugfs))
return "";
debugfs_found = 1;
return debugfs;
int setup_hr_tick(void)
const char *debugfs = find_debugfs();
char files[strlen(debugfs) + strlen("/sched_features") + 1];
char buf[500];
struct stat st;
static int set = 0;
char *p;
int ret;
int len;
int fd;
if (set)
return 1;
set = 1;
if (strlen(debugfs) == 0)
return 0;
sprintf(files, "%s/sched_features", debugfs);
ret = stat(files, &st);
if (ret < 0)
return 0;
fd = open(files, O_RDWR);
if (fd < 0) {
log_msg("could not open %s to set HRTICK: %s", files, strerror(errno));
return 0;
len = sizeof(buf);
ret = read(fd, buf, len);
if (ret < 0) {
return 0;
if (ret >= len)
ret = len - 1;
buf[ret] = 0;
ret = 1;
p = strstr(buf, "HRTICK");
if (p + 3 >= buf) {
p -= 3;
if (strncmp(p, "NO_HRTICK", 9) == 0) {
log_msg("dl_runtime is shorter than 1ms, setting HRTICK\n");
ret = write(fd, "HRTICK", 6);
if (ret != 6)
ret = 0;
ret = 1;
return ret;
int should_monitor(int cpu)
if (config_monitor_all_cpus)
return 1;
if (config_monitored_cpus[cpu])
return 1;
return 0;
* path to file for storing daemon pid
char pidfile[PATH_MAX];
void write_pidfile(void)
if (strlen(pidfile)) {
FILE *f = fopen(pidfile, "w");
if (f != NULL) {
fprintf(f, "%d", getpid());
die("unable to open pidfile %s: %s\n", pidfile, strerror(errno));
static void print_usage(void)
int i;
char *msg[] = {
"stalld: starvation detection and avoidance (with bounds)",
" usage: stalld [-l] [-v] [-k] [-s] [-f] [-h] \\",
" [-c cpu-list] \\",
" [-p time in ns] [-r time in ns] \\",
" [-d time in seconds] [-t time in seconds]",
" logging options:",
" -l/--log_only: only log information (do not boost)",
" -v/--verbose: print info to the std output",
" -k/--log_kmsg: print log to the kernel buffer",
" -s/--log_syslog: print log to syslog",
" -f/--foreground: run in foreground [implict when -v]",
" boosting options:",
" -p/--boost_period: SCHED_DEADLINE period [ns] that the starving task will receive",
" -r/--boost_runtime: SCHED_DEADLINE runtime [ns] that the starving task will receive",
" -d/--boost_duration: how long [s] the starving task will run with SCHED_DEADLINE",
" -F/--force_fifo: use SCHED_FIFO for boosting",
" monitoring options:",
" -t/--starving_threshold: how long [s] the starving task will wait before being boosted",
" -A/--aggressive_mode: dispatch one thread per run queue, even when there is no starving",
" threads on all CPU (uses more CPU/power).",
" misc:",
" --pidfile: write daemon pid to specified file",
" -h/--help: print this menu",
for(i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
void usage(const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
static void parse_cpu_list(char *cpulist)
const char *p;
int end_cpu;
int nr_cpus;
int cpu;
int i;
nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
config_monitored_cpus = malloc(nr_cpus * sizeof(char));
memset(config_monitored_cpus, 0, (nr_cpus * sizeof(char)));
for (p = cpulist; *p; ) {
cpu = atoi(p);
if (cpu < 0 || (!cpu && *p != '0') || cpu > nr_cpus)
goto err;
while (isdigit(*p))
if (*p == '-') {
end_cpu = atoi(p);
if (end_cpu < cpu || (!end_cpu && *p != '0'))
goto err;
while (isdigit(*p))
} else
end_cpu = cpu;
if (cpu == end_cpu) {
if (config_verbose)
printf("cpulist: adding cpu %d\n", cpu);
config_monitored_cpus[cpu] = 1;
} else {
for (i = cpu; i <= end_cpu; i++) {
if (config_verbose)
printf("cpulist: adding cpu %d\n", i);
config_monitored_cpus[i] = 1;
if (*p == ',')
die("Error parsing the cpu list %s", cpulist);
int parse_args(int argc, char **argv)
int c;
/* ensure the pidfile is an empty string */
pidfile[0] = '\0';
while (1) {
static struct option long_options[] = {
{"cpu", required_argument, 0, 'c'},
{"log_only", no_argument, 0, 'l'},
{"verbose", no_argument, 0, 'v'},
{"log_kmsg", no_argument, 0, 'k'},
{"log_syslog", no_argument, 0, 's'},
{"foreground", no_argument, 0, 'f'},
{"aggressive_mode", no_argument, 0, 'A'},
{"help", no_argument, 0, 'h'},
{"boost_period", required_argument, 0, 'p'},
{"boost_runtime", required_argument, 0, 'r'},
{"boost_duration", required_argument, 0, 'd'},
{"starving_threshold", required_argument, 0, 't'},
{"pidfile", required_argument, 0, 'P'},
{"force_fifo", no_argument, 0, 'F'},
{0, 0, 0, 0}
/* getopt_long stores the option index here. */
int option_index = 0;
c = getopt_long(argc, argv, "lvkfAhsp:r:d:t:c:F",
long_options, &option_index);
/* Detect the end of the options. */
if (c == -1)
switch (c) {
case 'c':
config_monitor_all_cpus = 0;
case 'l':
config_log_only = 1;
case 'v':
config_verbose = 1;
config_foreground = 1;
case 'k':
config_write_kmesg = 1;
case 's':
config_log_syslog = 1;
case 'f':
config_foreground = 1;
case 'A':
config_aggressive = 1;
case 'p':
config_dl_period = get_long_from_str(optarg);
if (config_dl_period < 200000000)
usage("boost_period should be at least 200 ms");
if (config_dl_period > 4000000000)
usage("boost_period should be at most 4 s");
case 'r':
config_dl_runtime = get_long_from_str(optarg);
if (config_dl_period < 200000000)
usage("boost_period should be at least 200 ms");
if (config_dl_period > 4000000000)
usage("boost_period should be at most 4 seconds");
case 'd':
config_boost_duration = get_long_from_str(optarg);
if (config_boost_duration < 1)
usage("boost_duration should be at least 1 second");
if (config_boost_duration > 60)
usage("boost_duration should be at most 60 seconds");
case 't':
config_starving_threshold = get_long_from_str(optarg);
if (config_starving_threshold < 1)
usage("starving_threshold should be at least 1 second");
if (config_starving_threshold > 3600)
usage("boost_duration should be at most one hour");
case 'h':
case 'P':
strncpy(pidfile, optarg, sizeof(pidfile)-1);
case 'F':
config_force_fifo = 1;
case '?':
usage("Invalid option");
usage("Invalid option");
if (config_dl_period < config_dl_runtime)
usage("runtime is longer than the period");
if (config_dl_period > (config_boost_duration * NS_PER_SEC))
usage("the period is longer than the boost_duration: the boosted task might not be able to run");
if (config_boost_duration > config_starving_threshold)
usage("the boost duration cannot be longer than the starving threshold ");
if (config_dl_runtime < 1000000)