blob: d4d236f804c9310661e4cca424d8e269907d9d1b [file] [log] [blame]
/* CacheFiles userspace management daemon
*
* Copyright (C) 2006-2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*
* Configuration file goes in /etc/cachefiles.conf and is of the form:
*
* dir /var/cache/fscache
* tag mycache
* brun 10%
* bcull 7%
* bstop 3%
* frun 10%
* fcull 7%
* fstop 3%
*
* Only "dir" is mandatory
* Blank lines and lines beginning with a hash are comments
* Trailing spaces are significant
* There is no character escaping mechanism
* NUL characters are cause for error
*/
#define CACHEFILESD_VERSION "0.10.10"
#define _GNU_SOURCE
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <syslog.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <syslog.h>
#include <dirent.h>
#include <time.h>
#include <poll.h>
#include <limits.h>
#include <grp.h>
#include <sys/inotify.h>
#include <sys/time.h>
#include <sys/vfs.h>
#include <sys/stat.h>
typedef enum objtype {
OBJTYPE_INDEX,
OBJTYPE_DATA,
OBJTYPE_SPECIAL,
OBJTYPE_INTERMEDIATE,
} objtype_t;
struct object {
struct object *parent; /* parent dir of this object (or NULL) */
struct object *children; /* children of this object */
struct object *next; /* next child of parent */
struct object *prev; /* previous child of parent */
DIR *dir; /* this object's directory (or NULL for data obj) */
ino_t ino; /* inode number of this object */
int usage; /* number of users of this object */
bool empty; /* T if directory empty */
bool new; /* T if object new */
bool cullable; /* T if object now cullable */
objtype_t type; /* type of object */
time_t atime; /* last access time on this object */
char name[1]; /* name of this object */
};
/* cache root representation */
static struct object root = {
.parent = NULL,
.usage = 2,
.type = OBJTYPE_INDEX,
};
static int nobjects = 1;
static int nopendir;
/* current scan point */
static struct object *scan_cursor;
static bool scan_signalled, stop_signalled, reap_signalled;
/* ranked order of cullable objects
* - we have two tables: one we're building and one that's full of ready to be
* culled objects
*/
static unsigned culltable_size = 4096;
static struct object **cullbuild;
static struct object **cullready;
static unsigned nr_in_build_table;
static unsigned nr_in_ready_table;
static int ncullable;
static bool kernel_wants_cull;
static bool have_nr_releases;
static unsigned long long f_released_since_last_scan;
static unsigned long long b_released_since_last_scan;
static const char *configfile = "/etc/cachefilesd.conf";
static const char *devfile = "/dev/cachefiles";
static const char *procfile = "/proc/fs/cachefiles";
static const char *pidfile = "/var/run/cachefilesd.pid";
static char *cacheroot, *graveyardpath;
static bool culling_disabled;
static bool xnolog, xopenedlog;
static int xdebug;
static int graveyardfd;
static unsigned long long brun, bcull, bstop, frun, fcull, fstop;
static unsigned long long b_resume_threshold = ULLONG_MAX;
static unsigned long long f_resume_threshold = 5;
static const gid_t group_list[0];
#define cachefd 3
static __attribute__((noreturn))
void version(void)
{
printf("cachefilesd version " CACHEFILESD_VERSION "\n");
exit(0);
}
static __attribute__((noreturn))
void help(void)
{
fprintf(stderr,
"Format:\n"
" /sbin/cachefilesd [-d]* [-s] [-n] [-p <pidfile>] [-f <configfile>]\n"
" /sbin/cachefilesd -v\n"
"\n"
"Options:\n"
" -d\tIncrease debugging level (cumulative)\n"
" -n\tDon't daemonise the process\n"
" -s\tMessage output to stderr instead of syslog\n"
" -p <pidfile>\tWrite the PID into the file\n"
" -f <configfile>\n"
" -v\tPrint version and exit\n"
"\tRead the specified configuration file instead of"
" /etc/cachefiles.conf\n");
exit(2);
}
static __attribute__((noreturn, format(printf, 2, 3)))
void __error(int excode, const char *fmt, ...)
{
va_list va;
if (xnolog) {
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
}
else {
if (!xopenedlog) {
openlog("cachefilesd", LOG_PID, LOG_DAEMON);
xopenedlog = true;
}
va_start(va, fmt);
vsyslog(LOG_ERR, fmt, va);
va_end(va);
closelog();
}
exit(excode);
}
#define error(FMT,...) __error(3, "Internal error: "FMT"\n" ,##__VA_ARGS__)
#define oserror(FMT,...) __error(1, FMT": errno %d (%m)\n" ,##__VA_ARGS__ ,errno)
#define cfgerror(FMT,...) __error(2, "%s:%d:"FMT"\n", configfile, lineno ,##__VA_ARGS__)
#define opterror(FMT,...) __error(2, FMT"\n" ,##__VA_ARGS__)
static __attribute__((format(printf, 3, 4)))
void __message(int dlevel, int level, const char *fmt, ...)
{
va_list va;
if (dlevel <= xdebug) {
if (xnolog) {
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
}
else if (!xnolog) {
if (!xopenedlog) {
openlog("cachefilesd", LOG_PID, LOG_DAEMON);
xopenedlog = true;
}
va_start(va, fmt);
vsyslog(level, fmt, va);
va_end(va);
closelog();
}
}
}
#define info(FMT,...) __message(0, LOG_INFO, FMT"\n" ,##__VA_ARGS__)
#define debug(DL, FMT,...) __message(DL, LOG_DEBUG, FMT"\n" ,##__VA_ARGS__)
#define notice(FMT,...) __message(0, LOG_NOTICE, FMT"\n" ,##__VA_ARGS__)
static void open_cache(void);
static void cachefilesd(void) __attribute__((noreturn));
static void reap_graveyard(void);
static void reap_graveyard_aux(const char *dirname);
static void read_cache_state(void);
static int is_object_in_use(const char *filename);
static void cull_file(const char *filename);
static void begin_building_cull_table(void);
static bool build_cull_table(void);
static void decant_cull_table(void);
static void insert_into_cull_table(struct object *object);
static void put_object(struct object *object);
static struct object *create_object(struct object *parent, const char *name, struct stat64 *st);
static void destroy_unexpected_object(struct object *parent, struct dirent *de);
static int get_dir_fd(struct object *dir);
static void cull_object(struct object *object);
static void cull_objects(void);
/*****************************************************************************/
/*
* termination request
*/
static void sigterm(int sig)
{
stop_signalled = true;
}
/*****************************************************************************/
/*
* the graveyard was populated
*/
static void sigio(int sig)
{
reap_signalled = true;
}
/*****************************************************************************/
/*
* redo scan after a time since the last scan turned up no results
*/
static void sigalrm(int sig)
{
scan_signalled = true;
}
/*****************************************************************************/
/*
* write the PID file
*/
static void write_pidfile(void)
{
FILE *pf;
pf = fopen(pidfile, "w");
if (!pf)
oserror("Unable to open PID file: %s", pidfile);
if (fprintf(pf, "%d\n", getpid()) < 0 ||
fclose(pf) == EOF)
oserror("Unable to write PID file: %s", pidfile);
}
/*****************************************************************************/
/*
* start up the cache and go
*/
int main(int argc, char *argv[])
{
struct stat st;
unsigned lineno;
ssize_t n;
size_t m;
FILE *config;
char *line, *cp;
long page_size;
int _cachefd, nullfd, opt, loop, open_max;
bool nodaemon = false;
/* handle help request */
if (argc == 2 && strcmp(argv[1], "--help") == 0)
help();
if (argc == 2 && strcmp(argv[1], "--version") == 0)
version();
/* parse the arguments */
while (opt = getopt(argc, argv, "dsnNf:p:v"),
opt != EOF
) {
switch (opt) {
case 'd':
/* turn on debugging */
xdebug++;
break;
case 's':
/* disable syslog writing */
xnolog = true;
break;
case 'n':
/* don't daemonise */
nodaemon = true;
break;
case 'N':
/* disable culling */
culling_disabled = true;
break;
case 'f':
/* use a specific config file */
configfile = optarg;
break;
case 'p':
/* use a specific PID file */
pidfile = optarg;
break;
case 'v':
/* print the version and exit */
version();
default:
opterror("Unknown commandline option '%c'", optopt);
}
}
/* read various parameters */
page_size = sysconf(_SC_PAGESIZE);
if (page_size < 0)
oserror("Unable to get page size");
open_max = sysconf(_SC_OPEN_MAX);
if (open_max < 0)
oserror("Unable to get max open files");
/* become owned by root */
if (setgroups(sizeof(group_list) / sizeof(gid_t), group_list) < 0)
oserror("Unable to clear the supplementary groups");
if (setresuid(0, 0, 0) < 0)
oserror("Unable to set UID to 0");
if (setresgid(0, 0, 0) < 0)
oserror("Unable to set GID to 0");
/* just in case... */
sync();
/* open the devfile or the procfile on fd 3 */
_cachefd = open(devfile, O_RDWR);
if (_cachefd < 0) {
if (errno != ENOENT)
oserror("Unable to open %s", devfile);
_cachefd = open(procfile, O_RDWR);
if (_cachefd < 0) {
if (errno == ENOENT)
oserror("Unable to open %s", devfile);
oserror("Unable to open %s", procfile);
}
}
if (_cachefd != cachefd) {
if (dup2(_cachefd, cachefd) < 0)
oserror("Unable to transfer cache fd to 3");
if (close(_cachefd) < 0)
oserror("Close of original cache fd failed");
}
/* open /dev/null */
nullfd = open("/dev/null", O_RDWR);
if (nullfd < 0)
oserror("Unable to open /dev/null");
/* open the config file */
config = fopen(configfile, "r");
if (!config)
oserror("Unable to open %s", configfile);
/* read the configuration */
m = 0;
line = NULL;
lineno = 0;
while (n = getline(&line, &m, config),
n != EOF
) {
lineno++;
if (n >= page_size)
cfgerror("Line too long");
if (memchr(line, 0, n) != 0)
cfgerror("Line contains a NUL character");
/* eat blank lines, leading white space and trailing NL */
cp = strchr(line, '\n');
if (!cp)
cfgerror("Unterminated line");
if (cp == line)
continue;
*cp = '\0';
for (cp = line; isspace(*cp); cp++) {;}
if (!*cp)
continue;
/* eat full line comments */
if (*cp == '#')
continue;
/* allow culling to be disabled */
if (memcmp(cp, "nocull", 6) == 0 &&
(!cp[6] || isspace(cp[6]))) {
culling_disabled = true;
}
/* note the cull table size command */
if (memcmp(cp, "culltable", 9) == 0 && isspace(cp[9])) {
unsigned long cts;
char *sp;
for (sp = cp + 10; isspace(*sp); sp++) {;}
cts = strtoul(sp, &sp, 10);
if (*sp)
cfgerror("Invalid cull table size number");
if (cts < 12 || cts > 20)
cfgerror("Log2 of cull table size must be 12 <= N <= 20");
culltable_size = 1 << cts;
continue;
}
/* Note the suspension resume released file count thresholds
* ("-" to disable a threshold).
*/
if (memcmp(cp, "resume_thresholds", 18) == 0 && isspace(cp[18])) {
unsigned long long b_thresh, f_thresh;
char *sp;
for (sp = cp + 18; isspace(*sp); sp++) {;}
if (*sp == '-') {
sp++;
b_thresh = ULLONG_MAX;
} else {
b_thresh = strtoul(sp, &sp, 10);
}
if (!*sp || !isspace(*sp))
cfgerror("Error parsing resume threshold (blocks)");
if (b_thresh == 0)
cfgerror("Invalid resume threshold (blocks)");
for (; isspace(*sp); sp++) {;}
if (*sp == '-') {
sp++;
f_thresh = ULLONG_MAX;
} else {
f_thresh = strtoul(sp, &sp, 10);
if (*sp)
cfgerror("Error parsing resume threshold (files)");
if (f_thresh == 0)
cfgerror("Invalid resume threshold (files)");
}
b_resume_threshold = b_thresh;
f_resume_threshold = f_thresh;
continue;
}
/* note the dir command */
if (memcmp(cp, "dir", 3) == 0 && isspace(cp[3])) {
char *sp;
for (sp = cp + 4; isspace(*sp); sp++) {;}
if (strlen(sp) > PATH_MAX - 10)
cfgerror("Cache pathname is too long");
if (stat(sp, &st) < 0)
oserror("Can't confirm cache location");
cacheroot = strdup(sp);
if (!cacheroot)
oserror("Can't copy cache name");
}
/* object to the bind command */
if (memcmp(cp, "bind", 4) == 0 &&
(!cp[4] || isspace(cp[4])))
cfgerror("'bind' command not permitted");
/* pass the config options over to the kernel module */
if (write(cachefd, line, strlen(line)) < 0) {
if (errno == -ENOMEM || errno == -EIO)
oserror("CacheFiles");
cfgerror("CacheFiles gave config error: %m");
}
}
if (line)
free(line);
if (!feof(config))
oserror("Unable to read %s", configfile);
if (fclose(config) == EOF)
oserror("Unable to close %s", configfile);
/* allocate the cull tables */
if (!culling_disabled) {
cullbuild = calloc(culltable_size, sizeof(cullbuild[0]));
if (!cullbuild)
oserror("calloc");
cullready = calloc(culltable_size, sizeof(cullready[0]));
if (!cullready)
oserror("calloc");
}
/* leave stdin, stdout, stderr and cachefd open only */
if (nullfd != 0)
dup2(nullfd, 0);
if (nullfd != 1)
dup2(nullfd, 1);
for (loop = 4; loop < open_max; loop++)
close(loop);
/* set up a connection to syslog whilst we still can (the bind command
* will give us our own namespace with no /dev/log */
openlog("cachefilesd", LOG_PID, LOG_DAEMON);
xopenedlog = true;
info("About to bind cache");
/* now issue the bind command */
if (write(cachefd, "bind", 4) < 0)
oserror("CacheFiles bind failed");
info("Bound cache");
/* we now have a live cache - daemonise the process */
if (!nodaemon) {
if (!xdebug)
dup2(1, 2);
switch (fork()) {
case -1:
oserror("fork");
case 0:
if (xdebug)
fprintf(stderr, "Daemon PID %d\n", getpid());
signal(SIGTTIN, SIG_IGN);
signal(SIGTTOU, SIG_IGN);
signal(SIGTSTP, SIG_IGN);
setsid();
write_pidfile();
cachefilesd();
default:
break;
}
}
else {
cachefilesd();
}
exit(0);
}
/*****************************************************************************/
/*
* open the cache directories
*/
static void open_cache(void)
{
struct statfs sfs;
char buffer[PATH_MAX + 1];
/* open the cache directory so we can scan it */
snprintf(buffer, PATH_MAX, "%s/cache", cacheroot);
root.dir = opendir(buffer);
if (!root.dir)
oserror("Unable to open cache directory");
nopendir++;
/* open the graveyard so we can set a notification on it */
if (asprintf(&graveyardpath, "%s/graveyard", cacheroot) < 0)
oserror("Unable to copy graveyard name");
graveyardfd = open(graveyardpath, O_DIRECTORY);
if (graveyardfd < 0)
oserror("Unable to open graveyard directory");
if (fstatfs(graveyardfd, &sfs) < 0)
oserror("Unable to stat cache filesystem");
if (sfs.f_bsize + 1 == 0 ||
sfs.f_blocks + 1 == 0 ||
sfs.f_bfree + 1 == 0 ||
sfs.f_bavail + 1 == 0)
error("Backing filesystem returns unusable statistics through fstatfs()");
}
/*****************************************************************************/
/*
* manage the cache
*/
static void cachefilesd(void)
{
sigset_t sigs, osigs;
bool scanning_suspended = false;
bool scan_in_progress = false;
struct pollfd pollfds[1] = {
[0] = {
.fd = cachefd,
.events = POLLIN,
},
};
notice("Daemon Started");
/* open the cache directories */
open_cache();
/* We need to be able to disable signals that we need to check for
* before calling poll so that we don't race and miss something.
*/
sigemptyset(&sigs);
sigaddset(&sigs, SIGIO);
sigaddset(&sigs, SIGINT);
sigaddset(&sigs, SIGTERM);
sigaddset(&sigs, SIGALRM);
signal(SIGTERM, sigterm);
signal(SIGINT, sigterm);
/* check the graveyard for graves */
reap_graveyard();
while (!stop_signalled) {
bool do_cull = false;
debug(3, "Loop %sbuild=%d ready=%d susp=%u scan=%u",
culling_disabled ? "NOCULL " : "",
nr_in_build_table, nr_in_ready_table,
scanning_suspended, scan_in_progress);
read_cache_state();
if (!culling_disabled) {
/* Determine if we're going to need to start a new scan
* to refill the cull table. We want to do this if the
* secondary cull table is less than half full - but
* overriding that, we don't want to do this if we know
* there's insufficient cullables to make it worth
* while.
*/
if (!scan_in_progress) {
bool begin_scan = false;
debug(1, "Consider scan %d/%d",
nr_in_build_table, culltable_size / 2);
if (nr_in_build_table < culltable_size / 2) {
debug(1, "Want to scan");
begin_scan = true;
}
if (begin_scan && scanning_suspended) {
debug(1, "Scanning suspended");
if (have_nr_releases) {
if (f_released_since_last_scan <
f_resume_threshold &&
b_released_since_last_scan <
b_resume_threshold)
begin_scan = false;
} else {
begin_scan = scan_signalled;
}
}
if (begin_scan) {
debug(1, "Beginning a scan");
begin_building_cull_table();
scan_in_progress = true;
scanning_suspended = false;
scan_signalled = false;
f_released_since_last_scan = 0;
b_released_since_last_scan = 0;
}
}
/* Determine if there's anything we can actually cull yet if
* the kernel is calling for space.
*/
if (kernel_wants_cull) {
debug(1, "Want to cull");
if (nr_in_ready_table > 0)
do_cull = true;
}
}
/* We block the signals across the checks for reap, cull and
* scan initiation before polling so that we sleep without
* racing against the signal handlers.
*/
if (!scan_in_progress && !reap_signalled && !do_cull) {
if (sigprocmask(SIG_BLOCK, &sigs, &osigs) < 0)
oserror("Unable to block signals");
if (!reap_signalled &&
!stop_signalled &&
!scan_signalled) {
debug(1, "Poll");
if (ppoll(pollfds, 1, NULL, &osigs) < 0 &&
errno != EINTR)
oserror("Unable to suspend process");
}
if (sigprocmask(SIG_UNBLOCK, &sigs, NULL) < 0)
oserror("Unable to unblock signals");
continue;
}
if (!culling_disabled) {
if (do_cull)
cull_objects();
if (scan_in_progress) {
scan_in_progress = build_cull_table();
if (!scan_in_progress) {
/* Scan complete.
*
* If the scan didn't produce a full
* table then don't repeat the scan
* until something gets released by the
* kernel.
*/
if (nr_in_build_table < culltable_size) {
debug(1, "Suspend scanning");
scanning_suspended = true;
if (!have_nr_releases) {
signal(SIGALRM, sigalrm);
alarm(30);
}
}
}
}
if (!scan_in_progress) {
if (nr_in_ready_table <= culltable_size / 2 + 2 &&
nr_in_build_table > 0) {
debug(1, "Decant");
decant_cull_table();
}
}
}
if (reap_signalled)
reap_graveyard();
}
notice("Daemon Terminated");
exit(0);
}
/*****************************************************************************/
/*
* check the graveyard directory for graves to delete
*/
static void reap_graveyard(void)
{
/* set a one-shot notification to catch more graves appearing */
reap_signalled = false;
signal(SIGIO, sigio);
if (fcntl(graveyardfd, F_NOTIFY, DN_CREATE) < 0)
oserror("unable to set notification on graveyard");
reap_graveyard_aux(graveyardpath);
}
/*****************************************************************************/
/*
* recursively remove dead stuff from the graveyard
*/
static void reap_graveyard_aux(const char *dirname)
{
struct dirent *de;
size_t len;
char name[NAME_MAX + 1];
bool deleted;
DIR *dir;
if (chdir(dirname) < 0)
oserror("chdir failed");
dir = opendir(".");
if (!dir)
oserror("Unable to open grave dir %s", dirname);
do {
/* removing directory entries may cause us to skip when reading
* them */
rewinddir(dir);
deleted = false;
while (errno = 0,
de = readdir(dir),
de != NULL
) {
/* ignore "." and ".." */
if (de->d_name[0] == '.') {
if (de->d_name[1] == '\0')
continue;
if (de->d_name[1] == '.' ||
de->d_name[1] == '\0')
continue;
}
deleted = true;
/* attempt to unlink non-directory files */
if (de->d_type != DT_DIR) {
debug(1, "unlink %s", de->d_name);
if (unlink(de->d_name) == 0)
continue;
if (errno != EISDIR)
oserror("Unable to unlink file %s",
de->d_name);
}
/* recurse into directories */
len = strlen(de->d_name) + 1;
if (len > sizeof(name)) {
errno = ENAMETOOLONG;
oserror("Name longer than NAME_MAX \"%s\"",
de->d_name);
}
memcpy(name, de->d_name, len);
reap_graveyard_aux(name);
/* which we then attempt to remove */
debug(1, "rmdir %s", name);
if (rmdir(name) < 0)
oserror("Unable to remove dir %s", name);
}
if (errno != 0)
oserror("Unable to read dir %s", dirname);
} while (deleted);
closedir(dir);
if (chdir("..") < 0)
oserror("Unable to chdir to ..");
}
/*****************************************************************************/
/*
* read the cache state
*/
static void read_cache_state(void)
{
char buffer[4096 + 1], *tok, *next, *arg;
int n;
n = read(cachefd, buffer, sizeof(buffer) - 1);
if (n < 0)
oserror("Unable to read cache state");
buffer[n] = '\0';
debug(3, "KERNEL: %s", buffer);
tok = buffer;
do {
next = strpbrk(tok, " \t");
if (next)
*next++ = '\0';
arg = strchr(tok, '=');
if (arg) {
*arg++ = '\0';
} else {
debug(0, "Warning: malformed output from kernel, missing arg to [%s]", tok);
continue;
}
if (strcmp(tok, "cull") == 0) {
kernel_wants_cull = (strtoul(arg, NULL, 0) != 0);
} else if (strcmp(tok, "brun") == 0) {
brun = strtoull(arg, NULL, 16);
} else if (strcmp(tok, "bcull") == 0) {
bcull = strtoull(arg, NULL, 16);
} else if (strcmp(tok, "bstop") == 0) {
bstop = strtoull(arg, NULL, 16);
} else if (strcmp(tok, "frun") == 0) {
frun = strtoull(arg, NULL, 16);
} else if (strcmp(tok, "fcull") == 0) {
fcull = strtoull(arg, NULL, 16);
} else if (strcmp(tok, "fstop") == 0) {
fstop = strtoull(arg, NULL, 16);
} else if (strcmp(tok, "breleased") == 0) {
b_released_since_last_scan += strtoull(arg, NULL, 16);
have_nr_releases = true;
} else if (strcmp(tok, "freleased") == 0) {
f_released_since_last_scan += strtoull(arg, NULL, 16);
have_nr_releases = true;
}
} while ((tok = next));
}
/*****************************************************************************/
/*
* find out if an object in the current working directory is in use
*/
static int is_object_in_use(const char *filename)
{
char buffer[NAME_MAX + 30];
int ret, n;
n = sprintf(buffer, "inuse %s", filename);
/* command the module */
ret = write(cachefd, buffer, n);
if (ret < 0 && errno != ESTALE && errno != ENOENT && errno != EBUSY)
oserror("Failed to check object's in-use state");
return ret < 0 && errno == EBUSY ? 1 : 0;
}
/*****************************************************************************/
/*
* cull a file representing an object in the current working directory
* - requests CacheFiles rename the object "<cwd>/filename" to the graveyard
*/
static void cull_file(const char *filename)
{
char buffer[NAME_MAX + 30];
int ret, n;
n = sprintf(buffer, "cull %s", filename);
/* command the module */
ret = write(cachefd, buffer, n);
if (ret < 0 && errno != ESTALE && errno != ENOENT && errno != EBUSY)
oserror("Failed to cull object");
}
/*****************************************************************************/
/*
* create an object from a name and stat details and attach to the parent, if
* it doesn't already exist
*/
static struct object *create_object(struct object *parent,
const char *name,
struct stat64 *st)
{
struct object *object, *p, *pr;
int len;
/* see if the parent object already holds a representation of this
* one */
pr = NULL;
for (p = parent->children; p; pr = p, p = p->next) {
if (p->ino <= st->st_ino) {
if (p->ino == st->st_ino) {
/* it does */
p->usage++;
return p;
}
break;
}
}
/* allocate the object
* - note that struct object reserves space for NUL directly
*/
len = strlen(name);
object = calloc(1, sizeof(struct object) + len);
if (!object)
oserror("Unable to alloc object");
object->usage = 1;
object->new = true;
object->ino = st->st_ino;
object->atime = st->st_atime;
memcpy(object->name, name, len + 1);
switch (object->name[0]) {
case 'I':
case 'J':
object->type = OBJTYPE_INDEX;
break;
case 'D':
case 'E':
object->type = OBJTYPE_DATA;
break;
case 'S':
case 'T':
object->type = OBJTYPE_SPECIAL;
break;
case '+':
case '@':
object->type = OBJTYPE_INTERMEDIATE;
break;
default:
error("Unexpected file type '%c'", object->name[0]);
}
/* link into the parent's list */
parent->usage++;
object->parent = parent;
object->prev = pr;
object->next = p;
if (pr)
pr->next = object;
else
parent->children = object;
if (p)
p->prev = object;
nobjects++;
return object;
}
/*****************************************************************************/
/*
* free up an object, unlinking it from its parent
*/
static void put_object(struct object *object)
{
struct object *parent;
if (--object->usage > 0)
return;
nobjects--;
if (object->cullable)
ncullable--;
/* destroy the object */
if (object == &root)
error("Can't destroy root object representation");
if (object->children)
error("Destroying object with children: '%s'", object->name);
if (object->dir) {
closedir(object->dir);
nopendir--;
}
if (object->prev)
object->prev->next = object->next;
else
object->parent->children = object->next;
if (object->next)
object->next->prev = object->prev;
parent = object->parent;
memset(object, 0x6d, sizeof(struct object));
free(object);
if (parent)
put_object(parent);
}
/*****************************************************************************/
/*
* destroy an unexpected object
*/
static void destroy_unexpected_object(struct object *parent, struct dirent *de)
{
static unsigned uniquifier;
struct timeval tv;
char namebuf[40];
int fd;
fd = dirfd(parent->dir);
if (de->d_type != DT_DIR) {
if (unlinkat(fd, de->d_name, 0) < 0 &&
errno != ENOENT)
oserror("Unable to unlink unexpectedly named file: %s",
de->d_name);
}
else {
gettimeofday(&tv, NULL);
sprintf(namebuf, "x%lxx%xx", tv.tv_sec, uniquifier++);
if (renameat(fd, de->d_name, graveyardfd, namebuf) < 0 &&
errno != ENOENT)
oserror("Unable to rename unexpectedly named file: %s",
de->d_name);
}
}
/*****************************************************************************/
/*
* insert an object into the cull table if its old enough
*/
static void insert_into_cull_table(struct object *object)
{
int y, o, m;
if (!object)
error("NULL object pointer");
/* just insert if table is empty */
if (nr_in_build_table == 0) {
object->usage++;
cullbuild[0] = object;
nr_in_build_table++;
return;
}
/* insert somewhere if table is not full */
if (nr_in_build_table < culltable_size) {
object->usage++;
/* just insert at end if new oldest object */
if (object->atime <= cullbuild[nr_in_build_table - 1]->atime) {
cullbuild[nr_in_build_table] = object;
nr_in_build_table++;
return;
}
/* insert at front if new newest object */
if (object->atime > cullbuild[0]->atime) {
memmove(&cullbuild[1],
&cullbuild[0],
nr_in_build_table * sizeof(cullbuild[0]));
cullbuild[0] = object;
nr_in_build_table++;
return;
}
/* if only two objects in list then insert between them */
if (nr_in_build_table == 2) {
cullbuild[2] = cullbuild[1];
cullbuild[1] = object;
nr_in_build_table++;
return;
}
/* insert somewhere in between front and back elements
* of a three-plus object list
* - oldest_build == #objects_currently_in_list
*/
y = 1;
o = nr_in_build_table - 1;
do {
m = (y + o) / 2;
if (object->atime > cullbuild[m]->atime)
o = m;
else
y = m + 1;
} while (y < o);
memmove(&cullbuild[y + 1],
&cullbuild[y],
(nr_in_build_table - y) * sizeof(cullbuild[0]));
cullbuild[y] = object;
nr_in_build_table++;
return;
}
/* if table is full then insert only if older than newest */
if (nr_in_build_table > culltable_size)
error("Cull table overfull");
if (object->atime >= cullbuild[0]->atime)
return;
/* newest object in table will be displaced by this one */
put_object(cullbuild[0]);
cullbuild[0] = (void *)(0x6b000000 | __LINE__);
object->usage++;
/* place directly in first slot if second is older */
if (object->atime >= cullbuild[1]->atime) {
cullbuild[0] = object;
return;
}
/* shift everything up one if older than oldest */
if (object->atime <= cullbuild[culltable_size - 1]->atime) {
memmove(&cullbuild[0],
&cullbuild[1],
(culltable_size - 1) * sizeof(cullbuild[0]));
cullbuild[culltable_size - 1] = object;
return;
}
/* search the table to find the insertion point
* - it will be between the first and last the slots
* - we know second is younger
*/
cullbuild[0] = cullbuild[1];
y = 2;
o = culltable_size - 1;
do {
m = (y + o) / 2;
if (object->atime >= cullbuild[m]->atime)
o = m;
else
y = m + 1;
} while (y < o);
if (y == 2) {
cullbuild[1] = object;
return;
}
memmove(&cullbuild[1],
&cullbuild[2],
(y - 2) * sizeof(cullbuild[0]));
cullbuild[y - 1] = object;
}
/*****************************************************************************/
/*
* Begin a scan to build a cull table.
*/
static void begin_building_cull_table(void)
{
debug(1, "Refilling cull table");
root.usage++;
scan_cursor = &root;
}
/*****************************************************************************/
/*
* Do the next step in building up the cull table. Returns false upon
* completion of a scan.
*/
static bool build_cull_table(void)
{
struct dirent *de;
struct object *curr, *child;
struct stat64 st;
unsigned loop;
int fd;
curr = scan_cursor;
if (!curr->dir) {
curr->empty = true;
fd = openat(dirfd(curr->parent->dir), curr->name, O_DIRECTORY);
if (fd < 0) {
if (errno != ENOENT)
oserror("Failed to open directory");
goto dir_read_complete;
}
curr->dir = fdopendir(fd);
if (!curr->dir)
oserror("Failed to open directory");
nopendir++;
}
debug(2, "--> build_cull_table({%s})", curr->name);
if (fchdir(dirfd(curr->dir)) < 0)
oserror("Failed to change current directory");
next:
/* read the next directory entry */
errno = 0;
de = readdir(curr->dir);
if (!de) {
if (errno == 0 || errno == ENOENT)
goto dir_read_complete;
oserror("Unable to read directory");
}
if (de->d_name[0] == '.') {
if (!de->d_name[1] ||
(de->d_name[1] == '.' && !de->d_name[2]))
goto next;
}
debug(2, "readdir '%s'", de->d_name);
switch (de->d_type) {
case DT_UNKNOWN:
case DT_DIR:
case DT_REG:
break;
default:
oserror("readdir returned unsupported type %d", de->d_type);
}
/* delete any funny looking files */
if (memchr("IDSJET+@", de->d_name[0], 8) == NULL)
goto found_unexpected_object;
/* see if this object is already known to us */
if (fstatat64(dirfd(curr->dir), de->d_name, &st, 0) < 0) {
if (errno == ENOENT)
goto next;
oserror("Failed to stat directory");
}
if (!S_ISDIR(st.st_mode) &&
(!S_ISREG(st.st_mode) ||
de->d_name[0] == 'I' ||
de->d_name[0] == 'J' ||
de->d_name[0] == '@' ||
de->d_name[0] == '+'))
goto found_unexpected_object;
/* create a representation for this object */
child = create_object(curr, de->d_name, &st);
if (!child && errno == ENOENT)
goto next;
curr->empty = false;
if (!child)
oserror("Unable to create object");
/* we consider culling objects at the transition from index object to
* non-index object */
switch (child->type) {
case OBJTYPE_DATA:
case OBJTYPE_SPECIAL:
if (!child->new) {
/* the child appears to have been retained in the
* culling table already, so we see if it should be
* removed therefrom
*/
debug(2, "- old child");
if (st.st_atime <= child->atime) {
/* file on disk hasn't been touched */
put_object(child);
goto next;
}
for (loop = 0; loop < nr_in_ready_table; loop++)
if (cullready[loop] == child)
break;
if (loop == nr_in_ready_table - 1) {
/* child was oldest object */
cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__);
put_object(child);
goto removed;
}
else if (loop < nr_in_ready_table - 1) {
/* child was somewhere in between */
memmove(&cullready[loop],
&cullready[loop + 1],
(nr_in_ready_table - (loop + 1)) * sizeof(cullready[0]));
cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__);
put_object(child);
goto removed;
}
for (loop = 0; loop < nr_in_build_table; loop++)
if (cullbuild[loop] == child)
break;
if (loop == nr_in_build_table - 1) {
/* child was oldest object */
cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__);
put_object(child);
}
else if (loop < nr_in_build_table - 1) {
/* child was somewhere in between */
memmove(&cullbuild[loop],
&cullbuild[loop + 1],
(nr_in_build_table - (loop + 1)) * sizeof(cullbuild[0]));
cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__);
put_object(child);
}
removed:
;
}
/* add objects that aren't in use to the cull table */
if (!is_object_in_use(de->d_name)) {
debug(2, "- insert");
child->new = false;
insert_into_cull_table(child);
}
put_object(child);
goto next;
/* investigate all index and index-intermediate directories */
case OBJTYPE_INDEX:
case OBJTYPE_INTERMEDIATE:
debug(2, "- descend");
child->new = false;
scan_cursor = child;
debug(2, "<-- build_cull_table({%s})", curr->name);
return true;
default:
error("Unexpected type");
}
/* we've finished reading a directory - see if we can cull it */
dir_read_complete:
debug(2, "dir_read_complete: u=%d e=%d %s",
curr->usage, curr->empty, curr->name);
if (curr->dir) {
if (curr != &root) {
closedir(curr->dir);
curr->dir = NULL;
nopendir--;
}
else {
rewinddir(curr->dir);
}
}
if (curr->usage == 1 && curr->empty) {
/* attempt to cull unpinned empty intermediate and index
* objects */
if (fchdir(dirfd(curr->parent->dir)) < 0)
oserror("Failed to change current directory");
switch (curr->type) {
case OBJTYPE_INDEX:
cull_file(curr->name);
break;
case OBJTYPE_INTERMEDIATE:
unlinkat(dirfd(curr->parent->dir), curr->name,
AT_REMOVEDIR);
break;
default:
break;
}
}
scan_cursor = curr->parent;
if (!scan_cursor)
debug(1, "Scan complete");
debug(2, "<-- build_cull_table({%s})", curr->name);
put_object(curr);
return scan_cursor != NULL;
/* delete unexpected objects that we've found */
found_unexpected_object:
debug(2, "found_unexpected_object");
destroy_unexpected_object(curr, de);
goto next;
}
/*****************************************************************************/
/*
* decant cull entries from the build table to the ready table and enable them
*/
static void decant_cull_table(void)
{
unsigned loop, avail, copy, leave, space, n;
if (scan_cursor)
error("Can't decant cull table whilst scanning");
/* mark the new entries cullable */
for (loop = 0; loop < nr_in_build_table; loop++) {
if (!cullbuild[loop]->cullable) {
cullbuild[loop]->cullable = true;
ncullable++;
}
}
/* if the ready table is empty, copy the whole lot across */
if (nr_in_ready_table == 0) {
copy = nr_in_build_table;
debug(1, "Decant (all %d)", copy);
n = copy * sizeof(cullready[0]);
memcpy(cullready, cullbuild, n);
memset(cullbuild, 0x6e, n);
nr_in_ready_table = nr_in_build_table;
nr_in_build_table = 0;
goto check;
}
/* decant some of the build table if there's space */
if (culltable_size < nr_in_ready_table)
error("Less than zero space in ready table");
space = culltable_size - nr_in_ready_table;
if (space == 0)
goto check;
/* work out how much of the build table we can copy */
copy = avail = nr_in_build_table;
if (copy > space)
copy = space;
leave = avail - copy;
debug(1, "Decant (%u/%u to %u)", copy, avail, space);
/* make a hole in the ready table transfer "copy" elements from the end
* of cullbuild (oldest) to the beginning of cullready (youngest)
*/
memmove(&cullready[copy], &cullready[0], nr_in_ready_table * sizeof(cullready[0]));
nr_in_ready_table += copy;
memcpy(&cullready[0], &cullbuild[leave], copy * sizeof(cullready[0]));
memset(&cullbuild[leave], 0x6b, copy * sizeof(cullbuild[0]));
nr_in_build_table = leave;
if (copy + leave > culltable_size)
error("Scan table exceeded (%d+%d)", copy, leave);
check:
for (loop = 0; loop < nr_in_ready_table; loop++)
if (((long)cullready[loop] & 0xf0000000) == 0x60000000)
abort();
}
/*****************************************************************************/
/*
* get the directory handle for the given directory
*/
static int get_dir_fd(struct object *dir)
{
int parentfd, fd;
debug(1, "get_dir_fd(%s)", dir->name);
if (dir->dir) {
fd = dup(dirfd(dir->dir));
if (fd < 0)
oserror("Failed to dup fd");
debug(1, "cache fd to %d", fd);
return fd;
}
parentfd = get_dir_fd(dir->parent);
fd = openat(parentfd, dir->name, O_DIRECTORY);
if (fd < 0 && errno != ENOENT)
oserror("Failed to open directory");
/* return parent fd or -1 if ENOENT */
debug(1, "<%d>/%s to %d", parentfd, dir->name, fd);
close(parentfd);
return fd;
}
/*****************************************************************************/
/*
* cull an object
*/
static void cull_object(struct object *object)
{
struct stat64 st;
int dirfd;
debug(1, "CULL %s", object->name);
dirfd = get_dir_fd(object->parent);
if (dirfd >= 0) {
if (fstatat64(dirfd, object->name, &st, 0) < 0) {
if (errno != ENOENT)
oserror("Failed to re-stat object");
close(dirfd);
goto object_already_gone;
}
if (fchdir(dirfd) < 0)
oserror("Failed to change current directory");
if (object->atime >= st.st_atime)
cull_file(object->name);
close(dirfd);
}
object_already_gone:
put_object(object);
}
/*****************************************************************************/
/*
* consider starting a cull
*/
static void cull_objects(void)
{
if (ncullable <= 0)
error("Cullable object count is inconsistent");
if (cullready[nr_in_ready_table - 1]->cullable) {
cull_object(cullready[nr_in_ready_table - 1]);
cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__);
}
}