Add a notification facility for watching for key changes

Add keyctl commands and library functions to handle the setting and removal
of watches on keys for notifications of change events.  Five keyctl
commands are defined:

 (1) keyctl watch <key>

     Watch the specified key for changes, logging notifications to stdout.

 (2) keyctl watch_session [-n <name>] <notifylog> <gclog> <fd> prog [<arg>...]

     Create a new session keyring and attach a watch to it that an
     auxiliary logging process monitors.  The nominated program is run with
     the session program with the arguments given.

     The session keyring can be given a name.  The logging process will log
     synchronous events to file notifylog and asynchronous events to file
     gclog.

     The specified file descriptor will be attached to the watch_queue and
     left open across the exec.  This can be made use of by the next few
     commands.

 (3) keyctl watch_add <fd> <key>
 (4) keyctl watch_rm <fd> <key>

     Add/remove a watch on the specified key to/from the given watch_queue
     derived from watch_session.

 (5) keyctl watch_sync <fd>

     Wait for the logging process that's watching the given watch_queue to
     synchronise.

Commands (2) to (5) are primarily provided for the testsuite's purposes.

Signed-off-by: David Howells <dhowells@redhat.com>
diff --git a/Makefile b/Makefile
index 4d05570..18a7231 100644
--- a/Makefile
+++ b/Makefile
@@ -149,9 +149,11 @@
 %.o: %.c keyutils.h Makefile
 	$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $<
 
-keyctl: keyctl.o keyctl_testing.o $(LIB_DEPENDENCY)
-	$(CC) -L. $(CFLAGS) $(LDFLAGS) $(RPATH) -o $@ keyctl.o keyctl_testing.o -lkeyutils
-keyctl.o keyctl_testing.o: keyctl.h
+keyctl: keyctl.o keyctl_testing.o keyctl_watch.o $(LIB_DEPENDENCY)
+	$(CC) -L. $(CFLAGS) $(LDFLAGS) $(RPATH) -o $@ \
+		keyctl.o keyctl_testing.o keyctl_watch.o -lkeyutils
+keyctl.o keyctl_testing.o keyctl_watch.o: keyctl.h
+keyctl_watch.o: watch_queue.h
 
 request-key: request-key.o $(LIB_DEPENDENCY)
 	$(CC) -L. $(CFLAGS) $(LDFLAGS) $(RPATH) -o $@ $< -lkeyutils
diff --git a/keyctl.c b/keyctl.c
index b17ceb4..cc25ac0 100644
--- a/keyctl.c
+++ b/keyctl.c
@@ -139,14 +139,16 @@
 	{ act_keyctl_timeout,	"timeout",	"<key> <timeout>" },
 	{ act_keyctl_unlink,	"unlink",	"<key> [<keyring>]" },
 	{ act_keyctl_update,	"update",	"[-x] <key> <data>" },
+	{ act_keyctl_watch,	"watch",	"<key>" },
+	{ act_keyctl_watch_add,	"watch_add",	"<fd> <key>" },
+	{ act_keyctl_watch_rm,	"watch_rm",	"<fd> <key>" },
+	{ act_keyctl_watch_session, "watch_session", "[-n <name>] <notifylog> <gclog> <fd> <prog> [<arg1> <arg2> ...]" },
+	{ act_keyctl_watch_sync, "watch_sync",	"<fd>" },
 	{ act_keyctl_test,	"--test",	"..." },
 	{ NULL,			NULL,		NULL }
 };
 
 static int dump_key_tree(key_serial_t keyring, const char *name, int hex_key_IDs);
-static void format(void) __attribute__((noreturn));
-void error(const char *msg) __attribute__((noreturn));
-static key_serial_t get_key_id(char *arg);
 static void *read_file(const char *name, size_t *_size);
 
 static uid_t myuid;
@@ -227,7 +229,7 @@
 /*
  * display command format information
  */
-static void format(void)
+void format(void)
 {
 	const struct command *cmd;
 
@@ -2288,6 +2290,7 @@
 	{ "move_key",			0,	KEYCTL_CAPS0_MOVE },
 	{ "ns_keyring_name",		1,	KEYCTL_CAPS1_NS_KEYRING_NAME },
 	{ "ns_key_tag",			1,	KEYCTL_CAPS1_NS_KEY_TAG },
+	{ "notify",			1,	KEYCTL_CAPS1_NOTIFICATIONS },
 	{}
 };
 
@@ -2332,7 +2335,7 @@
 /*
  * parse a key identifier
  */
-static key_serial_t get_key_id(char *arg)
+key_serial_t get_key_id(char *arg)
 {
 	key_serial_t id;
 	char *end;
diff --git a/keyctl.h b/keyctl.h
index e061334..ee86779 100644
--- a/keyctl.h
+++ b/keyctl.h
@@ -21,9 +21,20 @@
  * keyctl.c
  */
 extern nr void do_command(int, char **, const struct command *, const char *);
-extern nr void error(const char *);
+extern nr void format(void) __attribute__((noreturn));
+extern nr void error(const char *) __attribute__((noreturn));
+extern key_serial_t get_key_id(char *);
 
 /*
  * keyctl_testing.c
  */
 extern nr void act_keyctl_test(int, char *[]);
+
+/*
+ * keyctl_watch.c
+ */
+extern nr void act_keyctl_watch(int , char *[]);
+extern nr void act_keyctl_watch_add(int , char *[]);
+extern nr void act_keyctl_watch_rm(int , char *[]);
+extern nr void act_keyctl_watch_session(int , char *[]);
+extern nr void act_keyctl_watch_sync(int , char *[]);
diff --git a/keyctl_watch.c b/keyctl_watch.c
new file mode 100644
index 0000000..191fe51
--- /dev/null
+++ b/keyctl_watch.c
@@ -0,0 +1,499 @@
+/* Key watching facility.
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <errno.h>
+#include <poll.h>
+#include <getopt.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include "keyutils.h"
+#include <limits.h>
+#include "keyctl.h"
+#include "watch_queue.h"
+
+#define BUF_SIZE 4
+
+static int consumer_stop;
+static pid_t pid_con = -1, pid_cmd = -1;
+static key_serial_t session;
+static int watch_fd;
+static int debug;
+
+static inline bool after_eq(unsigned int a, unsigned int b)
+{
+        return (signed int)(a - b) >= 0;
+}
+
+static void consumer_term(int sig)
+{
+	consumer_stop = 1;
+}
+
+static void saw_key_change(FILE *log, struct watch_notification *n)
+{
+	struct key_notification *k = (struct key_notification *)n;
+	unsigned int len = (n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT;
+
+	if (len != sizeof(struct key_notification) / WATCH_LENGTH_GRANULARITY)
+		return;
+
+	switch (n->subtype) {
+	case NOTIFY_KEY_INSTANTIATED:
+		fprintf(log, "%u inst\n", k->key_id);
+		break;
+	case NOTIFY_KEY_UPDATED:
+		fprintf(log, "%u upd\n", k->key_id);
+		break;
+	case NOTIFY_KEY_LINKED:
+		fprintf(log, "%u link %u\n", k->key_id, k->aux);
+		break;
+	case NOTIFY_KEY_UNLINKED:
+		fprintf(log, "%u unlk %u\n", k->key_id, k->aux);
+		break;
+	case NOTIFY_KEY_CLEARED:
+		fprintf(log, "%u clr\n", k->key_id);
+		break;
+	case NOTIFY_KEY_REVOKED:
+		fprintf(log, "%u rev\n", k->key_id);
+		break;
+	case NOTIFY_KEY_INVALIDATED:
+		fprintf(log, "%u inv\n", k->key_id);
+		break;
+	case NOTIFY_KEY_SETATTR:
+		fprintf(log, "%u attr\n", k->key_id);
+		break;
+	}
+}
+
+/*
+ * Handle removal notification.
+ */
+static void saw_removal_notification(FILE *gc, struct watch_notification *n)
+{
+	key_serial_t key = 0;
+	unsigned int wp, l;
+
+	l = (n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT;
+	l <<= WATCH_LENGTH_GRANULARITY;
+	wp = (n->info & WATCH_INFO_ID) >> WATCH_INFO_ID__SHIFT;
+
+	if (l >= sizeof(struct watch_notification_removal)) {
+		struct watch_notification_removal *r = (void *)n;
+		key = r->id;
+	}
+
+	fprintf(gc, "%u gc\n", key);
+	if (wp == 1)
+		exit(0);
+}
+
+/*
+ * Consume and display events.
+ */
+static __attribute__((noreturn))
+int consumer(FILE *log, FILE *gc, int fd, struct watch_queue_buffer *buf)
+{
+	struct watch_notification *n;
+	struct pollfd p[1];
+	unsigned int head, tail, mask = buf->meta.mask;
+
+	setlinebuf(log);
+	setlinebuf(gc);
+	signal(SIGTERM, consumer_term);
+
+	do {
+		if (!consumer_stop) {
+			p[0].fd = fd;
+			p[0].events = POLLIN | POLLERR;
+			p[0].revents = 0;
+
+			if (poll(p, 1, -1) == -1) {
+				if (errno == EINTR)
+					continue;
+				error("poll");
+			}
+		}
+
+		while (head = __atomic_load_n(&buf->meta.head, __ATOMIC_ACQUIRE),
+		       tail = buf->meta.tail,
+		       tail != head
+		       ) {
+			n = &buf->slots[tail & mask];
+			if (debug)
+				fprintf(stderr,
+					"NOTIFY[%08x-%08x] ty=%06x:%02x i=%08x\n",
+					head, tail, n->type, n->subtype, n->info);
+
+			if ((n->info & WATCH_INFO_LENGTH) == 0) {
+				fprintf(stderr, "Zero-length watch record\n");
+				exit(4);
+			}
+
+			switch (n->type) {
+			case WATCH_TYPE_META:
+				switch (n->subtype) {
+				case WATCH_META_REMOVAL_NOTIFICATION:
+					saw_removal_notification(gc, n);
+					break;
+				}
+				break;
+			case WATCH_TYPE_KEY_NOTIFY:
+				saw_key_change(log, n);
+				break;
+			}
+
+			tail += (n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT;
+			__atomic_store_n(&buf->meta.tail, tail, __ATOMIC_RELEASE);
+		}
+	} while (!consumer_stop);
+
+	fprintf(log, "Monitoring terminated\n");
+	if (gc != log)
+		fprintf(gc, "Monitoring terminated\n");
+	exit(0);
+}
+
+static struct watch_notification_filter filter = {
+	.nr_filters	= 1,
+	.__reserved	= 0,
+	.filters = {
+		[0]	= {
+			.type			= WATCH_TYPE_KEY_NOTIFY,
+			.subtype_filter[0]	= UINT_MAX,
+		},
+	},
+};
+
+/*
+ * Open the watch device and allocate a buffer.
+ */
+static int open_watch(struct watch_queue_buffer **_buf)
+{
+	struct watch_queue_buffer *buf;
+	size_t page_size;
+	int fd;
+
+	fd = open("/dev/watch_queue", O_RDWR);
+	if (fd == -1)
+		error("/dev/watch_queue");
+
+	if (ioctl(fd, IOC_WATCH_QUEUE_SET_SIZE, BUF_SIZE) == -1)
+		error("/dev/watch_queue(size)");
+
+	if (ioctl(fd, IOC_WATCH_QUEUE_SET_FILTER, &filter) == -1)
+		error("/dev/watch_queue(filter)");
+
+	page_size = sysconf(_SC_PAGESIZE);
+	buf = mmap(NULL, BUF_SIZE * page_size, PROT_READ | PROT_WRITE,
+		   MAP_SHARED, fd, 0);
+	if (buf == MAP_FAILED)
+		error("mmap");
+
+	*_buf = buf;
+	return fd;
+}
+
+/*
+ * Watch a key or keyring for changes.
+ */
+void act_keyctl_watch(int argc, char *argv[])
+{
+	struct watch_queue_buffer *buf;
+	key_serial_t key;
+	int wfd;
+
+	if (argc != 2)
+		format();
+
+	key = get_key_id(argv[1]);
+	wfd = open_watch(&buf);
+
+	if (keyctl_watch_key(key, wfd, 0x01) == -1)
+		error("keyctl_watch_key");
+
+	consumer(stdout, stdout, wfd, buf);
+}
+
+/*
+ * Add a watch on a key to the monitor created by watch_session.
+ */
+void act_keyctl_watch_add(int argc, char *argv[])
+{
+	key_serial_t key;
+	int fd;
+
+	if (argc != 3)
+		format();
+
+	fd = atoi(argv[1]);
+	key = get_key_id(argv[2]);
+
+	if (keyctl_watch_key(key, fd, 0x02) == -1)
+		error("keyctl_watch_key");
+	exit(0);
+}
+
+/*
+ * Remove a watch on a key from the monitor created by watch_session.
+ */
+void act_keyctl_watch_rm(int argc, char *argv[])
+{
+	key_serial_t key;
+	int fd;
+
+	if (argc != 3)
+		format();
+
+	fd = atoi(argv[1]);
+	key = get_key_id(argv[2]);
+
+	if (keyctl_watch_key(key, fd, -1) == -1)
+		error("keyctl_watch_key");
+	exit(0);
+}
+
+static void exit_cleanup(void)
+{
+	pid_t me = getpid();
+	int w;
+
+	if (me != pid_cmd && me != pid_con) {
+		keyctl_watch_key(session, watch_fd, -1);
+		if (pid_cmd != -1) {
+			kill(pid_cmd, SIGTERM);
+			waitpid(pid_cmd, &w, 0);
+		}
+		if (pid_con != -1) {
+			kill(pid_con, SIGTERM);
+			waitpid(pid_con, &w, 0);
+		}
+	}
+}
+
+static void run_command(int argc, char *argv[], int wfd)
+{
+	char buf[16];
+
+	pid_cmd = fork();
+	if (pid_cmd == -1)
+		error("fork");
+	if (pid_cmd != 0)
+		return;
+
+	pid_cmd = -1;
+	pid_con = -1;
+
+	sprintf(buf, "%u", wfd);
+	setenv("KEYCTL_WATCH_FD", buf, true);
+
+	/* run the standard shell if no arguments */
+	if (argc == 0) {
+		const char *q = getenv("SHELL");
+		if (!q)
+			q = "/bin/sh";
+		execl(q, q, NULL);
+		error(q);
+	}
+
+	/* run the command specified */
+	execvp(argv[0], argv);
+	error(argv[0]);
+}
+
+/*
+ * Open a logfiles.
+ */
+static FILE *open_logfile(const char *logfile)
+{
+	unsigned int flags;
+	FILE *log;
+	int lfd;
+
+	log = fopen(logfile, "a");
+	if (!log)
+		error(logfile);
+
+	lfd = fileno(log);
+	flags = fcntl(lfd, F_GETFD);
+	if (flags == -1)
+		error("F_GETFD");
+	if (fcntl(lfd, F_SETFD, flags | FD_CLOEXEC) == -1)
+		error("F_SETFD");
+
+	return log;
+}
+
+/*
+ * Set up a new session keyring with a monitor that is exposed on an explicit
+ * file descriptor in the program that it starts.
+ */
+void act_keyctl_watch_session(int argc, char *argv[])
+{
+	struct watch_queue_buffer *buf;
+	const char *session_name = NULL;
+	const char *logfile, *gcfile, *target_fd;
+	unsigned int flags;
+	pid_t pid;
+	FILE *log, *gc;
+	int wfd, tfd, opt, w, e = 0, e2 = 0;
+
+	while (opt = getopt(argc, argv, "+dn:"),
+	       opt != -1) {
+		switch (opt) {
+		case 'd':
+			debug = 1;
+			break;
+		case 'n':
+			session_name = optarg;
+			break;
+		default:
+			fprintf(stderr, "Unknown option\n");
+			exit(2);
+		}
+	}
+
+	argv += optind;
+	argc -= optind;
+
+	if (argc < 4)
+		format();
+
+	logfile = argv[0];
+	gcfile = argv[1];
+	target_fd = argv[2];
+	tfd = atoi(target_fd);
+	if (tfd < 3 || tfd > 9) {
+		fprintf(stderr, "The target fd must be between 3 and 9\n");
+		exit(2);
+	}
+
+	wfd = open_watch(&buf);
+	if (wfd != tfd) {
+		if (dup2(wfd, tfd) == -1)
+			error("dup2");
+		close(wfd);
+		wfd = tfd;
+	}
+	watch_fd = wfd;
+
+	atexit(exit_cleanup);
+
+	/* We want the fd to be inherited across a fork. */
+	flags = fcntl(wfd, F_GETFD);
+	if (flags == -1)
+		error("F_GETFD");
+	if (fcntl(wfd, F_SETFD, flags & ~FD_CLOEXEC) == -1)
+		error("F_SETFD");
+
+	log = open_logfile(logfile);
+	gc = open_logfile(gcfile);
+
+	pid_con = fork();
+	if (pid_con == -1)
+		error("fork");
+	if (pid_con == 0) {
+		pid_cmd = -1;
+		pid_con = -1;
+		consumer(log, gc, wfd, buf);
+	}
+
+	/* Create a new session keyring and watch it. */
+	session = keyctl_join_session_keyring(session_name);
+	if (session == -1)
+		error("keyctl_join_session_keyring");
+
+	if (keyctl_watch_key(session, wfd, 0x01) == -1)
+		error("keyctl_watch_key/session");
+
+	fprintf(stderr, "Joined session keyring: %d\n", session);
+
+	/* Start the command and then wait for it to finish and the
+	 * notification consumer to clean up.
+	 */
+	run_command(argc - 3, argv + 3, wfd);
+	close(wfd);
+	wfd = -1;
+
+	while (pid = wait(&w),
+	       pid != -1) {
+		if (pid == pid_cmd) {
+			if (pid_con != -1)
+				kill(pid_con, SIGTERM);
+			if (WIFEXITED(w)) {
+				e2 = WEXITSTATUS(w);
+				pid_cmd = -1;
+			} else if (WIFSIGNALED(w)) {
+				e2 = WTERMSIG(w) + 128;
+				pid_cmd = -1;
+			} else if (WIFSTOPPED(w)) {
+				raise(WSTOPSIG(w));
+			}
+		} else if (pid == pid_con) {
+			if (pid_cmd != -1)
+				kill(pid_cmd, SIGTERM);
+			if (WIFEXITED(w)) {
+				e = WEXITSTATUS(w);
+				pid_con = -1;
+			} else if (WIFSIGNALED(w)) {
+				e = WTERMSIG(w) + 128;
+				pid_con = -1;
+			}
+		}
+	}
+
+	if (e == 0)
+		e = e2;
+	exit(e);
+}
+
+/*
+ * Wait for monitoring to synchronise.
+ */
+void act_keyctl_watch_sync(int argc, char *argv[])
+{
+	struct watch_queue_buffer *buf;
+	//unsigned int head, tail;
+	size_t page_size;
+	int wfd;
+
+	if (argc != 2)
+		format();
+
+	wfd = atoi(argv[1]);
+
+	/* We only need to see the first page. */
+	page_size = sysconf(_SC_PAGESIZE);
+	buf = mmap(NULL, 1 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_SHARED, wfd, 0);
+	if (buf == MAP_FAILED)
+		error("mmap");
+
+#if 0
+	head = __atomic_load_n(&buf->meta.head, __ATOMIC_RELAXED);
+
+	while (tail = __atomic_load_n(&buf->meta.tail, __ATOMIC_RELAXED),
+	       !after_eq(tail, head)
+	       )
+		usleep(10000);
+#endif
+	exit(0);
+}
diff --git a/keyutils.c b/keyutils.c
index 9c37256..6f2d797 100644
--- a/keyutils.c
+++ b/keyutils.c
@@ -385,6 +385,11 @@
 	return sizeof(unsigned char);
 }
 
+long keyctl_watch_key(key_serial_t id, int watch_queue_fd, int watch_id)
+{
+	return keyctl(KEYCTL_WATCH_KEY, id, watch_queue_fd, watch_id);
+}
+
 /*****************************************************************************/
 /*
  * fetch key description into an allocated buffer
diff --git a/keyutils.h b/keyutils.h
index d50119e..8570045 100644
--- a/keyutils.h
+++ b/keyutils.h
@@ -108,6 +108,7 @@
 #define KEYCTL_RESTRICT_KEYRING		29	/* Restrict keys allowed to link to a keyring */
 #define KEYCTL_MOVE			30	/* Move keys between keyrings */
 #define KEYCTL_CAPABILITIES		31	/* Find capabilities of keyrings subsystem */
+#define KEYCTL_WATCH_KEY		32	/* Watch a key or ring of keys for changes */
 
 /* keyctl structures */
 struct keyctl_dh_params {
@@ -164,6 +165,7 @@
 #define KEYCTL_CAPS0_MOVE		0x80 /* KEYCTL_MOVE supported */
 #define KEYCTL_CAPS1_NS_KEYRING_NAME	0x01 /* Keyring names are per-user_namespace */
 #define KEYCTL_CAPS1_NS_KEY_TAG		0x02 /* Key indexing can include a namespace tag */
+#define KEYCTL_CAPS1_NOTIFICATIONS	0x04 /* Keys generate watchable notifications */
 
 /*
  * syscall wrappers
@@ -251,6 +253,7 @@
 			key_serial_t to_ringid,
 			unsigned int flags);
 extern long keyctl_capabilities(unsigned char *buffer, size_t buflen);
+extern long keyctl_watch_key(key_serial_t id, int watch_queue_fd, int watch_id);
 
 /*
  * utilities
diff --git a/man/keyctl.1 b/man/keyctl.1
index 2f545bd..f18f92d 100644
--- a/man/keyctl.1
+++ b/man/keyctl.1
@@ -114,6 +114,15 @@
 \fBkeyctl\fR pkey_sign <key> <pass> <datafile> [k=v]* ><sigfile>
 .br
 \fBkeyctl\fR pkey_decrypt <key> <pass> <datafile> <sigfile> [k=v]*
+.br
+\fBkeyctl\fR watch <key>
+.br
+\fBkeyctl\fR watch_add <fd> <key>
+.br
+\fBkeyctl\fR watch_rm <fd> <key>
+.br
+\fBkeyctl\fR watch_session [-n <name>] \\
+                <notifylog> <gclog> <fd> <prog> [<arg1> <arg2> ...]
 .SH DESCRIPTION
 This program is used to control the key management facility in various ways
 using a variety of subcommands.
@@ -944,6 +953,89 @@
 .PP
 See asymmetric-key(7) for more information.
 
+.SS Change notifications
+\fBkeyctl\fR watch <key>
+.br
+\fBkeyctl\fR watch_session [-n <name>] \\
+                <notifylog> <gclog> <fd> <prog> [<arg1> <arg2> ...]
+\fBkeyctl\fR watch_add <fd> <key>
+.br
+\fBkeyctl\fR watch_rm <fd> <key>
+.br
+.PP
+The
+.B watch
+command watches a single key, printing notifications to stdout until the key
+is destroyed.
+.PP
+The output of the command looks like:
+.PP
+.RS
+.nf
+.RI < keyid "> <" event "> [<" aux ">]"
+.fi
+.RE
+.PP
+Where
+.I keyid
+is the primary subject of the notification,
+.I op
+is the event and
+.I aux
+is the secondary key if there is one (such as link where the primary key is
+the keyring secondary key is the key being linked in to it).  For example:
+.PP
+.RS
+.nf
+255913279 link 340681059
+255913279 clr
+.fi
+.RE
+.PP
+An additional notication is generated when a key being watched is garbage
+collected, e.g.:
+.PP
+.RS
+.nf
+255913279 gc
+.fi
+.RE
+.PP
+The
+.B watch_session
+command creates a new session keyring, with name
+.I name
+if given, watches it for notifications and runs program
+.I prog
+with it.  The program is given the specified arguments.
+.PP
+A second process is forked off to monitor the notifications.  The output from
+that is directed to the files
+.I notifylog
+for most notifications and
+.I gclog
+for key removal notifications (which are asynchronous and may be deferred).
+.PP
+The
+.BR watch_queue (7)
+device is exported to the program attached to fd number
+.IR fd .
+This can be passed by the other two commands.
+.PP
+The
+.B watch_add
+command adds a watch on
+.I key
+to the
+.B watch_queue
+attached to
+.I fd
+as exported by watch_session and the
+.B watch_rm
+caommand removes it.  A watch_queue can handle multiple keys and even non-keys
+sources as well.
+
+
 .SH ERRORS
 There are a number of common errors returned by this program:
 
diff --git a/man/keyctl.3 b/man/keyctl.3
index b0f5fdc..3b9e889 100644
--- a/man/keyctl.3
+++ b/man/keyctl.3
@@ -104,6 +104,8 @@
 .BR keyctl_unlink (3)
 .br
 .BR keyctl_update (3)
+.br
+.BR keyctl_watch_key (3)
 .\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 .SH UTILITY FUNCTIONS
 .BR find_key_by_type_and_name (3)
diff --git a/man/keyctl_watch_key.3 b/man/keyctl_watch_key.3
new file mode 100644
index 0000000..044b7a3
--- /dev/null
+++ b/man/keyctl_watch_key.3
@@ -0,0 +1,206 @@
+.\"
+.\" Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" This program is free software; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License
+.\" as published by the Free Software Foundation; either version
+.\" 2 of the License, or (at your option) any later version.
+.\"
+.TH KEYCTL_GRANT_PERMISSION 3 "28 Aug 2019" Linux "Linux Key Management Calls"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH NAME
+keyctl_watch_key \- Watch for changes to a key
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH SYNOPSIS
+.nf
+.B #include <keyutils.h>
+.sp
+.BI "long keyctl_watch_key(key_serial_t " key ,
+.BI "                      int " watch_queue_fd
+.BI "                      int " watch_id ");"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH DESCRIPTION
+.BR keyctl_watch_key ()
+sets or removes a watch on
+.IR key .
+.PP
+.I watch_id
+specifies the ID for a watch that will be included in notification messages.
+It can be between 0 and 255 to add a key; it should be -1 to remove a key.
+.PP
+.I watch_queue_fd
+is a file descriptor attached to a watch_queue device instance.  Multiple
+openings of a device provide separate instances.  Each device instance can
+only have one watch on any particular key.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Notification Record
+.PP
+Key-specific notification messages that the kernel emits into the buffer have
+the following format:
+.PP
+.in +4n
+.EX
+struct key_notification {
+	struct watch_notification watch;
+	__u32	key_id;
+	__u32	aux;
+};
+.EE
+.in
+.PP
+The
+.I watch.type
+field will be set to
+.B WATCH_TYPE_KEY_NOTIFY
+and the
+.I watch.subtype
+field will contain one of the following constants, indicating the event that
+occurred and the watch_id passed to keyctl_watch_key() will be placed in
+.I watch.info
+in the ID field.  The following events are defined:
+.TP
+.B NOTIFY_KEY_INSTANTIATED
+This indicates that a watched key got instantiated or negatively instantiated.
+.I key_id
+indicates the key that was instantiated and
+.I aux
+is unused.
+.TP
+.B NOTIFY_KEY_UPDATED
+This indicates that a watched key got updated or instantiated by update.
+.I key_id
+indicates the key that was updated and
+.I aux
+is unused.
+.TP
+.B NOTIFY_KEY_LINKED
+This indicates that a key got linked into a watched keyring.
+.I key_id
+indicates the keyring that was modified
+.I aux
+indicates the key that was added.
+.TP
+.B NOTIFY_KEY_UNLINKED
+This indicates that a key got unlinked from a watched keyring.
+.I key_id
+indicates the keyring that was modified
+.I aux
+indicates the key that was removed.
+.TP
+.B NOTIFY_KEY_CLEARED
+This indicates that a watched keyring got cleared.
+.I key_id
+indicates the keyring that was cleared and
+.I aux
+is unused.
+.TP
+.B NOTIFY_KEY_REVOKED
+This indicates that a watched key got revoked.
+.I key_id
+indicates the key that was revoked and
+.I aux
+is unused.
+.TP
+.B NOTIFY_KEY_INVALIDATED
+This indicates that a watched key got invalidated.
+.I key_id
+indicates the key that was invalidated and
+.I aux
+is unused.
+.TP
+.B NOTIFY_KEY_SETATTR
+This indicates that a watched key had its attributes (owner, group,
+permissions, timeout) modified.
+.I key_id
+indicates the key that was modified and
+.I aux
+is unused.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Removal Notification
+When a watched key is garbage collected, all of its watches are automatically
+destroyed and a notification is delivered to each watcher.  This will normally
+be an extended notification of the form:
+.PP
+.in +4n
+.EX
+struct watch_notification_removal {
+	struct watch_notification watch;
+	__u64	id;
+};
+.EE
+.in
+.PP
+The
+.I watch.type
+field will be set to
+.B WATCH_TYPE_META
+and the
+.I watch.subtype
+field will contain
+.BR WATCH_META_REMOVAL_NOTIFICATION .
+If the extended notification is given, then the length will be 2 units,
+otherwise it will be 1 and only the header will be present.
+.PP
+The watch_id passed to
+.IR keyctl_watch_key ()
+will be placed in
+.I watch.info
+in the ID field.
+.PP
+If the extension is present,
+.I id
+will be set to the ID of the destroyed key.
+.PP
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH RETURN VALUE
+On success
+.BR keyctl_watch_key ()
+returns
+.B 0 .
+On error, the value
+.B -1
+will be returned and
+.I errno
+will have been set to an appropriate error.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH ERRORS
+.TP
+.B ENOKEY
+The specified key does not exist.
+.TP
+.B EKEYEXPIRED
+The specified key has expired.
+.TP
+.B EKEYREVOKED
+The specified key has been revoked.
+.TP
+.B EACCES
+The named key exists, but does not grant
+.B view
+permission to the calling process.
+.TP
+.B EBUSY
+The specified key already has a watch on it for that device instance (add
+only).
+.TP
+.B EBADSLT
+The specified key doesn't have a watch on it (removal only).
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH LINKING
+This is a library function that can be found in
+.IR libkeyutils .
+When linking,
+.B \-lkeyutils
+should be specified to the linker.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR add_key (2),
+.BR keyctl (2),
+.BR request_key (2),
+.BR keyctl (3),
+.BR keyrings (7),
+.BR keyutils (7)
diff --git a/tests/prepare.inc.sh b/tests/prepare.inc.sh
index 448e42b..a306ed0 100644
--- a/tests/prepare.inc.sh
+++ b/tests/prepare.inc.sh
@@ -74,6 +74,7 @@
 have_big_key_type=0
 have_dh_compute=0
 have_restrict_keyring=0
+have_notify=0
 
 if keyctl supports capabilities >&/dev/null
 then
diff --git a/version.lds b/version.lds
index 2a6e142..6c34adf 100644
--- a/version.lds
+++ b/version.lds
@@ -100,5 +100,7 @@
 } KEYUTILS_1.8;
 
 KEYUTILS_1.10 {
+	/* Management functions */
+	keyctl_watch_key;
 
 } KEYUTILS_1.9;
diff --git a/watch_queue.h b/watch_queue.h
new file mode 100644
index 0000000..16883d7
--- /dev/null
+++ b/watch_queue.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _KEYUTILS_LINUX_WATCH_QUEUE_H
+#define _KEYUTILS_LINUX_WATCH_QUEUE_H
+
+#include <linux/types.h>
+#include <sys/ioctl.h>
+
+#define IOC_WATCH_QUEUE_SET_SIZE	_IO('W', 0x60)	/* Set the size in pages */
+#define IOC_WATCH_QUEUE_SET_FILTER	_IO('W', 0x61)	/* Set the filter */
+
+enum watch_notification_type {
+	WATCH_TYPE_META		= 0,	/* Special record */
+	WATCH_TYPE_KEY_NOTIFY	= 1,	/* Key change event notification */
+	WATCH_TYPE___NR		= 2
+};
+
+enum watch_meta_notification_subtype {
+	WATCH_META_SKIP_NOTIFICATION	= 0,	/* Just skip this record */
+	WATCH_META_REMOVAL_NOTIFICATION	= 1,	/* Watched object was removed */
+};
+
+#define WATCH_LENGTH_GRANULARITY sizeof(__u64)
+
+/*
+ * Notification record header.  This is aligned to 64-bits so that subclasses
+ * can contain __u64 fields.
+ */
+struct watch_notification {
+	__u32			type:24;	/* enum watch_notification_type */
+	__u32			subtype:8;	/* Type-specific subtype (filterable) */
+	__u32			info;
+#define WATCH_INFO_LENGTH	0x0000003f	/* Length of record / sizeof(watch_notification) */
+#define WATCH_INFO_LENGTH__SHIFT 0
+#define WATCH_INFO_ID		0x0000ff00	/* ID of watchpoint, if type-appropriate */
+#define WATCH_INFO_ID__SHIFT	8
+#define WATCH_INFO_TYPE_INFO	0xffff0000	/* Type-specific info */
+#define WATCH_INFO_TYPE_INFO__SHIFT 16
+#define WATCH_INFO_FLAG_0	0x00010000	/* Type-specific info, flag bit 0 */
+#define WATCH_INFO_FLAG_1	0x00020000	/* ... */
+#define WATCH_INFO_FLAG_2	0x00040000
+#define WATCH_INFO_FLAG_3	0x00080000
+#define WATCH_INFO_FLAG_4	0x00100000
+#define WATCH_INFO_FLAG_5	0x00200000
+#define WATCH_INFO_FLAG_6	0x00400000
+#define WATCH_INFO_FLAG_7	0x00800000
+} __attribute__((aligned(WATCH_LENGTH_GRANULARITY)));
+
+struct watch_queue_buffer {
+	union {
+		/* The first few entries are special, containing the
+		 * ring management variables.
+		 */
+		struct {
+			struct watch_notification watch; /* WATCH_TYPE_META */
+			__u32		head;		/* Ring head index */
+			__u32		tail;		/* Ring tail index */
+			__u32		mask;		/* Ring index mask */
+			__u32		__reserved;
+		} meta;
+		struct watch_notification slots[0];
+	};
+};
+
+/*
+ * The Metadata pseudo-notification message uses a flag bits in the information
+ * field to convey the fact that messages have been lost.  We can only use a
+ * single bit in this manner per word as some arches that support SMP
+ * (eg. parisc) have no kernel<->user atomic bit ops.
+ */
+#define WATCH_INFO_NOTIFICATIONS_LOST WATCH_INFO_FLAG_0
+
+/*
+ * Notification filtering rules (IOC_WATCH_QUEUE_SET_FILTER).
+ */
+struct watch_notification_type_filter {
+	__u32	type;			/* Type to apply filter to */
+	__u32	info_filter;		/* Filter on watch_notification::info */
+	__u32	info_mask;		/* Mask of relevant bits in info_filter */
+	__u32	subtype_filter[8];	/* Bitmask of subtypes to filter on */
+};
+
+struct watch_notification_filter {
+	__u32	nr_filters;		/* Number of filters */
+	__u32	__reserved;		/* Must be 0 */
+	struct watch_notification_type_filter filters[];
+};
+
+
+/*
+ * Extended watch removal notification.  This is used optionally if the type
+ * wants to indicate an identifier for the object being watched, if there is
+ * such.  This can be distinguished by the length.
+ *
+ * type -> WATCH_TYPE_META
+ * subtype -> WATCH_META_REMOVAL_NOTIFICATION
+ * length -> 2 * gran
+ */
+struct watch_notification_removal {
+	struct watch_notification watch;
+	__u64	id;		/* Type-dependent identifier */
+};
+
+/*
+ * Type of key/keyring change notification.
+ */
+enum key_notification_subtype {
+	NOTIFY_KEY_INSTANTIATED	= 0, /* Key was instantiated (aux is error code) */
+	NOTIFY_KEY_UPDATED	= 1, /* Key was updated */
+	NOTIFY_KEY_LINKED	= 2, /* Key (aux) was added to watched keyring */
+	NOTIFY_KEY_UNLINKED	= 3, /* Key (aux) was removed from watched keyring */
+	NOTIFY_KEY_CLEARED	= 4, /* Keyring was cleared */
+	NOTIFY_KEY_REVOKED	= 5, /* Key was revoked */
+	NOTIFY_KEY_INVALIDATED	= 6, /* Key was invalidated */
+	NOTIFY_KEY_SETATTR	= 7, /* Key's attributes got changed */
+};
+
+/*
+ * Key/keyring notification record.
+ * - watch.type = WATCH_TYPE_KEY_NOTIFY
+ * - watch.subtype = enum key_notification_type
+ */
+struct key_notification {
+	struct watch_notification watch;
+	__u32	key_id;		/* The key/keyring affected */
+	__u32	aux;		/* Per-type auxiliary data */
+};
+
+#endif /* _KEYUTILS_LINUX_WATCH_QUEUE_H */