| .\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com> |
| .\" starting from a version by Davide Libenzi <davidel@xmailserver.org> |
| .\" |
| .\" %%%LICENSE_START(GPLv2+_SW_3_PARA) |
| .\" This program is free software; you can redistribute it and/or modify |
| .\" it under the terms of the GNU General Public License as published by |
| .\" the Free Software Foundation; either version 2 of the License, or |
| .\" (at your option) any later version. |
| .\" |
| .\" This program is distributed in the hope that it will be useful, |
| .\" but WITHOUT ANY WARRANTY; without even the implied warranty of |
| .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| .\" GNU General Public License for more details. |
| .\" |
| .\" You should have received a copy of the GNU General Public |
| .\" License along with this manual; if not, see |
| .\" <http://www.gnu.org/licenses/>. |
| .\" %%%LICENSE_END |
| .\" |
| .\" 2008-10-10, mtk: describe eventfd2(), and EFD_NONBLOCK and EFD_CLOEXEC |
| .\" |
| .TH EVENTFD 2 2021-03-22 Linux "Linux Programmer's Manual" |
| .SH NAME |
| eventfd \- create a file descriptor for event notification |
| .SH SYNOPSIS |
| .nf |
| .B #include <sys/eventfd.h> |
| .PP |
| .BI "int eventfd(unsigned int " initval ", int " flags ); |
| .fi |
| .SH DESCRIPTION |
| .BR eventfd () |
| creates an "eventfd object" that can be used as |
| an event wait/notify mechanism by user-space applications, |
| and by the kernel to notify user-space applications of events. |
| The object contains an unsigned 64-bit integer |
| .RI ( uint64_t ) |
| counter that is maintained by the kernel. |
| This counter is initialized with the value specified in the argument |
| .IR initval . |
| .PP |
| As its return value, |
| .BR eventfd () |
| returns a new file descriptor that can be used to refer to the |
| eventfd object. |
| .PP |
| The following values may be bitwise ORed in |
| .IR flags |
| to change the behavior of |
| .BR eventfd (): |
| .TP |
| .BR EFD_CLOEXEC " (since Linux 2.6.27)" |
| Set the close-on-exec |
| .RB ( FD_CLOEXEC ) |
| flag on the new file descriptor. |
| See the description of the |
| .B O_CLOEXEC |
| flag in |
| .BR open (2) |
| for reasons why this may be useful. |
| .TP |
| .BR EFD_NONBLOCK " (since Linux 2.6.27)" |
| Set the |
| .BR O_NONBLOCK |
| file status flag on the open file description (see |
| .BR open (2)) |
| referred to by the new file descriptor. |
| Using this flag saves extra calls to |
| .BR fcntl (2) |
| to achieve the same result. |
| .TP |
| .BR EFD_SEMAPHORE " (since Linux 2.6.30)" |
| Provide semaphore-like semantics for reads from the new file descriptor. |
| See below. |
| .PP |
| In Linux up to version 2.6.26, the |
| .I flags |
| argument is unused, and must be specified as zero. |
| .PP |
| The following operations can be performed on the file descriptor returned by |
| .BR eventfd (): |
| .TP |
| .BR read (2) |
| Each successful |
| .BR read (2) |
| returns an 8-byte integer. |
| A |
| .BR read (2) |
| fails with the error |
| .B EINVAL |
| if the size of the supplied buffer is less than 8 bytes. |
| .IP |
| The value returned by |
| .BR read (2) |
| is in host byte order\(emthat is, |
| the native byte order for integers on the host machine. |
| .IP |
| The semantics of |
| .BR read (2) |
| depend on whether the eventfd counter currently has a nonzero value |
| and whether the |
| .BR EFD_SEMAPHORE |
| flag was specified when creating the eventfd file descriptor: |
| .RS |
| .IP * 3 |
| If |
| .BR EFD_SEMAPHORE |
| was not specified and the eventfd counter has a nonzero value, then a |
| .BR read (2) |
| returns 8 bytes containing that value, |
| and the counter's value is reset to zero. |
| .IP * |
| If |
| .BR EFD_SEMAPHORE |
| was specified and the eventfd counter has a nonzero value, then a |
| .BR read (2) |
| returns 8 bytes containing the value 1, |
| and the counter's value is decremented by 1. |
| .IP * |
| If the eventfd counter is zero at the time of the call to |
| .BR read (2), |
| then the call either blocks until the counter becomes nonzero |
| (at which time, the |
| .BR read (2) |
| proceeds as described above) |
| or fails with the error |
| .B EAGAIN |
| if the file descriptor has been made nonblocking. |
| .RE |
| .TP |
| .BR write (2) |
| A |
| .BR write (2) |
| call adds the 8-byte integer value supplied in its |
| buffer to the counter. |
| The maximum value that may be stored in the counter is the largest |
| unsigned 64-bit value minus 1 (i.e., 0xfffffffffffffffe). |
| If the addition would cause the counter's value to exceed |
| the maximum, then the |
| .BR write (2) |
| either blocks until a |
| .BR read (2) |
| is performed on the file descriptor, |
| or fails with the error |
| .B EAGAIN |
| if the file descriptor has been made nonblocking. |
| .IP |
| A |
| .BR write (2) |
| fails with the error |
| .B EINVAL |
| if the size of the supplied buffer is less than 8 bytes, |
| or if an attempt is made to write the value 0xffffffffffffffff. |
| .TP |
| .BR poll "(2), " select "(2) (and similar)" |
| The returned file descriptor supports |
| .BR poll (2) |
| (and analogously |
| .BR epoll (7)) |
| and |
| .BR select (2), |
| as follows: |
| .RS |
| .IP * 3 |
| The file descriptor is readable |
| (the |
| .BR select (2) |
| .I readfds |
| argument; the |
| .BR poll (2) |
| .B POLLIN |
| flag) |
| if the counter has a value greater than 0. |
| .IP * |
| The file descriptor is writable |
| (the |
| .BR select (2) |
| .I writefds |
| argument; the |
| .BR poll (2) |
| .B POLLOUT |
| flag) |
| if it is possible to write a value of at least "1" without blocking. |
| .IP * |
| If an overflow of the counter value was detected, |
| then |
| .BR select (2) |
| indicates the file descriptor as being both readable and writable, and |
| .BR poll (2) |
| returns a |
| .B POLLERR |
| event. |
| As noted above, |
| .BR write (2) |
| can never overflow the counter. |
| However an overflow can occur if 2^64 |
| eventfd "signal posts" were performed by the KAIO |
| subsystem (theoretically possible, but practically unlikely). |
| If an overflow has occurred, then |
| .BR read (2) |
| will return that maximum |
| .I uint64_t |
| value (i.e., 0xffffffffffffffff). |
| .RE |
| .IP |
| The eventfd file descriptor also supports the other file-descriptor |
| multiplexing APIs: |
| .BR pselect (2) |
| and |
| .BR ppoll (2). |
| .TP |
| .BR close (2) |
| When the file descriptor is no longer required it should be closed. |
| When all file descriptors associated with the same eventfd object |
| have been closed, the resources for object are freed by the kernel. |
| .PP |
| A copy of the file descriptor created by |
| .BR eventfd () |
| is inherited by the child produced by |
| .BR fork (2). |
| The duplicate file descriptor is associated with the same |
| eventfd object. |
| File descriptors created by |
| .BR eventfd () |
| are preserved across |
| .BR execve (2), |
| unless the close-on-exec flag has been set. |
| .SH RETURN VALUE |
| On success, |
| .BR eventfd () |
| returns a new eventfd file descriptor. |
| On error, \-1 is returned and |
| .I errno |
| is set to indicate the error. |
| .SH ERRORS |
| .TP |
| .B EINVAL |
| An unsupported value was specified in |
| .IR flags . |
| .TP |
| .B EMFILE |
| The per-process limit on the number of open file descriptors has been reached. |
| .TP |
| .B ENFILE |
| The system-wide limit on the total number of open files has been |
| reached. |
| .TP |
| .B ENODEV |
| .\" Note from Davide: |
| .\" The ENODEV error is basically never going to happen if |
| .\" the kernel boots correctly. That error happen only if during |
| .\" the kernel initialization, some error occur in the anonymous |
| .\" inode source initialization. |
| Could not mount (internal) anonymous inode device. |
| .TP |
| .B ENOMEM |
| There was insufficient memory to create a new |
| eventfd file descriptor. |
| .SH VERSIONS |
| .BR eventfd () |
| is available on Linux since kernel 2.6.22. |
| Working support is provided in glibc since version 2.8. |
| .\" eventfd() is in glibc 2.7, but reportedly does not build |
| The |
| .BR eventfd2 () |
| system call (see NOTES) is available on Linux since kernel 2.6.27. |
| Since version 2.9, the glibc |
| .BR eventfd () |
| wrapper will employ the |
| .BR eventfd2 () |
| system call, if it is supported by the kernel. |
| .SH ATTRIBUTES |
| For an explanation of the terms used in this section, see |
| .BR attributes (7). |
| .ad l |
| .nh |
| .TS |
| allbox; |
| lbx lb lb |
| l l l. |
| Interface Attribute Value |
| T{ |
| .BR eventfd () |
| T} Thread safety MT-Safe |
| .TE |
| .hy |
| .ad |
| .sp 1 |
| .SH CONFORMING TO |
| .BR eventfd () |
| and |
| .BR eventfd2 () |
| are Linux-specific. |
| .SH NOTES |
| Applications can use an eventfd file descriptor instead of a pipe (see |
| .BR pipe (2)) |
| in all cases where a pipe is used simply to signal events. |
| The kernel overhead of an eventfd file descriptor |
| is much lower than that of a pipe, |
| and only one file descriptor is |
| required (versus the two required for a pipe). |
| .PP |
| When used in the kernel, an eventfd |
| file descriptor can provide a bridge from kernel to user space, allowing, |
| for example, functionalities like KAIO (kernel AIO) |
| .\" or eventually syslets/threadlets |
| to signal to a file descriptor that some operation is complete. |
| .PP |
| A key point about an eventfd file descriptor is that it can be |
| monitored just like any other file descriptor using |
| .BR select (2), |
| .BR poll (2), |
| or |
| .BR epoll (7). |
| This means that an application can simultaneously monitor the |
| readiness of "traditional" files and the readiness of other |
| kernel mechanisms that support the eventfd interface. |
| (Without the |
| .BR eventfd () |
| interface, these mechanisms could not be multiplexed via |
| .BR select (2), |
| .BR poll (2), |
| or |
| .BR epoll (7).) |
| .PP |
| The current value of an eventfd counter can be viewed |
| via the entry for the corresponding file descriptor in the process's |
| .IR /proc/[pid]/fdinfo |
| directory. |
| See |
| .BR proc (5) |
| for further details. |
| .\" |
| .SS C library/kernel differences |
| There are two underlying Linux system calls: |
| .BR eventfd () |
| and the more recent |
| .BR eventfd2 (). |
| The former system call does not implement a |
| .I flags |
| argument. |
| The latter system call implements the |
| .I flags |
| values described above. |
| The glibc wrapper function will use |
| .BR eventfd2 () |
| where it is available. |
| .SS Additional glibc features |
| The GNU C library defines an additional type, |
| and two functions that attempt to abstract some of the details of |
| reading and writing on an eventfd file descriptor: |
| .PP |
| .in +4n |
| .EX |
| typedef uint64_t eventfd_t; |
| |
| int eventfd_read(int fd, eventfd_t *value); |
| int eventfd_write(int fd, eventfd_t value); |
| .EE |
| .in |
| .PP |
| The functions perform the read and write operations on an |
| eventfd file descriptor, |
| returning 0 if the correct number of bytes was transferred, |
| or \-1 otherwise. |
| .SH EXAMPLES |
| The following program creates an eventfd file descriptor |
| and then forks to create a child process. |
| While the parent briefly sleeps, |
| the child writes each of the integers supplied in the program's |
| command-line arguments to the eventfd file descriptor. |
| When the parent has finished sleeping, |
| it reads from the eventfd file descriptor. |
| .PP |
| The following shell session shows a sample run of the program: |
| .PP |
| .in +4n |
| .EX |
| .RB "$" " ./a.out 1 2 4 7 14" |
| Child writing 1 to efd |
| Child writing 2 to efd |
| Child writing 4 to efd |
| Child writing 7 to efd |
| Child writing 14 to efd |
| Child completed write loop |
| Parent about to read |
| Parent read 28 (0x1c) from efd |
| .EE |
| .in |
| .SS Program source |
| \& |
| .EX |
| #include <sys/eventfd.h> |
| #include <unistd.h> |
| #include <inttypes.h> /* Definition of PRIu64 & PRIx64 */ |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <stdint.h> /* Definition of uint64_t */ |
| |
| #define handle_error(msg) \e |
| do { perror(msg); exit(EXIT_FAILURE); } while (0) |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| int efd; |
| uint64_t u; |
| ssize_t s; |
| |
| if (argc < 2) { |
| fprintf(stderr, "Usage: %s <num>...\en", argv[0]); |
| exit(EXIT_FAILURE); |
| } |
| |
| efd = eventfd(0, 0); |
| if (efd == \-1) |
| handle_error("eventfd"); |
| |
| switch (fork()) { |
| case 0: |
| for (int j = 1; j < argc; j++) { |
| printf("Child writing %s to efd\en", argv[j]); |
| u = strtoull(argv[j], NULL, 0); |
| /* strtoull() allows various bases */ |
| s = write(efd, &u, sizeof(uint64_t)); |
| if (s != sizeof(uint64_t)) |
| handle_error("write"); |
| } |
| printf("Child completed write loop\en"); |
| |
| exit(EXIT_SUCCESS); |
| |
| default: |
| sleep(2); |
| |
| printf("Parent about to read\en"); |
| s = read(efd, &u, sizeof(uint64_t)); |
| if (s != sizeof(uint64_t)) |
| handle_error("read"); |
| printf("Parent read %"PRIu64" (%#"PRIx64") from efd\en", u, u); |
| exit(EXIT_SUCCESS); |
| |
| case \-1: |
| handle_error("fork"); |
| } |
| } |
| .EE |
| .SH SEE ALSO |
| .BR futex (2), |
| .BR pipe (2), |
| .BR poll (2), |
| .BR read (2), |
| .BR select (2), |
| .BR signalfd (2), |
| .BR timerfd_create (2), |
| .BR write (2), |
| .BR epoll (7), |
| .BR sem_overview (7) |