| .\" This manpage is copyright (C) 1992 Drew Eckhardt, |
| .\" copyright (C) 1995 Michael Shields. |
| .\" |
| .\" %%%LICENSE_START(VERBATIM) |
| .\" Permission is granted to make and distribute verbatim copies of this |
| .\" manual provided the copyright notice and this permission notice are |
| .\" preserved on all copies. |
| .\" |
| .\" Permission is granted to copy and distribute modified versions of this |
| .\" manual under the conditions for verbatim copying, provided that the |
| .\" entire resulting derived work is distributed under the terms of a |
| .\" permission notice identical to this one. |
| .\" |
| .\" Since the Linux kernel and libraries are constantly changing, this |
| .\" manual page may be incorrect or out-of-date. The author(s) assume no |
| .\" responsibility for errors or omissions, or for damages resulting from |
| .\" the use of the information contained herein. The author(s) may not |
| .\" have taken the same level of care in the production of this manual, |
| .\" which is licensed free of charge, as they might when working |
| .\" professionally. |
| .\" |
| .\" Formatted or processed versions of this manual, if unaccompanied by |
| .\" the source, must acknowledge the copyright and authors of this work. |
| .\" %%%LICENSE_END |
| .\" |
| .\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu> |
| .\" Modified 1995-05-18 by Jim Van Zandt <jrv@vanzandt.mv.com> |
| .\" Sun Feb 11 14:07:00 MET 1996 Martin Schulze <joey@linux.de> |
| .\" * layout slightly modified |
| .\" |
| .\" Modified Mon Oct 21 23:05:29 EDT 1996 by Eric S. Raymond <esr@thyrsus.com> |
| .\" Modified Thu Feb 24 01:41:09 CET 2000 by aeb |
| .\" Modified Thu Feb 9 22:32:09 CET 2001 by bert hubert <ahu@ds9a.nl>, aeb |
| .\" Modified Mon Nov 11 14:35:00 PST 2002 by Ben Woodard <ben@zork.net> |
| .\" 2005-03-11, mtk, modified pselect() text (it is now a system |
| .\" call in 2.6.16. |
| .\" |
| .TH SELECT 2 2019-03-06 "Linux" "Linux Programmer's Manual" |
| .SH NAME |
| select, pselect, FD_CLR, FD_ISSET, FD_SET, FD_ZERO \- |
| synchronous I/O multiplexing |
| .SH SYNOPSIS |
| .nf |
| /* According to POSIX.1-2001, POSIX.1-2008 */ |
| .B #include <sys/select.h> |
| .PP |
| /* According to earlier standards */ |
| .B #include <sys/time.h> |
| .B #include <sys/types.h> |
| .B #include <unistd.h> |
| .PP |
| .BI "int select(int " nfds ", fd_set *" readfds ", fd_set *" writefds , |
| .BI " fd_set *" exceptfds ", struct timeval *" timeout ); |
| .PP |
| .BI "void FD_CLR(int " fd ", fd_set *" set ); |
| .BI "int FD_ISSET(int " fd ", fd_set *" set ); |
| .BI "void FD_SET(int " fd ", fd_set *" set ); |
| .BI "void FD_ZERO(fd_set *" set ); |
| |
| .B #include <sys/select.h> |
| .PP |
| .BI "int pselect(int " nfds ", fd_set *" readfds ", fd_set *" writefds , |
| .BI " fd_set *" exceptfds ", const struct timespec *" timeout , |
| .BI " const sigset_t *" sigmask ); |
| .fi |
| .PP |
| .in -4n |
| Feature Test Macro Requirements for glibc (see |
| .BR feature_test_macros (7)): |
| .in |
| .PP |
| .BR pselect (): |
| _POSIX_C_SOURCE\ >=\ 200112L |
| .SH DESCRIPTION |
| .BR select () |
| and |
| .BR pselect () |
| allow a program to monitor multiple file descriptors, |
| waiting until one or more of the file descriptors become "ready" |
| for some class of I/O operation (e.g., input possible). |
| A file descriptor is considered ready if it is possible to |
| perform a corresponding I/O operation (e.g., |
| .BR read (2), |
| or a sufficiently small |
| .BR write (2)) |
| without blocking. |
| .PP |
| .BR select () |
| can monitor only file descriptors numbers that are less than |
| .BR FD_SETSIZE ; |
| .BR poll (2) |
| does not have this limitation. |
| See BUGS. |
| .PP |
| The operation of |
| .BR select () |
| and |
| .BR pselect () |
| is identical, other than these three differences: |
| .TP |
| (i) |
| .BR select () |
| uses a timeout that is a |
| .I struct timeval |
| (with seconds and microseconds), while |
| .BR pselect () |
| uses a |
| .I struct timespec |
| (with seconds and nanoseconds). |
| .TP |
| (ii) |
| .BR select () |
| may update the |
| .I timeout |
| argument to indicate how much time was left. |
| .BR pselect () |
| does not change this argument. |
| .TP |
| (iii) |
| .BR select () |
| has no |
| .I sigmask |
| argument, and behaves as |
| .BR pselect () |
| called with NULL |
| .IR sigmask . |
| .PP |
| Three independent sets of file descriptors are watched. |
| The file descriptors listed in |
| .I readfds |
| will be watched to see if characters become |
| available for reading (more precisely, to see if a read will not |
| block; in particular, a file descriptor is also ready on end-of-file). |
| The file descriptors in |
| .I writefds |
| will be watched to see if space is available for write (though a large |
| write may still block). |
| The file descriptors in |
| .I exceptfds |
| will be watched for exceptional conditions. |
| (For examples of some exceptional conditions, see the discussion of |
| .B POLLPRI |
| in |
| .BR poll (2).) |
| .PP |
| On exit, each of the file descriptor sets is modified in place |
| to indicate which file descriptors actually changed status. |
| (Thus, if using |
| .BR select () |
| within a loop, the sets must be reinitialized before each call.) |
| .PP |
| Each of the three file descriptor sets may be specified as NULL |
| if no file descriptors are to be watched for the corresponding class |
| of events. |
| .PP |
| Four macros are provided to manipulate the sets. |
| .BR FD_ZERO () |
| clears a set. |
| .BR FD_SET () |
| and |
| .BR FD_CLR () |
| add and remove a given file descriptor from a set. |
| .BR FD_ISSET () |
| tests to see if a file descriptor is part of the set; |
| this is useful after |
| .BR select () |
| returns. |
| .PP |
| .I nfds |
| should be set to the highest-numbered file descriptor in any |
| of the three sets, plus 1. |
| The indicated file descriptors in each set are checked, up to this limit |
| (but see BUGS). |
| .PP |
| The |
| .I timeout |
| argument specifies the interval that |
| .BR select () |
| should block waiting for a file descriptor to become ready. |
| The call will block until either: |
| .IP * 3 |
| a file descriptor becomes ready; |
| .IP * |
| the call is interrupted by a signal handler; or |
| .IP * |
| the timeout expires. |
| .PP |
| Note that the |
| .I timeout |
| interval will be rounded up to the system clock granularity, |
| and kernel scheduling delays mean that the blocking interval |
| may overrun by a small amount. |
| If both fields of the |
| .I timeval |
| structure are zero, then |
| .BR select () |
| returns immediately. |
| (This is useful for polling.) |
| If |
| .I timeout |
| is NULL (no timeout), |
| .BR select () |
| can block indefinitely. |
| .PP |
| .I sigmask |
| is a pointer to a signal mask (see |
| .BR sigprocmask (2)); |
| if it is not NULL, then |
| .BR pselect () |
| first replaces the current signal mask by the one pointed to by |
| .IR sigmask , |
| then does the "select" function, and then restores the original |
| signal mask. |
| .PP |
| Other than the difference in the precision of the |
| .I timeout |
| argument, the following |
| .BR pselect () |
| call: |
| .PP |
| .in +4n |
| .EX |
| ready = pselect(nfds, &readfds, &writefds, &exceptfds, |
| timeout, &sigmask); |
| .EE |
| .in |
| .PP |
| is equivalent to |
| .I atomically |
| executing the following calls: |
| .PP |
| .in +4n |
| .EX |
| sigset_t origmask; |
| |
| pthread_sigmask(SIG_SETMASK, &sigmask, &origmask); |
| ready = select(nfds, &readfds, &writefds, &exceptfds, timeout); |
| pthread_sigmask(SIG_SETMASK, &origmask, NULL); |
| .EE |
| .in |
| .PP |
| .PP |
| The reason that |
| .BR pselect () |
| is needed is that if one wants to wait for either a signal |
| or for a file descriptor to become ready, then |
| an atomic test is needed to prevent race conditions. |
| (Suppose the signal handler sets a global flag and |
| returns. |
| Then a test of this global flag followed by a call of |
| .BR select () |
| could hang indefinitely if the signal arrived just after the test |
| but just before the call. |
| By contrast, |
| .BR pselect () |
| allows one to first block signals, handle the signals that have come in, |
| then call |
| .BR pselect () |
| with the desired |
| .IR sigmask , |
| avoiding the race.) |
| .SS The timeout |
| The time structures involved are defined in |
| .I <sys/time.h> |
| and look like |
| .PP |
| .in +4n |
| .EX |
| struct timeval { |
| long tv_sec; /* seconds */ |
| long tv_usec; /* microseconds */ |
| }; |
| .EE |
| .in |
| .PP |
| and |
| .PP |
| .in +4n |
| .EX |
| struct timespec { |
| long tv_sec; /* seconds */ |
| long tv_nsec; /* nanoseconds */ |
| }; |
| .EE |
| .in |
| .PP |
| (However, see below on the POSIX.1 versions.) |
| .PP |
| Some code calls |
| .BR select () |
| with all three sets empty, |
| .I nfds |
| zero, and a non-NULL |
| .I timeout |
| as a fairly portable way to sleep with subsecond precision. |
| .PP |
| On Linux, |
| .BR select () |
| modifies |
| .I timeout |
| to reflect the amount of time not slept; most other implementations |
| do not do this. |
| (POSIX.1 permits either behavior.) |
| This causes problems both when Linux code which reads |
| .I timeout |
| is ported to other operating systems, and when code is ported to Linux |
| that reuses a \fIstruct timeval\fP for multiple |
| .BR select ()s |
| in a loop without reinitializing it. |
| Consider |
| .I timeout |
| to be undefined after |
| .BR select () |
| returns. |
| .\" .PP - it is rumored that: |
| .\" On BSD, when a timeout occurs, the file descriptor bits are not changed. |
| .\" - it is certainly true that: |
| .\" Linux follows SUSv2 and sets the bit masks to zero upon a timeout. |
| .SH RETURN VALUE |
| On success, |
| .BR select () |
| and |
| .BR pselect () |
| return the number of file descriptors contained in the three returned |
| descriptor sets (that is, the total number of bits that are set in |
| .IR readfds , |
| .IR writefds , |
| .IR exceptfds ) |
| which may be zero if the timeout expires before anything interesting happens. |
| On error, \-1 is returned, and |
| .I errno |
| is set to indicate the error; |
| the file descriptor sets are unmodified, |
| and |
| .I timeout |
| becomes undefined. |
| .SH ERRORS |
| .TP |
| .B EBADF |
| An invalid file descriptor was given in one of the sets. |
| (Perhaps a file descriptor that was already closed, |
| or one on which an error has occurred.) |
| However, see BUGS. |
| .TP |
| .B EINTR |
| A signal was caught; see |
| .BR signal (7). |
| .TP |
| .B EINVAL |
| .I nfds |
| is negative or exceeds the |
| .BR RLIMIT_NOFILE |
| resource limit (see |
| .BR getrlimit (2)). |
| .TP |
| .B EINVAL |
| The value contained within |
| .I timeout |
| is invalid. |
| .TP |
| .B ENOMEM |
| Unable to allocate memory for internal tables. |
| .SH VERSIONS |
| .BR pselect () |
| was added to Linux in kernel 2.6.16. |
| Prior to this, |
| .BR pselect () |
| was emulated in glibc (but see BUGS). |
| .SH CONFORMING TO |
| .BR select () |
| conforms to POSIX.1-2001, POSIX.1-2008, and |
| 4.4BSD |
| .RB ( select () |
| first appeared in 4.2BSD). |
| Generally portable to/from |
| non-BSD systems supporting clones of the BSD socket layer (including |
| System\ V variants). |
| However, note that the System\ V variant typically |
| sets the timeout variable before exit, but the BSD variant does not. |
| .PP |
| .BR pselect () |
| is defined in POSIX.1g, and in |
| POSIX.1-2001 and POSIX.1-2008. |
| .SH NOTES |
| An |
| .I fd_set |
| is a fixed size buffer. |
| Executing |
| .BR FD_CLR () |
| or |
| .BR FD_SET () |
| with a value of |
| .I fd |
| that is negative or is equal to or larger than |
| .B FD_SETSIZE |
| will result |
| in undefined behavior. |
| Moreover, POSIX requires |
| .I fd |
| to be a valid file descriptor. |
| .PP |
| The operation of |
| .BR select () |
| and |
| .BR pselect () |
| is not affected by the |
| .BR O_NONBLOCK |
| flag. |
| .PP |
| On some other UNIX systems, |
| .\" Darwin, according to a report by Jeremy Sequoia, relayed by Josh Triplett |
| .BR select () |
| can fail with the error |
| .B EAGAIN |
| if the system fails to allocate kernel-internal resources, rather than |
| .B ENOMEM |
| as Linux does. |
| POSIX specifies this error for |
| .BR poll (2), |
| but not for |
| .BR select (). |
| Portable programs may wish to check for |
| .B EAGAIN |
| and loop, just as with |
| .BR EINTR . |
| .PP |
| On systems that lack |
| .BR pselect (), |
| reliable (and more portable) signal trapping can be achieved |
| using the self-pipe trick. |
| In this technique, |
| a signal handler writes a byte to a pipe whose other end |
| is monitored by |
| .BR select () |
| in the main program. |
| (To avoid possibly blocking when writing to a pipe that may be full |
| or reading from a pipe that may be empty, |
| nonblocking I/O is used when reading from and writing to the pipe.) |
| .PP |
| Concerning the types involved, the classical situation is that |
| the two fields of a |
| .I timeval |
| structure are typed as |
| .I long |
| (as shown above), and the structure is defined in |
| .IR <sys/time.h> . |
| The POSIX.1 situation is |
| .PP |
| .in +4n |
| .EX |
| struct timeval { |
| time_t tv_sec; /* seconds */ |
| suseconds_t tv_usec; /* microseconds */ |
| }; |
| .EE |
| .in |
| .PP |
| where the structure is defined in |
| .I <sys/select.h> |
| and the data types |
| .I time_t |
| and |
| .I suseconds_t |
| are defined in |
| .IR <sys/types.h> . |
| .PP |
| Concerning prototypes, the classical situation is that one should |
| include |
| .I <time.h> |
| for |
| .BR select (). |
| The POSIX.1 situation is that one should include |
| .I <sys/select.h> |
| for |
| .BR select () |
| and |
| .BR pselect (). |
| .PP |
| Under glibc 2.0, |
| .I <sys/select.h> |
| gives the wrong prototype for |
| .BR pselect (). |
| Under glibc 2.1 to 2.2.1, it gives |
| .BR pselect () |
| when |
| .B _GNU_SOURCE |
| is defined. |
| Since glibc 2.2.2, the requirements are as shown in the SYNOPSIS. |
| .\" |
| .SS Correspondence between select() and poll() notifications |
| Within the Linux kernel source, |
| .\" fs/select.c |
| we find the following definitions which show the correspondence |
| between the readable, writable, and exceptional condition notifications of |
| .BR select () |
| and the event notifications provided by |
| .BR poll (2) |
| (and |
| .BR epoll (7)): |
| .PP |
| .in +4n |
| .EX |
| #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | |
| POLLERR) |
| /* Ready for reading */ |
| #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) |
| /* Ready for writing */ |
| #define POLLEX_SET (POLLPRI) |
| /* Exceptional condition */ |
| .EE |
| .in |
| .\" |
| .SS Multithreaded applications |
| If a file descriptor being monitored by |
| .BR select () |
| is closed in another thread, the result is unspecified. |
| On some UNIX systems, |
| .BR select () |
| unblocks and returns, with an indication that the file descriptor is ready |
| (a subsequent I/O operation will likely fail with an error, |
| unless another process reopens file descriptor between the time |
| .BR select () |
| returned and the I/O operation is performed). |
| On Linux (and some other systems), |
| closing the file descriptor in another thread has no effect on |
| .BR select (). |
| In summary, any application that relies on a particular behavior |
| in this scenario must be considered buggy. |
| .\" |
| .SS C library/kernel differences |
| The Linux kernel allows file descriptor sets of arbitrary size, |
| determining the length of the sets to be checked from the value of |
| .IR nfds . |
| However, in the glibc implementation, the |
| .IR fd_set |
| type is fixed in size. |
| See also BUGS. |
| .PP |
| The |
| .BR pselect () |
| interface described in this page is implemented by glibc. |
| The underlying Linux system call is named |
| .BR pselect6 (). |
| This system call has somewhat different behavior from the glibc |
| wrapper function. |
| .PP |
| The Linux |
| .BR pselect6 () |
| system call modifies its |
| .I timeout |
| argument. |
| However, the glibc wrapper function hides this behavior |
| by using a local variable for the timeout argument that |
| is passed to the system call. |
| Thus, the glibc |
| .BR pselect () |
| function does not modify its |
| .I timeout |
| argument; |
| this is the behavior required by POSIX.1-2001. |
| .PP |
| The final argument of the |
| .BR pselect6 () |
| system call is not a |
| .I "sigset_t\ *" |
| pointer, but is instead a structure of the form: |
| .PP |
| .in +4 |
| .EX |
| struct { |
| const kernel_sigset_t *ss; /* Pointer to signal set */ |
| size_t ss_len; /* Size (in bytes) of object |
| pointed to by 'ss' */ |
| }; |
| .EE |
| .in |
| .PP |
| This allows the system call to obtain both |
| a pointer to the signal set and its size, |
| while allowing for the fact that most architectures |
| support a maximum of 6 arguments to a system call. |
| See |
| .BR sigprocmask (2) |
| for a discussion of the difference between the kernel and libc |
| notion of the signal set. |
| .SH BUGS |
| POSIX allows an implementation to define an upper limit, |
| advertised via the constant |
| .BR FD_SETSIZE , |
| on the range of file descriptors that can be specified |
| in a file descriptor set. |
| The Linux kernel imposes no fixed limit, but the glibc implementation makes |
| .IR fd_set |
| a fixed-size type, with |
| .BR FD_SETSIZE |
| defined as 1024, and the |
| .BR FD_* () |
| macros operating according to that limit. |
| To monitor file descriptors greater than 1023, use |
| .BR poll (2) |
| instead. |
| .PP |
| The implementation of the |
| .I fd_set |
| arguments as value-result arguments means that they must be |
| reinitialized on each call to |
| .BR select (). |
| This design error is avoided by |
| .BR poll (2), |
| which uses separate structure fields for the input and output of the call. |
| .PP |
| According to POSIX, |
| .BR select () |
| should check all specified file descriptors in the three file descriptor sets, |
| up to the limit |
| .IR nfds\-1 . |
| However, the current implementation ignores any file descriptor in |
| these sets that is greater than the maximum file descriptor number |
| that the process currently has open. |
| According to POSIX, any such file descriptor that is specified in one |
| of the sets should result in the error |
| .BR EBADF . |
| .PP |
| Glibc 2.0 provided a version of |
| .BR pselect () |
| that did not take a |
| .I sigmask |
| argument. |
| .PP |
| Starting with version 2.1, glibc provided an emulation of |
| .BR pselect () |
| that was implemented using |
| .BR sigprocmask (2) |
| and |
| .BR select (). |
| This implementation remained vulnerable to the very race condition that |
| .BR pselect () |
| was designed to prevent. |
| Modern versions of glibc use the (race-free) |
| .BR pselect () |
| system call on kernels where it is provided. |
| .PP |
| Under Linux, |
| .BR select () |
| may report a socket file descriptor as "ready for reading", while |
| nevertheless a subsequent read blocks. |
| This could for example |
| happen when data has arrived but upon examination has wrong |
| checksum and is discarded. |
| There may be other circumstances |
| in which a file descriptor is spuriously reported as ready. |
| .\" Stevens discusses a case where accept can block after select |
| .\" returns successfully because of an intervening RST from the client. |
| Thus it may be safer to use |
| .B O_NONBLOCK |
| on sockets that should not block. |
| .\" Maybe the kernel should have returned EIO in such a situation? |
| .PP |
| On Linux, |
| .BR select () |
| also modifies |
| .I timeout |
| if the call is interrupted by a signal handler (i.e., the |
| .B EINTR |
| error return). |
| This is not permitted by POSIX.1. |
| The Linux |
| .BR pselect () |
| system call has the same behavior, |
| but the glibc wrapper hides this behavior by internally copying the |
| .I timeout |
| to a local variable and passing that variable to the system call. |
| .SH EXAMPLE |
| .EX |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <sys/time.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| int |
| main(void) |
| { |
| fd_set rfds; |
| struct timeval tv; |
| int retval; |
| |
| /* Watch stdin (fd 0) to see when it has input. */ |
| |
| FD_ZERO(&rfds); |
| FD_SET(0, &rfds); |
| |
| /* Wait up to five seconds. */ |
| |
| tv.tv_sec = 5; |
| tv.tv_usec = 0; |
| |
| retval = select(1, &rfds, NULL, NULL, &tv); |
| /* Don't rely on the value of tv now! */ |
| |
| if (retval == \-1) |
| perror("select()"); |
| else if (retval) |
| printf("Data is available now.\en"); |
| /* FD_ISSET(0, &rfds) will be true. */ |
| else |
| printf("No data within five seconds.\en"); |
| |
| exit(EXIT_SUCCESS); |
| } |
| .EE |
| .SH SEE ALSO |
| .BR accept (2), |
| .BR connect (2), |
| .BR poll (2), |
| .BR read (2), |
| .BR recv (2), |
| .BR restart_syscall (2), |
| .BR send (2), |
| .BR sigprocmask (2), |
| .BR write (2), |
| .BR epoll (7), |
| .BR time (7) |
| .PP |
| For a tutorial with discussion and examples, see |
| .BR select_tut (2). |