| .\" Copyright (C) 2002 Robert Love |
| .\" and Copyright (C) 2006, 2015 Michael Kerrisk |
| .\" |
| .\" %%%LICENSE_START(GPLv2+_DOC_FULL) |
| .\" This is free documentation; you can redistribute it and/or |
| .\" modify it under the terms of the GNU General Public License as |
| .\" published by the Free Software Foundation; either version 2 of |
| .\" the License, or (at your option) any later version. |
| .\" |
| .\" The GNU General Public License's references to "object code" |
| .\" and "executables" are to be interpreted as the output of any |
| .\" document formatting or typesetting system, including |
| .\" intermediate and printed output. |
| .\" |
| .\" This manual is distributed in the hope that it will be useful, |
| .\" but WITHOUT ANY WARRANTY; without even the implied warranty of |
| .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| .\" GNU General Public License for more details. |
| .\" |
| .\" You should have received a copy of the GNU General Public |
| .\" License along with this manual; if not, see |
| .\" <http://www.gnu.org/licenses/>. |
| .\" %%%LICENSE_END |
| .\" |
| .\" 2002-11-19 Robert Love <rml@tech9.net> - initial version |
| .\" 2004-04-20 mtk - fixed description of return value |
| .\" 2004-04-22 aeb - added glibc prototype history |
| .\" 2005-05-03 mtk - noted that sched_setaffinity may cause thread |
| .\" migration and that CPU affinity is a per-thread attribute. |
| .\" 2006-02-03 mtk -- Major rewrite |
| .\" 2008-11-12, mtk, removed CPU_*() macro descriptions to a |
| .\" separate CPU_SET(3) page. |
| .\" |
| .TH SCHED_SETAFFINITY 2 2017-09-15 "Linux" "Linux Programmer's Manual" |
| .SH NAME |
| sched_setaffinity, sched_getaffinity \- \ |
| set and get a thread's CPU affinity mask |
| .SH SYNOPSIS |
| .nf |
| .BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */" |
| .B #include <sched.h> |
| .PP |
| .BI "int sched_setaffinity(pid_t " pid ", size_t " cpusetsize , |
| .BI " const cpu_set_t *" mask ); |
| .PP |
| .BI "int sched_getaffinity(pid_t " pid ", size_t " cpusetsize , |
| .BI " cpu_set_t *" mask ); |
| .fi |
| .SH DESCRIPTION |
| A thread's CPU affinity mask determines the set of CPUs on which |
| it is eligible to run. |
| On a multiprocessor system, setting the CPU affinity mask |
| can be used to obtain performance benefits. |
| For example, |
| by dedicating one CPU to a particular thread |
| (i.e., setting the affinity mask of that thread to specify a single CPU, |
| and setting the affinity mask of all other threads to exclude that CPU), |
| it is possible to ensure maximum execution speed for that thread. |
| Restricting a thread to run on a single CPU also avoids |
| the performance cost caused by the cache invalidation that occurs |
| when a thread ceases to execute on one CPU and then |
| recommences execution on a different CPU. |
| .PP |
| A CPU affinity mask is represented by the |
| .I cpu_set_t |
| structure, a "CPU set", pointed to by |
| .IR mask . |
| A set of macros for manipulating CPU sets is described in |
| .BR CPU_SET (3). |
| .PP |
| .BR sched_setaffinity () |
| sets the CPU affinity mask of the thread whose ID is |
| .I pid |
| to the value specified by |
| .IR mask . |
| If |
| .I pid |
| is zero, then the calling thread is used. |
| The argument |
| .I cpusetsize |
| is the length (in bytes) of the data pointed to by |
| .IR mask . |
| Normally this argument would be specified as |
| .IR "sizeof(cpu_set_t)" . |
| .PP |
| If the thread specified by |
| .I pid |
| is not currently running on one of the CPUs specified in |
| .IR mask , |
| then that thread is migrated to one of the CPUs specified in |
| .IR mask . |
| .PP |
| .BR sched_getaffinity () |
| writes the affinity mask of the thread whose ID is |
| .I pid |
| into the |
| .I cpu_set_t |
| structure pointed to by |
| .IR mask . |
| The |
| .I cpusetsize |
| argument specifies the size (in bytes) of |
| .IR mask . |
| If |
| .I pid |
| is zero, then the mask of the calling thread is returned. |
| .SH RETURN VALUE |
| On success, |
| .BR sched_setaffinity () |
| and |
| .BR sched_getaffinity () |
| return 0. |
| On error, \-1 is returned, and |
| .I errno |
| is set appropriately. |
| .SH ERRORS |
| .TP |
| .B EFAULT |
| A supplied memory address was invalid. |
| .TP |
| .B EINVAL |
| The affinity bit mask |
| .I mask |
| contains no processors that are currently physically on the system |
| and permitted to the thread according to any restrictions that |
| may be imposed by |
| .I cpuset |
| cgroups or the "cpuset" mechanism described in |
| .BR cpuset (7). |
| .TP |
| .B EINVAL |
| .RB ( sched_getaffinity () |
| and, in kernels before 2.6.9, |
| .BR sched_setaffinity ()) |
| .I cpusetsize |
| is smaller than the size of the affinity mask used by the kernel. |
| .TP |
| .B EPERM |
| .RB ( sched_setaffinity ()) |
| The calling thread does not have appropriate privileges. |
| The caller needs an effective user ID equal to the real user ID |
| or effective user ID of the thread identified by |
| .IR pid , |
| or it must possess the |
| .B CAP_SYS_NICE |
| capability in the user namespace of the thread |
| .IR pid . |
| .TP |
| .B ESRCH |
| The thread whose ID is \fIpid\fP could not be found. |
| .SH VERSIONS |
| The CPU affinity system calls were introduced in Linux kernel 2.5.8. |
| The system call wrappers were introduced in glibc 2.3. |
| Initially, the glibc interfaces included a |
| .I cpusetsize |
| argument, typed as |
| .IR "unsigned int" . |
| In glibc 2.3.3, the |
| .I cpusetsize |
| argument was removed, but was then restored in glibc 2.3.4, with type |
| .IR size_t . |
| .SH CONFORMING TO |
| These system calls are Linux-specific. |
| .SH NOTES |
| After a call to |
| .BR sched_setaffinity (), |
| the set of CPUs on which the thread will actually run is |
| the intersection of the set specified in the |
| .I mask |
| argument and the set of CPUs actually present on the system. |
| The system may further restrict the set of CPUs on which the thread |
| runs if the "cpuset" mechanism described in |
| .BR cpuset (7) |
| is being used. |
| These restrictions on the actual set of CPUs on which the thread |
| will run are silently imposed by the kernel. |
| .PP |
| There are various ways of determining the number of CPUs |
| available on the system, including: inspecting the contents of |
| .IR /proc/cpuinfo ; |
| using |
| .BR sysconf (3) |
| to obtain the values of the |
| .BR _SC_NPROCESSORS_CONF |
| and |
| .BR _SC_NPROCESSORS_ONLN |
| parameters; and inspecting the list of CPU directories under |
| .IR /sys/devices/system/cpu/ . |
| .PP |
| .BR sched (7) |
| has a description of the Linux scheduling scheme. |
| .PP |
| The affinity mask is a per-thread attribute that can be |
| adjusted independently for each of the threads in a thread group. |
| The value returned from a call to |
| .BR gettid (2) |
| can be passed in the argument |
| .IR pid . |
| Specifying |
| .I pid |
| as 0 will set the attribute for the calling thread, |
| and passing the value returned from a call to |
| .BR getpid (2) |
| will set the attribute for the main thread of the thread group. |
| (If you are using the POSIX threads API, then use |
| .BR pthread_setaffinity_np (3) |
| instead of |
| .BR sched_setaffinity ().) |
| .PP |
| The |
| .I isolcpus |
| boot option can be used to isolate one or more CPUs at boot time, |
| so that no processes are scheduled onto those CPUs. |
| Following the use of this boot option, |
| the only way to schedule processes onto the isolated CPUs is via |
| .BR sched_setaffinity () |
| or the |
| .BR cpuset (7) |
| mechanism. |
| For further information, see the kernel source file |
| .IR Documentation/admin-guide/kernel-parameters.txt . |
| As noted in that file, |
| .I isolcpus |
| is the preferred mechanism of isolating CPUs |
| (versus the alternative of manually setting the CPU affinity |
| of all processes on the system). |
| .PP |
| A child created via |
| .BR fork (2) |
| inherits its parent's CPU affinity mask. |
| The affinity mask is preserved across an |
| .BR execve (2). |
| .SS C library/kernel differences |
| This manual page describes the glibc interface for the CPU affinity calls. |
| The actual system call interface is slightly different, with the |
| .I mask |
| being typed as |
| .IR "unsigned long\ *" , |
| reflecting the fact that the underlying implementation of CPU |
| sets is a simple bit mask. |
| On success, the raw |
| .BR sched_getaffinity () |
| system call returns the size (in bytes) of the |
| .I cpumask_t |
| data type that is used internally by the kernel to |
| represent the CPU set bit mask. |
| .SS Handling systems with large CPU affinity masks |
| The underlying system calls (which represent CPU masks as bit masks of type |
| .IR "unsigned long\ *" ) |
| impose no restriction on the size of the CPU mask. |
| However, the |
| .I cpu_set_t |
| data type used by glibc has a fixed size of 128 bytes, |
| meaning that the maximum CPU number that can be represented is 1023. |
| .\" FIXME . See https://sourceware.org/bugzilla/show_bug.cgi?id=15630 |
| .\" and https://sourceware.org/ml/libc-alpha/2013-07/msg00288.html |
| If the kernel CPU affinity mask is larger than 1024, |
| then calls of the form: |
| .PP |
| sched_getaffinity(pid, sizeof(cpu_set_t), &mask); |
| .PP |
| fail with the error |
| .BR EINVAL , |
| the error produced by the underlying system call for the case where the |
| .I mask |
| size specified in |
| .I cpusetsize |
| is smaller than the size of the affinity mask used by the kernel. |
| (Depending on the system CPU topology, the kernel affinity mask can |
| be substantially larger than the number of active CPUs in the system.) |
| .PP |
| When working on systems with large kernel CPU affinity masks, |
| one must dynamically allocate the |
| .I mask |
| argument (see |
| .BR CPU_ALLOC (3)). |
| Currently, the only way to do this is by probing for the size |
| of the required mask using |
| .BR sched_getaffinity () |
| calls with increasing mask sizes (until the call does not fail with the error |
| .BR EINVAL ). |
| .PP |
| Be aware that |
| .BR CPU_ALLOC (3) |
| may allocate a slightly larger CPU set than requested |
| (because CPU sets are implemented as bit masks allocated in units of |
| .IR sizeof(long) ). |
| Consequently, |
| .BR sched_getaffinity () |
| can set bits beyond the requested allocation size, because the kernel |
| sees a few additional bits. |
| Therefore, the caller should iterate over the bits in the returned set, |
| counting those which are set, and stop upon reaching the value returned by |
| .BR CPU_COUNT (3) |
| (rather than iterating over the number of bits |
| requested to be allocated). |
| .SH EXAMPLE |
| The program below creates a child process. |
| The parent and child then each assign themselves to a specified CPU |
| and execute identical loops that consume some CPU time. |
| Before terminating, the parent waits for the child to complete. |
| The program takes three command-line arguments: |
| the CPU number for the parent, |
| the CPU number for the child, |
| and the number of loop iterations that both processes should perform. |
| .PP |
| As the sample runs below demonstrate, the amount of real and CPU time |
| consumed when running the program will depend on intra-core caching effects |
| and whether the processes are using the same CPU. |
| .PP |
| We first employ |
| .BR lscpu (1) |
| to determine that this (x86) |
| system has two cores, each with two CPUs: |
| .PP |
| .in +4n |
| .EX |
| $ \fBlscpu | grep -i 'core.*:|socket'\fP |
| Thread(s) per core: 2 |
| Core(s) per socket: 2 |
| Socket(s): 1 |
| .EE |
| .in |
| .PP |
| We then time the operation of the example program for three cases: |
| both processes running on the same CPU; |
| both processes running on different CPUs on the same core; |
| and both processes running on different CPUs on different cores. |
| .PP |
| .in +4n |
| .EX |
| $ \fBtime \-p ./a.out 0 0 100000000\fP |
| real 14.75 |
| user 3.02 |
| sys 11.73 |
| $ \fBtime \-p ./a.out 0 1 100000000\fP |
| real 11.52 |
| user 3.98 |
| sys 19.06 |
| $ \fBtime \-p ./a.out 0 3 100000000\fP |
| real 7.89 |
| user 3.29 |
| sys 12.07 |
| .EE |
| .in |
| .SS Program source |
| \& |
| .EX |
| #define _GNU_SOURCE |
| #include <sched.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <sys/wait.h> |
| |
| #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\ |
| } while (0) |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| cpu_set_t set; |
| int parentCPU, childCPU; |
| int nloops, j; |
| |
| if (argc != 4) { |
| fprintf(stderr, "Usage: %s parent\-cpu child\-cpu num\-loops\\n", |
| argv[0]); |
| exit(EXIT_FAILURE); |
| } |
| |
| parentCPU = atoi(argv[1]); |
| childCPU = atoi(argv[2]); |
| nloops = atoi(argv[3]); |
| |
| CPU_ZERO(&set); |
| |
| switch (fork()) { |
| case \-1: /* Error */ |
| errExit("fork"); |
| |
| case 0: /* Child */ |
| CPU_SET(childCPU, &set); |
| |
| if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1) |
| errExit("sched_setaffinity"); |
| |
| for (j = 0; j < nloops; j++) |
| getppid(); |
| |
| exit(EXIT_SUCCESS); |
| |
| default: /* Parent */ |
| CPU_SET(parentCPU, &set); |
| |
| if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1) |
| errExit("sched_setaffinity"); |
| |
| for (j = 0; j < nloops; j++) |
| getppid(); |
| |
| wait(NULL); /* Wait for child to terminate */ |
| exit(EXIT_SUCCESS); |
| } |
| } |
| .EE |
| .SH SEE ALSO |
| .ad l |
| .nh |
| .BR lscpu (1), |
| .BR nproc (1), |
| .BR taskset (1), |
| .BR clone (2), |
| .BR getcpu (2), |
| .BR getpriority (2), |
| .BR gettid (2), |
| .BR nice (2), |
| .BR sched_get_priority_max (2), |
| .BR sched_get_priority_min (2), |
| .BR sched_getscheduler (2), |
| .BR sched_setscheduler (2), |
| .BR setpriority (2), |
| .BR CPU_SET (3), |
| .BR get_nprocs (3), |
| .BR pthread_setaffinity_np (3), |
| .BR sched_getcpu (3), |
| .BR capabilities (7), |
| .BR cpuset (7), |
| .BR sched (7), |
| .BR numactl (8) |