| .\" Hey Emacs! This file is -*- nroff -*- source. |
| .\" |
| .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992 |
| .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013 |
| .\" May be distributed under the GNU General Public License. |
| .\" Modified by Michael Haardt <michael@moria.de> |
| .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu> |
| .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>: |
| .\" New man page (copied from 'fork.2'). |
| .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl> |
| .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr> |
| .\" Modified 26 Jun 2001 by Michael Kerrisk |
| .\" Mostly upgraded to 2.4.x |
| .\" Added prototype for sys_clone() plus description |
| .\" Added CLONE_THREAD with a brief description of thread groups |
| .\" Added CLONE_PARENT and revised entire page remove ambiguity |
| .\" between "calling process" and "parent process" |
| .\" Added CLONE_PTRACE and CLONE_VFORK |
| .\" Added EPERM and EINVAL error codes |
| .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>) |
| .\" various other minor tidy ups and clarifications. |
| .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com> |
| .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD |
| .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com> |
| .\" Added description for CLONE_NEWNS, which was added in 2.4.19 |
| .\" Slightly rephrased, aeb. |
| .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb. |
| .\" Modified 1 Jan 2004 - various updates, aeb |
| .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb. |
| .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid() |
| .\" wrapper under BUGS. |
| .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED. |
| .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD. |
| .\" 2008-11-18, mtk, order CLONE_* flags alphabetically |
| .\" 2008-11-18, mtk, document CLONE_NEWPID |
| .\" 2008-11-19, mtk, document CLONE_NEWUTS |
| .\" 2008-11-19, mtk, document CLONE_NEWIPC |
| .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO |
| .\" |
| .\" FIXME Document CLONE_NEWUSER, which is new in 2.6.23 |
| .\" (also supported for unshare()?) |
| .\" |
| .TH CLONE 2 2013-01-01 "Linux" "Linux Programmer's Manual" |
| .SH NAME |
| clone, __clone2 \- create a child process |
| .SH SYNOPSIS |
| .nf |
| .BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */" |
| .\" Actually _BSD_SOURCE || _SVID_SOURCE |
| .\" FIXME See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749 |
| .B #include <sched.h> |
| |
| .BI "int clone(int (*" "fn" ")(void *), void *" child_stack , |
| .BI " int " flags ", void *" "arg" ", ... " |
| .BI " /* pid_t *" ptid ", struct user_desc *" tls \ |
| ", pid_t *" ctid " */ );" |
| .fi |
| .SH DESCRIPTION |
| .BR clone () |
| creates a new process, in a manner similar to |
| .BR fork (2). |
| It is actually a library function layered on top of the underlying |
| .BR clone () |
| system call, hereinafter referred to as |
| .BR sys_clone . |
| A description of |
| .B sys_clone |
| is given toward the end of this page. |
| |
| Unlike |
| .BR fork (2), |
| these calls |
| allow the child process to share parts of its execution context with |
| the calling process, such as the memory space, the table of file |
| descriptors, and the table of signal handlers. |
| (Note that on this manual |
| page, "calling process" normally corresponds to "parent process". |
| But see the description of |
| .B CLONE_PARENT |
| below.) |
| |
| The main use of |
| .BR clone () |
| is to implement threads: multiple threads of control in a program that |
| run concurrently in a shared memory space. |
| |
| When the child process is created with |
| .BR clone (), |
| it executes the function |
| .IR fn ( arg ). |
| (This differs from |
| .BR fork (2), |
| where execution continues in the child from the point |
| of the |
| .BR fork (2) |
| call.) |
| The |
| .I fn |
| argument is a pointer to a function that is called by the child |
| process at the beginning of its execution. |
| The |
| .I arg |
| argument is passed to the |
| .I fn |
| function. |
| |
| When the |
| .IR fn ( arg ) |
| function application returns, the child process terminates. |
| The integer returned by |
| .I fn |
| is the exit code for the child process. |
| The child process may also terminate explicitly by calling |
| .BR exit (2) |
| or after receiving a fatal signal. |
| |
| The |
| .I child_stack |
| argument specifies the location of the stack used by the child process. |
| Since the child and calling process may share memory, |
| it is not possible for the child process to execute in the |
| same stack as the calling process. |
| The calling process must therefore |
| set up memory space for the child stack and pass a pointer to this |
| space to |
| .BR clone (). |
| Stacks grow downward on all processors that run Linux |
| (except the HP PA processors), so |
| .I child_stack |
| usually points to the topmost address of the memory space set up for |
| the child stack. |
| |
| The low byte of |
| .I flags |
| contains the number of the |
| .I "termination signal" |
| sent to the parent when the child dies. |
| If this signal is specified as anything other than |
| .BR SIGCHLD , |
| then the parent process must specify the |
| .B __WALL |
| or |
| .B __WCLONE |
| options when waiting for the child with |
| .BR wait (2). |
| If no signal is specified, then the parent process is not signaled |
| when the child terminates. |
| |
| .I flags |
| may also be bitwise-or'ed with zero or more of the following constants, |
| in order to specify what is shared between the calling process |
| and the child process: |
| .TP |
| .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)" |
| Erase child thread ID at location |
| .I ctid |
| in child memory when the child exits, and do a wakeup on the futex |
| at that address. |
| The address involved may be changed by the |
| .BR set_tid_address (2) |
| system call. |
| This is used by threading libraries. |
| .TP |
| .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)" |
| Store child thread ID at location |
| .I ctid |
| in child memory. |
| .TP |
| .BR CLONE_FILES " (since Linux 2.0)" |
| If |
| .B CLONE_FILES |
| is set, the calling process and the child process share the same file |
| descriptor table. |
| Any file descriptor created by the calling process or by the child |
| process is also valid in the other process. |
| Similarly, if one of the processes closes a file descriptor, |
| or changes its associated flags (using the |
| .BR fcntl (2) |
| .B F_SETFD |
| operation), the other process is also affected. |
| |
| If |
| .B CLONE_FILES |
| is not set, the child process inherits a copy of all file descriptors |
| opened in the calling process at the time of |
| .BR clone (). |
| (The duplicated file descriptors in the child refer to the |
| same open file descriptions (see |
| .BR open (2)) |
| as the corresponding file descriptors in the calling process.) |
| Subsequent operations that open or close file descriptors, |
| or change file descriptor flags, |
| performed by either the calling |
| process or the child process do not affect the other process. |
| .TP |
| .BR CLONE_FS " (since Linux 2.0)" |
| If |
| .B CLONE_FS |
| is set, the caller and the child process share the same file system |
| information. |
| This includes the root of the file system, the current |
| working directory, and the umask. |
| Any call to |
| .BR chroot (2), |
| .BR chdir (2), |
| or |
| .BR umask (2) |
| performed by the calling process or the child process also affects the |
| other process. |
| |
| If |
| .B CLONE_FS |
| is not set, the child process works on a copy of the file system |
| information of the calling process at the time of the |
| .BR clone () |
| call. |
| Calls to |
| .BR chroot (2), |
| .BR chdir (2), |
| .BR umask (2) |
| performed later by one of the processes do not affect the other process. |
| .TP |
| .BR CLONE_IO " (since Linux 2.6.25)" |
| If |
| .B CLONE_IO |
| is set, then the new process shares an I/O context with |
| the calling process. |
| If this flag is not set, then (as with |
| .BR fork (2)) |
| the new process has its own I/O context. |
| |
| .\" The following based on text from Jens Axboe |
| The I/O context is the I/O scope of the disk scheduler (i.e, |
| what the I/O scheduler uses to model scheduling of a process's I/O). |
| If processes share the same I/O context, |
| they are treated as one by the I/O scheduler. |
| As a consequence, they get to share disk time. |
| For some I/O schedulers, |
| .\" the anticipatory and CFQ scheduler |
| if two processes share an I/O context, |
| they will be allowed to interleave their disk access. |
| If several threads are doing I/O on behalf of the same process |
| .RB ( aio_read (3), |
| for instance), they should employ |
| .BR CLONE_IO |
| to get better I/O performance. |
| .\" with CFQ and AS. |
| |
| If the kernel is not configured with the |
| .B CONFIG_BLOCK |
| option, this flag is a no-op. |
| .TP |
| .BR CLONE_NEWIPC " (since Linux 2.6.19)" |
| If |
| .B CLONE_NEWIPC |
| is set, then create the process in a new IPC namespace. |
| If this flag is not set, then (as with |
| .BR fork (2)), |
| the process is created in the same IPC namespace as |
| the calling process. |
| This flag is intended for the implementation of containers. |
| |
| An IPC namespace provides an isolated view of System V IPC objects (see |
| .BR svipc (7)) |
| and (since Linux 2.6.30) |
| .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f |
| .\" https://lwn.net/Articles/312232/ |
| POSIX message queues |
| (see |
| .BR mq_overview (7)). |
| The common characteristic of these IPC mechanisms is that IPC |
| objects are identified by mechanisms other than filesystem |
| pathnames. |
| |
| Objects created in an IPC namespace are visible to all other processes |
| that are members of that namespace, |
| but are not visible to processes in other IPC namespaces. |
| |
| When an IPC namespace is destroyed |
| (i.e., when the last process that is a member of the namespace terminates), |
| all IPC objects in the namespace are automatically destroyed. |
| |
| Use of this flag requires: a kernel configured with the |
| .B CONFIG_SYSVIPC |
| and |
| .B CONFIG_IPC_NS |
| options and that the process be privileged |
| .RB ( CAP_SYS_ADMIN ). |
| This flag can't be specified in conjunction with |
| .BR CLONE_SYSVSEM . |
| .TP |
| .BR CLONE_NEWNET " (since Linux 2.6.24)" |
| .\" FIXME Check when the implementation was completed |
| (The implementation of this flag was only completed |
| by about kernel version 2.6.29.) |
| |
| If |
| .B CLONE_NEWNET |
| is set, then create the process in a new network namespace. |
| If this flag is not set, then (as with |
| .BR fork (2)), |
| the process is created in the same network namespace as |
| the calling process. |
| This flag is intended for the implementation of containers. |
| |
| A network namespace provides an isolated view of the networking stack |
| (network device interfaces, IPv4 and IPv6 protocol stacks, |
| IP routing tables, firewall rules, the |
| .I /proc/net |
| and |
| .I /sys/class/net |
| directory trees, sockets, etc.). |
| A physical network device can live in exactly one |
| network namespace. |
| A virtual network device ("veth") pair provides a pipe-like abstraction |
| .\" FIXME Add pointer to veth(4) page when it is eventually completed |
| that can be used to create tunnels between network namespaces, |
| and can be used to create a bridge to a physical network device |
| in another namespace. |
| |
| When a network namespace is freed |
| (i.e., when the last process in the namespace terminates), |
| its physical network devices are moved back to the |
| initial network namespace (not to the parent of the process). |
| |
| Use of this flag requires: a kernel configured with the |
| .B CONFIG_NET_NS |
| option and that the process be privileged |
| .RB ( CAP_SYS_ADMIN ). |
| .TP |
| .BR CLONE_NEWNS " (since Linux 2.4.19)" |
| Start the child in a new mount namespace. |
| |
| Every process lives in a mount namespace. |
| The |
| .I namespace |
| of a process is the data (the set of mounts) describing the file hierarchy |
| as seen by that process. |
| After a |
| .BR fork (2) |
| or |
| .BR clone () |
| where the |
| .B CLONE_NEWNS |
| flag is not set, the child lives in the same mount |
| namespace as the parent. |
| The system calls |
| .BR mount (2) |
| and |
| .BR umount (2) |
| change the mount namespace of the calling process, and hence affect |
| all processes that live in the same namespace, but do not affect |
| processes in a different mount namespace. |
| |
| After a |
| .BR clone () |
| where the |
| .B CLONE_NEWNS |
| flag is set, the cloned child is started in a new mount namespace, |
| initialized with a copy of the namespace of the parent. |
| |
| Only a privileged process (one having the \fBCAP_SYS_ADMIN\fP capability) |
| may specify the |
| .B CLONE_NEWNS |
| flag. |
| It is not permitted to specify both |
| .B CLONE_NEWNS |
| and |
| .B CLONE_FS |
| in the same |
| .BR clone () |
| call. |
| .TP |
| .BR CLONE_NEWPID " (since Linux 2.6.24)" |
| .\" This explanation draws a lot of details from |
| .\" http://lwn.net/Articles/259217/ |
| .\" Authors: Pavel Emelyanov <xemul@openvz.org> |
| .\" and Kir Kolyshkin <kir@openvz.org> |
| .\" |
| .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264 |
| .\" Author: Pavel Emelyanov <xemul@openvz.org> |
| If |
| .B CLONE_NEWPID |
| is set, then create the process in a new PID namespace. |
| If this flag is not set, then (as with |
| .BR fork (2)), |
| the process is created in the same PID namespace as |
| the calling process. |
| This flag is intended for the implementation of containers. |
| |
| A PID namespace provides an isolated environment for PIDs: |
| PIDs in a new namespace start at 1, |
| somewhat like a standalone system, and calls to |
| .BR fork (2), |
| .BR vfork (2), |
| or |
| .BR clone () |
| will produce processes with PIDs that are unique within the namespace. |
| |
| The first process created in a new namespace |
| (i.e., the process created using the |
| .BR CLONE_NEWPID |
| flag) has the PID 1, and is the "init" process for the namespace. |
| Children that are orphaned within the namespace will be reparented |
| to this process rather than |
| .BR init (8). |
| Unlike the traditional |
| .B init |
| process, the "init" process of a PID namespace can terminate, |
| and if it does, all of the processes in the namespace are terminated. |
| |
| PID namespaces form a hierarchy. |
| When a new PID namespace is created, |
| the processes in that namespace are visible |
| in the PID namespace of the process that created the new namespace; |
| analogously, if the parent PID namespace is itself |
| the child of another PID namespace, |
| then processes in the child and parent PID namespaces will both be |
| visible in the grandparent PID namespace. |
| Conversely, the processes in the "child" PID namespace do not see |
| the processes in the parent namespace. |
| The existence of a namespace hierarchy means that each process |
| may now have multiple PIDs: |
| one for each namespace in which it is visible; |
| each of these PIDs is unique within the corresponding namespace. |
| (A call to |
| .BR getpid (2) |
| always returns the PID associated with the namespace in which |
| the process lives.) |
| |
| After creating the new namespace, |
| it is useful for the child to change its root directory |
| and mount a new procfs instance at |
| .I /proc |
| so that tools such as |
| .BR ps (1) |
| work correctly. |
| .\" mount -t proc proc /proc |
| (If |
| .BR CLONE_NEWNS |
| is also included in |
| .IR flags , |
| then it isn't necessary to change the root directory: |
| a new procfs instance can be mounted directly over |
| .IR /proc .) |
| |
| Use of this flag requires: a kernel configured with the |
| .B CONFIG_PID_NS |
| option and that the process be privileged |
| .RB ( CAP_SYS_ADMIN ). |
| This flag can't be specified in conjunction with |
| .BR CLONE_THREAD . |
| .TP |
| .BR CLONE_NEWUTS " (since Linux 2.6.19)" |
| If |
| .B CLONE_NEWUTS |
| is set, then create the process in a new UTS namespace, |
| whose identifiers are initialized by duplicating the identifiers |
| from the UTS namespace of the calling process. |
| If this flag is not set, then (as with |
| .BR fork (2)), |
| the process is created in the same UTS namespace as |
| the calling process. |
| This flag is intended for the implementation of containers. |
| |
| A UTS namespace is the set of identifiers returned by |
| .BR uname (2); |
| among these, the domain name and the host name can be modified by |
| .BR setdomainname (2) |
| and |
| .BR |
| .BR sethostname (2), |
| respectively. |
| Changes made to the identifiers in a UTS namespace |
| are visible to all other processes in the same namespace, |
| but are not visible to processes in other UTS namespaces. |
| |
| Use of this flag requires: a kernel configured with the |
| .B CONFIG_UTS_NS |
| option and that the process be privileged |
| .RB ( CAP_SYS_ADMIN ). |
| .TP |
| .BR CLONE_PARENT " (since Linux 2.3.12)" |
| If |
| .B CLONE_PARENT |
| is set, then the parent of the new child (as returned by |
| .BR getppid (2)) |
| will be the same as that of the calling process. |
| |
| If |
| .B CLONE_PARENT |
| is not set, then (as with |
| .BR fork (2)) |
| the child's parent is the calling process. |
| |
| Note that it is the parent process, as returned by |
| .BR getppid (2), |
| which is signaled when the child terminates, so that |
| if |
| .B CLONE_PARENT |
| is set, then the parent of the calling process, rather than the |
| calling process itself, will be signaled. |
| .TP |
| .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)" |
| Store child thread ID at location |
| .I ptid |
| in parent and child memory. |
| (In Linux 2.5.32-2.5.48 there was a flag |
| .B CLONE_SETTID |
| that did this.) |
| .TP |
| .BR CLONE_PID " (obsolete)" |
| If |
| .B CLONE_PID |
| is set, the child process is created with the same process ID as |
| the calling process. |
| This is good for hacking the system, but otherwise |
| of not much use. |
| Since 2.3.21 this flag can be |
| specified only by the system boot process (PID 0). |
| It disappeared in Linux 2.5.16. |
| .TP |
| .BR CLONE_PTRACE " (since Linux 2.2)" |
| If |
| .B CLONE_PTRACE |
| is specified, and the calling process is being traced, |
| then trace the child also (see |
| .BR ptrace (2)). |
| .TP |
| .BR CLONE_SETTLS " (since Linux 2.5.32)" |
| The |
| .I newtls |
| argument is the new TLS (Thread Local Storage) descriptor. |
| (See |
| .BR set_thread_area (2).) |
| .TP |
| .BR CLONE_SIGHAND " (since Linux 2.0)" |
| If |
| .B CLONE_SIGHAND |
| is set, the calling process and the child process share the same table of |
| signal handlers. |
| If the calling process or child process calls |
| .BR sigaction (2) |
| to change the behavior associated with a signal, the behavior is |
| changed in the other process as well. |
| However, the calling process and child |
| processes still have distinct signal masks and sets of pending |
| signals. |
| So, one of them may block or unblock some signals using |
| .BR sigprocmask (2) |
| without affecting the other process. |
| |
| If |
| .B CLONE_SIGHAND |
| is not set, the child process inherits a copy of the signal handlers |
| of the calling process at the time |
| .BR clone () |
| is called. |
| Calls to |
| .BR sigaction (2) |
| performed later by one of the processes have no effect on the other |
| process. |
| |
| Since Linux 2.6.0-test6, |
| .I flags |
| must also include |
| .B CLONE_VM |
| if |
| .B CLONE_SIGHAND |
| is specified |
| .TP |
| .BR CLONE_STOPPED " (since Linux 2.6.0-test2)" |
| If |
| .B CLONE_STOPPED |
| is set, then the child is initially stopped (as though it was sent a |
| .B SIGSTOP |
| signal), and must be resumed by sending it a |
| .B SIGCONT |
| signal. |
| |
| This flag was |
| .I deprecated |
| from Linux 2.6.25 onward, |
| and was |
| .I removed |
| altogether in Linux 2.6.38. |
| .\" glibc 2.8 removed this defn from bits/sched.h |
| .TP |
| .BR CLONE_SYSVSEM " (since Linux 2.5.10)" |
| If |
| .B CLONE_SYSVSEM |
| is set, then the child and the calling process share |
| a single list of System V semaphore undo values (see |
| .BR semop (2)). |
| If this flag is not set, then the child has a separate undo list, |
| which is initially empty. |
| .TP |
| .BR CLONE_THREAD " (since Linux 2.4.0-test8)" |
| If |
| .B CLONE_THREAD |
| is set, the child is placed in the same thread group as the calling process. |
| To make the remainder of the discussion of |
| .B CLONE_THREAD |
| more readable, the term "thread" is used to refer to the |
| processes within a thread group. |
| |
| Thread groups were a feature added in Linux 2.4 to support the |
| POSIX threads notion of a set of threads that share a single PID. |
| Internally, this shared PID is the so-called |
| thread group identifier (TGID) for the thread group. |
| Since Linux 2.4, calls to |
| .BR getpid (2) |
| return the TGID of the caller. |
| |
| The threads within a group can be distinguished by their (system-wide) |
| unique thread IDs (TID). |
| A new thread's TID is available as the function result |
| returned to the caller of |
| .BR clone (), |
| and a thread can obtain |
| its own TID using |
| .BR gettid (2). |
| |
| When a call is made to |
| .BR clone () |
| without specifying |
| .BR CLONE_THREAD , |
| then the resulting thread is placed in a new thread group |
| whose TGID is the same as the thread's TID. |
| This thread is the |
| .I leader |
| of the new thread group. |
| |
| A new thread created with |
| .B CLONE_THREAD |
| has the same parent process as the caller of |
| .BR clone () |
| (i.e., like |
| .BR CLONE_PARENT ), |
| so that calls to |
| .BR getppid (2) |
| return the same value for all of the threads in a thread group. |
| When a |
| .B CLONE_THREAD |
| thread terminates, the thread that created it using |
| .BR clone () |
| is not sent a |
| .B SIGCHLD |
| (or other termination) signal; |
| nor can the status of such a thread be obtained |
| using |
| .BR wait (2). |
| (The thread is said to be |
| .IR detached .) |
| |
| After all of the threads in a thread group terminate |
| the parent process of the thread group is sent a |
| .B SIGCHLD |
| (or other termination) signal. |
| |
| If any of the threads in a thread group performs an |
| .BR execve (2), |
| then all threads other than the thread group leader are terminated, |
| and the new program is executed in the thread group leader. |
| |
| If one of the threads in a thread group creates a child using |
| .BR fork (2), |
| then any thread in the group can |
| .BR wait (2) |
| for that child. |
| |
| Since Linux 2.5.35, |
| .I flags |
| must also include |
| .B CLONE_SIGHAND |
| if |
| .B CLONE_THREAD |
| is specified. |
| |
| Signals may be sent to a thread group as a whole (i.e., a TGID) using |
| .BR kill (2), |
| or to a specific thread (i.e., TID) using |
| .BR tgkill (2). |
| |
| Signal dispositions and actions are process-wide: |
| if an unhandled signal is delivered to a thread, then |
| it will affect (terminate, stop, continue, be ignored in) |
| all members of the thread group. |
| |
| Each thread has its own signal mask, as set by |
| .BR sigprocmask (2), |
| but signals can be pending either: for the whole process |
| (i.e., deliverable to any member of the thread group), |
| when sent with |
| .BR kill (2); |
| or for an individual thread, when sent with |
| .BR tgkill (2). |
| A call to |
| .BR sigpending (2) |
| returns a signal set that is the union of the signals pending for the |
| whole process and the signals that are pending for the calling thread. |
| |
| If |
| .BR kill (2) |
| is used to send a signal to a thread group, |
| and the thread group has installed a handler for the signal, then |
| the handler will be invoked in exactly one, arbitrarily selected |
| member of the thread group that has not blocked the signal. |
| If multiple threads in a group are waiting to accept the same signal using |
| .BR sigwaitinfo (2), |
| the kernel will arbitrarily select one of these threads |
| to receive a signal sent using |
| .BR kill (2). |
| .TP |
| .BR CLONE_UNTRACED " (since Linux 2.5.46)" |
| If |
| .B CLONE_UNTRACED |
| is specified, then a tracing process cannot force |
| .B CLONE_PTRACE |
| on this child process. |
| .TP |
| .BR CLONE_VFORK " (since Linux 2.2)" |
| If |
| .B CLONE_VFORK |
| is set, the execution of the calling process is suspended |
| until the child releases its virtual memory |
| resources via a call to |
| .BR execve (2) |
| or |
| .BR _exit (2) |
| (as with |
| .BR vfork (2)). |
| |
| If |
| .B CLONE_VFORK |
| is not set then both the calling process and the child are schedulable |
| after the call, and an application should not rely on execution occurring |
| in any particular order. |
| .TP |
| .BR CLONE_VM " (since Linux 2.0)" |
| If |
| .B CLONE_VM |
| is set, the calling process and the child process run in the same memory |
| space. |
| In particular, memory writes performed by the calling process |
| or by the child process are also visible in the other process. |
| Moreover, any memory mapping or unmapping performed with |
| .BR mmap (2) |
| or |
| .BR munmap (2) |
| by the child or calling process also affects the other process. |
| |
| If |
| .B CLONE_VM |
| is not set, the child process runs in a separate copy of the memory |
| space of the calling process at the time of |
| .BR clone (). |
| Memory writes or file mappings/unmappings performed by one of the |
| processes do not affect the other, as with |
| .BR fork (2). |
| .SS "sys_clone" |
| The |
| .B sys_clone |
| system call corresponds more closely to |
| .BR fork (2) |
| in that execution in the child continues from the point of the |
| call. |
| As such, the |
| .I fn |
| and |
| .I arg |
| arguments of the |
| .BR clone () |
| wrapper function are omitted. |
| Furthermore, the argument order changes. |
| The raw system call interface is roughly: |
| .in +4 |
| .nf |
| |
| .BI "long clone(unsigned long " flags ", void *" child_stack , |
| .BI " void *" ptid ", void *" ctid , |
| .BI " struct pt_regs *" regs ); |
| |
| .fi |
| .in |
| Another difference for |
| .B sys_clone |
| is that the |
| .I child_stack |
| argument may be zero, in which case copy-on-write semantics ensure that the |
| child gets separate copies of stack pages when either process modifies |
| the stack. |
| In this case, for correct operation, the |
| .B CLONE_VM |
| option should not be specified. |
| .SS Linux 2.4 and earlier |
| In Linux 2.4 and earlier, |
| .BR clone () |
| does not take arguments |
| .IR ptid , |
| .IR tls , |
| and |
| .IR ctid . |
| .SH "RETURN VALUE" |
| .\" gettid(2) returns current->pid; |
| .\" getpid(2) returns current->tgid; |
| On success, the thread ID of the child process is returned |
| in the caller's thread of execution. |
| On failure, \-1 is returned |
| in the caller's context, no child process will be created, and |
| .I errno |
| will be set appropriately. |
| .SH ERRORS |
| .TP |
| .B EAGAIN |
| Too many processes are already running. |
| .TP |
| .B EINVAL |
| .B CLONE_SIGHAND |
| was specified, but |
| .B CLONE_VM |
| was not. |
| (Since Linux 2.6.0-test6.) |
| .TP |
| .B EINVAL |
| .B CLONE_THREAD |
| was specified, but |
| .B CLONE_SIGHAND |
| was not. |
| (Since Linux 2.5.35.) |
| .\" .TP |
| .\" .B EINVAL |
| .\" Precisely one of |
| .\" .B CLONE_DETACHED |
| .\" and |
| .\" .B CLONE_THREAD |
| .\" was specified. |
| .\" (Since Linux 2.6.0-test6.) |
| .TP |
| .B EINVAL |
| Both |
| .B CLONE_FS |
| and |
| .B CLONE_NEWNS |
| were specified in |
| .IR flags . |
| .TP |
| .B EINVAL |
| Both |
| .B CLONE_NEWIPC |
| and |
| .B CLONE_SYSVSEM |
| were specified in |
| .IR flags . |
| .TP |
| .B EINVAL |
| Both |
| .BR CLONE_NEWPID |
| and |
| .BR CLONE_THREAD |
| were specified in |
| .IR flags . |
| .TP |
| .B EINVAL |
| Returned by |
| .BR clone () |
| when a zero value is specified for |
| .IR child_stack . |
| .TP |
| .B EINVAL |
| .BR CLONE_NEWIPC |
| was specified in |
| .IR flags , |
| but the kernel was not configured with the |
| .B CONFIG_SYSVIPC |
| and |
| .BR CONFIG_IPC_NS |
| options. |
| .TP |
| .B EINVAL |
| .BR CLONE_NEWNET |
| was specified in |
| .IR flags , |
| but the kernel was not configured with the |
| .B CONFIG_NET_NS |
| option. |
| .TP |
| .B EINVAL |
| .BR CLONE_NEWPID |
| was specified in |
| .IR flags , |
| but the kernel was not configured with the |
| .B CONFIG_PID_NS |
| option. |
| .TP |
| .B EINVAL |
| .BR CLONE_NEWUTS |
| was specified in |
| .IR flags , |
| but the kernel was not configured with the |
| .B CONFIG_UTS |
| option. |
| .TP |
| .B ENOMEM |
| Cannot allocate sufficient memory to allocate a task structure for the |
| child, or to copy those parts of the caller's context that need to be |
| copied. |
| .TP |
| .B EPERM |
| .BR CLONE_NEWIPC , |
| .BR CLONE_NEWNET , |
| .BR CLONE_NEWNS , |
| .BR CLONE_NEWPID , |
| or |
| .BR CLONE_NEWUTS |
| was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP). |
| .TP |
| .B EPERM |
| .B CLONE_PID |
| was specified by a process other than process 0. |
| .SH VERSIONS |
| There is no entry for |
| .BR clone () |
| in libc5. |
| glibc2 provides |
| .BR clone () |
| as described in this manual page. |
| .SH "CONFORMING TO" |
| The |
| .BR clone () |
| and |
| .B sys_clone |
| calls are Linux-specific and should not be used in programs |
| intended to be portable. |
| .SH NOTES |
| In the kernel 2.4.x series, |
| .B CLONE_THREAD |
| generally does not make the parent of the new thread the same |
| as the parent of the calling process. |
| However, for kernel versions 2.4.7 to 2.4.18 the |
| .B CLONE_THREAD |
| flag implied the |
| .B CLONE_PARENT |
| flag (as in kernel 2.6). |
| |
| For a while there was |
| .B CLONE_DETACHED |
| (introduced in 2.5.32): |
| parent wants no child-exit signal. |
| In 2.6.2 the need to give this |
| together with |
| .B CLONE_THREAD |
| disappeared. |
| This flag is still defined, but has no effect. |
| |
| On i386, |
| .BR clone () |
| should not be called through vsyscall, but directly through |
| .IR "int $0x80" . |
| |
| On ia64, a different system call is used: |
| .nf |
| |
| .BI "int __clone2(int (*" "fn" ")(void *), " |
| .BI " void *" child_stack_base ", size_t " stack_size , |
| .BI " int " flags ", void *" "arg" ", ... " |
| .BI " /* pid_t *" ptid ", struct user_desc *" tls \ |
| ", pid_t *" ctid " */ );" |
| .fi |
| .PP |
| The |
| .BR __clone2 () |
| system call operates in the same way as |
| .BR clone (), |
| except that |
| .I child_stack_base |
| points to the lowest address of the child's stack area, |
| and |
| .I stack_size |
| specifies the size of the stack pointed to by |
| .IR child_stack_base . |
| .SH BUGS |
| Versions of the GNU C library that include the NPTL threading library |
| contain a wrapper function for |
| .BR getpid (2) |
| that performs caching of PIDs. |
| This caching relies on support in the glibc wrapper for |
| .BR clone (), |
| but as currently implemented, |
| the cache may not be up to date in some circumstances. |
| In particular, |
| if a signal is delivered to the child immediately after the |
| .BR clone () |
| call, then a call to |
| .BR getpid (2) |
| in a handler for the signal may return the PID |
| of the calling process ("the parent"), |
| if the clone wrapper has not yet had a chance to update the PID |
| cache in the child. |
| (This discussion ignores the case where the child was created using |
| .BR CLONE_THREAD , |
| when |
| .BR getpid (2) |
| .I should |
| return the same value in the child and in the process that called |
| .BR clone (), |
| since the caller and the child are in the same thread group. |
| The stale-cache problem also does not occur if the |
| .I flags |
| argument includes |
| .BR CLONE_VM .) |
| To get the truth, it may be necessary to use code such as the following: |
| .nf |
| |
| #include <syscall.h> |
| |
| pid_t mypid; |
| |
| mypid = syscall(SYS_getpid); |
| .fi |
| .\" See also the following bug reports |
| .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521 |
| .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910 |
| .SH EXAMPLE |
| .SS Create a child that executes in a separate UTS namespace |
| The following program demonstrates the use of |
| .BR clone (2) |
| to create a child process that executes in a separate UTS namespace. |
| The child changes the hostname in its UTS namespace. |
| Both parent and child then display the system hostname, |
| making it possible to see that the hostname |
| differs in the UTS namespaces of the parent and child. |
| For an example of the use of this program, see |
| .BR setns (2). |
| |
| .nf |
| #define _GNU_SOURCE |
| #include <sys/wait.h> |
| #include <sys/utsname.h> |
| #include <sched.h> |
| #include <string.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| |
| #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\ |
| } while (0) |
| |
| static int /* Start function for cloned child */ |
| childFunc(void *arg) |
| { |
| struct utsname uts; |
| |
| /* Change hostname in UTS namespace of child */ |
| |
| if (sethostname(arg, strlen(arg)) == \-1) |
| errExit("sethostname"); |
| |
| /* Retrieve and display hostname */ |
| |
| if (uname(&uts) == \-1) |
| errExit("uname"); |
| printf("uts.nodename in child: %s\\n", uts.nodename); |
| |
| /* Keep the namespace open for a while, by sleeping. |
| This allows some experimentation\-\-for example, another |
| process might join the namespace. */ |
| |
| sleep(200); |
| |
| return 0; /* Child terminates now */ |
| } |
| |
| #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */ |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| char *stack; /* Start of stack buffer */ |
| char *stackTop; /* End of stack buffer */ |
| pid_t pid; |
| struct utsname uts; |
| |
| if (argc < 2) { |
| fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]); |
| exit(EXIT_SUCCESS); |
| } |
| |
| /* Allocate stack for child */ |
| |
| stack = malloc(STACK_SIZE); |
| if (stack == NULL) |
| errExit("malloc"); |
| stackTop = stack + STACK_SIZE; /* Assume stack grows downward */ |
| |
| /* Create child that has its own UTS namespace; |
| child commences execution in childFunc() */ |
| |
| pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]); |
| if (pid == \-1) |
| errExit("clone"); |
| printf("clone() returned %ld\\n", (long) pid); |
| |
| /* Parent falls through to here */ |
| |
| sleep(1); /* Give child time to change its hostname */ |
| |
| /* Display hostname in parent\(aqs UTS namespace. This will be |
| different from hostname in child\(aqs UTS namespace. */ |
| |
| if (uname(&uts) == \-1) |
| errExit("uname"); |
| printf("uts.nodename in parent: %s\\n", uts.nodename); |
| |
| if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */ |
| errExit("waitpid"); |
| printf("child has terminated\\n"); |
| |
| exit(EXIT_SUCCESS); |
| } |
| .fi |
| .SH "SEE ALSO" |
| .BR fork (2), |
| .BR futex (2), |
| .BR getpid (2), |
| .BR gettid (2), |
| .BR kcmp (2), |
| .BR set_thread_area (2), |
| .BR set_tid_address (2), |
| .BR setns (2), |
| .BR tkill (2), |
| .BR unshare (2), |
| .BR wait (2), |
| .BR capabilities (7), |
| .BR pthreads (7) |