| .\" Copyright (C) 2006, Janak Desai <janak@us.ibm.com> |
| .\" and Copyright (C) 2006, 2012 Michael Kerrisk <mtk.manpages@gmail.com> |
| .\" |
| .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE) |
| .\" Licensed under the GPL |
| .\" %%%LICENSE_END |
| .\" |
| .\" Patch Justification: |
| .\" unshare system call is needed to implement, using PAM, |
| .\" per-security_context and/or per-user namespace to provide |
| .\" polyinstantiated directories. Using unshare and bind mounts, a |
| .\" PAM module can create private namespace with appropriate |
| .\" directories(based on user's security context) bind mounted on |
| .\" public directories such as /tmp, thus providing an instance of |
| .\" /tmp that is based on user's security context. Without the |
| .\" unshare system call, namespace separation can only be achieved |
| .\" by clone, which would require porting and maintaining all commands |
| .\" such as login, and su, that establish a user session. |
| .\" |
| .TH UNSHARE 2 2018-02-02 "Linux" "Linux Programmer's Manual" |
| .SH NAME |
| unshare \- disassociate parts of the process execution context |
| .SH SYNOPSIS |
| .nf |
| .B #define _GNU_SOURCE |
| .B #include <sched.h> |
| .PP |
| .BI "int unshare(int " flags ); |
| .fi |
| .SH DESCRIPTION |
| .BR unshare () |
| allows a process (or thread) to disassociate parts of its execution |
| context that are currently being shared with other processes (or threads). |
| Part of the execution context, such as the mount namespace, is shared |
| implicitly when a new process is created using |
| .BR fork (2) |
| or |
| .BR vfork (2), |
| while other parts, such as virtual memory, may be |
| shared by explicit request when creating a process or thread using |
| .BR clone (2). |
| .PP |
| The main use of |
| .BR unshare () |
| is to allow a process to control its |
| shared execution context without creating a new process. |
| .PP |
| The |
| .I flags |
| argument is a bit mask that specifies which parts of |
| the execution context should be unshared. |
| This argument is specified by ORing together zero or more |
| of the following constants: |
| .TP |
| .B CLONE_FILES |
| Reverse the effect of the |
| .BR clone (2) |
| .B CLONE_FILES |
| flag. |
| Unshare the file descriptor table, so that the calling process |
| no longer shares its file descriptors with any other process. |
| .TP |
| .B CLONE_FS |
| Reverse the effect of the |
| .BR clone (2) |
| .B CLONE_FS |
| flag. |
| Unshare filesystem attributes, so that the calling process |
| no longer shares its root directory |
| .RB ( chroot (2)), |
| current directory |
| .RB ( chdir (2)), |
| or umask |
| .RB ( umask (2)) |
| attributes with any other process. |
| .TP |
| .BR CLONE_NEWCGROUP " (since Linux 4.6)" |
| This flag has the same effect as the |
| .BR clone (2) |
| .B CLONE_NEWCGROUP |
| flag. |
| Unshare the cgroup namespace. |
| Use of |
| .BR CLONE_NEWCGROUP |
| requires the |
| .BR CAP_SYS_ADMIN |
| capability. |
| .TP |
| .BR CLONE_NEWIPC " (since Linux 2.6.19)" |
| This flag has the same effect as the |
| .BR clone (2) |
| .B CLONE_NEWIPC |
| flag. |
| Unshare the IPC namespace, |
| so that the calling process has a private copy of the |
| IPC namespace which is not shared with any other process. |
| Specifying this flag automatically implies |
| .BR CLONE_SYSVSEM |
| as well. |
| Use of |
| .BR CLONE_NEWIPC |
| requires the |
| .BR CAP_SYS_ADMIN |
| capability. |
| .TP |
| .BR CLONE_NEWNET " (since Linux 2.6.24)" |
| This flag has the same effect as the |
| .BR clone (2) |
| .B CLONE_NEWNET |
| flag. |
| Unshare the network namespace, |
| so that the calling process is moved into a |
| new network namespace which is not shared |
| with any previously existing process. |
| Use of |
| .BR CLONE_NEWNET |
| requires the |
| .BR CAP_SYS_ADMIN |
| capability. |
| .TP |
| .B CLONE_NEWNS |
| .\" These flag name are inconsistent: |
| .\" CLONE_NEWNS does the same thing in clone(), but CLONE_VM, |
| .\" CLONE_FS, and CLONE_FILES reverse the action of the clone() |
| .\" flags of the same name. |
| This flag has the same effect as the |
| .BR clone (2) |
| .B CLONE_NEWNS |
| flag. |
| Unshare the mount namespace, |
| so that the calling process has a private copy of |
| its namespace which is not shared with any other process. |
| Specifying this flag automatically implies |
| .B CLONE_FS |
| as well. |
| Use of |
| .BR CLONE_NEWNS |
| requires the |
| .BR CAP_SYS_ADMIN |
| capability. |
| For further information, see |
| .BR mount_namespaces (7). |
| .TP |
| .BR CLONE_NEWPID " (since Linux 3.8)" |
| This flag has the same effect as the |
| .BR clone (2) |
| .B CLONE_NEWPID |
| flag. |
| Unshare the PID namespace, |
| so that the calling process has a new PID namespace for its children |
| which is not shared with any previously existing process. |
| The calling process is |
| .I not |
| moved into the new namespace. |
| The first child created by the calling process will have |
| the process ID 1 and will assume the role of |
| .BR init (1) |
| in the new namespace. |
| .BR CLONE_NEWPID |
| automatically implies |
| .BR CLONE_THREAD |
| as well. |
| Use of |
| .BR CLONE_NEWPID |
| requires the |
| .BR CAP_SYS_ADMIN |
| capability. |
| For further information, see |
| .BR pid_namespaces (7). |
| .TP |
| .BR CLONE_NEWUSER " (since Linux 3.8)" |
| This flag has the same effect as the |
| .BR clone (2) |
| .B CLONE_NEWUSER |
| flag. |
| Unshare the user namespace, |
| so that the calling process is moved into a new user namespace |
| which is not shared with any previously existing process. |
| As with the child process created by |
| .BR clone (2) |
| with the |
| .B CLONE_NEWUSER |
| flag, the caller obtains a full set of capabilities in the new namespace. |
| .IP |
| .BR CLONE_NEWUSER |
| requires that the calling process is not threaded; specifying |
| .BR CLONE_NEWUSER |
| automatically implies |
| .BR CLONE_THREAD . |
| Since Linux 3.9, |
| .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71 |
| .\" https://lwn.net/Articles/543273/ |
| .BR CLONE_NEWUSER |
| also automatically implies |
| .BR CLONE_FS . |
| .BR CLONE_NEWUSER |
| requires that the user ID and group ID |
| of the calling process are mapped to user IDs and group IDs in the |
| user namespace of the calling process at the time of the call. |
| .IP |
| For further information on user namespaces, see |
| .BR user_namespaces (7). |
| .TP |
| .BR CLONE_NEWUTS " (since Linux 2.6.19)" |
| This flag has the same effect as the |
| .BR clone (2) |
| .B CLONE_NEWUTS |
| flag. |
| Unshare the UTS IPC namespace, |
| so that the calling process has a private copy of the |
| UTS namespace which is not shared with any other process. |
| Use of |
| .BR CLONE_NEWUTS |
| requires the |
| .BR CAP_SYS_ADMIN |
| capability. |
| .TP |
| .BR CLONE_SYSVSEM " (since Linux 2.6.26) |
| .\" commit 9edff4ab1f8d82675277a04e359d0ed8bf14a7b7 |
| This flag reverses the effect of the |
| .BR clone (2) |
| .B CLONE_SYSVSEM |
| flag. |
| Unshare System\ V semaphore adjustment |
| .RI ( semadj ) |
| values, |
| so that the calling process has a new empty |
| .I semadj |
| list that is not shared with any other process. |
| If this is the last process that has a reference to the process's current |
| .I semadj |
| list, then the adjustments in that list are applied |
| to the corresponding semaphores, as described in |
| .BR semop (2). |
| .\" CLONE_NEWNS If CLONE_SIGHAND is set and signals are also being shared |
| .\" (i.e., current->signal->count > 1), force CLONE_THREAD. |
| .PP |
| In addition, |
| .BR CLONE_THREAD , |
| .BR CLONE_SIGHAND , |
| and |
| .BR CLONE_VM |
| can be specified in |
| .I flags |
| if the caller is single threaded (i.e., it is not sharing |
| its address space with another process or thread). |
| In this case, these flags have no effect. |
| (Note also that specifying |
| .BR CLONE_THREAD |
| automatically implies |
| .BR CLONE_VM , |
| and specifying |
| .BR CLONE_VM |
| automatically implies |
| .BR CLONE_SIGHAND .) |
| .\" As at 3.9, the following forced implications also apply, |
| .\" although the relevant flags are not yet implemented. |
| .\" If CLONE_THREAD is set force CLONE_VM. |
| .\" If CLONE_VM is set, force CLONE_SIGHAND. |
| .\" |
| If the process is multithreaded, then |
| the use of these flags results in an error. |
| .\" See kernel/fork.c::check_unshare_flags() |
| .PP |
| If |
| .I flags |
| is specified as zero, then |
| .BR unshare () |
| is a no-op; |
| no changes are made to the calling process's execution context. |
| .SH RETURN VALUE |
| On success, zero returned. |
| On failure, \-1 is returned and |
| .I errno |
| is set to indicate the error. |
| .SH ERRORS |
| .TP |
| .B EINVAL |
| An invalid bit was specified in |
| .IR flags . |
| .TP |
| .B EINVAL |
| .BR CLONE_THREAD , |
| .BR CLONE_SIGHAND , |
| or |
| .BR CLONE_VM |
| was specified in |
| .IR flags , |
| and the caller is multithreaded. |
| .TP |
| .B ENOMEM |
| Cannot allocate sufficient memory to copy parts of caller's |
| context that need to be unshared. |
| .TP |
| .BR ENOSPC " (since Linux 3.7)" |
| .\" commit f2302505775fd13ba93f034206f1e2a587017929 |
| .B CLONE_NEWPID |
| was specified in flags, |
| but the limit on the nesting depth of PID namespaces |
| would have been exceeded; see |
| .BR pid_namespaces (7). |
| .TP |
| .BR ENOSPC " (since Linux 4.9; beforehand " EUSERS ) |
| .B CLONE_NEWUSER |
| was specified in |
| .IR flags , |
| and the call would cause the limit on the number of |
| nested user namespaces to be exceeded. |
| See |
| .BR user_namespaces (7). |
| .IP |
| From Linux 3.11 to Linux 4.8, the error diagnosed in this case was |
| .BR EUSERS . |
| .TP |
| .BR ENOSPC " (since Linux 4.9)" |
| One of the values in |
| .I flags |
| specified the creation of a new user namespace, |
| but doing so would have caused the limit defined by the corresponding file in |
| .IR /proc/sys/user |
| to be exceeded. |
| For further details, see |
| .BR namespaces (7). |
| .TP |
| .B EPERM |
| The calling process did not have the required privileges for this operation. |
| .TP |
| .B EPERM |
| .BR CLONE_NEWUSER |
| was specified in |
| .IR flags , |
| but either the effective user ID or the effective group ID of the caller |
| does not have a mapping in the parent namespace (see |
| .BR user_namespaces (7)). |
| .TP |
| .BR EPERM " (since Linux 3.9)" |
| .\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d |
| .B CLONE_NEWUSER |
| was specified in |
| .I flags |
| and the caller is in a chroot environment |
| .\" FIXME What is the rationale for this restriction? |
| (i.e., the caller's root directory does not match the root directory |
| of the mount namespace in which it resides). |
| .TP |
| .BR EUSERS " (from Linux 3.11 to Linux 4.8)" |
| .B CLONE_NEWUSER |
| was specified in |
| .IR flags , |
| and the limit on the number of nested user namespaces would be exceeded. |
| See the discussion of the |
| .BR ENOSPC |
| error above. |
| .SH VERSIONS |
| The |
| .BR unshare () |
| system call was added to Linux in kernel 2.6.16. |
| .SH CONFORMING TO |
| The |
| .BR unshare () |
| system call is Linux-specific. |
| .SH NOTES |
| Not all of the process attributes that can be shared when |
| a new process is created using |
| .BR clone (2) |
| can be unshared using |
| .BR unshare (). |
| In particular, as at kernel 3.8, |
| .\" FIXME all of the following needs to be reviewed for the current kernel |
| .BR unshare () |
| does not implement flags that reverse the effects of |
| .BR CLONE_SIGHAND , |
| .\" However, we can do unshare(CLONE_SIGHAND) if CLONE_SIGHAND |
| .\" was not specified when doing clone(); i.e., unsharing |
| .\" signal handlers is permitted if we are not actually |
| .\" sharing signal handlers. mtk |
| .BR CLONE_THREAD , |
| or |
| .BR CLONE_VM . |
| .\" However, we can do unshare(CLONE_VM) if CLONE_VM |
| .\" was not specified when doing clone(); i.e., unsharing |
| .\" virtual memory is permitted if we are not actually |
| .\" sharing virtual memory. mtk |
| Such functionality may be added in the future, if required. |
| .\" |
| .\"9) Future Work |
| .\"-------------- |
| .\"The current implementation of unshare does not allow unsharing of |
| .\"signals and signal handlers. Signals are complex to begin with and |
| .\"to unshare signals and/or signal handlers of a currently running |
| .\"process is even more complex. If in the future there is a specific |
| .\"need to allow unsharing of signals and/or signal handlers, it can |
| .\"be incrementally added to unshare without affecting legacy |
| .\"applications using unshare. |
| .\" |
| .SH EXAMPLE |
| The program below provides a simple implementation of the |
| .BR unshare (1) |
| command, which unshares one or more namespaces and executes the |
| command supplied in its command-line arguments. |
| Here's an example of the use of this program, |
| running a shell in a new mount namespace, |
| and verifying that the original shell and the |
| new shell are in separate mount namespaces: |
| .PP |
| .in +4n |
| .EX |
| $ \fBreadlink /proc/$$/ns/mnt\fP |
| mnt:[4026531840] |
| $ \fBsudo ./unshare -m /bin/bash\fP |
| # \fBreadlink /proc/$$/ns/mnt\fP |
| mnt:[4026532325] |
| .EE |
| .in |
| .PP |
| The differing output of the two |
| .BR readlink (1) |
| commands shows that the two shells are in different mount namespaces. |
| .SS Program source |
| \& |
| .EX |
| /* unshare.c |
| |
| A simple implementation of the unshare(1) command: unshare |
| namespaces and execute a command. |
| */ |
| #define _GNU_SOURCE |
| #include <sched.h> |
| #include <unistd.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| |
| /* A simple error\-handling function: print an error message based |
| on the value in \(aqerrno\(aq and terminate the calling process */ |
| |
| #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\ |
| } while (0) |
| |
| static void |
| usage(char *pname) |
| { |
| fprintf(stderr, "Usage: %s [options] program [arg...]\\n", pname); |
| fprintf(stderr, "Options can be:\\n"); |
| fprintf(stderr, " \-i unshare IPC namespace\\n"); |
| fprintf(stderr, " \-m unshare mount namespace\\n"); |
| fprintf(stderr, " \-n unshare network namespace\\n"); |
| fprintf(stderr, " \-p unshare PID namespace\\n"); |
| fprintf(stderr, " \-u unshare UTS namespace\\n"); |
| fprintf(stderr, " \-U unshare user namespace\\n"); |
| exit(EXIT_FAILURE); |
| } |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| int flags, opt; |
| |
| flags = 0; |
| |
| while ((opt = getopt(argc, argv, "imnpuU")) != \-1) { |
| switch (opt) { |
| case \(aqi\(aq: flags |= CLONE_NEWIPC; break; |
| case \(aqm\(aq: flags |= CLONE_NEWNS; break; |
| case \(aqn\(aq: flags |= CLONE_NEWNET; break; |
| case \(aqp\(aq: flags |= CLONE_NEWPID; break; |
| case \(aqu\(aq: flags |= CLONE_NEWUTS; break; |
| case \(aqU\(aq: flags |= CLONE_NEWUSER; break; |
| default: usage(argv[0]); |
| } |
| } |
| |
| if (optind >= argc) |
| usage(argv[0]); |
| |
| if (unshare(flags) == \-1) |
| errExit("unshare"); |
| |
| execvp(argv[optind], &argv[optind]); |
| errExit("execvp"); |
| } |
| .EE |
| .SH SEE ALSO |
| .BR unshare (1), |
| .BR clone (2), |
| .BR fork (2), |
| .BR kcmp (2), |
| .BR setns (2), |
| .BR vfork (2), |
| .BR namespaces (7) |
| .PP |
| .I Documentation/userspace-api/unshare.rst |
| in the Linux kernel source tree |
| .\" commit f504d47be5e8fa7ecf2bf660b18b42e6960c0eb2 |
| (or |
| .I Documentation/unshare.txt |
| before Linux 4.12) |