| From: Alexey Gladkov <legion@kernel.org> |
| Subject: sysctl: allow change system v ipc sysctls inside ipc namespace |
| Date: Mon, 15 Jan 2024 15:46:41 +0000 |
| |
| Patch series "Allow to change ipc/mq sysctls inside ipc namespace", v3. |
| |
| Right now ipc and mq limits count as per ipc namespace, but only real root |
| can change them. By default, the current values of these limits are such |
| that it can only be reduced. Since only root can change the values, it is |
| impossible to reduce these limits in the rootless container. |
| |
| We can allow limit changes within ipc namespace because mq parameters are |
| limited by RLIMIT_MSGQUEUE and ipc parameters are not limited to anything |
| other than cgroups. |
| |
| |
| This patch (of 3): |
| |
| Rootless containers are not allowed to modify kernel IPC parameters. |
| |
| All default limits are set to such high values that in fact there are no |
| limits at all. All limits are not inherited and are initialized to |
| default values when a new ipc_namespace is created. |
| |
| For new ipc_namespace: |
| |
| size_t ipc_ns.shm_ctlmax = SHMMAX; // (ULONG_MAX - (1UL << 24)) |
| size_t ipc_ns.shm_ctlall = SHMALL; // (ULONG_MAX - (1UL << 24)) |
| int ipc_ns.shm_ctlmni = IPCMNI; // (1 << 15) |
| int ipc_ns.shm_rmid_forced = 0; |
| unsigned int ipc_ns.msg_ctlmax = MSGMAX; // 8192 |
| unsigned int ipc_ns.msg_ctlmni = MSGMNI; // 32000 |
| unsigned int ipc_ns.msg_ctlmnb = MSGMNB; // 16384 |
| |
| The shm_tot (total amount of shared pages) has also ceased to be global, |
| it is located in ipc_namespace and is not inherited from anywhere. |
| |
| In such conditions, it cannot be said that these limits limit anything. |
| The real limiter for them is cgroups. |
| |
| If we allow rootless containers to change these parameters, then it can |
| only be reduced. |
| |
| Link: https://lkml.kernel.org/r/cover.1705333426.git.legion@kernel.org |
| Link: https://lkml.kernel.org/r/d2f4603305cbfed58a24755aa61d027314b73a45.1705333426.git.legion@kernel.org |
| Signed-off-by: Alexey Gladkov <legion@kernel.org> |
| Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> |
| Link: https://lkml.kernel.org/r/e2d84d3ec0172cfff759e6065da84ce0cc2736f8.1663756794.git.legion@kernel.org |
| Cc: Christian Brauner <brauner@kernel.org> |
| Cc: Joel Granados <joel.granados@gmail.com> |
| Cc: Kees Cook <keescook@chromium.org> |
| Cc: Luis Chamberlain <mcgrof@kernel.org> |
| Cc: Manfred Spraul <manfred@colorfullife.com> |
| Cc: Davidlohr Bueso <dave@stgolabs.net> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| ipc/ipc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- |
| 1 file changed, 35 insertions(+), 2 deletions(-) |
| |
| --- a/ipc/ipc_sysctl.c~sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-namespace |
| +++ a/ipc/ipc_sysctl.c |
| @@ -14,6 +14,7 @@ |
| #include <linux/ipc_namespace.h> |
| #include <linux/msg.h> |
| #include <linux/slab.h> |
| +#include <linux/cred.h> |
| #include "util.h" |
| |
| static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, |
| @@ -190,25 +191,57 @@ static int set_is_seen(struct ctl_table_ |
| return ¤t->nsproxy->ipc_ns->ipc_set == set; |
| } |
| |
| +static void ipc_set_ownership(struct ctl_table_header *head, |
| + struct ctl_table *table, |
| + kuid_t *uid, kgid_t *gid) |
| +{ |
| + struct ipc_namespace *ns = |
| + container_of(head->set, struct ipc_namespace, ipc_set); |
| + |
| + kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); |
| + kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); |
| + |
| + *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; |
| + *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; |
| +} |
| + |
| static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) |
| { |
| int mode = table->mode; |
| |
| #ifdef CONFIG_CHECKPOINT_RESTORE |
| - struct ipc_namespace *ns = current->nsproxy->ipc_ns; |
| + struct ipc_namespace *ns = |
| + container_of(head->set, struct ipc_namespace, ipc_set); |
| |
| if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || |
| (table->data == &ns->ids[IPC_MSG_IDS].next_id) || |
| (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && |
| checkpoint_restore_ns_capable(ns->user_ns)) |
| mode = 0666; |
| + else |
| #endif |
| - return mode; |
| + { |
| + kuid_t ns_root_uid; |
| + kgid_t ns_root_gid; |
| + |
| + ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); |
| + |
| + if (uid_eq(current_euid(), ns_root_uid)) |
| + mode >>= 6; |
| + |
| + else if (in_egroup_p(ns_root_gid)) |
| + mode >>= 3; |
| + } |
| + |
| + mode &= 7; |
| + |
| + return (mode << 6) | (mode << 3) | mode; |
| } |
| |
| static struct ctl_table_root set_root = { |
| .lookup = set_lookup, |
| .permissions = ipc_permissions, |
| + .set_ownership = ipc_set_ownership, |
| }; |
| |
| bool setup_ipc_sysctls(struct ipc_namespace *ns) |
| _ |