| // SPDX-License-Identifier: GPL-2.0 |
| /* filescontrol.c - Cgroup controller for open file handles. |
| * |
| * Copyright 2014 Google Inc. |
| * Author: Brian Makin <merimus@google.com> |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| #include <linux/page_counter.h> |
| #include <linux/filescontrol.h> |
| #include <linux/cgroup.h> |
| #include <linux/export.h> |
| #include <linux/printk.h> |
| #include <linux/slab.h> |
| #include <linux/fs.h> |
| #include <linux/seq_file.h> |
| #include <linux/fdtable.h> |
| #include <linux/sched/signal.h> |
| #include <linux/module.h> |
| |
| #define FILES_MAX D_COUNT_MAX |
| #define FILES_MAX_STR "max" |
| |
| static bool no_acct; |
| struct cgroup_subsys files_cgrp_subsys __read_mostly; |
| EXPORT_SYMBOL(files_cgrp_subsys); |
| |
| module_param(no_acct, bool, 0444); |
| |
| struct files_cgroup { |
| struct cgroup_subsys_state css; |
| struct page_counter open_handles; |
| }; |
| |
| static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css) |
| { |
| return css ? container_of(css, struct files_cgroup, css) : NULL; |
| } |
| |
| static inline struct page_counter * |
| css_res_open_handles(struct cgroup_subsys_state *css) |
| { |
| return &css_fcg(css)->open_handles; |
| } |
| |
| static inline struct files_cgroup * |
| files_cgroup_from_files(struct files_struct *files) |
| { |
| return files->files_cgroup; |
| } |
| |
| |
| static struct cgroup_subsys_state * |
| files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) |
| { |
| struct files_cgroup *parent_fcg; |
| struct files_cgroup *fcg; |
| |
| parent_fcg = css_fcg(parent_css); |
| fcg = kzalloc(sizeof(*fcg), GFP_KERNEL); |
| if (!fcg) |
| goto out; |
| |
| if (!parent_fcg) { |
| page_counter_init(&fcg->open_handles, NULL); |
| page_counter_set_max(&fcg->open_handles, FILES_MAX); |
| } else { |
| struct page_counter *p_counter = &parent_fcg->open_handles; |
| |
| page_counter_init(&fcg->open_handles, p_counter); |
| page_counter_set_max(&fcg->open_handles, FILES_MAX); |
| } |
| return &fcg->css; |
| |
| out: |
| return ERR_PTR(-ENOMEM); |
| } |
| |
| static void files_cgroup_css_free(struct cgroup_subsys_state *css) |
| { |
| kfree(css_fcg(css)); |
| } |
| |
| u64 files_cgroup_count_fds(struct files_struct *files) |
| { |
| int i; |
| struct fdtable *fdt; |
| int retval = 0; |
| |
| fdt = files_fdtable(files); |
| for (i = 0; i < DIV_ROUND_UP(fdt->max_fds, BITS_PER_LONG); i++) |
| retval += hweight64((__u64)fdt->open_fds[i]); |
| return retval; |
| } |
| |
| /* |
| * If attaching this cgroup would overcommit the resource then deny |
| * the attach. If not, attach the file resource into new cgroup. |
| */ |
| static int files_cgroup_can_attach(struct cgroup_taskset *tset) |
| { |
| u64 num_files; |
| bool can_attach; |
| struct cgroup_subsys_state *to_css; |
| struct cgroup_subsys_state *from_css; |
| struct page_counter *from_res; |
| struct page_counter *to_res; |
| struct page_counter *fail_res; |
| struct files_struct *files; |
| struct task_struct *task = cgroup_taskset_first(tset, &to_css); |
| |
| to_res = css_res_open_handles(to_css); |
| |
| task_lock(task); |
| files = task->files; |
| if (!files || files == &init_files) { |
| task_unlock(task); |
| return 0; |
| } |
| |
| from_css = &files_cgroup_from_files(files)->css; |
| from_res = css_res_open_handles(from_css); |
| |
| spin_lock(&files->file_lock); |
| num_files = files_cgroup_count_fds(files); |
| page_counter_uncharge(from_res, num_files); |
| |
| if (!page_counter_try_charge(to_res, num_files, &fail_res)) { |
| page_counter_charge(from_res, num_files); |
| pr_err("Open files limit overcommited\n"); |
| can_attach = false; |
| } else { |
| css_put(from_css); |
| css_get(to_css); |
| task->files->files_cgroup = css_fcg(to_css); |
| can_attach = true; |
| } |
| spin_unlock(&files->file_lock); |
| task_unlock(task); |
| return can_attach ? 0 : -ENOSPC; |
| } |
| |
| int files_cgroup_alloc_fd(struct files_struct *files, u64 n) |
| { |
| /* |
| * Kernel threads which are forked by kthreadd inherited the |
| * const files_struct 'init_files', we didn't wrap it so |
| * there's no associated files_cgroup. |
| * |
| * Kernel threads always stay in root cgroup, and we don't |
| * have limit for root files cgroup, so it won't hurt if |
| * we don't charge their fds, only issue is that files.usage |
| * won't be accurate in root files cgroup. |
| */ |
| if (!no_acct && files != &init_files) { |
| struct page_counter *fail_res; |
| struct files_cgroup *files_cgroup = |
| files_cgroup_from_files(files); |
| if (!page_counter_try_charge(&files_cgroup->open_handles, |
| n, &fail_res)) |
| return -ENOMEM; |
| } |
| return 0; |
| } |
| EXPORT_SYMBOL(files_cgroup_alloc_fd); |
| |
| void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) |
| { |
| /* |
| * It's not charged so no need to uncharge, see comments in |
| * files_cgroup_alloc_fd. |
| */ |
| if (!no_acct && files != &init_files) { |
| struct files_cgroup *files_cgroup = |
| files_cgroup_from_files(files); |
| page_counter_uncharge(&files_cgroup->open_handles, n); |
| } |
| } |
| EXPORT_SYMBOL(files_cgroup_unalloc_fd); |
| |
| static u64 files_disabled_read(struct cgroup_subsys_state *css, |
| struct cftype *cft) |
| { |
| return no_acct; |
| } |
| |
| static int files_disabled_write(struct cgroup_subsys_state *css, |
| struct cftype *cft, u64 val) |
| { |
| if (!val) |
| return -EINVAL; |
| no_acct = true; |
| |
| return 0; |
| } |
| |
| static int files_limit_read(struct seq_file *sf, void *v) |
| { |
| struct files_cgroup *fcg = css_fcg(seq_css(sf)); |
| struct page_counter *counter = &fcg->open_handles; |
| u64 limit = counter->max; |
| |
| if (limit >= FILES_MAX) |
| seq_printf(sf, "%s\n", FILES_MAX_STR); |
| else |
| seq_printf(sf, "%llu\n", limit); |
| |
| return 0; |
| } |
| |
| static ssize_t files_limit_write(struct kernfs_open_file *of, |
| char *buf, size_t nbytes, loff_t off) |
| { |
| struct files_cgroup *fcg = css_fcg(of_css(of)); |
| u64 limit; |
| int err; |
| |
| buf = strstrip((char *)buf); |
| if (!strcmp(buf, FILES_MAX_STR)) { |
| limit = FILES_MAX; |
| goto set_limit; |
| } |
| |
| err = kstrtoull(buf, 0, &limit); |
| if (err) |
| return err; |
| |
| set_limit: |
| /* |
| * Limit updates don't need to be mutex'd, since it isn't |
| * critical that any racing fork()s follow the new limit. |
| */ |
| page_counter_set_max(&fcg->open_handles, limit); |
| return nbytes; |
| } |
| |
| |
| static u64 files_usage_read(struct cgroup_subsys_state *css, |
| struct cftype *cft) |
| { |
| struct files_cgroup *fcg = css_fcg(css); |
| |
| return page_counter_read(&fcg->open_handles); |
| } |
| |
| static struct cftype files[] = { |
| { |
| .name = "limit", |
| .seq_show = files_limit_read, |
| .write = files_limit_write, |
| .flags = CFTYPE_NOT_ON_ROOT, |
| }, |
| { |
| .name = "usage", |
| .read_u64 = files_usage_read, |
| }, |
| { |
| .name = "no_acct", |
| .flags = CFTYPE_ONLY_ON_ROOT, |
| .read_u64 = files_disabled_read, |
| .write_u64 = files_disabled_write, |
| }, |
| { } |
| }; |
| |
| struct cgroup_subsys files_cgrp_subsys = { |
| .css_alloc = files_cgroup_css_alloc, |
| .css_free = files_cgroup_css_free, |
| .can_attach = files_cgroup_can_attach, |
| .legacy_cftypes = files, |
| .dfl_cftypes = files, |
| }; |
| |
| /* |
| * It could race against cgroup migration of current task, and |
| * using task_get_css() to get a valid css. |
| */ |
| void files_cgroup_assign(struct files_struct *files) |
| { |
| struct cgroup_subsys_state *css; |
| |
| if (files == &init_files) |
| return; |
| |
| css = task_get_css(current, files_cgrp_id); |
| files->files_cgroup = container_of(css, struct files_cgroup, css); |
| } |
| |
| void files_cgroup_remove(struct files_struct *files) |
| { |
| struct task_struct *tsk = current; |
| struct files_cgroup *fcg; |
| |
| if (files == &init_files) |
| return; |
| |
| task_lock(tsk); |
| spin_lock(&files->file_lock); |
| fcg = files_cgroup_from_files(files); |
| css_put(&fcg->css); |
| spin_unlock(&files->file_lock); |
| task_unlock(tsk); |
| } |