| From 9cfff15c098f67211ed051e4c3c3742dedfd5ff9 Mon Sep 17 00:00:00 2001 |
| From: Muchun Song <songmuchun@bytedance.com> |
| Date: Fri, 4 Sep 2020 16:36:13 -0700 |
| Subject: [PATCH] mm/hugetlb: fix a race between hugetlb sysctl handlers |
| |
| commit 17743798d81238ab13050e8e2833699b54e15467 upstream. |
| |
| There is a race between the assignment of `table->data` and write value |
| to the pointer of `table->data` in the __do_proc_doulongvec_minmax() on |
| the other thread. |
| |
| CPU0: CPU1: |
| proc_sys_write |
| hugetlb_sysctl_handler proc_sys_call_handler |
| hugetlb_sysctl_handler_common hugetlb_sysctl_handler |
| table->data = &tmp; hugetlb_sysctl_handler_common |
| table->data = &tmp; |
| proc_doulongvec_minmax |
| do_proc_doulongvec_minmax sysctl_head_finish |
| __do_proc_doulongvec_minmax unuse_table |
| i = table->data; |
| *i = val; // corrupt CPU1's stack |
| |
| Fix this by duplicating the `table`, and only update the duplicate of |
| it. And introduce a helper of proc_hugetlb_doulongvec_minmax() to |
| simplify the code. |
| |
| The following oops was seen: |
| |
| BUG: kernel NULL pointer dereference, address: 0000000000000000 |
| #PF: supervisor instruction fetch in kernel mode |
| #PF: error_code(0x0010) - not-present page |
| Code: Bad RIP value. |
| ... |
| Call Trace: |
| ? set_max_huge_pages+0x3da/0x4f0 |
| ? alloc_pool_huge_page+0x150/0x150 |
| ? proc_doulongvec_minmax+0x46/0x60 |
| ? hugetlb_sysctl_handler_common+0x1c7/0x200 |
| ? nr_hugepages_store+0x20/0x20 |
| ? copy_fd_bitmaps+0x170/0x170 |
| ? hugetlb_sysctl_handler+0x1e/0x20 |
| ? proc_sys_call_handler+0x2f1/0x300 |
| ? unregister_sysctl_table+0xb0/0xb0 |
| ? __fd_install+0x78/0x100 |
| ? proc_sys_write+0x14/0x20 |
| ? __vfs_write+0x4d/0x90 |
| ? vfs_write+0xef/0x240 |
| ? ksys_write+0xc0/0x160 |
| ? __ia32_sys_read+0x50/0x50 |
| ? __close_fd+0x129/0x150 |
| ? __x64_sys_write+0x43/0x50 |
| ? do_syscall_64+0x6c/0x200 |
| ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 |
| |
| Fixes: e5ff215941d5 ("hugetlb: multiple hstates for multiple page sizes") |
| Signed-off-by: Muchun Song <songmuchun@bytedance.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> |
| Cc: Andi Kleen <ak@linux.intel.com> |
| Link: http://lkml.kernel.org/r/20200828031146.43035-1-songmuchun@bytedance.com |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/mm/hugetlb.c b/mm/hugetlb.c |
| index acabe9928049..1eedc7003380 100644 |
| --- a/mm/hugetlb.c |
| +++ b/mm/hugetlb.c |
| @@ -3013,6 +3013,22 @@ static unsigned int cpuset_mems_nr(unsigned int *array) |
| } |
| |
| #ifdef CONFIG_SYSCTL |
| +static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write, |
| + void *buffer, size_t *length, |
| + loff_t *ppos, unsigned long *out) |
| +{ |
| + struct ctl_table dup_table; |
| + |
| + /* |
| + * In order to avoid races with __do_proc_doulongvec_minmax(), we |
| + * can duplicate the @table and alter the duplicate of it. |
| + */ |
| + dup_table = *table; |
| + dup_table.data = out; |
| + |
| + return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos); |
| +} |
| + |
| static int hugetlb_sysctl_handler_common(bool obey_mempolicy, |
| struct ctl_table *table, int write, |
| void __user *buffer, size_t *length, loff_t *ppos) |
| @@ -3024,9 +3040,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, |
| if (!hugepages_supported()) |
| return -EOPNOTSUPP; |
| |
| - table->data = &tmp; |
| - table->maxlen = sizeof(unsigned long); |
| - ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); |
| + ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos, |
| + &tmp); |
| if (ret) |
| goto out; |
| |
| @@ -3070,9 +3085,8 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, |
| if (write && hstate_is_gigantic(h)) |
| return -EINVAL; |
| |
| - table->data = &tmp; |
| - table->maxlen = sizeof(unsigned long); |
| - ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); |
| + ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos, |
| + &tmp); |
| if (ret) |
| goto out; |
| |
| -- |
| 2.27.0 |
| |