| From bippy-5f407fcff5a0 Mon Sep 17 00:00:00 2001 |
| From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| To: <linux-cve-announce@vger.kernel.org> |
| Reply-to: <cve@kernel.org>, <linux-kernel@vger.kernel.org> |
| Subject: CVE-2024-42243: mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray |
| |
| Description |
| =========== |
| |
| In the Linux kernel, the following vulnerability has been resolved: |
| |
| mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray |
| |
| Patch series "mm/filemap: Limit page cache size to that supported by |
| xarray", v2. |
| |
| Currently, xarray can't support arbitrary page cache size. More details |
| can be found from the WARN_ON() statement in xas_split_alloc(). In our |
| test whose code is attached below, we hit the WARN_ON() on ARM64 system |
| where the base page size is 64KB and huge page size is 512MB. The issue |
| was reported long time ago and some discussions on it can be found here |
| [1]. |
| |
| [1] https://www.spinics.net/lists/linux-xfs/msg75404.html |
| |
| In order to fix the issue, we need to adjust MAX_PAGECACHE_ORDER to one |
| supported by xarray and avoid PMD-sized page cache if needed. The code |
| changes are suggested by David Hildenbrand. |
| |
| PATCH[1] adjusts MAX_PAGECACHE_ORDER to that supported by xarray |
| PATCH[2-3] avoids PMD-sized page cache in the synchronous readahead path |
| PATCH[4] avoids PMD-sized page cache for shmem files if needed |
| |
| Test program |
| ============ |
| # cat test.c |
| #define _GNU_SOURCE |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| #include <sys/syscall.h> |
| #include <sys/mman.h> |
| |
| #define TEST_XFS_FILENAME "/tmp/data" |
| #define TEST_SHMEM_FILENAME "/dev/shm/data" |
| #define TEST_MEM_SIZE 0x20000000 |
| |
| int main(int argc, char **argv) |
| { |
| const char *filename; |
| int fd = 0; |
| void *buf = (void *)-1, *p; |
| int pgsize = getpagesize(); |
| int ret; |
| |
| if (pgsize != 0x10000) { |
| fprintf(stderr, "64KB base page size is required\n"); |
| return -EPERM; |
| } |
| |
| system("echo force > /sys/kernel/mm/transparent_hugepage/shmem_enabled"); |
| system("rm -fr /tmp/data"); |
| system("rm -fr /dev/shm/data"); |
| system("echo 1 > /proc/sys/vm/drop_caches"); |
| |
| /* Open xfs or shmem file */ |
| filename = TEST_XFS_FILENAME; |
| if (argc > 1 && !strcmp(argv[1], "shmem")) |
| filename = TEST_SHMEM_FILENAME; |
| |
| fd = open(filename, O_CREAT | O_RDWR | O_TRUNC); |
| if (fd < 0) { |
| fprintf(stderr, "Unable to open <%s>\n", filename); |
| return -EIO; |
| } |
| |
| /* Extend file size */ |
| ret = ftruncate(fd, TEST_MEM_SIZE); |
| if (ret) { |
| fprintf(stderr, "Error %d to ftruncate()\n", ret); |
| goto cleanup; |
| } |
| |
| /* Create VMA */ |
| buf = mmap(NULL, TEST_MEM_SIZE, |
| PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); |
| if (buf == (void *)-1) { |
| fprintf(stderr, "Unable to mmap <%s>\n", filename); |
| goto cleanup; |
| } |
| |
| fprintf(stdout, "mapped buffer at 0x%p\n", buf); |
| ret = madvise(buf, TEST_MEM_SIZE, MADV_HUGEPAGE); |
| if (ret) { |
| fprintf(stderr, "Unable to madvise(MADV_HUGEPAGE)\n"); |
| goto cleanup; |
| } |
| |
| /* Populate VMA */ |
| ret = madvise(buf, TEST_MEM_SIZE, MADV_POPULATE_WRITE); |
| if (ret) { |
| fprintf(stderr, "Error %d to madvise(MADV_POPULATE_WRITE)\n", ret); |
| goto cleanup; |
| } |
| |
| /* Punch the file to enforce xarray split */ |
| ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, |
| TEST_MEM_SIZE - pgsize, pgsize); |
| if (ret) |
| fprintf(stderr, "Error %d to fallocate()\n", ret); |
| |
| cleanup: |
| if (buf != (void *)-1) |
| munmap(buf, TEST_MEM_SIZE); |
| if (fd > 0) |
| close(fd); |
| |
| return 0; |
| } |
| |
| # gcc test.c -o test |
| # cat /proc/1/smaps | grep KernelPageSize | head -n 1 |
| KernelPageSize: 64 kB |
| # ./test shmem |
| : |
| ------------[ cut here ]------------ |
| WARNING: CPU: 17 PID: 5253 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128 |
| Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib \ |
| nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct \ |
| nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 \ |
| ip_set nf_tables rfkill nfnetlink vfat fat virtio_balloon \ |
| drm fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 \ |
| virtio_net sha1_ce net_failover failover virtio_console virtio_blk \ |
| dimlib virtio_mmio |
| CPU: 17 PID: 5253 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #12 |
| Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024 |
| pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) |
| pc : xas_split_alloc+0xf8/0x128 |
| lr : split_huge_page_to_list_to_order+0x1c4/0x720 |
| sp : ffff80008a92f5b0 |
| x29: ffff80008a92f5b0 x28: ffff80008a92f610 x27: ffff80008a92f728 |
| x26: 0000000000000cc0 x25: 000000000000000d x24: ffff0000cf00c858 |
| x23: ffff80008a92f610 x22: ffffffdfc0600000 x21: 0000000000000000 |
| x20: 0000000000000000 x19: ffffffdfc0600000 x18: 0000000000000000 |
| x17: 0000000000000000 x16: 0000018000000000 x15: 3374004000000000 |
| x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020 |
| x11: 3374000000000000 x10: 3374e1c0ffff6000 x9 : ffffb463a84c681c |
| x8 : 0000000000000003 x7 : 0000000000000000 x6 : ffff00011c976ce0 |
| x5 : ffffb463aa47e378 x4 : 0000000000000000 x3 : 0000000000000cc0 |
| x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000 |
| Call trace: |
| xas_split_alloc+0xf8/0x128 |
| split_huge_page_to_list_to_order+0x1c4/0x720 |
| truncate_inode_partial_folio+0xdc/0x160 |
| shmem_undo_range+0x2bc/0x6a8 |
| shmem_fallocate+0x134/0x430 |
| vfs_fallocate+0x124/0x2e8 |
| ksys_fallocate+0x4c/0xa0 |
| __arm64_sys_fallocate+0x24/0x38 |
| invoke_syscall.constprop.0+0x7c/0xd8 |
| do_el0_svc+0xb4/0xd0 |
| el0_svc+0x44/0x1d8 |
| el0t_64_sync_handler+0x134/0x150 |
| el0t_64_sync+0x17c/0x180 |
| |
| |
| This patch (of 4): |
| |
| The largest page cache order can be HPAGE_PMD_ORDER (13) on ARM64 with |
| 64KB base page size. The xarray entry with this order can't be split as |
| the following error messages indicate. |
| |
| ------------[ cut here ]------------ |
| WARNING: CPU: 35 PID: 7484 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128 |
| Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib \ |
| nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct \ |
| nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 \ |
| ip_set rfkill nf_tables nfnetlink vfat fat virtio_balloon drm \ |
| fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 \ |
| sha1_ce virtio_net net_failover virtio_console virtio_blk failover \ |
| dimlib virtio_mmio |
| CPU: 35 PID: 7484 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #9 |
| Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024 |
| pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) |
| pc : xas_split_alloc+0xf8/0x128 |
| lr : split_huge_page_to_list_to_order+0x1c4/0x720 |
| sp : ffff800087a4f6c0 |
| x29: ffff800087a4f6c0 x28: ffff800087a4f720 x27: 000000001fffffff |
| x26: 0000000000000c40 x25: 000000000000000d x24: ffff00010625b858 |
| x23: ffff800087a4f720 x22: ffffffdfc0780000 x21: 0000000000000000 |
| x20: 0000000000000000 x19: ffffffdfc0780000 x18: 000000001ff40000 |
| x17: 00000000ffffffff x16: 0000018000000000 x15: 51ec004000000000 |
| x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020 |
| x11: 51ec000000000000 x10: 51ece1c0ffff8000 x9 : ffffbeb961a44d28 |
| x8 : 0000000000000003 x7 : ffffffdfc0456420 x6 : ffff0000e1aa6eb8 |
| x5 : 20bf08b4fe778fca x4 : ffffffdfc0456420 x3 : 0000000000000c40 |
| x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000 |
| Call trace: |
| xas_split_alloc+0xf8/0x128 |
| split_huge_page_to_list_to_order+0x1c4/0x720 |
| truncate_inode_partial_folio+0xdc/0x160 |
| truncate_inode_pages_range+0x1b4/0x4a8 |
| truncate_pagecache_range+0x84/0xa0 |
| xfs_flush_unmap_range+0x70/0x90 [xfs] |
| xfs_file_fallocate+0xfc/0x4d8 [xfs] |
| vfs_fallocate+0x124/0x2e8 |
| ksys_fallocate+0x4c/0xa0 |
| __arm64_sys_fallocate+0x24/0x38 |
| invoke_syscall.constprop.0+0x7c/0xd8 |
| do_el0_svc+0xb4/0xd0 |
| el0_svc+0x44/0x1d8 |
| el0t_64_sync_handler+0x134/0x150 |
| el0t_64_sync+0x17c/0x180 |
| |
| Fix it by decreasing MAX_PAGECACHE_ORDER to the largest supported order |
| by xarray. For this specific case, MAX_PAGECACHE_ORDER is dropped from |
| 13 to 11 when CONFIG_BASE_SMALL is disabled. |
| |
| The Linux kernel CVE team has assigned CVE-2024-42243 to this issue. |
| |
| |
| Affected and fixed versions |
| =========================== |
| |
| Issue introduced in 5.18 with commit 793917d997df2e432f3e9ac126e4482d68256d01 and fixed in 6.6.41 with commit a0c42ddd0969fdc760a85e20e267776028a7ca4e |
| Issue introduced in 5.18 with commit 793917d997df2e432f3e9ac126e4482d68256d01 and fixed in 6.9.10 with commit 333c5539a31f48828456aa9997ec2808f06a699a |
| Issue introduced in 5.18 with commit 793917d997df2e432f3e9ac126e4482d68256d01 and fixed in 6.10 with commit 099d90642a711caae377f53309abfe27e8724a8b |
| |
| Please see https://www.kernel.org for a full list of currently supported |
| kernel versions by the kernel community. |
| |
| Unaffected versions might change over time as fixes are backported to |
| older supported kernel versions. The official CVE entry at |
| https://cve.org/CVERecord/?id=CVE-2024-42243 |
| will be updated if fixes are backported, please check that for the most |
| up to date information about this issue. |
| |
| |
| Affected files |
| ============== |
| |
| The file(s) affected by this issue are: |
| include/linux/pagemap.h |
| |
| |
| Mitigation |
| ========== |
| |
| The Linux kernel CVE team recommends that you update to the latest |
| stable kernel version for this, and many other bugfixes. Individual |
| changes are never tested alone, but rather are part of a larger kernel |
| release. Cherry-picking individual commits is not recommended or |
| supported by the Linux kernel community at all. If however, updating to |
| the latest release is impossible, the individual changes to resolve this |
| issue can be found at these commits: |
| https://git.kernel.org/stable/c/a0c42ddd0969fdc760a85e20e267776028a7ca4e |
| https://git.kernel.org/stable/c/333c5539a31f48828456aa9997ec2808f06a699a |
| https://git.kernel.org/stable/c/099d90642a711caae377f53309abfe27e8724a8b |