| From 2a25f099f10447d416cfb9deb6ff523b60d54446 Mon Sep 17 00:00:00 2001 |
| From: "Kirill A. Shutemov" <kirill@shutemov.name> |
| Date: Mon, 13 Jan 2020 16:29:10 -0800 |
| Subject: [PATCH] mm/huge_memory.c: thp: fix conflict of above-47bit hint |
| address and PMD alignment |
| |
| commit 97d3d0f9a1cf132c63c0b8b8bd497b8a56283dd9 upstream. |
| |
| Patch series "Fix two above-47bit hint address vs. THP bugs". |
| |
| The two get_unmapped_area() implementations have to be fixed to provide |
| THP-friendly mappings if above-47bit hint address is specified. |
| |
| This patch (of 2): |
| |
| Filesystems use thp_get_unmapped_area() to provide THP-friendly |
| mappings. For DAX in particular. |
| |
| Normally, the kernel doesn't create userspace mappings above 47-bit, |
| even if the machine allows this (such as with 5-level paging on x86-64). |
| Not all user space is ready to handle wide addresses. It's known that |
| at least some JIT compilers use higher bits in pointers to encode their |
| information. |
| |
| Userspace can ask for allocation from full address space by specifying |
| hint address (with or without MAP_FIXED) above 47-bits. If the |
| application doesn't need a particular address, but wants to allocate |
| from whole address space it can specify -1 as a hint address. |
| |
| Unfortunately, this trick breaks thp_get_unmapped_area(): the function |
| would not try to allocate PMD-aligned area if *any* hint address |
| specified. |
| |
| Modify the routine to handle it correctly: |
| |
| - Try to allocate the space at the specified hint address with length |
| padding required for PMD alignment. |
| - If failed, retry without length padding (but with the same hint |
| address); |
| - If the returned address matches the hint address return it. |
| - Otherwise, align the address as required for THP and return. |
| |
| The user specified hint address is passed down to get_unmapped_area() so |
| above-47bit hint address will be taken into account without breaking |
| alignment requirements. |
| |
| Link: http://lkml.kernel.org/r/20191220142548.7118-2-kirill.shutemov@linux.intel.com |
| Fixes: b569bab78d8d ("x86/mm: Prepare to expose larger address space to userspace") |
| Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> |
| Reported-by: Thomas Willhalm <thomas.willhalm@intel.com> |
| Tested-by: Dan Williams <dan.j.williams@intel.com> |
| Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com> |
| Cc: "Bruggeman, Otto G" <otto.g.bruggeman@intel.com> |
| Cc: <stable@vger.kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/mm/huge_memory.c b/mm/huge_memory.c |
| index 83236e22a8a8..2b90d4f85df5 100644 |
| --- a/mm/huge_memory.c |
| +++ b/mm/huge_memory.c |
| @@ -508,13 +508,13 @@ void prep_transhuge_page(struct page *page) |
| set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); |
| } |
| |
| -static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, |
| +static unsigned long __thp_get_unmapped_area(struct file *filp, |
| + unsigned long addr, unsigned long len, |
| loff_t off, unsigned long flags, unsigned long size) |
| { |
| - unsigned long addr; |
| loff_t off_end = off + len; |
| loff_t off_align = round_up(off, size); |
| - unsigned long len_pad; |
| + unsigned long len_pad, ret; |
| |
| if (off_end <= off_align || (off_end - off_align) < size) |
| return 0; |
| @@ -523,30 +523,40 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long le |
| if (len_pad < len || (off + len_pad) < off) |
| return 0; |
| |
| - addr = current->mm->get_unmapped_area(filp, 0, len_pad, |
| + ret = current->mm->get_unmapped_area(filp, addr, len_pad, |
| off >> PAGE_SHIFT, flags); |
| - if (IS_ERR_VALUE(addr)) |
| + |
| + /* |
| + * The failure might be due to length padding. The caller will retry |
| + * without the padding. |
| + */ |
| + if (IS_ERR_VALUE(ret)) |
| return 0; |
| |
| - addr += (off - addr) & (size - 1); |
| - return addr; |
| + /* |
| + * Do not try to align to THP boundary if allocation at the address |
| + * hint succeeds. |
| + */ |
| + if (ret == addr) |
| + return addr; |
| + |
| + ret += (off - ret) & (size - 1); |
| + return ret; |
| } |
| |
| unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, |
| unsigned long len, unsigned long pgoff, unsigned long flags) |
| { |
| + unsigned long ret; |
| loff_t off = (loff_t)pgoff << PAGE_SHIFT; |
| |
| - if (addr) |
| - goto out; |
| if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD)) |
| goto out; |
| |
| - addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE); |
| - if (addr) |
| - return addr; |
| - |
| - out: |
| + ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE); |
| + if (ret) |
| + return ret; |
| +out: |
| return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); |
| } |
| EXPORT_SYMBOL_GPL(thp_get_unmapped_area); |
| -- |
| 2.7.4 |
| |