| From 38b040db2052c5a188e06eb91cd75891ba501eeb Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Wed, 24 Feb 2021 12:07:32 -0800 |
| Subject: mm/pmem: avoid inserting hugepage PTE entry with fsdax if hugepage |
| support is disabled |
| |
| From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> |
| |
| [ Upstream commit bae84953815793f68ddd8edeadd3f4e32676a2c8 ] |
| |
| Differentiate between hardware not supporting hugepages and user disabling |
| THP via 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' |
| |
| For the devdax namespace, the kernel handles the above via the |
| supported_alignment attribute and failing to initialize the namespace if |
| the namespace align value is not supported on the platform. |
| |
| For the fsdax namespace, the kernel will continue to initialize the |
| namespace. This can result in the kernel creating a huge pte entry even |
| though the hardware don't support the same. |
| |
| We do want hugepage support with pmem even if the end-user disabled THP |
| via sysfs file (/sys/kernel/mm/transparent_hugepage/enabled). Hence |
| differentiate between hardware/firmware lacking support vs user-controlled |
| disable of THP and prevent a huge fault if the hardware lacks hugepage |
| support. |
| |
| Link: https://lkml.kernel.org/r/20210205023956.417587-1-aneesh.kumar@linux.ibm.com |
| Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> |
| Reviewed-by: Dan Williams <dan.j.williams@intel.com> |
| Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> |
| Cc: Jan Kara <jack@suse.cz> |
| Cc: David Hildenbrand <david@redhat.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| include/linux/huge_mm.h | 15 +++++++++------ |
| mm/huge_memory.c | 6 +++++- |
| 2 files changed, 14 insertions(+), 7 deletions(-) |
| |
| diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h |
| index ff55be011739..10c7a80a0394 100644 |
| --- a/include/linux/huge_mm.h |
| +++ b/include/linux/huge_mm.h |
| @@ -84,6 +84,7 @@ static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, |
| } |
| |
| enum transparent_hugepage_flag { |
| + TRANSPARENT_HUGEPAGE_NEVER_DAX, |
| TRANSPARENT_HUGEPAGE_FLAG, |
| TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, |
| TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, |
| @@ -129,6 +130,13 @@ extern unsigned long transparent_hugepage_flags; |
| */ |
| static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) |
| { |
| + |
| + /* |
| + * If the hardware/firmware marked hugepage support disabled. |
| + */ |
| + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX)) |
| + return false; |
| + |
| if (vma->vm_flags & VM_NOHUGEPAGE) |
| return false; |
| |
| @@ -140,12 +148,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) |
| |
| if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) |
| return true; |
| - /* |
| - * For dax vmas, try to always use hugepage mappings. If the kernel does |
| - * not support hugepages, fsdax mappings will fallback to PAGE_SIZE |
| - * mappings, and device-dax namespaces, that try to guarantee a given |
| - * mapping size, will fail to enable |
| - */ |
| + |
| if (vma_is_dax(vma)) |
| return true; |
| |
| diff --git a/mm/huge_memory.c b/mm/huge_memory.c |
| index 6301ecc1f679..f1432d4d81c7 100644 |
| --- a/mm/huge_memory.c |
| +++ b/mm/huge_memory.c |
| @@ -375,7 +375,11 @@ static int __init hugepage_init(void) |
| struct kobject *hugepage_kobj; |
| |
| if (!has_transparent_hugepage()) { |
| - transparent_hugepage_flags = 0; |
| + /* |
| + * Hardware doesn't support hugepages, hence disable |
| + * DAX PMD support. |
| + */ |
| + transparent_hugepage_flags = 1 << TRANSPARENT_HUGEPAGE_NEVER_DAX; |
| return -EINVAL; |
| } |
| |
| -- |
| 2.30.2 |
| |