queue-3.16/rmap-drop-support-of-non-linear-mappings.patch - pub/scm/linux/kernel/git/bwh/linux-stable-queue - Git at Google

 From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
 Date: Tue, 10 Feb 2015 14:09:59 -0800
 Subject: rmap: drop support of non-linear mappings

 commit 27ba0644ea9dfe6e7693abc85837b60e40583b96 upstream.

 We don't create non-linear mappings anymore.  Let's drop code which
 handles them in rmap.

 Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
 [bwh: Backported to 3.16:
  - Deleted code is slightly different
  - Adjust context]
 Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
 ---
 --- a/Documentation/cachetlb.txt
 +++ b/Documentation/cachetlb.txt
 @@ -317,10 +317,10 @@ maps this page at its virtual address.
  	about doing this.

  	The idea is, first at flush_dcache_page() time, if
 -	page->mapping->i_mmap is an empty tree and ->i_mmap_nonlinear
 -	an empty list, just mark the architecture private page flag bit.
 -	Later, in update_mmu_cache(), a check is made of this flag bit,
 -	and if set the flush is done and the flag bit is cleared.
 +	page->mapping->i_mmap is an empty tree, just mark the architecture
 +	private page flag bit.  Later, in update_mmu_cache(), a check is
 +	made of this flag bit, and if set the flush is done and the flag
 +	bit is cleared.

  	IMPORTANT NOTE: It is often important, if you defer the flush,
  			that the actual flush occurs on the same CPU
 --- a/fs/inode.c
 +++ b/fs/inode.c
 @@ -352,7 +352,6 @@ void address_space_init_once(struct addr
  	INIT_LIST_HEAD(&mapping->private_list);
  	spin_lock_init(&mapping->private_lock);
  	mapping->i_mmap = RB_ROOT;
 -	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
  }
  EXPORT_SYMBOL(address_space_init_once);

 --- a/include/linux/fs.h
 +++ b/include/linux/fs.h
 @@ -395,7 +395,6 @@ struct address_space {
  	spinlock_t		tree_lock;	/* and lock protecting it */
  	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
  	struct rb_root		i_mmap;		/* tree of private and shared mappings */
 -	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
  	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
  	/* Protected by tree_lock together with the radix tree */
  	unsigned long		nrpages;	/* number of total pages */
 @@ -467,8 +466,7 @@ int mapping_tagged(struct address_space
   */
  static inline int mapping_mapped(struct address_space *mapping)
  {
 -	return	!RB_EMPTY_ROOT(&mapping->i_mmap) ||
 -		!list_empty(&mapping->i_mmap_nonlinear);
 +	return	!RB_EMPTY_ROOT(&mapping->i_mmap);
  }

  /*
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -1728,12 +1728,6 @@ struct vm_area_struct *vma_interval_tree
  	for (vma = vma_interval_tree_iter_first(root, start, last);	\
  	     vma; vma = vma_interval_tree_iter_next(vma, start, last))

 -static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
 -					struct list_head *list)
 -{
 -	list_add_tail(&vma->shared.nonlinear, list);
 -}
 -
  void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
  				   struct rb_root *root);
  void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
 --- a/include/linux/mm_types.h
 +++ b/include/linux/mm_types.h
 @@ -272,15 +272,13 @@ struct vm_area_struct {

  	/*
  	 * For areas with an address space and backing store,
 -	 * linkage into the address_space->i_mmap interval tree, or
 -	 * linkage of vma in the address_space->i_mmap_nonlinear list.
 +	 * linkage into the address_space->i_mmap interval tree.
  	 */
  	union {
  		struct {
  			struct rb_node rb;
  			unsigned long rb_subtree_last;
  		} linear;
 -		struct list_head nonlinear;
  	} shared;

  	/*
 --- a/include/linux/rmap.h
 +++ b/include/linux/rmap.h
 @@ -232,7 +232,6 @@ int page_mapped_in_vma(struct page *page
   * arg: passed to rmap_one() and invalid_vma()
   * rmap_one: executed on each vma where page is mapped
   * done: for checking traversing termination condition
 - * file_nonlinear: for handling file nonlinear mapping
   * anon_lock: for getting anon_lock by optimized way rather than default
   * invalid_vma: for skipping uninterested vma
   */
 @@ -241,7 +240,6 @@ struct rmap_walk_control {
  	int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
  					unsigned long addr, void *arg);
  	int (*done)(struct page *page);
 -	int (*file_nonlinear)(struct page *, struct address_space *, void *arg);
  	struct anon_vma *(*anon_lock)(struct page *page);
  	bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
  };
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
 @@ -430,12 +430,8 @@ static int dup_mmap(struct mm_struct *mm
  				mapping->i_mmap_writable++;
  			flush_dcache_mmap_lock(mapping);
  			/* insert tmp into the share list, just after mpnt */
 -			if (unlikely(tmp->vm_flags & VM_NONLINEAR))
 -				vma_nonlinear_insert(tmp,
 -						&mapping->i_mmap_nonlinear);
 -			else
 -				vma_interval_tree_insert_after(tmp, mpnt,
 -							&mapping->i_mmap);
 +			vma_interval_tree_insert_after(tmp, mpnt,
 +					&mapping->i_mmap);
  			flush_dcache_mmap_unlock(mapping);
  			mutex_unlock(&mapping->i_mmap_mutex);
  		}
 --- a/mm/migrate.c
 +++ b/mm/migrate.c
 @@ -181,37 +181,6 @@ out:
  }

  /*
 - * Congratulations to trinity for discovering this bug.
 - * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
 - * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
 - * replace the specified range by file ptes throughout (maybe populated after).
 - * If page migration finds a page within that range, while it's still located
 - * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
 - * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
 - * But if the migrating page is in a part of the vma outside the range to be
 - * remapped, then it will not be cleared, and remove_migration_ptes() needs to
 - * deal with it.  Fortunately, this part of the vma is of course still linear,
 - * so we just need to use linear location on the nonlinear list.
 - */
 -static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
 -		struct address_space *mapping, void *arg)
 -{
 -	struct vm_area_struct *vma;
 -	/* hugetlbfs does not support remap_pages, so no huge pgoff worries */
 -	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 -	unsigned long addr;
 -
 -	list_for_each_entry(vma,
 -		&mapping->i_mmap_nonlinear, shared.nonlinear) {
 -
 -		addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
 -		if (addr >= vma->vm_start && addr < vma->vm_end)
 -			remove_migration_pte(page, vma, addr, arg);
 -	}
 -	return SWAP_AGAIN;
 -}
 -
 -/*
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
 @@ -220,7 +189,6 @@ static void remove_migration_ptes(struct
  	struct rmap_walk_control rwc = {
  		.rmap_one = remove_migration_pte,
  		.arg = old,
 -		.file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
  	};

  	rmap_walk(new, &rwc);
 --- a/mm/mmap.c
 +++ b/mm/mmap.c
 @@ -219,10 +219,7 @@ static void __remove_shared_vm_struct(st
  		mapping->i_mmap_writable--;

  	flush_dcache_mmap_lock(mapping);
 -	if (unlikely(vma->vm_flags & VM_NONLINEAR))
 -		list_del_init(&vma->shared.nonlinear);
 -	else
 -		vma_interval_tree_remove(vma, &mapping->i_mmap);
 +	vma_interval_tree_remove(vma, &mapping->i_mmap);
  	flush_dcache_mmap_unlock(mapping);
  }

 @@ -639,10 +636,7 @@ static void __vma_link_file(struct vm_ar
  			mapping->i_mmap_writable++;

  		flush_dcache_mmap_lock(mapping);
 -		if (unlikely(vma->vm_flags & VM_NONLINEAR))
 -			vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 -		else
 -			vma_interval_tree_insert(vma, &mapping->i_mmap);
 +		vma_interval_tree_insert(vma, &mapping->i_mmap);
  		flush_dcache_mmap_unlock(mapping);
  	}
  }
 @@ -777,14 +771,11 @@ again:			remove_next = 1 + (end > next->

  	if (file) {
  		mapping = file->f_mapping;
 -		if (!(vma->vm_flags & VM_NONLINEAR)) {
 -			root = &mapping->i_mmap;
 -			uprobe_munmap(vma, vma->vm_start, vma->vm_end);
 -
 -			if (adjust_next)
 -				uprobe_munmap(next, next->vm_start,
 -							next->vm_end);
 -		}
 +		root = &mapping->i_mmap;
 +		uprobe_munmap(vma, vma->vm_start, vma->vm_end);
 +
 +		if (adjust_next)
 +			uprobe_munmap(next, next->vm_start, next->vm_end);

  		mutex_lock(&mapping->i_mmap_mutex);
  		if (insert) {
 @@ -3187,8 +3178,7 @@ static void vm_lock_mapping(struct mm_st
   *
   * mmap_sem in write mode is required in order to block all operations
   * that could modify pagetables and free pages without need of
 - * altering the vma layout (for example populate_range() with
 - * nonlinear vmas). It's also needed in write mode to avoid new
 + * altering the vma layout. It's also needed in write mode to avoid new
   * anon_vmas to be associated with existing vmas.
   *
   * A single task can't take more than one mm_take_all_locks() in a row
 --- a/mm/rmap.c
 +++ b/mm/rmap.c
 @@ -597,9 +597,8 @@ unsigned long page_address_in_vma(struct
  		if (!vma->anon_vma || !page__anon_vma ||
  		    vma->anon_vma->root != page__anon_vma->root)
  			return -EFAULT;
 -	} else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
 -		if (!vma->vm_file ||
 -		    vma->vm_file->f_mapping != page->mapping)
 +	} else if (page->mapping) {
 +		if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
  			return -EFAULT;
  	} else
  		return -EFAULT;
 @@ -1286,7 +1285,6 @@ static int try_to_unmap_one(struct page
  		if (pte_soft_dirty(pteval))
  			swp_pte = pte_swp_mksoft_dirty(swp_pte);
  		set_pte_at(mm, address, pte, swp_pte);
 -		BUG_ON(pte_file(*pte));
  	} else if (IS_ENABLED(CONFIG_MIGRATION) &&
  		   (flags & TTU_MIGRATION)) {
  		/* Establish migration entry for a file page */
 @@ -1328,207 +1326,6 @@ out_mlock:
  	return ret;
  }

 -/*
 - * objrmap doesn't work for nonlinear VMAs because the assumption that
 - * offset-into-file correlates with offset-into-virtual-addresses does not hold.
 - * Consequently, given a particular page and its ->index, we cannot locate the
 - * ptes which are mapping that page without an exhaustive linear search.
 - *
 - * So what this code does is a mini "virtual scan" of each nonlinear VMA which
 - * maps the file to which the target page belongs.  The ->vm_private_data field
 - * holds the current cursor into that scan.  Successive searches will circulate
 - * around the vma's virtual address space.
 - *
 - * So as more replacement pressure is applied to the pages in a nonlinear VMA,
 - * more scanning pressure is placed against them as well.   Eventually pages
 - * will become fully unmapped and are eligible for eviction.
 - *
 - * For very sparsely populated VMAs this is a little inefficient - chances are
 - * there there won't be many ptes located within the scan cluster.  In this case
 - * maybe we could scan further - to the end of the pte page, perhaps.
 - *
 - * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
 - * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
 - * rather than unmapping them.  If we encounter the "check_page" that vmscan is
 - * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
 - */
 -#define CLUSTER_SIZE	min(32*PAGE_SIZE, PMD_SIZE)
 -#define CLUSTER_MASK	(~(CLUSTER_SIZE - 1))
 -
 -static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 -		struct vm_area_struct *vma, struct page *check_page)
 -{
 -	struct mm_struct *mm = vma->vm_mm;
 -	pmd_t *pmd;
 -	pte_t *pte;
 -	pte_t pteval;
 -	spinlock_t *ptl;
 -	struct page *page;
 -	unsigned long address;
 -	unsigned long mmun_start;	/* For mmu_notifiers */
 -	unsigned long mmun_end;		/* For mmu_notifiers */
 -	unsigned long end;
 -	int ret = SWAP_AGAIN;
 -	int locked_vma = 0;
 -
 -	address = (vma->vm_start + cursor) & CLUSTER_MASK;
 -	end = address + CLUSTER_SIZE;
 -	if (address < vma->vm_start)
 -		address = vma->vm_start;
 -	if (end > vma->vm_end)
 -		end = vma->vm_end;
 -
 -	pmd = mm_find_pmd(mm, address);
 -	if (!pmd)
 -		return ret;
 -
 -	mmun_start = address;
 -	mmun_end   = end;
 -	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 -
 -	/*
 -	 * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
 -	 * keep the sem while scanning the cluster for mlocking pages.
 -	 */
 -	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
 -		locked_vma = (vma->vm_flags & VM_LOCKED);
 -		if (!locked_vma)
 -			up_read(&vma->vm_mm->mmap_sem); /* don't need it */
 -	}
 -
 -	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 -
 -	/* Update high watermark before we lower rss */
 -	update_hiwater_rss(mm);
 -
 -	for (; address < end; pte++, address += PAGE_SIZE) {
 -		if (!pte_present(*pte))
 -			continue;
 -		page = vm_normal_page(vma, address, *pte);
 -		BUG_ON(!page || PageAnon(page));
 -
 -		if (locked_vma) {
 -			if (page == check_page) {
 -				/* we know we have check_page locked */
 -				mlock_vma_page(page);
 -				ret = SWAP_MLOCK;
 -			} else if (trylock_page(page)) {
 -				/*
 -				 * If we can lock the page, perform mlock.
 -				 * Otherwise leave the page alone, it will be
 -				 * eventually encountered again later.
 -				 */
 -				mlock_vma_page(page);
 -				unlock_page(page);
 -			}
 -			continue;	/* don't unmap */
 -		}
 -
 -		if (ptep_clear_flush_young_notify(vma, address, pte))
 -			continue;
 -
 -		/* Nuke the page table entry. */
 -		flush_cache_page(vma, address, pte_pfn(*pte));
 -		pteval = ptep_clear_flush(vma, address, pte);
 -
 -		/* If nonlinear, store the file page offset in the pte. */
 -		if (page->index != linear_page_index(vma, address)) {
 -			pte_t ptfile = pgoff_to_pte(page->index);
 -			if (pte_soft_dirty(pteval))
 -				ptfile = pte_file_mksoft_dirty(ptfile);
 -			set_pte_at(mm, address, pte, ptfile);
 -		}
 -
 -		/* Move the dirty bit to the physical page now the pte is gone. */
 -		if (pte_dirty(pteval))
 -			set_page_dirty(page);
 -
 -		page_remove_rmap(page);
 -		page_cache_release(page);
 -		dec_mm_counter(mm, MM_FILEPAGES);
 -		(*mapcount)--;
 -	}
 -	pte_unmap_unlock(pte - 1, ptl);
 -	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 -	if (locked_vma)
 -		up_read(&vma->vm_mm->mmap_sem);
 -	return ret;
 -}
 -
 -static int try_to_unmap_nonlinear(struct page *page,
 -		struct address_space *mapping, void *arg)
 -{
 -	struct vm_area_struct *vma;
 -	int ret = SWAP_AGAIN;
 -	unsigned long cursor;
 -	unsigned long max_nl_cursor = 0;
 -	unsigned long max_nl_size = 0;
 -	unsigned int mapcount;
 -
 -	list_for_each_entry(vma,
 -		&mapping->i_mmap_nonlinear, shared.nonlinear) {
 -
 -		cursor = (unsigned long) vma->vm_private_data;
 -		if (cursor > max_nl_cursor)
 -			max_nl_cursor = cursor;
 -		cursor = vma->vm_end - vma->vm_start;
 -		if (cursor > max_nl_size)
 -			max_nl_size = cursor;
 -	}
 -
 -	if (max_nl_size == 0) {	/* all nonlinears locked or reserved ? */
 -		return SWAP_FAIL;
 -	}
 -
 -	/*
 -	 * We don't try to search for this page in the nonlinear vmas,
 -	 * and page_referenced wouldn't have found it anyway.  Instead
 -	 * just walk the nonlinear vmas trying to age and unmap some.
 -	 * The mapcount of the page we came in with is irrelevant,
 -	 * but even so use it as a guide to how hard we should try?
 -	 */
 -	mapcount = page_mapcount(page);
 -	if (!mapcount)
 -		return ret;
 -
 -	cond_resched();
 -
 -	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
 -	if (max_nl_cursor == 0)
 -		max_nl_cursor = CLUSTER_SIZE;
 -
 -	do {
 -		list_for_each_entry(vma,
 -			&mapping->i_mmap_nonlinear, shared.nonlinear) {
 -
 -			cursor = (unsigned long) vma->vm_private_data;
 -			while (cursor < max_nl_cursor &&
 -				cursor < vma->vm_end - vma->vm_start) {
 -				if (try_to_unmap_cluster(cursor, &mapcount,
 -						vma, page) == SWAP_MLOCK)
 -					ret = SWAP_MLOCK;
 -				cursor += CLUSTER_SIZE;
 -				vma->vm_private_data = (void *) cursor;
 -				if ((int)mapcount <= 0)
 -					return ret;
 -			}
 -			vma->vm_private_data = (void *) max_nl_cursor;
 -		}
 -		cond_resched();
 -		max_nl_cursor += CLUSTER_SIZE;
 -	} while (max_nl_cursor <= max_nl_size);
 -
 -	/*
 -	 * Don't loop forever (perhaps all the remaining pages are
 -	 * in locked vmas).  Reset cursor on all unreserved nonlinear
 -	 * vmas, now forgetting on which ones it had fallen behind.
 -	 */
 -	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
 -		vma->vm_private_data = NULL;
 -
 -	return ret;
 -}
 -
  bool is_vma_temporary_stack(struct vm_area_struct *vma)
  {
  	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
 @@ -1574,7 +1371,6 @@ int try_to_unmap(struct page *page, enum
  		.rmap_one = try_to_unmap_one,
  		.arg = (void *)flags,
  		.done = page_not_mapped,
 -		.file_nonlinear = try_to_unmap_nonlinear,
  		.anon_lock = page_lock_anon_vma_read,
  	};

 @@ -1620,12 +1416,6 @@ int try_to_munlock(struct page *page)
  		.rmap_one = try_to_unmap_one,
  		.arg = (void *)TTU_MUNLOCK,
  		.done = page_not_mapped,
 -		/*
 -		 * We don't bother to try to find the munlocked page in
 -		 * nonlinears. It's costly. Instead, later, page reclaim logic
 -		 * may call try_to_unmap() and recover PG_mlocked lazily.
 -		 */
 -		.file_nonlinear = NULL,
  		.anon_lock = page_lock_anon_vma_read,

  	};
 @@ -1753,14 +1543,6 @@ static int rmap_walk_file(struct page *p
  			goto done;
  	}

 -	if (!rwc->file_nonlinear)
 -		goto done;
 -
 -	if (list_empty(&mapping->i_mmap_nonlinear))
 -		goto done;
 -
 -	ret = rwc->file_nonlinear(page, mapping, rwc->arg);
 -
  done:
  	mutex_unlock(&mapping->i_mmap_mutex);
  	return ret;
 --- a/mm/swap.c
 +++ b/mm/swap.c
 @@ -1103,10 +1103,8 @@ void __init swap_setup(void)

  	if (bdi_init(swapper_spaces[0].backing_dev_info))
  		panic("Failed to init swap bdi");
 -	for (i = 0; i < MAX_SWAPFILES; i++) {
 +	for (i = 0; i < MAX_SWAPFILES; i++)
  		spin_lock_init(&swapper_spaces[i].tree_lock);
 -		INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
 -	}
  #endif

  	/* Use a smaller cluster for small-memory machines */
	From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
	Date: Tue, 10 Feb 2015 14:09:59 -0800
	Subject: rmap: drop support of non-linear mappings

	commit 27ba0644ea9dfe6e7693abc85837b60e40583b96 upstream.

	We don't create non-linear mappings anymore. Let's drop code which
	handles them in rmap.

	Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
	[bwh: Backported to 3.16:
	- Deleted code is slightly different
	- Adjust context]
	Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
	---
	--- a/Documentation/cachetlb.txt
	+++ b/Documentation/cachetlb.txt
	@@ -317,10 +317,10 @@ maps this page at its virtual address.
	about doing this.

	The idea is, first at flush_dcache_page() time, if
	- page->mapping->i_mmap is an empty tree and ->i_mmap_nonlinear
	- an empty list, just mark the architecture private page flag bit.
	- Later, in update_mmu_cache(), a check is made of this flag bit,
	- and if set the flush is done and the flag bit is cleared.
	+ page->mapping->i_mmap is an empty tree, just mark the architecture
	+ private page flag bit. Later, in update_mmu_cache(), a check is
	+ made of this flag bit, and if set the flush is done and the flag
	+ bit is cleared.

	IMPORTANT NOTE: It is often important, if you defer the flush,
	that the actual flush occurs on the same CPU
	--- a/fs/inode.c
	+++ b/fs/inode.c
	@@ -352,7 +352,6 @@ void address_space_init_once(struct addr
	INIT_LIST_HEAD(&mapping->private_list);
	spin_lock_init(&mapping->private_lock);
	mapping->i_mmap = RB_ROOT;
	- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
	}
	EXPORT_SYMBOL(address_space_init_once);

	--- a/include/linux/fs.h
	+++ b/include/linux/fs.h
	@@ -395,7 +395,6 @@ struct address_space {
	spinlock_t tree_lock; /* and lock protecting it */
	unsigned int i_mmap_writable;/* count VM_SHARED mappings */
	struct rb_root i_mmap; /* tree of private and shared mappings */
	- struct list_head i_mmap_nonlinear;/list VM_NONLINEAR mappings /
	struct mutex i_mmap_mutex; /* protect tree, count, list */
	/* Protected by tree_lock together with the radix tree */
	unsigned long nrpages; /* number of total pages */
	@@ -467,8 +466,7 @@ int mapping_tagged(struct address_space
	*/
	static inline int mapping_mapped(struct address_space *mapping)
	{
	- return !RB_EMPTY_ROOT(&mapping->i_mmap) \|\|
	- !list_empty(&mapping->i_mmap_nonlinear);
	+ return !RB_EMPTY_ROOT(&mapping->i_mmap);
	}

	/*
	--- a/include/linux/mm.h
	+++ b/include/linux/mm.h
	@@ -1728,12 +1728,6 @@ struct vm_area_struct *vma_interval_tree
	for (vma = vma_interval_tree_iter_first(root, start, last); \
	vma; vma = vma_interval_tree_iter_next(vma, start, last))

	-static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
	- struct list_head *list)
	-{
	- list_add_tail(&vma->shared.nonlinear, list);
	-}
	-
	void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
	struct rb_root *root);
	void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
	--- a/include/linux/mm_types.h
	+++ b/include/linux/mm_types.h
	@@ -272,15 +272,13 @@ struct vm_area_struct {

	/*
	* For areas with an address space and backing store,
	- * linkage into the address_space->i_mmap interval tree, or
	- * linkage of vma in the address_space->i_mmap_nonlinear list.
	+ * linkage into the address_space->i_mmap interval tree.
	*/
	union {
	struct {
	struct rb_node rb;
	unsigned long rb_subtree_last;
	} linear;
	- struct list_head nonlinear;
	} shared;

	/*
	--- a/include/linux/rmap.h
	+++ b/include/linux/rmap.h
	@@ -232,7 +232,6 @@ int page_mapped_in_vma(struct page *page
	* arg: passed to rmap_one() and invalid_vma()
	* rmap_one: executed on each vma where page is mapped
	* done: for checking traversing termination condition
	- * file_nonlinear: for handling file nonlinear mapping
	* anon_lock: for getting anon_lock by optimized way rather than default
	* invalid_vma: for skipping uninterested vma
	*/
	@@ -241,7 +240,6 @@ struct rmap_walk_control {
	int (rmap_one)(struct page page, struct vm_area_struct *vma,
	unsigned long addr, void *arg);
	int (done)(struct page page);
	- int (file_nonlinear)(struct page , struct address_space , void arg);
	struct anon_vma (anon_lock)(struct page *page);
	bool (invalid_vma)(struct vm_area_struct vma, void *arg);
	};
	--- a/kernel/fork.c
	+++ b/kernel/fork.c
	@@ -430,12 +430,8 @@ static int dup_mmap(struct mm_struct *mm
	mapping->i_mmap_writable++;
	flush_dcache_mmap_lock(mapping);
	/* insert tmp into the share list, just after mpnt */
	- if (unlikely(tmp->vm_flags & VM_NONLINEAR))
	- vma_nonlinear_insert(tmp,
	- &mapping->i_mmap_nonlinear);
	- else
	- vma_interval_tree_insert_after(tmp, mpnt,
	- &mapping->i_mmap);
	+ vma_interval_tree_insert_after(tmp, mpnt,
	+ &mapping->i_mmap);
	flush_dcache_mmap_unlock(mapping);
	mutex_unlock(&mapping->i_mmap_mutex);
	}
	--- a/mm/migrate.c
	+++ b/mm/migrate.c
	@@ -181,37 +181,6 @@ out:
	}

	/*
	- * Congratulations to trinity for discovering this bug.
	- * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
	- * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
	- * replace the specified range by file ptes throughout (maybe populated after).
	- * If page migration finds a page within that range, while it's still located
	- * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
	- * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
	- * But if the migrating page is in a part of the vma outside the range to be
	- * remapped, then it will not be cleared, and remove_migration_ptes() needs to
	- * deal with it. Fortunately, this part of the vma is of course still linear,
	- * so we just need to use linear location on the nonlinear list.
	- */
	-static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
	- struct address_space mapping, void arg)
	-{
	- struct vm_area_struct *vma;
	- /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
	- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
	- unsigned long addr;
	-
	- list_for_each_entry(vma,
	- &mapping->i_mmap_nonlinear, shared.nonlinear) {
	-
	- addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
	- if (addr >= vma->vm_start && addr < vma->vm_end)
	- remove_migration_pte(page, vma, addr, arg);
	- }
	- return SWAP_AGAIN;
	-}
	-
	-/*
	* Get rid of all migration entries and replace them by
	* references to the indicated page.
	*/
	@@ -220,7 +189,6 @@ static void remove_migration_ptes(struct
	struct rmap_walk_control rwc = {
	.rmap_one = remove_migration_pte,
	.arg = old,
	- .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
	};

	rmap_walk(new, &rwc);
	--- a/mm/mmap.c
	+++ b/mm/mmap.c
	@@ -219,10 +219,7 @@ static void __remove_shared_vm_struct(st
	mapping->i_mmap_writable--;

	flush_dcache_mmap_lock(mapping);
	- if (unlikely(vma->vm_flags & VM_NONLINEAR))
	- list_del_init(&vma->shared.nonlinear);
	- else
	- vma_interval_tree_remove(vma, &mapping->i_mmap);
	+ vma_interval_tree_remove(vma, &mapping->i_mmap);
	flush_dcache_mmap_unlock(mapping);
	}

	@@ -639,10 +636,7 @@ static void __vma_link_file(struct vm_ar
	mapping->i_mmap_writable++;

	flush_dcache_mmap_lock(mapping);
	- if (unlikely(vma->vm_flags & VM_NONLINEAR))
	- vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
	- else
	- vma_interval_tree_insert(vma, &mapping->i_mmap);
	+ vma_interval_tree_insert(vma, &mapping->i_mmap);
	flush_dcache_mmap_unlock(mapping);
	}
	}
	@@ -777,14 +771,11 @@ again: remove_next = 1 + (end > next->

	if (file) {
	mapping = file->f_mapping;
	- if (!(vma->vm_flags & VM_NONLINEAR)) {
	- root = &mapping->i_mmap;
	- uprobe_munmap(vma, vma->vm_start, vma->vm_end);
	-
	- if (adjust_next)
	- uprobe_munmap(next, next->vm_start,
	- next->vm_end);
	- }
	+ root = &mapping->i_mmap;
	+ uprobe_munmap(vma, vma->vm_start, vma->vm_end);
	+
	+ if (adjust_next)
	+ uprobe_munmap(next, next->vm_start, next->vm_end);

	mutex_lock(&mapping->i_mmap_mutex);
	if (insert) {
	@@ -3187,8 +3178,7 @@ static void vm_lock_mapping(struct mm_st
	*
	* mmap_sem in write mode is required in order to block all operations
	* that could modify pagetables and free pages without need of
	- * altering the vma layout (for example populate_range() with
	- * nonlinear vmas). It's also needed in write mode to avoid new
	+ * altering the vma layout. It's also needed in write mode to avoid new
	* anon_vmas to be associated with existing vmas.
	*
	* A single task can't take more than one mm_take_all_locks() in a row
	--- a/mm/rmap.c
	+++ b/mm/rmap.c
	@@ -597,9 +597,8 @@ unsigned long page_address_in_vma(struct
	if (!vma->anon_vma \|\| !page__anon_vma \|\|
	vma->anon_vma->root != page__anon_vma->root)
	return -EFAULT;
	- } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
	- if (!vma->vm_file \|\|
	- vma->vm_file->f_mapping != page->mapping)
	+ } else if (page->mapping) {
	+ if (!vma->vm_file \|\| vma->vm_file->f_mapping != page->mapping)
	return -EFAULT;
	} else
	return -EFAULT;
	@@ -1286,7 +1285,6 @@ static int try_to_unmap_one(struct page
	if (pte_soft_dirty(pteval))
	swp_pte = pte_swp_mksoft_dirty(swp_pte);
	set_pte_at(mm, address, pte, swp_pte);
	- BUG_ON(pte_file(*pte));
	} else if (IS_ENABLED(CONFIG_MIGRATION) &&
	(flags & TTU_MIGRATION)) {
	/* Establish migration entry for a file page */
	@@ -1328,207 +1326,6 @@ out_mlock:
	return ret;
	}

	-/*
	- * objrmap doesn't work for nonlinear VMAs because the assumption that
	- * offset-into-file correlates with offset-into-virtual-addresses does not hold.
	- * Consequently, given a particular page and its ->index, we cannot locate the
	- * ptes which are mapping that page without an exhaustive linear search.
	- *
	- * So what this code does is a mini "virtual scan" of each nonlinear VMA which
	- * maps the file to which the target page belongs. The ->vm_private_data field
	- * holds the current cursor into that scan. Successive searches will circulate
	- * around the vma's virtual address space.
	- *
	- * So as more replacement pressure is applied to the pages in a nonlinear VMA,
	- * more scanning pressure is placed against them as well. Eventually pages
	- * will become fully unmapped and are eligible for eviction.
	- *
	- * For very sparsely populated VMAs this is a little inefficient - chances are
	- * there there won't be many ptes located within the scan cluster. In this case
	- * maybe we could scan further - to the end of the pte page, perhaps.
	- *
	- * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
	- * acquire it without blocking. If vma locked, mlock the pages in the cluster,
	- * rather than unmapping them. If we encounter the "check_page" that vmscan is
	- * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
	- */
	-#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
	-#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
	-
	-static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
	- struct vm_area_struct vma, struct page check_page)
	-{
	- struct mm_struct *mm = vma->vm_mm;
	- pmd_t *pmd;
	- pte_t *pte;
	- pte_t pteval;
	- spinlock_t *ptl;
	- struct page *page;
	- unsigned long address;
	- unsigned long mmun_start; /* For mmu_notifiers */
	- unsigned long mmun_end; /* For mmu_notifiers */
	- unsigned long end;
	- int ret = SWAP_AGAIN;
	- int locked_vma = 0;
	-
	- address = (vma->vm_start + cursor) & CLUSTER_MASK;
	- end = address + CLUSTER_SIZE;
	- if (address < vma->vm_start)
	- address = vma->vm_start;
	- if (end > vma->vm_end)
	- end = vma->vm_end;
	-
	- pmd = mm_find_pmd(mm, address);
	- if (!pmd)
	- return ret;
	-
	- mmun_start = address;
	- mmun_end = end;
	- mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
	-
	- /*
	- * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
	- * keep the sem while scanning the cluster for mlocking pages.
	- */
	- if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
	- locked_vma = (vma->vm_flags & VM_LOCKED);
	- if (!locked_vma)
	- up_read(&vma->vm_mm->mmap_sem); /* don't need it */
	- }
	-
	- pte = pte_offset_map_lock(mm, pmd, address, &ptl);
	-
	- /* Update high watermark before we lower rss */
	- update_hiwater_rss(mm);
	-
	- for (; address < end; pte++, address += PAGE_SIZE) {
	- if (!pte_present(*pte))
	- continue;
	- page = vm_normal_page(vma, address, *pte);
	- BUG_ON(!page \|\| PageAnon(page));
	-
	- if (locked_vma) {
	- if (page == check_page) {
	- /* we know we have check_page locked */
	- mlock_vma_page(page);
	- ret = SWAP_MLOCK;
	- } else if (trylock_page(page)) {
	- /*
	- * If we can lock the page, perform mlock.
	- * Otherwise leave the page alone, it will be
	- * eventually encountered again later.
	- */
	- mlock_vma_page(page);
	- unlock_page(page);
	- }
	- continue; /* don't unmap */
	- }
	-
	- if (ptep_clear_flush_young_notify(vma, address, pte))
	- continue;
	-
	- /* Nuke the page table entry. */
	- flush_cache_page(vma, address, pte_pfn(*pte));
	- pteval = ptep_clear_flush(vma, address, pte);
	-
	- /* If nonlinear, store the file page offset in the pte. */
	- if (page->index != linear_page_index(vma, address)) {
	- pte_t ptfile = pgoff_to_pte(page->index);
	- if (pte_soft_dirty(pteval))
	- ptfile = pte_file_mksoft_dirty(ptfile);
	- set_pte_at(mm, address, pte, ptfile);
	- }
	-
	- /* Move the dirty bit to the physical page now the pte is gone. */
	- if (pte_dirty(pteval))
	- set_page_dirty(page);
	-
	- page_remove_rmap(page);
	- page_cache_release(page);
	- dec_mm_counter(mm, MM_FILEPAGES);
	- (*mapcount)--;
	- }
	- pte_unmap_unlock(pte - 1, ptl);
	- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
	- if (locked_vma)
	- up_read(&vma->vm_mm->mmap_sem);
	- return ret;
	-}
	-
	-static int try_to_unmap_nonlinear(struct page *page,
	- struct address_space mapping, void arg)
	-{
	- struct vm_area_struct *vma;
	- int ret = SWAP_AGAIN;
	- unsigned long cursor;
	- unsigned long max_nl_cursor = 0;
	- unsigned long max_nl_size = 0;
	- unsigned int mapcount;
	-
	- list_for_each_entry(vma,
	- &mapping->i_mmap_nonlinear, shared.nonlinear) {
	-
	- cursor = (unsigned long) vma->vm_private_data;
	- if (cursor > max_nl_cursor)
	- max_nl_cursor = cursor;
	- cursor = vma->vm_end - vma->vm_start;
	- if (cursor > max_nl_size)
	- max_nl_size = cursor;
	- }
	-
	- if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
	- return SWAP_FAIL;
	- }
	-
	- /*
	- * We don't try to search for this page in the nonlinear vmas,
	- * and page_referenced wouldn't have found it anyway. Instead
	- * just walk the nonlinear vmas trying to age and unmap some.
	- * The mapcount of the page we came in with is irrelevant,
	- * but even so use it as a guide to how hard we should try?
	- */
	- mapcount = page_mapcount(page);
	- if (!mapcount)
	- return ret;
	-
	- cond_resched();
	-
	- max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
	- if (max_nl_cursor == 0)
	- max_nl_cursor = CLUSTER_SIZE;
	-
	- do {
	- list_for_each_entry(vma,
	- &mapping->i_mmap_nonlinear, shared.nonlinear) {
	-
	- cursor = (unsigned long) vma->vm_private_data;
	- while (cursor < max_nl_cursor &&
	- cursor < vma->vm_end - vma->vm_start) {
	- if (try_to_unmap_cluster(cursor, &mapcount,
	- vma, page) == SWAP_MLOCK)
	- ret = SWAP_MLOCK;
	- cursor += CLUSTER_SIZE;
	- vma->vm_private_data = (void *) cursor;
	- if ((int)mapcount <= 0)
	- return ret;
	- }
	- vma->vm_private_data = (void *) max_nl_cursor;
	- }
	- cond_resched();
	- max_nl_cursor += CLUSTER_SIZE;
	- } while (max_nl_cursor <= max_nl_size);
	-
	- /*
	- * Don't loop forever (perhaps all the remaining pages are
	- * in locked vmas). Reset cursor on all unreserved nonlinear
	- * vmas, now forgetting on which ones it had fallen behind.
	- */
	- list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
	- vma->vm_private_data = NULL;
	-
	- return ret;
	-}
	-
	bool is_vma_temporary_stack(struct vm_area_struct *vma)
	{
	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN \| VM_GROWSUP);
	@@ -1574,7 +1371,6 @@ int try_to_unmap(struct page *page, enum
	.rmap_one = try_to_unmap_one,
	.arg = (void *)flags,
	.done = page_not_mapped,
	- .file_nonlinear = try_to_unmap_nonlinear,
	.anon_lock = page_lock_anon_vma_read,
	};

	@@ -1620,12 +1416,6 @@ int try_to_munlock(struct page *page)
	.rmap_one = try_to_unmap_one,
	.arg = (void *)TTU_MUNLOCK,
	.done = page_not_mapped,
	- /*
	- * We don't bother to try to find the munlocked page in
	- * nonlinears. It's costly. Instead, later, page reclaim logic
	- * may call try_to_unmap() and recover PG_mlocked lazily.
	- */
	- .file_nonlinear = NULL,
	.anon_lock = page_lock_anon_vma_read,

	};
	@@ -1753,14 +1543,6 @@ static int rmap_walk_file(struct page *p
	goto done;
	}

	- if (!rwc->file_nonlinear)
	- goto done;
	-
	- if (list_empty(&mapping->i_mmap_nonlinear))
	- goto done;
	-
	- ret = rwc->file_nonlinear(page, mapping, rwc->arg);
	-
	done:
	mutex_unlock(&mapping->i_mmap_mutex);
	return ret;
	--- a/mm/swap.c
	+++ b/mm/swap.c
	@@ -1103,10 +1103,8 @@ void __init swap_setup(void)

	if (bdi_init(swapper_spaces[0].backing_dev_info))
	panic("Failed to init swap bdi");
	- for (i = 0; i < MAX_SWAPFILES; i++) {
	+ for (i = 0; i < MAX_SWAPFILES; i++)
	spin_lock_init(&swapper_spaces[i].tree_lock);
	- INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
	- }
	#endif

	/* Use a smaller cluster for small-memory machines */