patches/old/mempolicy-mmap_lock-is-not-needed-while-migrating-folios.patch - pub/scm/linux/kernel/git/akpm/25-new - Git at Google

 From: Hugh Dickins <hughd@google.com>
 Subject: mempolicy: mmap_lock is not needed while migrating folios
 Date: Tue, 3 Oct 2023 02:27:47 -0700 (PDT)

 mbind(2) holds down_write of current task's mmap_lock throughout
 (exclusive because it needs to set the new mempolicy on the vmas);
 migrate_pages(2) holds down_read of pid's mmap_lock throughout.

 They both hold mmap_lock across the internal migrate_pages(), under which
 all new page allocations (huge or small) are made.  I'm nervous about it;
 and migrate_pages() certainly does not need mmap_lock itself.  It's done
 this way for mbind(2), because its page allocator is vma_alloc_folio() or
 alloc_hugetlb_folio_vma(), both of which depend on vma and address.

 Now that we have alloc_pages_mpol(), depending on (refcounted) memory
 policy and interleave index, mbind(2) can be modified to use that or
 alloc_hugetlb_folio_nodemask(), and then not need mmap_lock across the
 internal migrate_pages() at all: add alloc_migration_target_by_mpol() to
 replace mbind's new_page().

 (After that change, alloc_hugetlb_folio_vma() is used by nothing but a
 userfaultfd function: move it out of hugetlb.h and into the #ifdef.)

 migrate_pages(2) has chosen its target node before migrating, so can
 continue to use the standard alloc_migration_target(); but let it take and
 drop mmap_lock just around migrate_to_node()'s queue_pages_range():
 neither the node-to-node calculations nor the page migrations need it.

 It seems unlikely, but it is conceivable that some userspace depends on
 the kernel's mmap_lock exclusion here, instead of doing its own locking:
 more likely in a testsuite than in real life.  It is also possible, of
 course, that some pages on the list will be munmapped by another thread
 before they are migrated, or a newer memory policy applied to the range by
 that time: but such races could happen before, as soon as mmap_lock was
 dropped, so it does not appear to be a concern.

 Link: https://lkml.kernel.org/r/21e564e8-269f-6a89-7ee2-fd612831c289@google.com
 Signed-off-by: Hugh Dickins <hughd@google.com>
 Cc: Andi Kleen <ak@linux.intel.com>
 Cc: Christoph Lameter <cl@linux.com>
 Cc: David Hildenbrand <david@redhat.com>
 Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 Cc: "Huang, Ying" <ying.huang@intel.com>
 Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
 Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
 Cc: Mel Gorman <mgorman@techsingularity.net>
 Cc: Michal Hocko <mhocko@suse.com>
 Cc: Mike Kravetz <mike.kravetz@oracle.com>
 Cc: Nhat Pham <nphamcs@gmail.com>
 Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
 Cc: Suren Baghdasaryan <surenb@google.com>
 Cc: Tejun heo <tj@kernel.org>
 Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
 Cc: Yang Shi <shy828301@gmail.com>
 Cc: Yosry Ahmed <yosryahmed@google.com>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 ---

  include/linux/hugetlb.h |    9 ----
  mm/hugetlb.c            |   38 +++++++++--------
  mm/mempolicy.c          |   83 +++++++++++++++++++-------------------
  3 files changed, 63 insertions(+), 67 deletions(-)

 --- a/include/linux/hugetlb.h~mempolicy-mmap_lock-is-not-needed-while-migrating-folios
 +++ a/include/linux/hugetlb.h
 @@ -748,8 +748,6 @@ struct folio *alloc_hugetlb_folio(struct
  				unsigned long addr, int avoid_reserve);
  struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
  				nodemask_t *nmask, gfp_t gfp_mask);
 -struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
 -				unsigned long address);
  int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
  			pgoff_t idx);
  void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 @@ -1071,13 +1069,6 @@ alloc_hugetlb_folio_nodemask(struct hsta
  {
  	return NULL;
  }
 -
 -static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h,
 -					       struct vm_area_struct *vma,
 -					       unsigned long address)
 -{
 -	return NULL;
 -}

  static inline int __alloc_bootmem_huge_page(struct hstate *h)
  {
 --- a/mm/hugetlb.c~mempolicy-mmap_lock-is-not-needed-while-migrating-folios
 +++ a/mm/hugetlb.c
 @@ -2630,24 +2630,6 @@ struct folio *alloc_hugetlb_folio_nodema
  	return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask);
  }

 -/* mempolicy aware migration callback */
 -struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
 -		unsigned long address)
 -{
 -	struct mempolicy *mpol;
 -	nodemask_t *nodemask;
 -	struct folio *folio;
 -	gfp_t gfp_mask;
 -	int node;
 -
 -	gfp_mask = htlb_alloc_mask(h);
 -	node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
 -	folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask);
 -	mpol_cond_put(mpol);
 -
 -	return folio;
 -}
 -
  /*
   * Increase the hugetlb pool such that it can accommodate a reservation
   * of size 'delta'.
 @@ -6560,6 +6542,26 @@ out_mutex:

  #ifdef CONFIG_USERFAULTFD
  /*
 + * Can probably be eliminated, but still used by hugetlb_mfill_atomic_pte().
 + */
 +static struct folio *alloc_hugetlb_folio_vma(struct hstate *h,
 +		struct vm_area_struct *vma, unsigned long address)
 +{
 +	struct mempolicy *mpol;
 +	nodemask_t *nodemask;
 +	struct folio *folio;
 +	gfp_t gfp_mask;
 +	int node;
 +
 +	gfp_mask = htlb_alloc_mask(h);
 +	node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
 +	folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask);
 +	mpol_cond_put(mpol);
 +
 +	return folio;
 +}
 +
 +/*
   * Used by userfaultfd UFFDIO_* ioctls. Based on userfaultfd's mfill_atomic_pte
   * with modifications for hugetlb pages.
   */
 --- a/mm/mempolicy.c~mempolicy-mmap_lock-is-not-needed-while-migrating-folios
 +++ a/mm/mempolicy.c
 @@ -415,6 +415,8 @@ static const struct mempolicy_operations

  static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
  				unsigned long flags);
 +static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
 +				pgoff_t ilx, int *nid);

  static bool strictly_unmovable(unsigned long flags)
  {
 @@ -1021,6 +1023,8 @@ static long migrate_to_node(struct mm_st
  	node_set(source, nmask);

  	VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
 +
 +	mmap_read_lock(mm);
  	vma = find_vma(mm, 0);

  	/*
 @@ -1031,6 +1035,7 @@ static long migrate_to_node(struct mm_st
  	 */
  	nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
  				      flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 +	mmap_read_unlock(mm);

  	if (!list_empty(&pagelist)) {
  		err = migrate_pages(&pagelist, alloc_migration_target, NULL,
 @@ -1059,8 +1064,6 @@ int do_migrate_pages(struct mm_struct *m

  	lru_cache_disable();

 -	mmap_read_lock(mm);
 -
  	/*
  	 * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
  	 * bit in 'to' is not also set in 'tmp'.  Clear the found 'source'
 @@ -1140,7 +1143,6 @@ int do_migrate_pages(struct mm_struct *m
  		if (err < 0)
  			break;
  	}
 -	mmap_read_unlock(mm);

  	lru_cache_enable();
  	if (err < 0)
 @@ -1149,44 +1151,38 @@ int do_migrate_pages(struct mm_struct *m
  }

  /*
 - * Allocate a new page for page migration based on vma policy.
 - * Start by assuming the page is mapped by the same vma as contains @start.
 - * Search forward from there, if not.  N.B., this assumes that the
 - * list of pages handed to migrate_pages()--which is how we get here--
 - * is in virtual address order.
 + * Allocate a new folio for page migration, according to NUMA mempolicy.
   */
 -static struct folio *new_folio(struct folio *src, unsigned long start)
 +static struct folio *alloc_migration_target_by_mpol(struct folio *src,
 +						    unsigned long private)
  {
 -	struct vm_area_struct *vma;
 -	unsigned long address;
 -	VMA_ITERATOR(vmi, current->mm, start);
 -	gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL;
 -
 -	for_each_vma(vmi, vma) {
 -		address = page_address_in_vma(&src->page, vma);
 -		if (address != -EFAULT)
 -			break;
 -	}
 +	struct mempolicy *pol = (struct mempolicy *)private;
 +	pgoff_t ilx = 0;	/* improve on this later */
 +	struct page *page;
 +	unsigned int order;
 +	int nid = numa_node_id();
 +	gfp_t gfp;

 -	/*
 -	 * __get_vma_policy() now expects a genuine non-NULL vma. Return NULL
 -	 * when the page can no longer be located in a vma: that is not ideal
 -	 * (migrate_pages() will give up early, presuming ENOMEM), but good
 -	 * enough to avoid a crash by syzkaller or concurrent holepunch.
 -	 */
 -	if (!vma)
 -		return NULL;
 +	order = folio_order(src);
 +	ilx += src->index >> order;

  	if (folio_test_hugetlb(src)) {
 -		return alloc_hugetlb_folio_vma(folio_hstate(src),
 -				vma, address);
 +		nodemask_t *nodemask;
 +		struct hstate *h;
 +
 +		h = folio_hstate(src);
 +		gfp = htlb_alloc_mask(h);
 +		nodemask = policy_nodemask(gfp, pol, ilx, &nid);
 +		return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp);
  	}

  	if (folio_test_large(src))
  		gfp = GFP_TRANSHUGE;
 +	else
 +		gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL | __GFP_COMP;

 -	return vma_alloc_folio(gfp, folio_order(src), vma, address,
 -			folio_test_large(src));
 +	page = alloc_pages_mpol(gfp, order, pol, ilx, nid);
 +	return page_rmappable_folio(page);
  }
  #else

 @@ -1202,7 +1198,8 @@ int do_migrate_pages(struct mm_struct *m
  	return -ENOSYS;
  }

 -static struct folio *new_folio(struct folio *src, unsigned long start)
 +static struct folio *alloc_migration_target_by_mpol(struct folio *src,
 +						    unsigned long private)
  {
  	return NULL;
  }
 @@ -1276,6 +1273,7 @@ static long do_mbind(unsigned long start

  	if (nr_failed < 0) {
  		err = nr_failed;
 +		nr_failed = 0;
  	} else {
  		vma_iter_init(&vmi, mm, start);
  		prev = vma_prev(&vmi);
 @@ -1286,19 +1284,24 @@ static long do_mbind(unsigned long start
  		}
  	}

 -	if (!err) {
 -		if (!list_empty(&pagelist)) {
 -			nr_failed |= migrate_pages(&pagelist, new_folio, NULL,
 -				start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL);
 +	mmap_write_unlock(mm);
 +
 +	if (!err && !list_empty(&pagelist)) {
 +		/* Convert MPOL_DEFAULT's NULL to task or default policy */
 +		if (!new) {
 +			new = get_task_policy(current);
 +			mpol_get(new);
  		}
 -		if (nr_failed && (flags & MPOL_MF_STRICT))
 -			err = -EIO;
 +		nr_failed |= migrate_pages(&pagelist,
 +				alloc_migration_target_by_mpol, NULL,
 +				(unsigned long)new, MIGRATE_SYNC,
 +				MR_MEMPOLICY_MBIND, NULL);
  	}

 +	if (nr_failed && (flags & MPOL_MF_STRICT))
 +		err = -EIO;
  	if (!list_empty(&pagelist))
  		putback_movable_pages(&pagelist);
 -
 -	mmap_write_unlock(mm);
  mpol_out:
  	mpol_put(new);
  	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
 _
	From: Hugh Dickins <hughd@google.com>
	Subject: mempolicy: mmap_lock is not needed while migrating folios
	Date: Tue, 3 Oct 2023 02:27:47 -0700 (PDT)

	mbind(2) holds down_write of current task's mmap_lock throughout
	(exclusive because it needs to set the new mempolicy on the vmas);
	migrate_pages(2) holds down_read of pid's mmap_lock throughout.

	They both hold mmap_lock across the internal migrate_pages(), under which
	all new page allocations (huge or small) are made. I'm nervous about it;
	and migrate_pages() certainly does not need mmap_lock itself. It's done
	this way for mbind(2), because its page allocator is vma_alloc_folio() or
	alloc_hugetlb_folio_vma(), both of which depend on vma and address.

	Now that we have alloc_pages_mpol(), depending on (refcounted) memory
	policy and interleave index, mbind(2) can be modified to use that or
	alloc_hugetlb_folio_nodemask(), and then not need mmap_lock across the
	internal migrate_pages() at all: add alloc_migration_target_by_mpol() to
	replace mbind's new_page().

	(After that change, alloc_hugetlb_folio_vma() is used by nothing but a
	userfaultfd function: move it out of hugetlb.h and into the #ifdef.)

	migrate_pages(2) has chosen its target node before migrating, so can
	continue to use the standard alloc_migration_target(); but let it take and
	drop mmap_lock just around migrate_to_node()'s queue_pages_range():
	neither the node-to-node calculations nor the page migrations need it.

	It seems unlikely, but it is conceivable that some userspace depends on
	the kernel's mmap_lock exclusion here, instead of doing its own locking:
	more likely in a testsuite than in real life. It is also possible, of
	course, that some pages on the list will be munmapped by another thread
	before they are migrated, or a newer memory policy applied to the range by
	that time: but such races could happen before, as soon as mmap_lock was
	dropped, so it does not appear to be a concern.

	Link: https://lkml.kernel.org/r/21e564e8-269f-6a89-7ee2-fd612831c289@google.com
	Signed-off-by: Hugh Dickins <hughd@google.com>
	Cc: Andi Kleen <ak@linux.intel.com>
	Cc: Christoph Lameter <cl@linux.com>
	Cc: David Hildenbrand <david@redhat.com>
	Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Cc: "Huang, Ying" <ying.huang@intel.com>
	Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
	Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
	Cc: Mel Gorman <mgorman@techsingularity.net>
	Cc: Michal Hocko <mhocko@suse.com>
	Cc: Mike Kravetz <mike.kravetz@oracle.com>
	Cc: Nhat Pham <nphamcs@gmail.com>
	Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
	Cc: Suren Baghdasaryan <surenb@google.com>
	Cc: Tejun heo <tj@kernel.org>
	Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
	Cc: Yang Shi <shy828301@gmail.com>
	Cc: Yosry Ahmed <yosryahmed@google.com>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	---

	include/linux/hugetlb.h \| 9 ----
	mm/hugetlb.c \| 38 +++++++++--------
	mm/mempolicy.c \| 83 +++++++++++++++++++-------------------
	3 files changed, 63 insertions(+), 67 deletions(-)

	--- a/include/linux/hugetlb.h~mempolicy-mmap_lock-is-not-needed-while-migrating-folios
	+++ a/include/linux/hugetlb.h
	@@ -748,8 +748,6 @@ struct folio *alloc_hugetlb_folio(struct
	unsigned long addr, int avoid_reserve);
	struct folio alloc_hugetlb_folio_nodemask(struct hstate h, int preferred_nid,
	nodemask_t *nmask, gfp_t gfp_mask);
	-struct folio alloc_hugetlb_folio_vma(struct hstate h, struct vm_area_struct *vma,
	- unsigned long address);
	int hugetlb_add_to_page_cache(struct folio folio, struct address_space mapping,
	pgoff_t idx);
	void restore_reserve_on_error(struct hstate h, struct vm_area_struct vma,
	@@ -1071,13 +1069,6 @@ alloc_hugetlb_folio_nodemask(struct hsta
	{
	return NULL;
	}
	-
	-static inline struct folio alloc_hugetlb_folio_vma(struct hstate h,
	- struct vm_area_struct *vma,
	- unsigned long address)
	-{
	- return NULL;
	-}

	static inline int __alloc_bootmem_huge_page(struct hstate *h)
	{
	--- a/mm/hugetlb.c~mempolicy-mmap_lock-is-not-needed-while-migrating-folios
	+++ a/mm/hugetlb.c
	@@ -2630,24 +2630,6 @@ struct folio *alloc_hugetlb_folio_nodema
	return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask);
	}

	-/* mempolicy aware migration callback */
	-struct folio alloc_hugetlb_folio_vma(struct hstate h, struct vm_area_struct *vma,
	- unsigned long address)
	-{
	- struct mempolicy *mpol;
	- nodemask_t *nodemask;
	- struct folio *folio;
	- gfp_t gfp_mask;
	- int node;
	-
	- gfp_mask = htlb_alloc_mask(h);
	- node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
	- folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask);
	- mpol_cond_put(mpol);
	-
	- return folio;
	-}
	-
	/*
	* Increase the hugetlb pool such that it can accommodate a reservation
	* of size 'delta'.
	@@ -6560,6 +6542,26 @@ out_mutex:

	#ifdef CONFIG_USERFAULTFD
	/*
	+ * Can probably be eliminated, but still used by hugetlb_mfill_atomic_pte().
	+ */
	+static struct folio alloc_hugetlb_folio_vma(struct hstate h,
	+ struct vm_area_struct *vma, unsigned long address)
	+{
	+ struct mempolicy *mpol;
	+ nodemask_t *nodemask;
	+ struct folio *folio;
	+ gfp_t gfp_mask;
	+ int node;
	+
	+ gfp_mask = htlb_alloc_mask(h);
	+ node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
	+ folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask);
	+ mpol_cond_put(mpol);
	+
	+ return folio;
	+}
	+
	+/*
	* Used by userfaultfd UFFDIO_* ioctls. Based on userfaultfd's mfill_atomic_pte
	* with modifications for hugetlb pages.
	*/
	--- a/mm/mempolicy.c~mempolicy-mmap_lock-is-not-needed-while-migrating-folios
	+++ a/mm/mempolicy.c
	@@ -415,6 +415,8 @@ static const struct mempolicy_operations

	static bool migrate_folio_add(struct folio folio, struct list_head foliolist,
	unsigned long flags);
	+static nodemask_t policy_nodemask(gfp_t gfp, struct mempolicy pol,
	+ pgoff_t ilx, int *nid);

	static bool strictly_unmovable(unsigned long flags)
	{
	@@ -1021,6 +1023,8 @@ static long migrate_to_node(struct mm_st
	node_set(source, nmask);

	VM_BUG_ON(!(flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL)));
	+
	+ mmap_read_lock(mm);
	vma = find_vma(mm, 0);

	/*
	@@ -1031,6 +1035,7 @@ static long migrate_to_node(struct mm_st
	*/
	nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
	flags \| MPOL_MF_DISCONTIG_OK, &pagelist);
	+ mmap_read_unlock(mm);

	if (!list_empty(&pagelist)) {
	err = migrate_pages(&pagelist, alloc_migration_target, NULL,
	@@ -1059,8 +1064,6 @@ int do_migrate_pages(struct mm_struct *m

	lru_cache_disable();

	- mmap_read_lock(mm);
	-
	/*
	* Find a 'source' bit set in 'tmp' whose corresponding 'dest'
	* bit in 'to' is not also set in 'tmp'. Clear the found 'source'
	@@ -1140,7 +1143,6 @@ int do_migrate_pages(struct mm_struct *m
	if (err < 0)
	break;
	}
	- mmap_read_unlock(mm);

	lru_cache_enable();
	if (err < 0)
	@@ -1149,44 +1151,38 @@ int do_migrate_pages(struct mm_struct *m
	}

	/*
	- * Allocate a new page for page migration based on vma policy.
	- * Start by assuming the page is mapped by the same vma as contains @start.
	- * Search forward from there, if not. N.B., this assumes that the
	- * list of pages handed to migrate_pages()--which is how we get here--
	- * is in virtual address order.
	+ * Allocate a new folio for page migration, according to NUMA mempolicy.
	*/
	-static struct folio new_folio(struct folio src, unsigned long start)
	+static struct folio alloc_migration_target_by_mpol(struct folio src,
	+ unsigned long private)
	{
	- struct vm_area_struct *vma;
	- unsigned long address;
	- VMA_ITERATOR(vmi, current->mm, start);
	- gfp_t gfp = GFP_HIGHUSER_MOVABLE \| __GFP_RETRY_MAYFAIL;
	-
	- for_each_vma(vmi, vma) {
	- address = page_address_in_vma(&src->page, vma);
	- if (address != -EFAULT)
	- break;
	- }
	+ struct mempolicy pol = (struct mempolicy )private;
	+ pgoff_t ilx = 0; /* improve on this later */
	+ struct page *page;
	+ unsigned int order;
	+ int nid = numa_node_id();
	+ gfp_t gfp;

	- /*
	- * __get_vma_policy() now expects a genuine non-NULL vma. Return NULL
	- * when the page can no longer be located in a vma: that is not ideal
	- * (migrate_pages() will give up early, presuming ENOMEM), but good
	- * enough to avoid a crash by syzkaller or concurrent holepunch.
	- */
	- if (!vma)
	- return NULL;
	+ order = folio_order(src);
	+ ilx += src->index >> order;

	if (folio_test_hugetlb(src)) {
	- return alloc_hugetlb_folio_vma(folio_hstate(src),
	- vma, address);
	+ nodemask_t *nodemask;
	+ struct hstate *h;
	+
	+ h = folio_hstate(src);
	+ gfp = htlb_alloc_mask(h);
	+ nodemask = policy_nodemask(gfp, pol, ilx, &nid);
	+ return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp);
	}

	if (folio_test_large(src))
	gfp = GFP_TRANSHUGE;
	+ else
	+ gfp = GFP_HIGHUSER_MOVABLE \| __GFP_RETRY_MAYFAIL \| __GFP_COMP;

	- return vma_alloc_folio(gfp, folio_order(src), vma, address,
	- folio_test_large(src));
	+ page = alloc_pages_mpol(gfp, order, pol, ilx, nid);
	+ return page_rmappable_folio(page);
	}
	#else

	@@ -1202,7 +1198,8 @@ int do_migrate_pages(struct mm_struct *m
	return -ENOSYS;
	}

	-static struct folio new_folio(struct folio src, unsigned long start)
	+static struct folio alloc_migration_target_by_mpol(struct folio src,
	+ unsigned long private)
	{
	return NULL;
	}
	@@ -1276,6 +1273,7 @@ static long do_mbind(unsigned long start

	if (nr_failed < 0) {
	err = nr_failed;
	+ nr_failed = 0;
	} else {
	vma_iter_init(&vmi, mm, start);
	prev = vma_prev(&vmi);
	@@ -1286,19 +1284,24 @@ static long do_mbind(unsigned long start
	}
	}

	- if (!err) {
	- if (!list_empty(&pagelist)) {
	- nr_failed \|= migrate_pages(&pagelist, new_folio, NULL,
	- start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL);
	+ mmap_write_unlock(mm);
	+
	+ if (!err && !list_empty(&pagelist)) {
	+ /* Convert MPOL_DEFAULT's NULL to task or default policy */
	+ if (!new) {
	+ new = get_task_policy(current);
	+ mpol_get(new);
	}
	- if (nr_failed && (flags & MPOL_MF_STRICT))
	- err = -EIO;
	+ nr_failed \|= migrate_pages(&pagelist,
	+ alloc_migration_target_by_mpol, NULL,
	+ (unsigned long)new, MIGRATE_SYNC,
	+ MR_MEMPOLICY_MBIND, NULL);
	}

	+ if (nr_failed && (flags & MPOL_MF_STRICT))
	+ err = -EIO;
	if (!list_empty(&pagelist))
	putback_movable_pages(&pagelist);
	-
	- mmap_write_unlock(mm);
	mpol_out:
	mpol_put(new);
	if (flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL))
	_