patches/old/mmpage_owner-implement-the-tracking-of-the-stacks-count.patch - pub/scm/linux/kernel/git/akpm/25-new - Git at Google

 From: Oscar Salvador <osalvador@suse.de>
 Subject: mm,page_owner: implement the tracking of the stacks count
 Date: Thu, 15 Feb 2024 22:59:04 +0100

 Implement {inc,dec}_stack_record_count() which increments or decrements on
 respective allocation and free operations, via __reset_page_owner() (free
 operation) and __set_page_owner() (alloc operation).

 Newly allocated stack_record structs will be added to the list stack_list
 via add_stack_record_to_list().  Modifications on the list are protected
 via a spinlock with irqs disabled, since this code can also be reached
 from IRQ context.

 Link: https://lkml.kernel.org/r/20240215215907.20121-5-osalvador@suse.de
 Signed-off-by: Oscar Salvador <osalvador@suse.de>
 Reviewed-by: Marco Elver <elver@google.com>
 Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
 Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
 Cc: Alexander Potapenko <glider@google.com>
 Cc: Michal Hocko <mhocko@suse.com>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 ---

  mm/page_owner.c |   73 +++++++++++++++++++++++++++++++++++++++++++++-
  1 file changed, 72 insertions(+), 1 deletion(-)

 --- a/mm/page_owner.c~mmpage_owner-implement-the-tracking-of-the-stacks-count
 +++ a/mm/page_owner.c
 @@ -43,6 +43,7 @@ struct stack {
  static struct stack dummy_stack;
  static struct stack failure_stack;
  static struct stack *stack_list;
 +static DEFINE_SPINLOCK(stack_list_lock);

  static bool page_owner_enabled __initdata;
  DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 @@ -150,11 +151,68 @@ static noinline depot_stack_handle_t sav
  	return handle;
  }

 +static void add_stack_record_to_list(struct stack_record *stack_record,
 +				     gfp_t gfp_mask)
 +{
 +	unsigned long flags;
 +	struct stack *stack;
 +
 +	/* Filter gfp_mask the same way stackdepot does, for consistency */
 +	gfp_mask &= ~GFP_ZONEMASK;
 +	gfp_mask &= (GFP_ATOMIC | GFP_KERNEL);
 +	gfp_mask |= __GFP_NOWARN;
 +
 +	stack = kmalloc(sizeof(*stack), gfp_mask);
 +	if (!stack)
 +		return;
 +
 +	stack->stack_record = stack_record;
 +	stack->next = NULL;
 +
 +	spin_lock_irqsave(&stack_list_lock, flags);
 +	stack->next = stack_list;
 +	stack_list = stack;
 +	spin_unlock_irqrestore(&stack_list_lock, flags);
 +}
 +
 +static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
 +{
 +	struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
 +
 +	if (!stack_record)
 +		return;
 +
 +	/*
 +	 * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
 +	 * with REFCOUNT_SATURATED to catch spurious increments of their
 +	 * refcount.
 +	 * Since we do not use STACK_DEPOT_FLAG_GET API, let us
 +	 * set a refcount of 1 ourselves.
 +	 */
 +	if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
 +		int old = REFCOUNT_SATURATED;
 +
 +		if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
 +			/* Add the new stack_record to our list */
 +			add_stack_record_to_list(stack_record, gfp_mask);
 +	}
 +	refcount_inc(&stack_record->count);
 +}
 +
 +static void dec_stack_record_count(depot_stack_handle_t handle)
 +{
 +	struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
 +
 +	if (stack_record)
 +		refcount_dec(&stack_record->count);
 +}
 +
  void __reset_page_owner(struct page *page, unsigned short order)
  {
  	int i;
  	struct page_ext *page_ext;
  	depot_stack_handle_t handle;
 +	depot_stack_handle_t alloc_handle;
  	struct page_owner *page_owner;
  	u64 free_ts_nsec = local_clock();

 @@ -162,17 +220,29 @@ void __reset_page_owner(struct page *pag
  	if (unlikely(!page_ext))
  		return;

 +	page_owner = get_page_owner(page_ext);
 +	alloc_handle = page_owner->handle;
 +
  	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
  	for (i = 0; i < (1 << order); i++) {
  		__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
 -		page_owner = get_page_owner(page_ext);
  		page_owner->free_handle = handle;
  		page_owner->free_ts_nsec = free_ts_nsec;
  		page_owner->free_pid = current->pid;
  		page_owner->free_tgid = current->tgid;
  		page_ext = page_ext_next(page_ext);
 +		page_owner = get_page_owner(page_ext);
  	}
  	page_ext_put(page_ext);
 +	if (alloc_handle != early_handle)
 +		/*
 +		 * early_handle is being set as a handle for all those
 +		 * early allocated pages. See init_pages_in_zone().
 +		 * Since their refcount is not being incremented because
 +		 * the machinery is not ready yet, we cannot decrement
 +		 * their refcount either.
 +		 */
 +		dec_stack_record_count(alloc_handle);
  }

  static inline void __set_page_owner_handle(struct page_ext *page_ext,
 @@ -214,6 +284,7 @@ noinline void __set_page_owner(struct pa
  		return;
  	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
  	page_ext_put(page_ext);
 +	inc_stack_record_count(handle, gfp_mask);
  }

  void __set_page_owner_migrate_reason(struct page *page, int reason)
 _
	From: Oscar Salvador <osalvador@suse.de>
	Subject: mm,page_owner: implement the tracking of the stacks count
	Date: Thu, 15 Feb 2024 22:59:04 +0100

	Implement {inc,dec}_stack_record_count() which increments or decrements on
	respective allocation and free operations, via __reset_page_owner() (free
	operation) and __set_page_owner() (alloc operation).

	Newly allocated stack_record structs will be added to the list stack_list
	via add_stack_record_to_list(). Modifications on the list are protected
	via a spinlock with irqs disabled, since this code can also be reached
	from IRQ context.

	Link: https://lkml.kernel.org/r/20240215215907.20121-5-osalvador@suse.de
	Signed-off-by: Oscar Salvador <osalvador@suse.de>
	Reviewed-by: Marco Elver <elver@google.com>
	Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
	Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
	Cc: Alexander Potapenko <glider@google.com>
	Cc: Michal Hocko <mhocko@suse.com>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	---

	mm/page_owner.c \| 73 +++++++++++++++++++++++++++++++++++++++++++++-
	1 file changed, 72 insertions(+), 1 deletion(-)

	--- a/mm/page_owner.c~mmpage_owner-implement-the-tracking-of-the-stacks-count
	+++ a/mm/page_owner.c
	@@ -43,6 +43,7 @@ struct stack {
	static struct stack dummy_stack;
	static struct stack failure_stack;
	static struct stack *stack_list;
	+static DEFINE_SPINLOCK(stack_list_lock);

	static bool page_owner_enabled __initdata;
	DEFINE_STATIC_KEY_FALSE(page_owner_inited);
	@@ -150,11 +151,68 @@ static noinline depot_stack_handle_t sav
	return handle;
	}

	+static void add_stack_record_to_list(struct stack_record *stack_record,
	+ gfp_t gfp_mask)
	+{
	+ unsigned long flags;
	+ struct stack *stack;
	+
	+ /* Filter gfp_mask the same way stackdepot does, for consistency */
	+ gfp_mask &= ~GFP_ZONEMASK;
	+ gfp_mask &= (GFP_ATOMIC \| GFP_KERNEL);
	+ gfp_mask \|= __GFP_NOWARN;
	+
	+ stack = kmalloc(sizeof(*stack), gfp_mask);
	+ if (!stack)
	+ return;
	+
	+ stack->stack_record = stack_record;
	+ stack->next = NULL;
	+
	+ spin_lock_irqsave(&stack_list_lock, flags);
	+ stack->next = stack_list;
	+ stack_list = stack;
	+ spin_unlock_irqrestore(&stack_list_lock, flags);
	+}
	+
	+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
	+{
	+ struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
	+
	+ if (!stack_record)
	+ return;
	+
	+ /*
	+ * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
	+ * with REFCOUNT_SATURATED to catch spurious increments of their
	+ * refcount.
	+ * Since we do not use STACK_DEPOT_FLAG_GET API, let us
	+ * set a refcount of 1 ourselves.
	+ */
	+ if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
	+ int old = REFCOUNT_SATURATED;
	+
	+ if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
	+ /* Add the new stack_record to our list */
	+ add_stack_record_to_list(stack_record, gfp_mask);
	+ }
	+ refcount_inc(&stack_record->count);
	+}
	+
	+static void dec_stack_record_count(depot_stack_handle_t handle)
	+{
	+ struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
	+
	+ if (stack_record)
	+ refcount_dec(&stack_record->count);
	+}
	+
	void __reset_page_owner(struct page *page, unsigned short order)
	{
	int i;
	struct page_ext *page_ext;
	depot_stack_handle_t handle;
	+ depot_stack_handle_t alloc_handle;
	struct page_owner *page_owner;
	u64 free_ts_nsec = local_clock();

	@@ -162,17 +220,29 @@ void __reset_page_owner(struct page *pag
	if (unlikely(!page_ext))
	return;

	+ page_owner = get_page_owner(page_ext);
	+ alloc_handle = page_owner->handle;
	+
	handle = save_stack(GFP_NOWAIT \| __GFP_NOWARN);
	for (i = 0; i < (1 << order); i++) {
	__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
	- page_owner = get_page_owner(page_ext);
	page_owner->free_handle = handle;
	page_owner->free_ts_nsec = free_ts_nsec;
	page_owner->free_pid = current->pid;
	page_owner->free_tgid = current->tgid;
	page_ext = page_ext_next(page_ext);
	+ page_owner = get_page_owner(page_ext);
	}
	page_ext_put(page_ext);
	+ if (alloc_handle != early_handle)
	+ /*
	+ * early_handle is being set as a handle for all those
	+ * early allocated pages. See init_pages_in_zone().
	+ * Since their refcount is not being incremented because
	+ * the machinery is not ready yet, we cannot decrement
	+ * their refcount either.
	+ */
	+ dec_stack_record_count(alloc_handle);
	}

	static inline void __set_page_owner_handle(struct page_ext *page_ext,
	@@ -214,6 +284,7 @@ noinline void __set_page_owner(struct pa
	return;
	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
	page_ext_put(page_ext);
	+ inc_stack_record_count(handle, gfp_mask);
	}

	void __set_page_owner_migrate_reason(struct page *page, int reason)
	_