patches/old/mm-vmstat-switch-counter-modification-to-cmpxchg.patch - pub/scm/linux/kernel/git/akpm/25-new - Git at Google

 From: Marcelo Tosatti <mtosatti@redhat.com>
 Subject: mm/vmstat: switch counter modification to cmpxchg
 Date: Mon, 20 Mar 2023 15:03:40 -0300

 In preparation for switching vmstat shepherd to flush per-CPU counters
 remotely, switch the __{mod,inc,dec} functions that modify the counters to
 use cmpxchg.

 To facilitate reviewing, functions are ordered in the text file, as:

 __{mod,inc,dec}_{zone,node}_page_state
 #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
 {mod,inc,dec}_{zone,node}_page_state
 #else
 {mod,inc,dec}_{zone,node}_page_state
 #endif

 This patch defines the __ versions for the
 CONFIG_HAVE_CMPXCHG_LOCAL case to be their non-"__" counterparts:

 #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
 {mod,inc,dec}_{zone,node}_page_state
 __{mod,inc,dec}_{zone,node}_page_state = {mod,inc,dec}_{zone,node}_page_state
 #else
 {mod,inc,dec}_{zone,node}_page_state
 __{mod,inc,dec}_{zone,node}_page_state
 #endif

 To test the performance difference, a page allocator microbenchmark:
 https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/mm/bench/page_bench01.c
 with loops=1000000 was used, on Intel Core i7-11850H @ 2.50GHz.

 For the single_page_alloc_free test, which does

         /** Loop to measure **/
         for (i = 0; i < rec->loops; i++) {
                 my_page = alloc_page(gfp_mask);
                 if (unlikely(my_page == NULL))
                         return 0;
                 __free_page(my_page);
         }

 Unit is cycles.

 Vanilla			Patched		Diff
 115.25			117		1.4%

 Link: https://lkml.kernel.org/r/20230320180745.733575720@redhat.com
 Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
 Cc: Aaron Tomlin <atomlin@atomlin.com>
 Cc: Christoph Lameter <cl@linux.com>
 Cc: Frederic Weisbecker <frederic@kernel.org>
 Cc: Heiko Carstens <hca@linux.ibm.com>
 Cc: Huacai Chen <chenhuacai@kernel.org>
 Cc: Michal Hocko <mhocko@suse.com>
 Cc: Peter Xu <peterx@redhat.com>
 Cc: "Russell King (Oracle)" <linux@armlinux.org.uk>
 Cc: Vlastimil Babka <vbabka@suse.cz>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 ---

  mm/page_alloc.c |    3
  mm/vmstat.c     |  325 +++++++++++++++++++++++++---------------------
  2 files changed, 182 insertions(+), 146 deletions(-)

 --- a/mm/page_alloc.c~mm-vmstat-switch-counter-modification-to-cmpxchg
 +++ a/mm/page_alloc.c
 @@ -6240,9 +6240,6 @@ static int page_alloc_cpu_dead(unsigned
  	/*
  	 * Zero the differential counters of the dead processor
  	 * so that the vm statistics are consistent.
 -	 *
 -	 * This is only okay since the processor is dead and cannot
 -	 * race with what we are doing.
  	 */
  	cpu_vm_stats_fold(cpu);

 --- a/mm/vmstat.c~mm-vmstat-switch-counter-modification-to-cmpxchg
 +++ a/mm/vmstat.c
 @@ -334,6 +334,188 @@ void set_pgdat_percpu_threshold(pg_data_
  	}
  }

 +#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
 +/*
 + * If we have cmpxchg_local support then we do not need to incur the overhead
 + * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
 + *
 + * mod_state() modifies the zone counter state through atomic per cpu
 + * operations.
 + *
 + * Overstep mode specifies how overstep should handled:
 + *     0       No overstepping
 + *     1       Overstepping half of threshold
 + *     -1      Overstepping minus half of threshold
 + */
 +static inline void mod_zone_state(struct zone *zone, enum zone_stat_item item,
 +				  long delta, int overstep_mode)
 +{
 +	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
 +	s8 __percpu *p = pcp->vm_stat_diff + item;
 +	long o, n, t, z;
 +
 +	do {
 +		z = 0;  /* overflow to zone counters */
 +
 +		/*
 +		 * The fetching of the stat_threshold is racy. We may apply
 +		 * a counter threshold to the wrong the cpu if we get
 +		 * rescheduled while executing here. However, the next
 +		 * counter update will apply the threshold again and
 +		 * therefore bring the counter under the threshold again.
 +		 *
 +		 * Most of the time the thresholds are the same anyways
 +		 * for all cpus in a zone.
 +		 */
 +		t = this_cpu_read(pcp->stat_threshold);
 +
 +		o = this_cpu_read(*p);
 +		n = delta + o;
 +
 +		if (abs(n) > t) {
 +			int os = overstep_mode * (t >> 1);
 +
 +			/* Overflow must be added to zone counters */
 +			z = n + os;
 +			n = -os;
 +		}
 +	} while (this_cpu_cmpxchg(*p, o, n) != o);
 +
 +	if (z)
 +		zone_page_state_add(z, zone, item);
 +}
 +
 +void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 +			 long delta)
 +{
 +	mod_zone_state(zone, item, delta, 0);
 +}
 +EXPORT_SYMBOL(mod_zone_page_state);
 +
 +void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 +			   long delta)
 +{
 +	mod_zone_state(zone, item, delta, 0);
 +}
 +EXPORT_SYMBOL(__mod_zone_page_state);
 +
 +void inc_zone_page_state(struct page *page, enum zone_stat_item item)
 +{
 +	mod_zone_state(page_zone(page), item, 1, 1);
 +}
 +EXPORT_SYMBOL(inc_zone_page_state);
 +
 +void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
 +{
 +	mod_zone_state(page_zone(page), item, 1, 1);
 +}
 +EXPORT_SYMBOL(__inc_zone_page_state);
 +
 +void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 +{
 +	mod_zone_state(page_zone(page), item, -1, -1);
 +}
 +EXPORT_SYMBOL(dec_zone_page_state);
 +
 +void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
 +{
 +	mod_zone_state(page_zone(page), item, -1, -1);
 +}
 +EXPORT_SYMBOL(__dec_zone_page_state);
 +
 +static inline void mod_node_state(struct pglist_data *pgdat,
 +				  enum node_stat_item item,
 +				  int delta, int overstep_mode)
 +{
 +	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
 +	s8 __percpu *p = pcp->vm_node_stat_diff + item;
 +	long o, n, t, z;
 +
 +	if (vmstat_item_in_bytes(item)) {
 +		/*
 +		 * Only cgroups use subpage accounting right now; at
 +		 * the global level, these items still change in
 +		 * multiples of whole pages. Store them as pages
 +		 * internally to keep the per-cpu counters compact.
 +		 */
 +		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
 +		delta >>= PAGE_SHIFT;
 +	}
 +
 +	do {
 +		z = 0;  /* overflow to node counters */
 +
 +		/*
 +		 * The fetching of the stat_threshold is racy. We may apply
 +		 * a counter threshold to the wrong the cpu if we get
 +		 * rescheduled while executing here. However, the next
 +		 * counter update will apply the threshold again and
 +		 * therefore bring the counter under the threshold again.
 +		 *
 +		 * Most of the time the thresholds are the same anyways
 +		 * for all cpus in a node.
 +		 */
 +		t = this_cpu_read(pcp->stat_threshold);
 +
 +		o = this_cpu_read(*p);
 +		n = delta + o;
 +
 +		if (abs(n) > t) {
 +			int os = overstep_mode * (t >> 1);
 +
 +			/* Overflow must be added to node counters */
 +			z = n + os;
 +			n = -os;
 +		}
 +	} while (this_cpu_cmpxchg(*p, o, n) != o);
 +
 +	if (z)
 +		node_page_state_add(z, pgdat, item);
 +}
 +
 +void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
 +					long delta)
 +{
 +	mod_node_state(pgdat, item, delta, 0);
 +}
 +EXPORT_SYMBOL(mod_node_page_state);
 +
 +void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
 +					long delta)
 +{
 +	mod_node_state(pgdat, item, delta, 0);
 +}
 +EXPORT_SYMBOL(__mod_node_page_state);
 +
 +void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
 +{
 +	mod_node_state(pgdat, item, 1, 1);
 +}
 +
 +void inc_node_page_state(struct page *page, enum node_stat_item item)
 +{
 +	mod_node_state(page_pgdat(page), item, 1, 1);
 +}
 +EXPORT_SYMBOL(inc_node_page_state);
 +
 +void __inc_node_page_state(struct page *page, enum node_stat_item item)
 +{
 +	mod_node_state(page_pgdat(page), item, 1, 1);
 +}
 +EXPORT_SYMBOL(__inc_node_page_state);
 +
 +void dec_node_page_state(struct page *page, enum node_stat_item item)
 +{
 +	mod_node_state(page_pgdat(page), item, -1, -1);
 +}
 +EXPORT_SYMBOL(dec_node_page_state);
 +
 +void __dec_node_page_state(struct page *page, enum node_stat_item item)
 +{
 +	mod_node_state(page_pgdat(page), item, -1, -1);
 +}
 +EXPORT_SYMBOL(__dec_node_page_state);
 +#else
  /*
   * For use when we know that interrupts are disabled,
   * or when we know that preemption is disabled and that
 @@ -541,149 +723,6 @@ void __dec_node_page_state(struct page *
  }
  EXPORT_SYMBOL(__dec_node_page_state);

 -#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
 -/*
 - * If we have cmpxchg_local support then we do not need to incur the overhead
 - * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
 - *
 - * mod_state() modifies the zone counter state through atomic per cpu
 - * operations.
 - *
 - * Overstep mode specifies how overstep should handled:
 - *     0       No overstepping
 - *     1       Overstepping half of threshold
 - *     -1      Overstepping minus half of threshold
 -*/
 -static inline void mod_zone_state(struct zone *zone,
 -       enum zone_stat_item item, long delta, int overstep_mode)
 -{
 -	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
 -	s8 __percpu *p = pcp->vm_stat_diff + item;
 -	long o, n, t, z;
 -
 -	do {
 -		z = 0;  /* overflow to zone counters */
 -
 -		/*
 -		 * The fetching of the stat_threshold is racy. We may apply
 -		 * a counter threshold to the wrong the cpu if we get
 -		 * rescheduled while executing here. However, the next
 -		 * counter update will apply the threshold again and
 -		 * therefore bring the counter under the threshold again.
 -		 *
 -		 * Most of the time the thresholds are the same anyways
 -		 * for all cpus in a zone.
 -		 */
 -		t = this_cpu_read(pcp->stat_threshold);
 -
 -		o = this_cpu_read(*p);
 -		n = delta + o;
 -
 -		if (abs(n) > t) {
 -			int os = overstep_mode * (t >> 1) ;
 -
 -			/* Overflow must be added to zone counters */
 -			z = n + os;
 -			n = -os;
 -		}
 -	} while (this_cpu_cmpxchg(*p, o, n) != o);
 -
 -	if (z)
 -		zone_page_state_add(z, zone, item);
 -}
 -
 -void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 -			 long delta)
 -{
 -	mod_zone_state(zone, item, delta, 0);
 -}
 -EXPORT_SYMBOL(mod_zone_page_state);
 -
 -void inc_zone_page_state(struct page *page, enum zone_stat_item item)
 -{
 -	mod_zone_state(page_zone(page), item, 1, 1);
 -}
 -EXPORT_SYMBOL(inc_zone_page_state);
 -
 -void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 -{
 -	mod_zone_state(page_zone(page), item, -1, -1);
 -}
 -EXPORT_SYMBOL(dec_zone_page_state);
 -
 -static inline void mod_node_state(struct pglist_data *pgdat,
 -       enum node_stat_item item, int delta, int overstep_mode)
 -{
 -	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
 -	s8 __percpu *p = pcp->vm_node_stat_diff + item;
 -	long o, n, t, z;
 -
 -	if (vmstat_item_in_bytes(item)) {
 -		/*
 -		 * Only cgroups use subpage accounting right now; at
 -		 * the global level, these items still change in
 -		 * multiples of whole pages. Store them as pages
 -		 * internally to keep the per-cpu counters compact.
 -		 */
 -		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
 -		delta >>= PAGE_SHIFT;
 -	}
 -
 -	do {
 -		z = 0;  /* overflow to node counters */
 -
 -		/*
 -		 * The fetching of the stat_threshold is racy. We may apply
 -		 * a counter threshold to the wrong the cpu if we get
 -		 * rescheduled while executing here. However, the next
 -		 * counter update will apply the threshold again and
 -		 * therefore bring the counter under the threshold again.
 -		 *
 -		 * Most of the time the thresholds are the same anyways
 -		 * for all cpus in a node.
 -		 */
 -		t = this_cpu_read(pcp->stat_threshold);
 -
 -		o = this_cpu_read(*p);
 -		n = delta + o;
 -
 -		if (abs(n) > t) {
 -			int os = overstep_mode * (t >> 1) ;
 -
 -			/* Overflow must be added to node counters */
 -			z = n + os;
 -			n = -os;
 -		}
 -	} while (this_cpu_cmpxchg(*p, o, n) != o);
 -
 -	if (z)
 -		node_page_state_add(z, pgdat, item);
 -}
 -
 -void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
 -					long delta)
 -{
 -	mod_node_state(pgdat, item, delta, 0);
 -}
 -EXPORT_SYMBOL(mod_node_page_state);
 -
 -void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
 -{
 -	mod_node_state(pgdat, item, 1, 1);
 -}
 -
 -void inc_node_page_state(struct page *page, enum node_stat_item item)
 -{
 -	mod_node_state(page_pgdat(page), item, 1, 1);
 -}
 -EXPORT_SYMBOL(inc_node_page_state);
 -
 -void dec_node_page_state(struct page *page, enum node_stat_item item)
 -{
 -	mod_node_state(page_pgdat(page), item, -1, -1);
 -}
 -EXPORT_SYMBOL(dec_node_page_state);
 -#else
  /*
   * Use interrupt disable to serialize counter updates
   */
 _
	From: Marcelo Tosatti <mtosatti@redhat.com>
	Subject: mm/vmstat: switch counter modification to cmpxchg
	Date: Mon, 20 Mar 2023 15:03:40 -0300

	In preparation for switching vmstat shepherd to flush per-CPU counters
	remotely, switch the __{mod,inc,dec} functions that modify the counters to
	use cmpxchg.

	To facilitate reviewing, functions are ordered in the text file, as:

	__{mod,inc,dec}_{zone,node}_page_state
	#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
	{mod,inc,dec}_{zone,node}_page_state
	#else
	{mod,inc,dec}_{zone,node}_page_state
	#endif

	This patch defines the __ versions for the
	CONFIG_HAVE_CMPXCHG_LOCAL case to be their non-"__" counterparts:

	#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
	{mod,inc,dec}_{zone,node}_page_state
	__{mod,inc,dec}_{zone,node}_page_state = {mod,inc,dec}_{zone,node}_page_state
	#else
	{mod,inc,dec}_{zone,node}_page_state
	__{mod,inc,dec}_{zone,node}_page_state
	#endif

	To test the performance difference, a page allocator microbenchmark:
	https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/mm/bench/page_bench01.c
	with loops=1000000 was used, on Intel Core i7-11850H @ 2.50GHz.

	For the single_page_alloc_free test, which does

	/ Loop to measure /
	for (i = 0; i < rec->loops; i++) {
	my_page = alloc_page(gfp_mask);
	if (unlikely(my_page == NULL))
	return 0;
	__free_page(my_page);
	}

	Unit is cycles.

	Vanilla Patched Diff
	115.25 117 1.4%

	Link: https://lkml.kernel.org/r/20230320180745.733575720@redhat.com
	Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
	Cc: Aaron Tomlin <atomlin@atomlin.com>
	Cc: Christoph Lameter <cl@linux.com>
	Cc: Frederic Weisbecker <frederic@kernel.org>
	Cc: Heiko Carstens <hca@linux.ibm.com>
	Cc: Huacai Chen <chenhuacai@kernel.org>
	Cc: Michal Hocko <mhocko@suse.com>
	Cc: Peter Xu <peterx@redhat.com>
	Cc: "Russell King (Oracle)" <linux@armlinux.org.uk>
	Cc: Vlastimil Babka <vbabka@suse.cz>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	---

	mm/page_alloc.c \| 3
	mm/vmstat.c \| 325 +++++++++++++++++++++++++---------------------
	2 files changed, 182 insertions(+), 146 deletions(-)

	--- a/mm/page_alloc.c~mm-vmstat-switch-counter-modification-to-cmpxchg
	+++ a/mm/page_alloc.c
	@@ -6240,9 +6240,6 @@ static int page_alloc_cpu_dead(unsigned
	/*
	* Zero the differential counters of the dead processor
	* so that the vm statistics are consistent.
	- *
	- * This is only okay since the processor is dead and cannot
	- * race with what we are doing.
	*/
	cpu_vm_stats_fold(cpu);

	--- a/mm/vmstat.c~mm-vmstat-switch-counter-modification-to-cmpxchg
	+++ a/mm/vmstat.c
	@@ -334,6 +334,188 @@ void set_pgdat_percpu_threshold(pg_data_
	}
	}

	+#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
	+/*
	+ * If we have cmpxchg_local support then we do not need to incur the overhead
	+ * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
	+ *
	+ * mod_state() modifies the zone counter state through atomic per cpu
	+ * operations.
	+ *
	+ * Overstep mode specifies how overstep should handled:
	+ * 0 No overstepping
	+ * 1 Overstepping half of threshold
	+ * -1 Overstepping minus half of threshold
	+ */
	+static inline void mod_zone_state(struct zone *zone, enum zone_stat_item item,
	+ long delta, int overstep_mode)
	+{
	+ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
	+ s8 __percpu *p = pcp->vm_stat_diff + item;
	+ long o, n, t, z;
	+
	+ do {
	+ z = 0; /* overflow to zone counters */
	+
	+ /*
	+ * The fetching of the stat_threshold is racy. We may apply
	+ * a counter threshold to the wrong the cpu if we get
	+ * rescheduled while executing here. However, the next
	+ * counter update will apply the threshold again and
	+ * therefore bring the counter under the threshold again.
	+ *
	+ * Most of the time the thresholds are the same anyways
	+ * for all cpus in a zone.
	+ */
	+ t = this_cpu_read(pcp->stat_threshold);
	+
	+ o = this_cpu_read(*p);
	+ n = delta + o;
	+
	+ if (abs(n) > t) {
	+ int os = overstep_mode * (t >> 1);
	+
	+ /* Overflow must be added to zone counters */
	+ z = n + os;
	+ n = -os;
	+ }
	+ } while (this_cpu_cmpxchg(*p, o, n) != o);
	+
	+ if (z)
	+ zone_page_state_add(z, zone, item);
	+}
	+
	+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
	+ long delta)
	+{
	+ mod_zone_state(zone, item, delta, 0);
	+}
	+EXPORT_SYMBOL(mod_zone_page_state);
	+
	+void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
	+ long delta)
	+{
	+ mod_zone_state(zone, item, delta, 0);
	+}
	+EXPORT_SYMBOL(__mod_zone_page_state);
	+
	+void inc_zone_page_state(struct page *page, enum zone_stat_item item)
	+{
	+ mod_zone_state(page_zone(page), item, 1, 1);
	+}
	+EXPORT_SYMBOL(inc_zone_page_state);
	+
	+void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
	+{
	+ mod_zone_state(page_zone(page), item, 1, 1);
	+}
	+EXPORT_SYMBOL(__inc_zone_page_state);
	+
	+void dec_zone_page_state(struct page *page, enum zone_stat_item item)
	+{
	+ mod_zone_state(page_zone(page), item, -1, -1);
	+}
	+EXPORT_SYMBOL(dec_zone_page_state);
	+
	+void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
	+{
	+ mod_zone_state(page_zone(page), item, -1, -1);
	+}
	+EXPORT_SYMBOL(__dec_zone_page_state);
	+
	+static inline void mod_node_state(struct pglist_data *pgdat,
	+ enum node_stat_item item,
	+ int delta, int overstep_mode)
	+{
	+ struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
	+ s8 __percpu *p = pcp->vm_node_stat_diff + item;
	+ long o, n, t, z;
	+
	+ if (vmstat_item_in_bytes(item)) {
	+ /*
	+ * Only cgroups use subpage accounting right now; at
	+ * the global level, these items still change in
	+ * multiples of whole pages. Store them as pages
	+ * internally to keep the per-cpu counters compact.
	+ */
	+ VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
	+ delta >>= PAGE_SHIFT;
	+ }
	+
	+ do {
	+ z = 0; /* overflow to node counters */
	+
	+ /*
	+ * The fetching of the stat_threshold is racy. We may apply
	+ * a counter threshold to the wrong the cpu if we get
	+ * rescheduled while executing here. However, the next
	+ * counter update will apply the threshold again and
	+ * therefore bring the counter under the threshold again.
	+ *
	+ * Most of the time the thresholds are the same anyways
	+ * for all cpus in a node.
	+ */
	+ t = this_cpu_read(pcp->stat_threshold);
	+
	+ o = this_cpu_read(*p);
	+ n = delta + o;
	+
	+ if (abs(n) > t) {
	+ int os = overstep_mode * (t >> 1);
	+
	+ /* Overflow must be added to node counters */
	+ z = n + os;
	+ n = -os;
	+ }
	+ } while (this_cpu_cmpxchg(*p, o, n) != o);
	+
	+ if (z)
	+ node_page_state_add(z, pgdat, item);
	+}
	+
	+void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
	+ long delta)
	+{
	+ mod_node_state(pgdat, item, delta, 0);
	+}
	+EXPORT_SYMBOL(mod_node_page_state);
	+
	+void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
	+ long delta)
	+{
	+ mod_node_state(pgdat, item, delta, 0);
	+}
	+EXPORT_SYMBOL(__mod_node_page_state);
	+
	+void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
	+{
	+ mod_node_state(pgdat, item, 1, 1);
	+}
	+
	+void inc_node_page_state(struct page *page, enum node_stat_item item)
	+{
	+ mod_node_state(page_pgdat(page), item, 1, 1);
	+}
	+EXPORT_SYMBOL(inc_node_page_state);
	+
	+void __inc_node_page_state(struct page *page, enum node_stat_item item)
	+{
	+ mod_node_state(page_pgdat(page), item, 1, 1);
	+}
	+EXPORT_SYMBOL(__inc_node_page_state);
	+
	+void dec_node_page_state(struct page *page, enum node_stat_item item)
	+{
	+ mod_node_state(page_pgdat(page), item, -1, -1);
	+}
	+EXPORT_SYMBOL(dec_node_page_state);
	+
	+void __dec_node_page_state(struct page *page, enum node_stat_item item)
	+{
	+ mod_node_state(page_pgdat(page), item, -1, -1);
	+}
	+EXPORT_SYMBOL(__dec_node_page_state);
	+#else
	/*
	* For use when we know that interrupts are disabled,
	* or when we know that preemption is disabled and that
	@@ -541,149 +723,6 @@ void __dec_node_page_state(struct page *
	}
	EXPORT_SYMBOL(__dec_node_page_state);

	-#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
	-/*
	- * If we have cmpxchg_local support then we do not need to incur the overhead
	- * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
	- *
	- * mod_state() modifies the zone counter state through atomic per cpu
	- * operations.
	- *
	- * Overstep mode specifies how overstep should handled:
	- * 0 No overstepping
	- * 1 Overstepping half of threshold
	- * -1 Overstepping minus half of threshold
	-*/
	-static inline void mod_zone_state(struct zone *zone,
	- enum zone_stat_item item, long delta, int overstep_mode)
	-{
	- struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
	- s8 __percpu *p = pcp->vm_stat_diff + item;
	- long o, n, t, z;
	-
	- do {
	- z = 0; /* overflow to zone counters */
	-
	- /*
	- * The fetching of the stat_threshold is racy. We may apply
	- * a counter threshold to the wrong the cpu if we get
	- * rescheduled while executing here. However, the next
	- * counter update will apply the threshold again and
	- * therefore bring the counter under the threshold again.
	- *
	- * Most of the time the thresholds are the same anyways
	- * for all cpus in a zone.
	- */
	- t = this_cpu_read(pcp->stat_threshold);
	-
	- o = this_cpu_read(*p);
	- n = delta + o;
	-
	- if (abs(n) > t) {
	- int os = overstep_mode * (t >> 1) ;
	-
	- /* Overflow must be added to zone counters */
	- z = n + os;
	- n = -os;
	- }
	- } while (this_cpu_cmpxchg(*p, o, n) != o);
	-
	- if (z)
	- zone_page_state_add(z, zone, item);
	-}
	-
	-void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
	- long delta)
	-{
	- mod_zone_state(zone, item, delta, 0);
	-}
	-EXPORT_SYMBOL(mod_zone_page_state);
	-
	-void inc_zone_page_state(struct page *page, enum zone_stat_item item)
	-{
	- mod_zone_state(page_zone(page), item, 1, 1);
	-}
	-EXPORT_SYMBOL(inc_zone_page_state);
	-
	-void dec_zone_page_state(struct page *page, enum zone_stat_item item)
	-{
	- mod_zone_state(page_zone(page), item, -1, -1);
	-}
	-EXPORT_SYMBOL(dec_zone_page_state);
	-
	-static inline void mod_node_state(struct pglist_data *pgdat,
	- enum node_stat_item item, int delta, int overstep_mode)
	-{
	- struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
	- s8 __percpu *p = pcp->vm_node_stat_diff + item;
	- long o, n, t, z;
	-
	- if (vmstat_item_in_bytes(item)) {
	- /*
	- * Only cgroups use subpage accounting right now; at
	- * the global level, these items still change in
	- * multiples of whole pages. Store them as pages
	- * internally to keep the per-cpu counters compact.
	- */
	- VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
	- delta >>= PAGE_SHIFT;
	- }
	-
	- do {
	- z = 0; /* overflow to node counters */
	-
	- /*
	- * The fetching of the stat_threshold is racy. We may apply
	- * a counter threshold to the wrong the cpu if we get
	- * rescheduled while executing here. However, the next
	- * counter update will apply the threshold again and
	- * therefore bring the counter under the threshold again.
	- *
	- * Most of the time the thresholds are the same anyways
	- * for all cpus in a node.
	- */
	- t = this_cpu_read(pcp->stat_threshold);
	-
	- o = this_cpu_read(*p);
	- n = delta + o;
	-
	- if (abs(n) > t) {
	- int os = overstep_mode * (t >> 1) ;
	-
	- /* Overflow must be added to node counters */
	- z = n + os;
	- n = -os;
	- }
	- } while (this_cpu_cmpxchg(*p, o, n) != o);
	-
	- if (z)
	- node_page_state_add(z, pgdat, item);
	-}
	-
	-void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
	- long delta)
	-{
	- mod_node_state(pgdat, item, delta, 0);
	-}
	-EXPORT_SYMBOL(mod_node_page_state);
	-
	-void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
	-{
	- mod_node_state(pgdat, item, 1, 1);
	-}
	-
	-void inc_node_page_state(struct page *page, enum node_stat_item item)
	-{
	- mod_node_state(page_pgdat(page), item, 1, 1);
	-}
	-EXPORT_SYMBOL(inc_node_page_state);
	-
	-void dec_node_page_state(struct page *page, enum node_stat_item item)
	-{
	- mod_node_state(page_pgdat(page), item, -1, -1);
	-}
	-EXPORT_SYMBOL(dec_node_page_state);
	-#else
	/*
	* Use interrupt disable to serialize counter updates
	*/
	_