| From d7da901ab7d409d7fdd76c9964b07b48675a051c Mon Sep 17 00:00:00 2001 |
| From: Ingo Molnar <mingo@elte.hu> |
| Date: Fri, 3 Jul 2009 08:29:37 -0500 |
| Subject: [PATCH] mm: page_alloc: rt-friendly per-cpu pages |
| |
| commit ff3fd6afd788760c846a2f4449487debb6c4b0ac in tip. |
| |
| rt-friendly per-cpu pages: convert the irqs-off per-cpu locking |
| method into a preemptible, explicit-per-cpu-locks method. |
| |
| Contains fixes from: |
| Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Thomas Gleixner <tglx@linutronix.de> |
| |
| [PG: upstream 99dcc3e5a94e muddies the waters of applying the original, |
| for example free_zone_pagesets() is gone.] |
| |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c |
| index a8182c8..0d15911 100644 |
| --- a/mm/page_alloc.c |
| +++ b/mm/page_alloc.c |
| @@ -187,6 +187,54 @@ static unsigned long __meminitdata dma_reserve; |
| EXPORT_SYMBOL(movable_zone); |
| #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +static DEFINE_PER_CPU_LOCKED(int, pcp_locks); |
| +#endif |
| + |
| +static inline void __lock_cpu_pcp(unsigned long *flags, int cpu) |
| +{ |
| +#ifdef CONFIG_PREEMPT_RT |
| + spin_lock(&__get_cpu_lock(pcp_locks, cpu)); |
| + flags = 0; |
| +#else |
| + local_irq_save(*flags); |
| +#endif |
| +} |
| + |
| +static inline void lock_cpu_pcp(unsigned long *flags, int *this_cpu) |
| +{ |
| +#ifdef CONFIG_PREEMPT_RT |
| + (void)get_cpu_var_locked(pcp_locks, this_cpu); |
| + flags = 0; |
| +#else |
| + local_irq_save(*flags); |
| + *this_cpu = smp_processor_id(); |
| +#endif |
| +} |
| + |
| +static inline void unlock_cpu_pcp(unsigned long flags, int this_cpu) |
| +{ |
| +#ifdef CONFIG_PREEMPT_RT |
| + put_cpu_var_locked(pcp_locks, this_cpu); |
| +#else |
| + local_irq_restore(flags); |
| +#endif |
| +} |
| + |
| +// PG: FIXME - zone_pcp is dead (99dcc3e5a9) so kill these get/put variants? |
| +static struct per_cpu_pageset * |
| +get_zone_pcp(struct zone *zone, unsigned long *flags, int *this_cpu) |
| +{ |
| + lock_cpu_pcp(flags, this_cpu); |
| + return per_cpu_ptr(zone->pageset, *this_cpu); |
| +} |
| + |
| +static void |
| +put_zone_pcp(struct zone *zone, unsigned long flags, int this_cpu) |
| +{ |
| + unlock_cpu_pcp(flags, this_cpu); |
| +} |
| + |
| #if MAX_NUMNODES > 1 |
| int nr_node_ids __read_mostly = MAX_NUMNODES; |
| int nr_online_nodes __read_mostly = 1; |
| @@ -604,8 +652,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order, |
| static void __free_pages_ok(struct page *page, unsigned int order) |
| { |
| unsigned long flags; |
| - int i; |
| - int bad = 0; |
| + int i, this_cpu, bad = 0; |
| int wasMlocked = __TestClearPageMlocked(page); |
| |
| trace_mm_page_free_direct(page, order); |
| @@ -624,13 +671,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) |
| arch_free_page(page, order); |
| kernel_map_pages(page, 1 << order, 0); |
| |
| - local_irq_save(flags); |
| + lock_cpu_pcp(&flags, &this_cpu); |
| if (unlikely(wasMlocked)) |
| free_page_mlock(page); |
| - __count_vm_events(PGFREE, 1 << order); |
| + count_vm_events(PGFREE, 1 << order); |
| + unlock_cpu_pcp(flags, this_cpu); |
| free_one_page(page_zone(page), page, order, |
| get_pageblock_migratetype(page)); |
| - local_irq_restore(flags); |
| } |
| |
| /* |
| @@ -1007,15 +1054,16 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
| { |
| unsigned long flags; |
| int to_drain; |
| + int this_cpu; |
| |
| - local_irq_save(flags); |
| + lock_cpu_pcp(&flags, &this_cpu); |
| if (pcp->count >= pcp->batch) |
| to_drain = pcp->batch; |
| else |
| to_drain = pcp->count; |
| free_pcppages_bulk(zone, to_drain, pcp); |
| pcp->count -= to_drain; |
| - local_irq_restore(flags); |
| + unlock_cpu_pcp(flags, this_cpu); |
| } |
| #endif |
| |
| @@ -1035,13 +1083,18 @@ static void drain_pages(unsigned int cpu) |
| struct per_cpu_pageset *pset; |
| struct per_cpu_pages *pcp; |
| |
| - local_irq_save(flags); |
| + __lock_cpu_pcp(&flags, cpu); |
| pset = per_cpu_ptr(zone->pageset, cpu); |
| |
| + if (!pset) { |
| + unlock_cpu_pcp(flags, cpu); |
| + WARN_ON(1); |
| + continue; |
| + } |
| pcp = &pset->pcp; |
| free_pcppages_bulk(zone, pcp->count, pcp); |
| pcp->count = 0; |
| - local_irq_restore(flags); |
| + unlock_cpu_pcp(flags, cpu); |
| } |
| } |
| |
| @@ -1053,12 +1106,52 @@ void drain_local_pages(void *arg) |
| drain_pages(smp_processor_id()); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +static void drain_local_pages_work(struct work_struct *wrk) |
| +{ |
| + drain_pages(smp_processor_id()); |
| +} |
| +#endif |
| + |
| /* |
| * Spill all the per-cpu pages from all CPUs back into the buddy allocator |
| */ |
| void drain_all_pages(void) |
| { |
| +#ifdef CONFIG_PREEMPT_RT |
| + /* |
| + * HACK!!!!! |
| + * For RT we can't use IPIs to run drain_local_pages, since |
| + * that code will call spin_locks that will now sleep. |
| + * But, schedule_on_each_cpu will call kzalloc, which will |
| + * call page_alloc which was what calls this. |
| + * |
| + * Luckily, there's a condition to get here, and that is if |
| + * the order passed in to alloc_pages is greater than 0 |
| + * (alloced more than a page size). The slabs only allocate |
| + * what is needed, and the allocation made by schedule_on_each_cpu |
| + * does an alloc of "sizeof(void *)*nr_cpu_ids". |
| + * |
| + * So we can safely call schedule_on_each_cpu if that number |
| + * is less than a page. Otherwise don't bother. At least warn of |
| + * this issue. |
| + * |
| + * And yes, this is one big hack. Please fix ;-) |
| + */ |
| + if (sizeof(void *)*nr_cpu_ids < PAGE_SIZE) |
| + schedule_on_each_cpu(drain_local_pages_work); |
| + else { |
| + static int once; |
| + if (!once) { |
| + printk(KERN_ERR "Can't drain all CPUS due to possible recursion\n"); |
| + once = 1; |
| + } |
| + drain_local_pages(NULL); |
| + } |
| + |
| +#else |
| on_each_cpu(drain_local_pages, NULL, 1); |
| +#endif |
| } |
| |
| #ifdef CONFIG_HIBERNATION |
| @@ -1104,10 +1197,11 @@ void mark_free_pages(struct zone *zone) |
| void free_hot_cold_page(struct page *page, int cold) |
| { |
| struct zone *zone = page_zone(page); |
| + struct per_cpu_pageset *pset; |
| struct per_cpu_pages *pcp; |
| unsigned long flags; |
| int migratetype; |
| - int wasMlocked = __TestClearPageMlocked(page); |
| + int this_cpu, wasMlocked = __TestClearPageMlocked(page); |
| |
| trace_mm_page_free_direct(page, 0); |
| kmemcheck_free_shadow(page, 0); |
| @@ -1124,12 +1218,13 @@ void free_hot_cold_page(struct page *page, int cold) |
| arch_free_page(page, 0); |
| kernel_map_pages(page, 1, 0); |
| |
| + pset = get_zone_pcp(zone, &flags, &this_cpu); |
| + pcp = &pset->pcp; |
| migratetype = get_pageblock_migratetype(page); |
| set_page_private(page, migratetype); |
| - local_irq_save(flags); |
| if (unlikely(wasMlocked)) |
| free_page_mlock(page); |
| - __count_vm_event(PGFREE); |
| + count_vm_event(PGFREE); |
| |
| /* |
| * We only track unmovable, reclaimable and movable on pcp lists. |
| @@ -1146,7 +1241,6 @@ void free_hot_cold_page(struct page *page, int cold) |
| migratetype = MIGRATE_MOVABLE; |
| } |
| |
| - pcp = &this_cpu_ptr(zone->pageset)->pcp; |
| if (cold) |
| list_add_tail(&page->lru, &pcp->lists[migratetype]); |
| else |
| @@ -1158,7 +1252,7 @@ void free_hot_cold_page(struct page *page, int cold) |
| } |
| |
| out: |
| - local_irq_restore(flags); |
| + put_zone_pcp(zone, flags, this_cpu); |
| } |
| |
| /* |
| @@ -1202,15 +1296,18 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, |
| unsigned long flags; |
| struct page *page; |
| int cold = !!(gfp_flags & __GFP_COLD); |
| + struct per_cpu_pageset *pset; |
| + int this_cpu; |
| |
| again: |
| + pset = get_zone_pcp(zone, &flags, &this_cpu); |
| + |
| if (likely(order == 0)) { |
| - struct per_cpu_pages *pcp; |
| struct list_head *list; |
| + struct per_cpu_pages *pcp = &pset->pcp; |
| |
| - local_irq_save(flags); |
| - pcp = &this_cpu_ptr(zone->pageset)->pcp; |
| list = &pcp->lists[migratetype]; |
| + |
| if (list_empty(list)) { |
| pcp->count += rmqueue_bulk(zone, 0, |
| pcp->batch, list, |
| @@ -1240,7 +1337,7 @@ again: |
| */ |
| WARN_ON_ONCE(order > 1); |
| } |
| - spin_lock_irqsave(&zone->lock, flags); |
| + spin_lock(&zone->lock); |
| page = __rmqueue(zone, order, migratetype); |
| spin_unlock(&zone->lock); |
| if (!page) |
| @@ -1250,7 +1347,7 @@ again: |
| |
| __count_zone_vm_events(PGALLOC, zone, 1 << order); |
| zone_statistics(preferred_zone, zone); |
| - local_irq_restore(flags); |
| + put_zone_pcp(zone, flags, this_cpu); |
| |
| VM_BUG_ON(bad_range(zone, page)); |
| if (prep_new_page(page, order, gfp_flags)) |
| @@ -1258,7 +1355,7 @@ again: |
| return page; |
| |
| failed: |
| - local_irq_restore(flags); |
| + put_zone_pcp(zone, flags, this_cpu); |
| return NULL; |
| } |
| |
| -- |
| 1.7.1.1 |
| |