mm/highmem.c - pub/scm/linux/kernel/git/joro/linux - Git at Google

 /*
  * High memory handling common code and variables.
  *
  * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
  *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
  *
  *
  * Redesigned the x86 32-bit VM architecture to deal with
  * 64-bit physical space. With current x86 CPUs this
  * means up to 64 Gigabytes physical RAM.
  *
  * Rewrote high memory support to move the page cache into
  * high memory. Implemented permanent (schedulable) kmaps
  * based on Linus' idea.
  *
  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  *
  * Largely rewritten to get rid of all global locks
  *
  * Copyright (C) 2006 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *
  */

 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/swap.h>
 #include <linux/bio.h>
 #include <linux/pagemap.h>
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
 #include <linux/hardirq.h>

 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>

 #ifdef CONFIG_HIGHMEM

 static int __set_page_address(struct page *page, void *virtual, int pos);

 unsigned long totalhigh_pages __read_mostly;
 EXPORT_SYMBOL(totalhigh_pages);

 unsigned int nr_free_highpages (void)
 {
 	pg_data_t *pgdat;
 	unsigned int pages = 0;

 	for_each_online_pgdat(pgdat) {
 		pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
 			NR_FREE_PAGES);
 		if (zone_movable_is_highmem())
 			pages += zone_page_state(
 					&pgdat->node_zones[ZONE_MOVABLE],
 					NR_FREE_PAGES);
 	}

 	return pages;
 }

 /*
  * count is not a pure "count".
  *  0 means its owned exclusively by someone
  *  1 means its free for use - either mapped or not.
  *  n means that there are (n-1) current users of it.
  */

 struct pkmap_state {
 	atomic_t count;
 	int	 pfn;
 };

 static struct pkmap_state pkmap[LAST_PKMAP];
 static atomic_t pkmap_hand;
 static atomic_t pkmap_free;
 static atomic_t pkmap_users;

 pte_t * pkmap_page_table;

 static DECLARE_WAIT_QUEUE_HEAD(pkmap_wait);


 /*
  * Most architectures have no use for kmap_high_get(), so let's abstract
  * the disabling of IRQ out of the locking in that case to save on a
  * potential useless overhead.
  */
 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
 #define lock_kmap()             spin_lock_irq(&kmap_lock)
 #define unlock_kmap()           spin_unlock_irq(&kmap_lock)
 #define lock_kmap_any(flags)    spin_lock_irqsave(&kmap_lock, flags)
 #define unlock_kmap_any(flags)  spin_unlock_irqrestore(&kmap_lock, flags)
 #else
 #define lock_kmap()             spin_lock(&kmap_lock)
 #define unlock_kmap()           spin_unlock(&kmap_lock)
 #define lock_kmap_any(flags)    \
 		do { spin_lock(&kmap_lock); (void)(flags); } while (0)
 #define unlock_kmap_any(flags)  \
 		do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
 #endif

 /*
  * Try to free a given kmap slot.
  *
  * Returns:
  *  -1 - in use
  *   0 - free, no TLB flush needed
  *   1 - free, needs TLB flush
  */
 static int pkmap_try_free(int pos)
 {
 	if (atomic_cmpxchg(&pkmap[pos].count, 1, 0) != 1)
 		return -1;
 	atomic_dec(&pkmap_free);
 	/*
 	 * TODO: add a young bit to make it CLOCK
 	 */
 	if (!pte_none(pkmap_page_table[pos])) {
 		unsigned long addr = PKMAP_ADDR(pos);
 		pte_t *ptep = &pkmap_page_table[pos];

 		if (!pkmap[pos].pfn) {
 			struct page *page = pte_page(pkmap_page_table[pos]);
 			VM_BUG_ON(addr != (unsigned long)page_address(page));
 			if (!__set_page_address(page, NULL, pos))
 				BUG();
 			flush_kernel_dcache_page(page);
 		}

 		pte_clear(&init_mm, addr, ptep);

 		return 1;
 	}

 	return 0;
 }

 static inline void pkmap_put(atomic_t *counter)
 {
 	switch (atomic_dec_return(counter)) {
 	case 0:
 		BUG();

 	case 1:
 		atomic_inc(&pkmap_free);
 		wake_up(&pkmap_wait);
 	}
 }

 #define TLB_BATCH	32

 static int pkmap_get_free(void)
 {
 	int i, pos, flush;

 restart:
 	for (i = 0; i < LAST_PKMAP; i++) {
 		pos = atomic_inc_return(&pkmap_hand) & LAST_PKMAP_MASK;
 		flush = pkmap_try_free(pos);
 		if (flush >= 0)
 			goto got_one;
 	}

 	atomic_dec(&pkmap_free);
 	/*
 	 * wait for somebody else to unmap their entries
 	 */
 	if (likely(!in_interrupt()))
 		wait_event(pkmap_wait, atomic_read(&pkmap_free) != 0);

 	goto restart;

 got_one:
 	if (flush) {
 #if 0
 		flush_tlb_kernel_range(PKMAP_ADDR(pos), PKMAP_ADDR(pos+1));
 #else
 		int pos2 = (pos + 1) & LAST_PKMAP_MASK;
 		int nr;
 		int entries[TLB_BATCH];

 		/*
 		 * For those architectures that cannot help but flush the
 		 * whole TLB, flush some more entries to make it worthwhile.
 		 * Scan ahead of the hand to minimise search distances.
 		 */
 		for (i = 0, nr = 0; i < LAST_PKMAP && nr < TLB_BATCH;
 				i++, pos2 = (pos2 + 1) & LAST_PKMAP_MASK) {

 			flush = pkmap_try_free(pos2);
 			if (flush < 0)
 				continue;

 			if (!flush) {
 				atomic_t *counter = &pkmap[pos2].count;
 				VM_BUG_ON(atomic_read(counter) != 0);
 				atomic_set(counter, 2);
 				pkmap_put(counter);
 			} else
 				entries[nr++] = pos2;
 		}
 		flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));

 		for (i = 0; i < nr; i++) {
 			atomic_t *counter = &pkmap[entries[i]].count;
 			VM_BUG_ON(atomic_read(counter) != 0);
 			atomic_set(counter, 2);
 			pkmap_put(counter);
 		}
 #endif
 	}
 	return pos;
 }

 static unsigned long pkmap_insert(unsigned long pfn, pgprot_t prot)
 {
 	int pos = pkmap_get_free();
 	unsigned long vaddr = PKMAP_ADDR(pos);
 	pte_t *ptep = &pkmap_page_table[pos];
 	pte_t entry = pfn_pte(pfn, prot);
 	atomic_t *counter = &pkmap[pos].count;

 	VM_BUG_ON(atomic_read(counter) != 0);
 	set_pte_at(&init_mm, vaddr, ptep, entry);

 	pkmap[pos].pfn =
 		!(pgprot_val(prot) == pgprot_val(kmap_prot) && pfn_valid(pfn));

 	if (!pkmap[pos].pfn) {
 		struct page *page = pfn_to_page(pfn);

 		if (unlikely(!__set_page_address(page, (void *)vaddr, pos))) {
 			/*
 			 * concurrent pkmap_inserts for this page -
 			 * the other won the race, release this entry.
 			 *
 			 * we can still clear the pte without a tlb flush since
 			 * it couldn't have been used yet.
 			 */
 			pte_clear(&init_mm, vaddr, ptep);
 			VM_BUG_ON(atomic_read(counter) != 0);
 			atomic_set(counter, 2);
 			pkmap_put(counter);
 			return 0;
 		}
 	} else {
 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
 		/*
 		 * non-default prot and pure pfn memory doesn't
 		 * get map deduplication, nor a working page_address
 		 *
 		 * this also makes it incompatible with
 		 * ARCH_NEEDS_KMAP_HIGH_GET
 		 */
 		BUG();
 #endif
 	}

 	atomic_set(counter, 2);

 	return vaddr;
 }

 /*
  * Flush all unused kmap mappings in order to remove stray mappings.
  */
 void kmap_flush_unused(void)
 {
 	WARN_ON_ONCE(1);
 }

 /*
  * Avoid starvation deadlock by limiting the number of tasks that can obtain a
  * kmap to (LAST_PKMAP - KM_TYPE_NR*NR_CPUS)/2.
  */
 static void kmap_account(void)
 {
 	int weight;

 #ifndef CONFIG_PREEMPT_RT
 	if (in_interrupt()) {
 		/* irqs can always get them */
 		weight = -1;
 	} else
 #endif
 	if (current->flags & PF_KMAP) {
 		current->flags &= ~PF_KMAP;
 		/* we already accounted the second */
 		weight = 0;
 	} else {
 		/* mark 1, account 2 */
 		current->flags |= PF_KMAP;
 		weight = 2;
 	}

 	if (weight > 0) {
 		/*
 		 * reserve KM_TYPE_NR maps per CPU for interrupt context
 		 */
 		const int target = LAST_PKMAP
 #ifndef CONFIG_PREEMPT_RT
 				- KM_TYPE_NR*NR_CPUS
 #endif
 			;

 again:
 		wait_event(pkmap_wait,
 			atomic_read(&pkmap_users) + weight <= target);

 		if (atomic_add_return(weight, &pkmap_users) > target) {
 			atomic_sub(weight, &pkmap_users);
 			goto again;
 		}
 	}
 }

 static void kunmap_account(void)
 {
 	int weight;

 #ifndef CONFIG_PREEMPT_RT
 	if (in_irq()) {
 		weight = -1;
 	} else
 #endif
 	if (current->flags & PF_KMAP) {
 		/* there was only 1 kmap, un-account both */
 		current->flags &= ~PF_KMAP;
 		weight = 2;
 	} else {
 		/* there were two kmaps, un-account per kunmap */
 		weight = 1;
 	}

 	if (weight > 0)
 		atomic_sub(weight, &pkmap_users);
 	wake_up(&pkmap_wait);
 }

 void *kmap_get(struct page *page)
 {
 	unsigned long vaddr;
 again:
 	vaddr = (unsigned long)page_address(page);
 	if (vaddr) {
 		atomic_t *counter = &pkmap[PKMAP_NR(vaddr)].count;
 		if (atomic_inc_not_zero(counter)) {
 			/*
 			 * atomic_inc_not_zero implies a (memory) barrier on
 			 * success so page address will be reloaded.
 			 */
 			unsigned long vaddr2 = (unsigned long)page_address(page);
 			if (likely(vaddr == vaddr2))
 				return (void *)vaddr;

 			/*
 			 * Oops, we got someone else.
 			 *
 			 * This can happen if we get preempted after
 			 * page_address() and before atomic_inc_not_zero()
 			 * and during that preemption this slot is freed and
 			 * reused.
 			 */
 			pkmap_put(counter);
 		}
 		goto again;
 	}
 	return (void *)vaddr;
 }

 void *kmap_high(struct page *page)
 {
 	unsigned long vaddr;

 	kmap_account();

 again:
 	vaddr = (unsigned long)kmap_get(page);
 	if (!vaddr) {
 		vaddr = pkmap_insert(page_to_pfn(page), kmap_prot);
 		if (!vaddr)
 			goto again;
 	}

 	return (void *)vaddr;
 }

 EXPORT_SYMBOL(kmap_high);

 void *kmap_pfn_prot(unsigned long pfn, pgprot_t prot)
 {
 	unsigned long vaddr;

 	if (pgprot_val(prot) == pgprot_val(kmap_prot) &&
 			pfn_valid(pfn) && PageHighMem(pfn_to_page(pfn)))
 		return kmap_high(pfn_to_page(pfn));

 	kmap_account();

 	vaddr = pkmap_insert(pfn, prot);
 	BUG_ON(!vaddr);

 	return (void *)vaddr;
 }

 EXPORT_SYMBOL(kmap_pfn_prot);

 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
 /**
  * kmap_high_get - pin a highmem page into memory
  * @page: &struct page to pin
  *
  * Returns the page's current virtual memory address, or NULL if no mapping
  * exists.  When and only when a non null address is returned then a
  * matching call to kunmap_high() is necessary.
  *
  * This can be called from any context.
  */
 void *kmap_high_get(struct page *page)
 {
 	unsigned long vaddr, flags;

 	lock_kmap_any(flags);
 	vaddr = (unsigned long)kmap_get(page);
 	unlock_kmap_any(flags);
 	return (void *)vaddr;
 }
 #endif

 void kunmap_virt(void *ptr)
 {
 	unsigned long vaddr = (unsigned long)ptr;
 	if (vaddr < PKMAP_ADDR(0) || vaddr >= PKMAP_ADDR(LAST_PKMAP))
 		return;
 	pkmap_put(&pkmap[PKMAP_NR(vaddr)].count);
 	kunmap_account();
 }

 void kunmap_high(struct page *page)
 {
 	unsigned long vaddr = (unsigned long)page_address(page);
 	BUG_ON(!vaddr);
 	pkmap_put(&pkmap[PKMAP_NR(vaddr)].count);
 	kunmap_account();
 }

 EXPORT_SYMBOL(kunmap_high);
 #endif

 #if defined(HASHED_PAGE_VIRTUAL)

 #define PA_HASH_ORDER	7

 /*
  * Describes one page->virtual address association.
  */
 static struct page_address_map {
 	struct page *page;
 	void *virtual;
 	struct list_head list;
 } page_address_maps[LAST_PKMAP];

 /*
  * Hash table bucket
  */
 static struct page_address_slot {
 	struct list_head lh;			/* List of page_address_maps */
 	spinlock_t lock;			/* Protect this bucket's list */
 } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];

 static struct page_address_slot *page_slot(struct page *page)
 {
 	return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
 }

 /**
  * page_address - get the mapped virtual address of a page
  * @page: &struct page to get the virtual address of
  *
  * Returns the page's virtual address.
  */

 static void *__page_address(struct page_address_slot *pas, struct page *page)
 {
 	void *ret = NULL;

 	if (!list_empty(&pas->lh)) {
 		struct page_address_map *pam;

 		list_for_each_entry(pam, &pas->lh, list) {
 			if (pam->page == page) {
 				ret = pam->virtual;
 				break;
 			}
 		}
 	}

 	return ret;
 }

 void *page_address(struct page *page)
 {
 	unsigned long flags;
 	void *ret;
 	struct page_address_slot *pas;

 	if (!PageHighMem(page))
 		return lowmem_page_address(page);

 	pas = page_slot(page);
 	spin_lock_irqsave(&pas->lock, flags);
 	ret = __page_address(pas, page);
 	spin_unlock_irqrestore(&pas->lock, flags);
 	return ret;
 }

 EXPORT_SYMBOL(page_address);

 /**
  * set_page_address - set a page's virtual address
  * @page: &struct page to set
  * @virtual: virtual address to use
  */
 static int __set_page_address(struct page *page, void *virtual, int pos)
 {
 	int ret = 0;
 	unsigned long flags;
 	struct page_address_slot *pas;
 	struct page_address_map *pam;

 	VM_BUG_ON(!PageHighMem(page));
 	VM_BUG_ON(atomic_read(&pkmap[pos].count) != 0);
 	VM_BUG_ON(pos < 0 || pos >= LAST_PKMAP);

 	pas = page_slot(page);
 	pam = &page_address_maps[pos];

 	spin_lock_irqsave(&pas->lock, flags);
 	if (virtual) { /* add */
 		VM_BUG_ON(!list_empty(&pam->list));

 		if (!__page_address(pas, page)) {
 			pam->page = page;
 			pam->virtual = virtual;
 			list_add_tail(&pam->list, &pas->lh);
 			ret = 1;
 		}
 	} else { /* remove */
 		if (!list_empty(&pam->list)) {
 			list_del_init(&pam->list);
 			ret = 1;
 		}
 	}
 	spin_unlock_irqrestore(&pas->lock, flags);

 	return ret;
 }

 int set_page_address(struct page *page, void *virtual)
 {
 	/*
 	 * set_page_address is not supposed to be called when using
 	 * hashed virtual addresses.
 	 */
 	BUG();
 	return 0;
 }

 void __init __page_address_init(void)
 {
 	int i;

 	for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
 		INIT_LIST_HEAD(&page_address_maps[i].list);

 	for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
 		INIT_LIST_HEAD(&page_address_htable[i].lh);
 		spin_lock_init(&page_address_htable[i].lock);
 	}
 }

 #elif defined (CONFIG_HIGHMEM) /* HASHED_PAGE_VIRTUAL */

 static int __set_page_address(struct page *page, void *virtual, int pos)
 {
 	return set_page_address(page, virtual);
 }

 #endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */

 #if defined(CONFIG_HIGHMEM) || defined(HASHED_PAGE_VIRTUAL)

 void __init page_address_init(void)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;

 	for (i = 0; i < ARRAY_SIZE(pkmap); i++)
 		atomic_set(&pkmap[i].count, 1);
 	atomic_set(&pkmap_hand, 0);
 	atomic_set(&pkmap_free, LAST_PKMAP);
 	atomic_set(&pkmap_users, 0);
 #endif

 #ifdef HASHED_PAGE_VIRTUAL
 	__page_address_init();
 #endif
 }

 #endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */

 #if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)

 void debug_kmap_atomic(enum km_type type)
 {
 	static int warn_count = 10;

 	if (unlikely(warn_count < 0))
 		return;

 	if (unlikely(in_interrupt())) {
 		if (in_nmi()) {
 			if (type != KM_NMI && type != KM_NMI_PTE) {
 				WARN_ON(1);
 				warn_count--;
 			}
 		} else if (in_irq()) {
 			if (type != KM_IRQ0 && type != KM_IRQ1 &&
 			    type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
 			    type != KM_BOUNCE_READ && type != KM_IRQ_PTE) {
 				WARN_ON(1);
 				warn_count--;
 			}
 		} else if (!irqs_disabled()) {	/* softirq */
 			if (type != KM_IRQ0 && type != KM_IRQ1 &&
 			    type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
 			    type != KM_SKB_SUNRPC_DATA &&
 			    type != KM_SKB_DATA_SOFTIRQ &&
 			    type != KM_BOUNCE_READ) {
 				WARN_ON(1);
 				warn_count--;
 			}
 		}
 	}

 	if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
 			type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ ||
 			type == KM_IRQ_PTE || type == KM_NMI ||
 			type == KM_NMI_PTE ) {
 		if (!irqs_disabled()) {
 			WARN_ON(1);
 			warn_count--;
 		}
 	} else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
 		if (irq_count() == 0 && !irqs_disabled()) {
 			WARN_ON(1);
 			warn_count--;
 		}
 	}
 }

 #endif
	/*
	* High memory handling common code and variables.
	*
	* (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
	* Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
	*
	*
	* Redesigned the x86 32-bit VM architecture to deal with
	* 64-bit physical space. With current x86 CPUs this
	* means up to 64 Gigabytes physical RAM.
	*
	* Rewrote high memory support to move the page cache into
	* high memory. Implemented permanent (schedulable) kmaps
	* based on Linus' idea.
	*
	* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
	*
	* Largely rewritten to get rid of all global locks
	*
	* Copyright (C) 2006 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
	*
	*/

	#include <linux/mm.h>
	#include <linux/module.h>
	#include <linux/swap.h>
	#include <linux/bio.h>
	#include <linux/pagemap.h>
	#include <linux/mempool.h>
	#include <linux/blkdev.h>
	#include <linux/init.h>
	#include <linux/hash.h>
	#include <linux/highmem.h>
	#include <linux/hardirq.h>

	#include <asm/tlbflush.h>
	#include <asm/pgtable.h>

	#ifdef CONFIG_HIGHMEM

	static int __set_page_address(struct page page, void virtual, int pos);

	unsigned long totalhigh_pages __read_mostly;
	EXPORT_SYMBOL(totalhigh_pages);

	unsigned int nr_free_highpages (void)
	{
	pg_data_t *pgdat;
	unsigned int pages = 0;

	for_each_online_pgdat(pgdat) {
	pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
	NR_FREE_PAGES);
	if (zone_movable_is_highmem())
	pages += zone_page_state(
	&pgdat->node_zones[ZONE_MOVABLE],
	NR_FREE_PAGES);
	}

	return pages;
	}

	/*
	* count is not a pure "count".
	* 0 means its owned exclusively by someone
	* 1 means its free for use - either mapped or not.
	* n means that there are (n-1) current users of it.
	*/

	struct pkmap_state {
	atomic_t count;
	int pfn;
	};

	static struct pkmap_state pkmap[LAST_PKMAP];
	static atomic_t pkmap_hand;
	static atomic_t pkmap_free;
	static atomic_t pkmap_users;

	pte_t * pkmap_page_table;

	static DECLARE_WAIT_QUEUE_HEAD(pkmap_wait);


	/*
	* Most architectures have no use for kmap_high_get(), so let's abstract
	* the disabling of IRQ out of the locking in that case to save on a
	* potential useless overhead.
	*/
	#ifdef ARCH_NEEDS_KMAP_HIGH_GET
	#define lock_kmap() spin_lock_irq(&kmap_lock)
	#define unlock_kmap() spin_unlock_irq(&kmap_lock)
	#define lock_kmap_any(flags) spin_lock_irqsave(&kmap_lock, flags)
	#define unlock_kmap_any(flags) spin_unlock_irqrestore(&kmap_lock, flags)
	#else
	#define lock_kmap() spin_lock(&kmap_lock)
	#define unlock_kmap() spin_unlock(&kmap_lock)
	#define lock_kmap_any(flags) \
	do { spin_lock(&kmap_lock); (void)(flags); } while (0)
	#define unlock_kmap_any(flags) \
	do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
	#endif

	/*
	* Try to free a given kmap slot.
	*
	* Returns:
	* -1 - in use
	* 0 - free, no TLB flush needed
	* 1 - free, needs TLB flush
	*/
	static int pkmap_try_free(int pos)
	{
	if (atomic_cmpxchg(&pkmap[pos].count, 1, 0) != 1)
	return -1;
	atomic_dec(&pkmap_free);
	/*
	* TODO: add a young bit to make it CLOCK
	*/
	if (!pte_none(pkmap_page_table[pos])) {
	unsigned long addr = PKMAP_ADDR(pos);
	pte_t *ptep = &pkmap_page_table[pos];

	if (!pkmap[pos].pfn) {
	struct page *page = pte_page(pkmap_page_table[pos]);
	VM_BUG_ON(addr != (unsigned long)page_address(page));
	if (!__set_page_address(page, NULL, pos))
	BUG();
	flush_kernel_dcache_page(page);
	}

	pte_clear(&init_mm, addr, ptep);

	return 1;
	}

	return 0;
	}

	static inline void pkmap_put(atomic_t *counter)
	{
	switch (atomic_dec_return(counter)) {
	case 0:
	BUG();

	case 1:
	atomic_inc(&pkmap_free);
	wake_up(&pkmap_wait);
	}
	}

	#define TLB_BATCH 32

	static int pkmap_get_free(void)
	{
	int i, pos, flush;

	restart:
	for (i = 0; i < LAST_PKMAP; i++) {
	pos = atomic_inc_return(&pkmap_hand) & LAST_PKMAP_MASK;
	flush = pkmap_try_free(pos);
	if (flush >= 0)
	goto got_one;
	}

	atomic_dec(&pkmap_free);
	/*
	* wait for somebody else to unmap their entries
	*/
	if (likely(!in_interrupt()))
	wait_event(pkmap_wait, atomic_read(&pkmap_free) != 0);

	goto restart;

	got_one:
	if (flush) {
	#if 0
	flush_tlb_kernel_range(PKMAP_ADDR(pos), PKMAP_ADDR(pos+1));
	#else
	int pos2 = (pos + 1) & LAST_PKMAP_MASK;
	int nr;
	int entries[TLB_BATCH];

	/*
	* For those architectures that cannot help but flush the
	* whole TLB, flush some more entries to make it worthwhile.
	* Scan ahead of the hand to minimise search distances.
	*/
	for (i = 0, nr = 0; i < LAST_PKMAP && nr < TLB_BATCH;
	i++, pos2 = (pos2 + 1) & LAST_PKMAP_MASK) {

	flush = pkmap_try_free(pos2);
	if (flush < 0)
	continue;

	if (!flush) {
	atomic_t *counter = &pkmap[pos2].count;
	VM_BUG_ON(atomic_read(counter) != 0);
	atomic_set(counter, 2);
	pkmap_put(counter);
	} else
	entries[nr++] = pos2;
	}
	flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));

	for (i = 0; i < nr; i++) {
	atomic_t *counter = &pkmap[entries[i]].count;
	VM_BUG_ON(atomic_read(counter) != 0);
	atomic_set(counter, 2);
	pkmap_put(counter);
	}
	#endif
	}
	return pos;
	}

	static unsigned long pkmap_insert(unsigned long pfn, pgprot_t prot)
	{
	int pos = pkmap_get_free();
	unsigned long vaddr = PKMAP_ADDR(pos);
	pte_t *ptep = &pkmap_page_table[pos];
	pte_t entry = pfn_pte(pfn, prot);
	atomic_t *counter = &pkmap[pos].count;

	VM_BUG_ON(atomic_read(counter) != 0);
	set_pte_at(&init_mm, vaddr, ptep, entry);

	pkmap[pos].pfn =
	!(pgprot_val(prot) == pgprot_val(kmap_prot) && pfn_valid(pfn));

	if (!pkmap[pos].pfn) {
	struct page *page = pfn_to_page(pfn);

	if (unlikely(!__set_page_address(page, (void *)vaddr, pos))) {
	/*
	* concurrent pkmap_inserts for this page -
	* the other won the race, release this entry.
	*
	* we can still clear the pte without a tlb flush since
	* it couldn't have been used yet.
	*/
	pte_clear(&init_mm, vaddr, ptep);
	VM_BUG_ON(atomic_read(counter) != 0);
	atomic_set(counter, 2);
	pkmap_put(counter);
	return 0;
	}
	} else {
	#ifdef ARCH_NEEDS_KMAP_HIGH_GET
	/*
	* non-default prot and pure pfn memory doesn't
	* get map deduplication, nor a working page_address
	*
	* this also makes it incompatible with
	* ARCH_NEEDS_KMAP_HIGH_GET
	*/
	BUG();
	#endif
	}

	atomic_set(counter, 2);

	return vaddr;
	}

	/*
	* Flush all unused kmap mappings in order to remove stray mappings.
	*/
	void kmap_flush_unused(void)
	{
	WARN_ON_ONCE(1);
	}

	/*
	* Avoid starvation deadlock by limiting the number of tasks that can obtain a
	* kmap to (LAST_PKMAP - KM_TYPE_NR*NR_CPUS)/2.
	*/
	static void kmap_account(void)
	{
	int weight;

	#ifndef CONFIG_PREEMPT_RT
	if (in_interrupt()) {
	/* irqs can always get them */
	weight = -1;
	} else
	#endif
	if (current->flags & PF_KMAP) {
	current->flags &= ~PF_KMAP;
	/* we already accounted the second */
	weight = 0;
	} else {
	/* mark 1, account 2 */
	current->flags \|= PF_KMAP;
	weight = 2;
	}

	if (weight > 0) {
	/*
	* reserve KM_TYPE_NR maps per CPU for interrupt context
	*/
	const int target = LAST_PKMAP
	#ifndef CONFIG_PREEMPT_RT
	- KM_TYPE_NR*NR_CPUS
	#endif
	;

	again:
	wait_event(pkmap_wait,
	atomic_read(&pkmap_users) + weight <= target);

	if (atomic_add_return(weight, &pkmap_users) > target) {
	atomic_sub(weight, &pkmap_users);
	goto again;
	}
	}
	}

	static void kunmap_account(void)
	{
	int weight;

	#ifndef CONFIG_PREEMPT_RT
	if (in_irq()) {
	weight = -1;
	} else
	#endif
	if (current->flags & PF_KMAP) {
	/* there was only 1 kmap, un-account both */
	current->flags &= ~PF_KMAP;
	weight = 2;
	} else {
	/* there were two kmaps, un-account per kunmap */
	weight = 1;
	}

	if (weight > 0)
	atomic_sub(weight, &pkmap_users);
	wake_up(&pkmap_wait);
	}

	void kmap_get(struct page page)
	{
	unsigned long vaddr;
	again:
	vaddr = (unsigned long)page_address(page);
	if (vaddr) {
	atomic_t *counter = &pkmap[PKMAP_NR(vaddr)].count;
	if (atomic_inc_not_zero(counter)) {
	/*
	* atomic_inc_not_zero implies a (memory) barrier on
	* success so page address will be reloaded.
	*/
	unsigned long vaddr2 = (unsigned long)page_address(page);
	if (likely(vaddr == vaddr2))
	return (void *)vaddr;

	/*
	* Oops, we got someone else.
	*
	* This can happen if we get preempted after
	* page_address() and before atomic_inc_not_zero()
	* and during that preemption this slot is freed and
	* reused.
	*/
	pkmap_put(counter);
	}
	goto again;
	}
	return (void *)vaddr;
	}

	void kmap_high(struct page page)
	{
	unsigned long vaddr;

	kmap_account();

	again:
	vaddr = (unsigned long)kmap_get(page);
	if (!vaddr) {
	vaddr = pkmap_insert(page_to_pfn(page), kmap_prot);
	if (!vaddr)
	goto again;
	}

	return (void *)vaddr;
	}

	EXPORT_SYMBOL(kmap_high);

	void *kmap_pfn_prot(unsigned long pfn, pgprot_t prot)
	{
	unsigned long vaddr;

	if (pgprot_val(prot) == pgprot_val(kmap_prot) &&
	pfn_valid(pfn) && PageHighMem(pfn_to_page(pfn)))
	return kmap_high(pfn_to_page(pfn));

	kmap_account();

	vaddr = pkmap_insert(pfn, prot);
	BUG_ON(!vaddr);

	return (void *)vaddr;
	}

	EXPORT_SYMBOL(kmap_pfn_prot);

	#ifdef ARCH_NEEDS_KMAP_HIGH_GET
	/**
	* kmap_high_get - pin a highmem page into memory
	* @page: &struct page to pin
	*
	* Returns the page's current virtual memory address, or NULL if no mapping
	* exists. When and only when a non null address is returned then a
	* matching call to kunmap_high() is necessary.
	*
	* This can be called from any context.
	*/
	void kmap_high_get(struct page page)
	{
	unsigned long vaddr, flags;

	lock_kmap_any(flags);
	vaddr = (unsigned long)kmap_get(page);
	unlock_kmap_any(flags);
	return (void *)vaddr;
	}
	#endif

	void kunmap_virt(void *ptr)
	{
	unsigned long vaddr = (unsigned long)ptr;
	if (vaddr < PKMAP_ADDR(0) \|\| vaddr >= PKMAP_ADDR(LAST_PKMAP))
	return;
	pkmap_put(&pkmap[PKMAP_NR(vaddr)].count);
	kunmap_account();
	}

	void kunmap_high(struct page *page)
	{
	unsigned long vaddr = (unsigned long)page_address(page);
	BUG_ON(!vaddr);
	pkmap_put(&pkmap[PKMAP_NR(vaddr)].count);
	kunmap_account();
	}

	EXPORT_SYMBOL(kunmap_high);
	#endif

	#if defined(HASHED_PAGE_VIRTUAL)

	#define PA_HASH_ORDER 7

	/*
	* Describes one page->virtual address association.
	*/
	static struct page_address_map {
	struct page *page;
	void *virtual;
	struct list_head list;
	} page_address_maps[LAST_PKMAP];

	/*
	* Hash table bucket
	*/
	static struct page_address_slot {
	struct list_head lh; /* List of page_address_maps */
	spinlock_t lock; /* Protect this bucket's list */
	} ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];

	static struct page_address_slot page_slot(struct page page)
	{
	return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
	}

	/**
	* page_address - get the mapped virtual address of a page
	* @page: &struct page to get the virtual address of
	*
	* Returns the page's virtual address.
	*/

	static void __page_address(struct page_address_slot pas, struct page *page)
	{
	void *ret = NULL;

	if (!list_empty(&pas->lh)) {
	struct page_address_map *pam;

	list_for_each_entry(pam, &pas->lh, list) {
	if (pam->page == page) {
	ret = pam->virtual;
	break;
	}
	}
	}

	return ret;
	}

	void page_address(struct page page)
	{
	unsigned long flags;
	void *ret;
	struct page_address_slot *pas;

	if (!PageHighMem(page))
	return lowmem_page_address(page);

	pas = page_slot(page);
	spin_lock_irqsave(&pas->lock, flags);
	ret = __page_address(pas, page);
	spin_unlock_irqrestore(&pas->lock, flags);
	return ret;
	}

	EXPORT_SYMBOL(page_address);

	/**
	* set_page_address - set a page's virtual address
	* @page: &struct page to set
	* @virtual: virtual address to use
	*/
	static int __set_page_address(struct page page, void virtual, int pos)
	{
	int ret = 0;
	unsigned long flags;
	struct page_address_slot *pas;
	struct page_address_map *pam;

	VM_BUG_ON(!PageHighMem(page));
	VM_BUG_ON(atomic_read(&pkmap[pos].count) != 0);
	VM_BUG_ON(pos < 0 \|\| pos >= LAST_PKMAP);

	pas = page_slot(page);
	pam = &page_address_maps[pos];

	spin_lock_irqsave(&pas->lock, flags);
	if (virtual) { /* add */
	VM_BUG_ON(!list_empty(&pam->list));

	if (!__page_address(pas, page)) {
	pam->page = page;
	pam->virtual = virtual;
	list_add_tail(&pam->list, &pas->lh);
	ret = 1;
	}
	} else { /* remove */
	if (!list_empty(&pam->list)) {
	list_del_init(&pam->list);
	ret = 1;
	}
	}
	spin_unlock_irqrestore(&pas->lock, flags);

	return ret;
	}

	int set_page_address(struct page page, void virtual)
	{
	/*
	* set_page_address is not supposed to be called when using
	* hashed virtual addresses.
	*/
	BUG();
	return 0;
	}

	void __init __page_address_init(void)
	{
	int i;

	for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
	INIT_LIST_HEAD(&page_address_maps[i].list);

	for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
	INIT_LIST_HEAD(&page_address_htable[i].lh);
	spin_lock_init(&page_address_htable[i].lock);
	}
	}

	#elif defined (CONFIG_HIGHMEM) /* HASHED_PAGE_VIRTUAL */

	static int __set_page_address(struct page page, void virtual, int pos)
	{
	return set_page_address(page, virtual);
	}

	#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */

	#if defined(CONFIG_HIGHMEM) \|\| defined(HASHED_PAGE_VIRTUAL)

	void __init page_address_init(void)
	{
	#ifdef CONFIG_HIGHMEM
	int i;

	for (i = 0; i < ARRAY_SIZE(pkmap); i++)
	atomic_set(&pkmap[i].count, 1);
	atomic_set(&pkmap_hand, 0);
	atomic_set(&pkmap_free, LAST_PKMAP);
	atomic_set(&pkmap_users, 0);
	#endif

	#ifdef HASHED_PAGE_VIRTUAL
	__page_address_init();
	#endif
	}

	#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */

	#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)

	void debug_kmap_atomic(enum km_type type)
	{
	static int warn_count = 10;

	if (unlikely(warn_count < 0))
	return;

	if (unlikely(in_interrupt())) {
	if (in_nmi()) {
	if (type != KM_NMI && type != KM_NMI_PTE) {
	WARN_ON(1);
	warn_count--;
	}
	} else if (in_irq()) {
	if (type != KM_IRQ0 && type != KM_IRQ1 &&
	type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
	type != KM_BOUNCE_READ && type != KM_IRQ_PTE) {
	WARN_ON(1);
	warn_count--;
	}
	} else if (!irqs_disabled()) { /* softirq */
	if (type != KM_IRQ0 && type != KM_IRQ1 &&
	type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
	type != KM_SKB_SUNRPC_DATA &&
	type != KM_SKB_DATA_SOFTIRQ &&
	type != KM_BOUNCE_READ) {
	WARN_ON(1);
	warn_count--;
	}
	}
	}

	if (type == KM_IRQ0 \|\| type == KM_IRQ1 \|\| type == KM_BOUNCE_READ \|\|
	type == KM_BIO_SRC_IRQ \|\| type == KM_BIO_DST_IRQ \|\|
	type == KM_IRQ_PTE \|\| type == KM_NMI \|\|
	type == KM_NMI_PTE ) {
	if (!irqs_disabled()) {
	WARN_ON(1);
	warn_count--;
	}
	} else if (type == KM_SOFTIRQ0 \|\| type == KM_SOFTIRQ1) {
	if (irq_count() == 0 && !irqs_disabled()) {
	WARN_ON(1);
	warn_count--;
	}
	}
	}

	#endif