include/linux/mmzone.h - pub/scm/linux/kernel/git/wtarreau/linux-2.4 - Git at Google

 #ifndef _LINUX_MMZONE_H
 #define _LINUX_MMZONE_H

 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__

 #include <linux/config.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/wait.h>

 /*
  * Free memory management - zoned buddy allocator.
  */

 #ifndef CONFIG_FORCE_MAX_ZONEORDER
 #define MAX_ORDER 10
 #else
 #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
 #endif

 #define ZONE_DMA               0
 #define ZONE_NORMAL            1
 #define ZONE_HIGHMEM           2
 #define MAX_NR_ZONES           3

 typedef struct free_area_struct {
 	struct list_head	free_list;
 	unsigned long		*map;
 } free_area_t;

 struct pglist_data;

 typedef struct zone_watermarks_s {
 	unsigned long min, low, high;
 } zone_watermarks_t;


 /*
  * On machines where it is needed (eg PCs) we divide physical memory
  * into multiple physical zones. On a PC we have 3 zones:
  *
  * ZONE_DMA	  < 16 MB	ISA DMA capable memory
  * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
  * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
  */
 typedef struct zone_struct {
 	/*
 	 * Commonly accessed fields:
 	 */
 	spinlock_t		lock;
 	unsigned long		free_pages;
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
 	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
 	 * to run OOM on the lower zones despite there's tons of freeable ram
 	 * on the higher zones).
 	 */
 	zone_watermarks_t       watermarks[MAX_NR_ZONES];

 	/*
 	 * The below fields are protected by different locks (or by
 	 * no lock at all like need_balance), so they're longs to
 	 * provide an atomic granularity against each other on
 	 * all architectures.
 	 */
 	unsigned long           need_balance;
 	/* protected by the pagemap_lru_lock */
 	unsigned long           nr_active_pages, nr_inactive_pages;
 	/* protected by the pagecache_lock */
 	unsigned long           nr_cache_pages;


 	/*
 	 * free areas of different sizes
 	 */
 	free_area_t		free_area[MAX_ORDER];

 	/*
 	 * wait_table		-- the array holding the hash table
 	 * wait_table_size	-- the size of the hash table array
 	 * wait_table_shift	-- wait_table_size
 	 * 				== BITS_PER_LONG (1 << wait_table_bits)
 	 *
 	 * The purpose of all these is to keep track of the people
 	 * waiting for a page to become available and make them
 	 * runnable again when possible. The trouble is that this
 	 * consumes a lot of space, especially when so few things
 	 * wait on pages at a given time. So instead of using
 	 * per-page waitqueues, we use a waitqueue hash table.
 	 *
 	 * The bucket discipline is to sleep on the same queue when
 	 * colliding and wake all in that wait queue when removing.
 	 * When something wakes, it must check to be sure its page is
 	 * truly available, a la thundering herd. The cost of a
 	 * collision is great, but given the expected load of the
 	 * table, they should be so rare as to be outweighed by the
 	 * benefits from the saved space.
 	 *
 	 * __wait_on_page() and unlock_page() in mm/filemap.c, are the
 	 * primary users of these fields, and in mm/page_alloc.c
 	 * free_area_init_core() performs the initialization of them.
 	 */
 	wait_queue_head_t	* wait_table;
 	unsigned long		wait_table_size;
 	unsigned long		wait_table_shift;

 	/*
 	 * Discontig memory support fields.
 	 */
 	struct pglist_data	*zone_pgdat;
 	struct page		*zone_mem_map;
 	unsigned long		zone_start_paddr;
 	unsigned long		zone_start_mapnr;

 	/*
 	 * rarely used fields:
 	 */
 	char			*name;
 	unsigned long		size;
 	unsigned long		realsize;
 } zone_t;

 /*
  * One allocation request operates on a zonelist. A zonelist
  * is a list of zones, the first one is the 'goal' of the
  * allocation, the other zones are fallback zones, in decreasing
  * priority.
  *
  * Right now a zonelist takes up less than a cacheline. We never
  * modify it apart from boot-up, and only a few indices are used,
  * so despite the zonelist table being relatively big, the cache
  * footprint of this construct is very small.
  */
 typedef struct zonelist_struct {
 	zone_t * zones [MAX_NR_ZONES+1]; // NULL delimited
 } zonelist_t;

 #define GFP_ZONEMASK	0x0f

 /*
  * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
  * (mostly NUMA machines?) to denote a higher-level memory zone than the
  * zone_struct denotes.
  *
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
  * it's memory layout.
  *
  * XXX: we need to move the global memory statistics (active_list, ...)
  *      into the pg_data_t to properly support NUMA.
  */
 struct bootmem_data;
 typedef struct pglist_data {
 	zone_t node_zones[MAX_NR_ZONES];
 	zonelist_t node_zonelists[GFP_ZONEMASK+1];
 	int nr_zones;
 	struct page *node_mem_map;
 	unsigned long *valid_addr_bitmap;
 	struct bootmem_data *bdata;
 	unsigned long node_start_paddr;
 	unsigned long node_start_mapnr;
 	unsigned long node_size;
 	int node_id;
 	struct pglist_data *node_next;
 } pg_data_t;

 extern int numnodes;
 extern pg_data_t *pgdat_list;

 #define zone_idx(zone)                 ((zone) - (zone)->zone_pgdat->node_zones)
 #define memclass(pgzone, classzone)    (zone_idx(pgzone) <= zone_idx(classzone))

 /*
  * The following two are not meant for general usage. They are here as
  * prototypes for the discontig memory code.
  */
 struct page;
 extern void show_free_areas_core(pg_data_t *pgdat);
 extern void free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
   unsigned long *zones_size, unsigned long paddr, unsigned long *zholes_size,
   struct page *pmap);

 extern pg_data_t contig_page_data;

 /**
  * for_each_pgdat - helper macro to iterate over all nodes
  * @pgdat - pg_data_t * variable
  *
  * Meant to help with common loops of the form
  * pgdat = pgdat_list;
  * while(pgdat) {
  * 	...
  * 	pgdat = pgdat->node_next;
  * }
  */
 #define for_each_pgdat(pgdat) \
 	for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)


 /*
  * next_zone - helper magic for for_each_zone()
  * Thanks to William Lee Irwin III for this piece of ingenuity.
  */
 static inline zone_t *next_zone(zone_t *zone)
 {
 	pg_data_t *pgdat = zone->zone_pgdat;

 	if (zone - pgdat->node_zones < MAX_NR_ZONES - 1)
 		zone++;

 	else if (pgdat->node_next) {
 		pgdat = pgdat->node_next;
 		zone = pgdat->node_zones;
 	} else
 		zone = NULL;

 	return zone;
 }

 /**
  * for_each_zone - helper macro to iterate over all memory zones
  * @zone - zone_t * variable
  *
  * The user only needs to declare the zone variable, for_each_zone
  * fills it in. This basically means for_each_zone() is an
  * easier to read version of this piece of code:
  *
  * for(pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)
  * 	for(i = 0; i < MAX_NR_ZONES; ++i) {
  * 		zone_t * z = pgdat->node_zones + i;
  * 		...
  * 	}
  * }
  */
 #define for_each_zone(zone) \
 	for(zone = pgdat_list->node_zones; zone; zone = next_zone(zone))


 #ifndef CONFIG_DISCONTIGMEM

 #define NODE_DATA(nid)		(&contig_page_data)
 #define NODE_MEM_MAP(nid)	mem_map
 #define MAX_NR_NODES		1

 #else /* !CONFIG_DISCONTIGMEM */

 #include <asm/mmzone.h>

 /* page->zone is currently 8 bits ... */
 #ifndef MAX_NR_NODES
 #define MAX_NR_NODES		(255 / MAX_NR_ZONES)
 #endif

 #endif /* !CONFIG_DISCONTIGMEM */

 #define MAP_ALIGN(x)	((((x) % sizeof(mem_map_t)) == 0) ? (x) : ((x) + \
 		sizeof(mem_map_t) - ((x) % sizeof(mem_map_t))))

 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MMZONE_H */
	#ifndef _LINUX_MMZONE_H
	#define _LINUX_MMZONE_H

	#ifdef __KERNEL__
	#ifndef __ASSEMBLY__

	#include <linux/config.h>
	#include <linux/spinlock.h>
	#include <linux/list.h>
	#include <linux/wait.h>

	/*
	* Free memory management - zoned buddy allocator.
	*/

	#ifndef CONFIG_FORCE_MAX_ZONEORDER
	#define MAX_ORDER 10
	#else
	#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
	#endif

	#define ZONE_DMA 0
	#define ZONE_NORMAL 1
	#define ZONE_HIGHMEM 2
	#define MAX_NR_ZONES 3

	typedef struct free_area_struct {
	struct list_head free_list;
	unsigned long *map;
	} free_area_t;

	struct pglist_data;

	typedef struct zone_watermarks_s {
	unsigned long min, low, high;
	} zone_watermarks_t;


	/*
	* On machines where it is needed (eg PCs) we divide physical memory
	* into multiple physical zones. On a PC we have 3 zones:
	*
	* ZONE_DMA < 16 MB ISA DMA capable memory
	* ZONE_NORMAL 16-896 MB direct mapped by the kernel
	* ZONE_HIGHMEM > 896 MB only page cache and user processes
	*/
	typedef struct zone_struct {
	/*
	* Commonly accessed fields:
	*/
	spinlock_t lock;
	unsigned long free_pages;
	/*
	* We don't know if the memory that we're going to allocate will be freeable
	* or/and it will be released eventually, so to avoid totally wasting several
	* GB of ram we must reserve some of the lower zone memory (otherwise we risk
	* to run OOM on the lower zones despite there's tons of freeable ram
	* on the higher zones).
	*/
	zone_watermarks_t watermarks[MAX_NR_ZONES];

	/*
	* The below fields are protected by different locks (or by
	* no lock at all like need_balance), so they're longs to
	* provide an atomic granularity against each other on
	* all architectures.
	*/
	unsigned long need_balance;
	/* protected by the pagemap_lru_lock */
	unsigned long nr_active_pages, nr_inactive_pages;
	/* protected by the pagecache_lock */
	unsigned long nr_cache_pages;


	/*
	* free areas of different sizes
	*/
	free_area_t free_area[MAX_ORDER];

	/*
	* wait_table -- the array holding the hash table
	* wait_table_size -- the size of the hash table array
	* wait_table_shift -- wait_table_size
	* == BITS_PER_LONG (1 << wait_table_bits)
	*
	* The purpose of all these is to keep track of the people
	* waiting for a page to become available and make them
	* runnable again when possible. The trouble is that this
	* consumes a lot of space, especially when so few things
	* wait on pages at a given time. So instead of using
	* per-page waitqueues, we use a waitqueue hash table.
	*
	* The bucket discipline is to sleep on the same queue when
	* colliding and wake all in that wait queue when removing.
	* When something wakes, it must check to be sure its page is
	* truly available, a la thundering herd. The cost of a
	* collision is great, but given the expected load of the
	* table, they should be so rare as to be outweighed by the
	* benefits from the saved space.
	*
	* __wait_on_page() and unlock_page() in mm/filemap.c, are the
	* primary users of these fields, and in mm/page_alloc.c
	* free_area_init_core() performs the initialization of them.
	*/
	wait_queue_head_t * wait_table;
	unsigned long wait_table_size;
	unsigned long wait_table_shift;

	/*
	* Discontig memory support fields.
	*/
	struct pglist_data *zone_pgdat;
	struct page *zone_mem_map;
	unsigned long zone_start_paddr;
	unsigned long zone_start_mapnr;

	/*
	* rarely used fields:
	*/
	char *name;
	unsigned long size;
	unsigned long realsize;
	} zone_t;

	/*
	* One allocation request operates on a zonelist. A zonelist
	* is a list of zones, the first one is the 'goal' of the
	* allocation, the other zones are fallback zones, in decreasing
	* priority.
	*
	* Right now a zonelist takes up less than a cacheline. We never
	* modify it apart from boot-up, and only a few indices are used,
	* so despite the zonelist table being relatively big, the cache
	* footprint of this construct is very small.
	*/
	typedef struct zonelist_struct {
	zone_t * zones [MAX_NR_ZONES+1]; // NULL delimited
	} zonelist_t;

	#define GFP_ZONEMASK 0x0f

	/*
	* The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
	* (mostly NUMA machines?) to denote a higher-level memory zone than the
	* zone_struct denotes.
	*
	* On NUMA machines, each NUMA node would have a pg_data_t to describe
	* it's memory layout.
	*
	* XXX: we need to move the global memory statistics (active_list, ...)
	* into the pg_data_t to properly support NUMA.
	*/
	struct bootmem_data;
	typedef struct pglist_data {
	zone_t node_zones[MAX_NR_ZONES];
	zonelist_t node_zonelists[GFP_ZONEMASK+1];
	int nr_zones;
	struct page *node_mem_map;
	unsigned long *valid_addr_bitmap;
	struct bootmem_data *bdata;
	unsigned long node_start_paddr;
	unsigned long node_start_mapnr;
	unsigned long node_size;
	int node_id;
	struct pglist_data *node_next;
	} pg_data_t;

	extern int numnodes;
	extern pg_data_t *pgdat_list;

	#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
	#define memclass(pgzone, classzone) (zone_idx(pgzone) <= zone_idx(classzone))

	/*
	* The following two are not meant for general usage. They are here as
	* prototypes for the discontig memory code.
	*/
	struct page;
	extern void show_free_areas_core(pg_data_t *pgdat);
	extern void free_area_init_core(int nid, pg_data_t pgdat, struct page *gmap,
	unsigned long zones_size, unsigned long paddr, unsigned long zholes_size,
	struct page *pmap);

	extern pg_data_t contig_page_data;

	/**
	* for_each_pgdat - helper macro to iterate over all nodes
	* @pgdat - pg_data_t * variable
	*
	* Meant to help with common loops of the form
	* pgdat = pgdat_list;
	* while(pgdat) {
	* ...
	* pgdat = pgdat->node_next;
	* }
	*/
	#define for_each_pgdat(pgdat) \
	for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)


	/*
	* next_zone - helper magic for for_each_zone()
	* Thanks to William Lee Irwin III for this piece of ingenuity.
	*/
	static inline zone_t next_zone(zone_t zone)
	{
	pg_data_t *pgdat = zone->zone_pgdat;

	if (zone - pgdat->node_zones < MAX_NR_ZONES - 1)
	zone++;

	else if (pgdat->node_next) {
	pgdat = pgdat->node_next;
	zone = pgdat->node_zones;
	} else
	zone = NULL;

	return zone;
	}

	/**
	* for_each_zone - helper macro to iterate over all memory zones
	* @zone - zone_t * variable
	*
	* The user only needs to declare the zone variable, for_each_zone
	* fills it in. This basically means for_each_zone() is an
	* easier to read version of this piece of code:
	*
	* for(pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)
	* for(i = 0; i < MAX_NR_ZONES; ++i) {
	* zone_t * z = pgdat->node_zones + i;
	* ...
	* }
	* }
	*/
	#define for_each_zone(zone) \
	for(zone = pgdat_list->node_zones; zone; zone = next_zone(zone))


	#ifndef CONFIG_DISCONTIGMEM

	#define NODE_DATA(nid) (&contig_page_data)
	#define NODE_MEM_MAP(nid) mem_map
	#define MAX_NR_NODES 1

	#else /* !CONFIG_DISCONTIGMEM */

	#include <asm/mmzone.h>

	/* page->zone is currently 8 bits ... */
	#ifndef MAX_NR_NODES
	#define MAX_NR_NODES (255 / MAX_NR_ZONES)
	#endif

	#endif /* !CONFIG_DISCONTIGMEM */

	#define MAP_ALIGN(x) ((((x) % sizeof(mem_map_t)) == 0) ? (x) : ((x) + \
	sizeof(mem_map_t) - ((x) % sizeof(mem_map_t))))

	#endif /* !__ASSEMBLY__ */
	#endif /* __KERNEL__ */
	#endif /* _LINUX_MMZONE_H */