blob: a3d81270bfa5db22e3b6d3e539c45a402e9fe1f5 [file] [log] [blame]
/*
* malloc.c --- a general purpose kernel memory allocator for Linux.
*
* Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
*
* This routine is written to be as fast as possible, so that it
* can be called from the interrupt level.
*
* Limitations: maximum size of memory we can allocate using this routine
* is 4k, the size of a page in Linux.
*
* The general game plan is that each page (called a bucket) will only hold
* objects of a given size. When all of the object on a page are released,
* the page can be returned to the general free pool. When kmalloc() is
* called, it looks for the smallest bucket size which will fulfill its
* request, and allocate a piece of memory from that bucket pool.
*
* Each bucket has as its control block a bucket descriptor which keeps
* track of how many objects are in use on that page, and the free list
* for that page. Like the buckets themselves, bucket descriptors are
* stored on pages requested from get_free_page(). However, unlike buckets,
* pages devoted to bucket descriptor pages are never released back to the
* system. Fortunately, a system should probably only need 1 or 2 bucket
* descriptor pages, since a page can hold 256 bucket descriptors (which
* corresponds to 1 megabyte worth of bucket pages.) If the kernel is using
* that much allocated memory, it's probably doing something wrong. :-)
*
* Note: kmalloc() and kfree() both call get_free_page() and free_page()
* in sections of code where interrupts are turned off, to allow
* kmalloc() and kfree() to be safely called from an interrupt routine.
* (We will probably need this functionality when networking code,
* particularily things like NFS, is added to Linux.) However, this
* presumes that get_free_page() and free_page() are interrupt-level
* safe, which they may not be once paging is added. If this is the
* case, we will need to modify kmalloc() to keep a few unused pages
* "pre-allocated" so that it can safely draw upon those pages if
* it is called from an interrupt routine.
*
* Another concern is that get_free_page() should not sleep; if it
* does, the code is carefully ordered so as to avoid any race
* conditions. The catch is that if kmalloc() is called re-entrantly,
* there is a chance that unecessary pages will be grabbed from the
* system. Except for the pages for the bucket descriptor page, the
* extra pages will eventually get released back to the system, though,
* so it isn't all that bad.
*/
/* I'm going to modify it to keep some free pages around. Get free page
can sleep, and tcp/ip needs to call kmalloc at interrupt time (Or keep
big buffers around for itself.) I guess I'll have return from
syscall fill up the free page descriptors. -RAB */
/* since the advent of GFP_ATOMIC, I've changed the kmalloc code to
use it and return NULL if it can't get a page. -RAB */
/* (mostly just undid the previous changes -RAB) */
/* I've added the priority argument to kmalloc so routines can
sleep on memory if they want. - RAB */
/* I've also got to make sure that kmalloc is reentrant now. */
/* Debugging support: add file/line info, add beginning+end markers. -M.U- */
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <asm/system.h>
struct bucket_desc { /* 16 bytes */
void *page;
struct bucket_desc *next;
void *freeptr;
unsigned short refcnt;
unsigned short bucket_size;
};
struct _bucket_dir { /* 8 bytes */
unsigned int size;
struct bucket_desc *chain;
};
#ifdef CONFIG_DEBUG_MALLOC
struct hdr_start {
const char *file;
const char *ok_file;
unsigned short line;
unsigned short ok_line;
unsigned short size;
int magic;
};
struct hdr_end {
int magic;
};
#define DEB_MAGIC_FREE 0x13579BDF /* free block */
#define DEB_MAGIC_ALLOC 0x2468ACE0 /* allocated block */
#define DEB_MAGIC_USED 0x147AD036 /* allocated but bad */
#define DEB_MAGIC_FREED 0x258BE169 /* free but abused */
#define DEB_MAGIC_END 0x369CF258 /* end marker */
#endif
/*
* The following is the where we store a pointer to the first bucket
* descriptor for a given size.
*
* If it turns out that the Linux kernel allocates a lot of objects of a
* specific size, then we may want to add that specific size to this list,
* since that will allow the memory to be allocated more efficiently.
* However, since an entire page must be dedicated to each specific size
* on this list, some amount of temperance must be exercised here.
*
* Note that this list *must* be kept in order.
*/
struct _bucket_dir bucket_dir[] = {
#ifndef CONFIG_DEBUG_MALLOC /* Debug headers have too much overhead */
{ 16, (struct bucket_desc *) 0},
#endif
{ 32, (struct bucket_desc *) 0},
{ 64, (struct bucket_desc *) 0},
{ 128, (struct bucket_desc *) 0},
{ 256, (struct bucket_desc *) 0},
{ 512, (struct bucket_desc *) 0},
{ 1024, (struct bucket_desc *) 0},
{ 2048, (struct bucket_desc *) 0},
{ 4096, (struct bucket_desc *) 0},
{ 0, (struct bucket_desc *) 0}}; /* End of list marker */
/*
* This contains a linked list of free bucket descriptor blocks
*/
static struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;
/*
* This routine initializes a bucket description page.
*/
/* It assumes it is called with interrupts on. and will
return that way. It also can sleep if priority != GFP_ATOMIC. */
static inline void init_bucket_desc(unsigned long page)
{
struct bucket_desc *bdesc;
int i;
bdesc = (struct bucket_desc *) page;
for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
bdesc->next = bdesc+1;
bdesc++;
}
/*
* This is done last, to avoid race conditions in case
* get_free_page() sleeps and this routine gets called again....
*/
cli();
bdesc->next = free_bucket_desc;
free_bucket_desc = (struct bucket_desc *) page;
}
/*
* Re-organized some code to give cleaner assembly output for easier
* verification.. LBT
*/
#ifdef CONFIG_DEBUG_MALLOC
void *
deb_kmalloc(const char *deb_file, unsigned short deb_line,
unsigned int len, int priority)
#else
void *
kmalloc(unsigned int len, int priority)
#endif
{
int i;
unsigned long flags;
unsigned long page;
struct _bucket_dir *bdir;
struct bucket_desc *bdesc;
void *retval;
#ifdef CONFIG_DEBUG_MALLOC
len += sizeof(struct hdr_start)+sizeof(struct hdr_end);
#endif
/*
* First we search the bucket_dir to find the right bucket change
* for this request.
*/
/* The sizes are static so there is no reentry problem here. */
bdir = bucket_dir;
for (bdir = bucket_dir ; bdir->size < len ; bdir++) {
if (!bdir->size)
goto too_large;
}
/*
* Now we search for a bucket descriptor which has free space
*/
save_flags(flags);
cli(); /* Avoid race conditions */
for (bdesc = bdir->chain; bdesc != NULL; bdesc = bdesc->next)
if (bdesc->freeptr)
goto found_bdesc;
/*
* If we didn't find a bucket with free space, then we'll
* allocate a new one.
*/
/*
* Note that init_bucket_descriptor() does its
* own cli() before returning, and guarantees that
* there is a bucket desc in the page.
*/
if (!free_bucket_desc) {
restore_flags(flags);
page = get_free_page(priority);
if (!page)
return NULL;
init_bucket_desc(page);
}
bdesc = free_bucket_desc;
free_bucket_desc = bdesc->next;
restore_flags(flags);
page = get_free_page(priority);
/*
* Out of memory? Put the bucket descriptor back on the free list
*/
if (!page) {
cli();
bdesc->next = free_bucket_desc;
free_bucket_desc = bdesc;
restore_flags(flags);
return NULL;
}
bdesc->refcnt = 0;
bdesc->bucket_size = bdir->size;
bdesc->page = bdesc->freeptr = (void *) page;
/* Set up the chain of free objects */
for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) {
#ifdef CONFIG_DEBUG_MALLOC
struct hdr_start *hd;
struct hdr_end *he;
hd = (struct hdr_start *) page;
he = (struct hdr_end *)(page+(bdir->size-sizeof(struct hdr_end)));
hd->magic = DEB_MAGIC_FREE;
hd->file = hd->ok_file = "(expand)";
hd->line = hd->ok_line = 0;
hd->size = bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end);
he->magic = DEB_MAGIC_END;
memset(hd+1,0xF8,hd->size);
*((void **) (hd+1)) = (i==1) ? NULL : (void *)(page + bdir->size);
#else
*((void **) page) = (i==1) ? NULL : (void *)(page + bdir->size);
#endif
page += bdir->size;
}
/* turn interrupts back off for putting the
thing onto the chain. */
cli();
/* remember bdir is not changed. */
bdesc->next = bdir->chain; /* OK, link it in! */
bdir->chain = bdesc;
found_bdesc:
retval = (void *) bdesc->freeptr;
#ifdef CONFIG_DEBUG_MALLOC
bdesc->freeptr = *((void **) (((char *)retval)+sizeof(struct hdr_start)));
#else
bdesc->freeptr = *((void **) retval);
#endif
bdesc->refcnt++;
restore_flags(flags); /* OK, we're safe again */
#ifdef CONFIG_DEBUG_MALLOC
{
struct hdr_start *hd;
struct hdr_end *he;
hd = (struct hdr_start *) retval;
retval = hd+1;
len -= sizeof(struct hdr_start)+sizeof(struct hdr_end);
if(hd->magic != DEB_MAGIC_FREE && hd->magic != DEB_MAGIC_FREED) {
printk("DEB_MALLOC allocating %s block 0x%x (head 0x%x) from %s:%d, magic %x\n",
(hd->magic == DEB_MAGIC_ALLOC) ? "nonfree" : "trashed",
retval,hd,deb_file,deb_line,hd->magic);
return NULL;
}
if(len > hd->size || len > bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end)) {
printk("DEB_MALLOC got %x:%x-byte block, wanted %x, from %s:%d, last %s:%d\n",
hd->size,bdir->size,len,hd->file,hd->line,deb_file,deb_line);
return NULL;
}
{
unsigned char *x = (unsigned char *) retval;
unsigned short pos = 4;
x += pos;
while(pos < hd->size) {
if(*x++ != 0xF8) {
printk("DEB_MALLOC used 0x%x:%x(%x) while free, from %s:%d\n",
retval,pos,hd->size,hd->file,hd->line);
return NULL;
}
pos++;
}
}
he = (struct hdr_end *)(((char *)retval)+hd->size);
if(he->magic != DEB_MAGIC_END) {
printk("DEB_MALLOC overran 0x%x:%d while free, from %s:%d\n",retval,hd->size,hd->file,hd->line);
}
memset(retval, 0xf0, len);
he = (struct hdr_end *)(((char *)retval)+len);
hd->file = hd->ok_file = deb_file;
hd->line = hd->ok_line = deb_line;
hd->size = len;
hd->magic = DEB_MAGIC_ALLOC;
he->magic = DEB_MAGIC_END;
}
#endif
return retval;
too_large:
/* This should be changed for sizes > 1 page. */
printk("kmalloc called with impossibly large argument (%d)\n", len);
return NULL;
}
#ifdef CONFIG_DEBUG_MALLOC
void deb_kcheck_s(const char *deb_file, unsigned short deb_line,
void *obj, int size)
{
struct hdr_start *hd;
struct hdr_end *he;
if (!obj)
return;
hd = (struct hdr_start *) obj;
hd--;
if(hd->magic != DEB_MAGIC_ALLOC) {
if(hd->magic != DEB_MAGIC_USED && hd->magic != DEB_MAGIC_FREED)
printk("DEB_MALLOC Using %s block of 0x%x from %s:%d, by %s:%d, last %s:%d\n",
(hd->magic == DEB_MAGIC_FREE)?"free":"bad",obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
if(hd->magic == DEB_MAGIC_FREE)
hd->magic = DEB_MAGIC_FREED;
return;
}
if(hd->size != size) {
if(size != 0) {
printk("DEB_MALLOC size for 0x%x given as %d, stored %d, from %s:%d, last %s:%d\n",
obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
}
size = hd->size;
}
he = (struct hdr_end *)(((char *)obj)+size);
if(he->magic != DEB_MAGIC_END) {
printk("DEB_MALLOC overran block 0x%x:%d, free at %s:%d\n",obj,hd->size,deb_file,deb_line);
hd->magic = DEB_MAGIC_USED;
return;
}
hd->ok_file = deb_file;
hd->ok_line = deb_line;
}
#endif
/*
* Here is the kfree routine. If you know the size of the object that you
* are freeing, then kfree_s() will use that information to speed up the
* search for the bucket descriptor.
*
* We will #define a macro so that "kfree(x)" is becomes "kfree_s(x, 0)"
*/
#ifdef CONFIG_DEBUG_MALLOC
void deb_kfree_s(const char *deb_file, unsigned short deb_line,
void *obj, int size)
#else
void kfree_s(void *obj, int size)
#endif
{
unsigned long flags;
void *page;
struct _bucket_dir *bdir;
struct bucket_desc *bdesc, *prev;
if (!obj)
return;
#ifdef CONFIG_DEBUG_MALLOC
{
struct hdr_start *hd;
struct hdr_end *he;
hd = (struct hdr_start *) obj;
hd--;
if(hd->magic != DEB_MAGIC_ALLOC && hd->magic != DEB_MAGIC_USED) {
if(hd->magic != DEB_MAGIC_FREED)
printk("DEB_MALLOC %s free of 0x%x from %s:%d by %s:%d, last %s:%d\n",
(hd->magic == DEB_MAGIC_FREE)?"dup":"bad",
obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
return;
}
if(hd->size != size) {
if(size != 0) {
if(hd->magic != DEB_MAGIC_USED)
printk("DEB_MALLOC size for 0x%x given as %d, stored %d, from %s:%d, last %s:%d\n",
obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
}
size = hd->size;
}
he = (struct hdr_end *)(((char *)obj)+size);
if(he->magic != DEB_MAGIC_END) {
if(hd->magic != DEB_MAGIC_USED)
printk("DEB_MALLOC overran block 0x%x:%d, free at %s:%d, last %s:%d\n",
obj,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
return;
}
size += sizeof(struct hdr_start)+sizeof(struct hdr_end);
}
#endif
save_flags(flags);
/* Calculate what page this object lives in */
page = (void *) ((unsigned long) obj & 0xfffff000);
/* Now search the buckets looking for that page */
for (bdir = bucket_dir; bdir->size; bdir++) {
prev = 0;
/* If size is zero then this conditional is always true */
if (bdir->size >= size) {
/* We have to turn off interrupts here because
we are descending the chain. If something
changes it in the middle we could suddenly
find ourselves descending the free list.
I think this would only cause a memory
leak, but better safe than sorry. */
cli(); /* To avoid race conditions */
for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
if (bdesc->page == page)
goto found;
prev = bdesc;
}
}
}
restore_flags(flags);
printk("Bad address passed to kernel kfree_s(%X, %d)\n",obj, size);
#ifdef CONFIG_DEBUG_MALLOC
printk("Offending code: %s:%d\n",deb_file,deb_line);
#else
printk("Offending eip: %08x\n",((unsigned long *) &obj)[-1]);
#endif
return;
found:
/* interrupts are off here. */
#ifdef CONFIG_DEBUG_MALLOC
{
struct hdr_start *hd;
struct hdr_end *he;
hd = (struct hdr_start *) obj;
hd--;
hd->file = deb_file;
hd->line = deb_line;
hd->magic = DEB_MAGIC_FREE;
hd->size = bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end);
he = (struct hdr_end *)(((char *)obj)+hd->size);
memset(obj, 0xf8, hd->size);
he->magic = DEB_MAGIC_END;
*((void **)obj) = bdesc->freeptr;
obj = hd;
}
#else
*((void **)obj) = bdesc->freeptr;
#endif
bdesc->freeptr = obj;
bdesc->refcnt--;
if (bdesc->refcnt == 0) {
/*
* We need to make sure that prev is still accurate. It
* may not be, if someone rudely interrupted us....
*/
if ((prev && (prev->next != bdesc)) ||
(!prev && (bdir->chain != bdesc)))
for (prev = bdir->chain; prev; prev = prev->next)
if (prev->next == bdesc)
break;
if (prev)
prev->next = bdesc->next;
else {
if (bdir->chain != bdesc)
panic("kmalloc bucket chains corrupted");
bdir->chain = bdesc->next;
}
bdesc->next = free_bucket_desc;
free_bucket_desc = bdesc;
free_page((unsigned long) bdesc->page);
}
restore_flags(flags);
return;
}
#ifdef CONFIG_DEBUG_MALLOC
int get_malloc(char *buffer)
{
int len = 0;
int i;
unsigned long flags;
void *page;
struct _bucket_dir *bdir;
struct bucket_desc *bdesc;
save_flags(flags);
cli(); /* To avoid race conditions */
for (bdir = bucket_dir; bdir->size; bdir++) {
for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
page = bdesc->page;
for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) {
struct hdr_start *hd;
hd = (struct hdr_start *)page;
if(hd->magic == DEB_MAGIC_ALLOC) {
if(len > PAGE_SIZE-80) {
restore_flags(flags);
len += sprintf(buffer+len,"+++\n");
return len;
}
len += sprintf(buffer+len,"%08x:%03x %s:%d %s:%d\n",
(long)(page+sizeof(struct hdr_start)),hd->size,hd->file,hd->line,hd->ok_file,hd->ok_line);
}
page += bdir->size;
}
}
}
restore_flags(flags);
return len;
}
#endif