| /* |
| * mm/simp.c -- simple allocator for cached objects |
| * |
| * (C) 1997 Thomas Schoebel-Theuer |
| */ |
| |
| #include <linux/simp.h> |
| #include <linux/tasks.h> |
| #include <linux/smp.h> |
| #include <linux/interrupt.h> |
| #include <asm/spinlock.h> |
| #include <asm/hardirq.h> |
| |
| /*#define BENCHMARK*/ |
| |
| #ifdef BENCHMARK |
| #include <linux/slab.h> |
| |
| struct cons { |
| struct cons *car, *cdr; |
| }; |
| |
| static struct cons * table[10000]; |
| |
| DEF_SIMP_HEADER(cons,struct cons,sizeof(struct cons),SIMP_SMALLOBJS,0,(structor)0,(structor)0,(structor)0,0) |
| |
| static struct cons_simp * cons_simp; |
| static kmem_cache_t * slab; |
| |
| static void simp_straight(struct cons_simp * cons_simp) |
| { |
| int j; |
| for(j = 0; j < 10000; j++) { |
| table[j] = __cons_alloc(cons_simp); |
| } |
| for(j = 0; j < 10000; j++) { |
| __cons_dealloc(cons_simp, table[j]); |
| } |
| } |
| |
| static void slab_straight(kmem_cache_t * slab) |
| { |
| int j; |
| for(j = 0; j < 10000; j++) { |
| table[j] = kmem_cache_alloc(slab, SLAB_KERNEL); |
| } |
| for(j = 0; j < 10000; j++) { |
| kmem_cache_free(slab, table[j]); |
| } |
| } |
| |
| static void simp_osc(struct cons_simp * cons_simp) |
| { |
| int j; |
| for(j = 0; j < 10000; j++) { |
| struct cons * tmp = __cons_alloc(cons_simp); |
| table[j] = __cons_alloc(cons_simp); |
| __cons_dealloc(cons_simp, tmp); |
| } |
| for(j = 0; j < 10000; j++) { |
| __cons_dealloc(cons_simp, table[j]); |
| } |
| } |
| |
| static void slab_osc(kmem_cache_t * slab) |
| { |
| int j; |
| for(j = 0; j < 10000; j++) { |
| struct cons * tmp = kmem_cache_alloc(slab, SLAB_KERNEL); |
| table[j] = kmem_cache_alloc(slab, SLAB_KERNEL); |
| kmem_cache_free(slab, tmp); |
| } |
| for(j = 0; j < 10000; j++) { |
| kmem_cache_free(slab, table[j]); |
| } |
| } |
| |
| static void simp_heavy(struct cons_simp * cons_simp) |
| { |
| int j; |
| struct cons * buf[10]; |
| for(j = 0; j < 10000; j++) { |
| int k; |
| for(k = 0; k < 10; k++) |
| buf[k] = __cons_alloc(cons_simp); |
| table[j] = __cons_alloc(cons_simp); |
| for(k = 0; k < 10; k++) |
| __cons_dealloc(cons_simp, buf[k]); |
| } |
| for(j = 0; j < 10000; j++) { |
| __cons_dealloc(cons_simp, table[j]); |
| } |
| } |
| |
| static void slab_heavy(kmem_cache_t * slab) |
| { |
| int j; |
| struct cons * buf[10]; |
| for(j = 0; j < 10000; j++) { |
| int k; |
| for(k = 0; k < 10; k++) |
| buf[k] = kmem_cache_alloc(slab, SLAB_KERNEL); |
| table[j] = kmem_cache_alloc(slab, SLAB_KERNEL); |
| for(k = 0; k < 10; k++) |
| kmem_cache_free(slab, buf[k]); |
| } |
| for(j = 0; j < 10000; j++) { |
| kmem_cache_free(slab, table[j]); |
| } |
| } |
| |
| static void simp_plain(struct cons_simp * cons_simp) |
| { |
| int j; |
| for(j = 0; j < 10000; j++) { |
| __cons_dealloc(cons_simp, __cons_alloc(cons_simp)); |
| } |
| } |
| |
| static void slab_plain(kmem_cache_t * slab) |
| { |
| int j; |
| for(j = 0; j < 10000; j++) { |
| kmem_cache_free(slab, kmem_cache_alloc(slab, SLAB_KERNEL)); |
| } |
| } |
| |
| void benchmark(void) |
| { |
| int i; |
| long time; |
| cons_simp = cons_create("cons"); |
| slab = kmem_cache_create("cons", sizeof(struct cons), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); |
| |
| time = jiffies; |
| for(i = 0; i < 1000; i++) { |
| simp_straight(cons_simp); |
| } |
| printk("SIMP benchmark (straight): %5ld jiffies\n", jiffies - time); |
| |
| time = jiffies; |
| for(i = 0; i < 1000; i++) { |
| slab_straight(slab); |
| } |
| printk("SLAB benchmark (straight): %5ld jiffies\n", jiffies - time); |
| |
| time = jiffies; |
| for(i = 0; i < 1000; i++) { |
| simp_osc(cons_simp); |
| } |
| printk("SIMP benchmark (oscillating): %5ld jiffies\n", jiffies - time); |
| |
| time = jiffies; |
| for(i = 0; i < 1000; i++) { |
| slab_osc(slab); |
| } |
| printk("SLAB benchmark (oscillating): %5ld jiffies\n", jiffies - time); |
| |
| time = jiffies; |
| for(i = 0; i < 300; i++) { |
| simp_heavy(cons_simp); |
| } |
| printk("SIMP benchmark (heavy): %5ld jiffies\n", jiffies - time); |
| |
| time = jiffies; |
| for(i = 0; i < 300; i++) { |
| slab_heavy(slab); |
| } |
| printk("SLAB benchmark (heavy): %5ld jiffies\n", jiffies - time); |
| |
| time = jiffies; |
| for(i = 0; i < 6000; i++) { |
| simp_plain(cons_simp); |
| } |
| printk("SIMP benchmark (plain): %5ld jiffies\n", jiffies - time); |
| |
| time = jiffies; |
| for(i = 0; i < 6000; i++) { |
| slab_plain(slab); |
| } |
| printk("SLAB benchmark (plain): %5ld jiffies\n", jiffies - time); |
| if(cons_try_delete(cons_simp)) |
| printk("deletion of cons simp failed!\n"); |
| } |
| |
| #endif |
| |
| #define GLOBAL_SIZE (PAGE_SIZE << (GLOBAL_ORDER)) |
| #define CHUNK_END(hdr,size) (void**)((char*)(hdr) + size) |
| |
| #define COLOR_INCREMENT (8*sizeof(void*)) /* should be 1 cache line */ |
| #define ALIGN_CACHE(adr) ((((((unsigned long)adr) - 1) / COLOR_INCREMENT) + 1) * COLOR_INCREMENT) |
| #define HEADER_SIZE ALIGN_CACHE(sizeof(struct simp_header)) |
| #define MAX_SIMPS ((GLOBAL_SIZE / sizeof(struct simp)) - 1) |
| |
| struct global_data { |
| /* 1st cache line */ |
| long nr_simps; |
| spinlock_t lock; |
| char fill[15*sizeof(void*) - sizeof(spinlock_t)]; |
| /* rest */ |
| struct simp simps[MAX_SIMPS]; |
| }; |
| |
| static struct global_data * global = NULL; |
| |
| #ifdef SIMP_DEBUG |
| static char global_magic[32] = "SIMP header SdC581oi9rY20051962\n"; |
| #endif |
| |
| #ifdef DEAD_BEEF |
| void make_dead_beef(struct simp_header * hdr, void * objp) |
| { |
| unsigned int * ptr = (unsigned int*)objp; |
| int count = hdr->father->real_size / sizeof(unsigned int); |
| while(count--) |
| *ptr++ = 0xdeadbeef; |
| } |
| |
| void test_dead_beef(struct simp * simp, void * objp) |
| { |
| unsigned int * ptr = (unsigned int*)objp; |
| int size = simp->real_size / sizeof(unsigned int); |
| while(size--) { |
| if(*ptr++ != 0xdeadbeef) { |
| printk("SIMP: deadbeef of object %p is touched\n", |
| objp); |
| printk("SIMP: pool is %s\n", simp->name); |
| break; |
| } |
| } |
| } |
| #endif |
| |
| void * always_get_pages(int gfp_mask, unsigned long gfporder) |
| { |
| void * res; |
| for(;;) { |
| res = (void*)__get_free_pages(gfp_mask, gfporder); |
| if(res) |
| break; |
| if(simp_garbage()) |
| continue; |
| if(in_interrupt()) { |
| int i; |
| int flags; |
| /* Kludge, to be reworked */ |
| __save_flags(flags); |
| __sti(); |
| for(i = 0; i < 100000; i++) { |
| __sti(); |
| } |
| __restore_flags(flags); |
| } else { |
| schedule(); |
| } |
| } |
| return res; |
| } |
| |
| |
| struct simp * simp_create(char * name, long size, long order, |
| structor first_ctor, |
| structor dtor, int reserve) |
| { |
| struct simp * simp = NULL; |
| long fraction; |
| long real_size; |
| int cpu; |
| int i; |
| |
| if(!global) { |
| global = (struct global_data*)always_get_pages(GFP_KERNEL,GLOBAL_ORDER); |
| memset(global, 0, GLOBAL_SIZE); |
| spin_lock_init(&global->lock); |
| #ifdef BENCHMARK |
| benchmark(); |
| #endif |
| } |
| |
| spin_lock(&global->lock); |
| for(i = 0; i < global->nr_simps; i++) { |
| simp = &global->simps[i]; |
| if(!simp->name) |
| break; |
| } |
| if(i >= global->nr_simps) { |
| global->nr_simps = i+1; |
| simp = &global->simps[i]; |
| } |
| spin_unlock(&global->lock); |
| |
| if(global->nr_simps >= MAX_SIMPS) { |
| printk("SIMP: too many simps allocated\n"); |
| return NULL; |
| } |
| spin_lock_init(&simp->lock); |
| simp->name = name; |
| simp->size = size; |
| real_size = ALIGN_CACHE(size); |
| /* allow aggregation of very small objects in 2-power fractions of |
| * cachelines */ |
| fraction = COLOR_INCREMENT / 2; |
| while(size <= fraction && fraction >= sizeof(void*)) { |
| real_size = fraction; |
| fraction >>= 1; |
| } |
| simp->real_size = real_size; |
| simp->order = order; |
| simp->chunksize = CHUNK_SIZE(order); |
| simp->hdr_mask = ~(CHUNK_SIZE(order)-1); |
| simp->first_ctor = first_ctor; |
| simp->dtor = dtor; |
| simp->reserve = reserve; |
| |
| real_size += sizeof(void*); |
| simp->max_elems = (CHUNK_SIZE(order) - HEADER_SIZE) / real_size; |
| simp->max_color = (CHUNK_SIZE(order) - HEADER_SIZE) % real_size; |
| for(cpu = 0; cpu < NR_PROCESSORS; cpu++) { |
| struct per_cpu * private = &simp->private[cpu]; |
| private->buffer_pos = private->postbuffer; |
| } |
| return simp; |
| } |
| |
| |
| |
| |
| /* Do *not* inline this, it clobbers too many registers... */ |
| static void alloc_simp_header(struct simp * simp) |
| { |
| struct simp_header * hdr; |
| char * ptr; |
| void ** index; |
| long count; |
| |
| spin_unlock(&simp->lock); |
| for(;;) { |
| hdr = (struct simp_header*)always_get_pages(GFP_KERNEL, simp->order); |
| if(hdr) |
| break; |
| if(!simp_garbage()) |
| return; |
| } |
| #ifdef SIMP_DEBUG |
| if(__CHUNK_BASE(hdr,simp) != hdr) |
| panic("simp: bad kernel page alignment"); |
| #endif |
| |
| memset(hdr, 0, HEADER_SIZE); |
| #ifdef SIMP_DEBUG |
| memcpy(hdr->magic, global_magic, sizeof(global_magic)); |
| #endif |
| hdr->father = simp; |
| hdr->first_ctor = simp->first_ctor; |
| |
| /* note: races on simp->color don't produce any error :-) */ |
| ptr = ((char*)hdr) + HEADER_SIZE + simp->color; |
| index = CHUNK_END(hdr,simp->chunksize); |
| for(count = 0; count < simp->max_elems; count++) { |
| *--index = ptr; |
| ptr += simp->real_size; |
| } |
| hdr->index = hdr->fresh = hdr->emptypos = index; |
| |
| spin_lock(&simp->lock); |
| simp->color += COLOR_INCREMENT; |
| if(simp->color >= simp->max_color) |
| simp->color = 0; |
| hdr->next = simp->usable_list; |
| simp->usable_list = hdr; |
| simp->nr_header++; |
| } |
| |
| /* current x86 memcpy() is horribly moving around registers for nothing, |
| * is doing unnecessary work if the size is dividable by a power-of-two, |
| * and it clobbers way too many registers. |
| * This results in nearly any other register being transfered to stack. |
| * Fixing this would be a major win for the whole kernel! |
| */ |
| void ** bunch_alloc(struct simp * simp, void ** buffer) |
| { |
| struct simp_header * hdr; |
| void ** index; |
| void ** to; |
| void ** end; |
| structor todo; |
| long length; |
| |
| spin_lock(&simp->lock); |
| hdr = simp->usable_list; |
| if(!hdr) { |
| alloc_simp_header(simp); |
| hdr = simp->usable_list; |
| } |
| |
| index = hdr->index; |
| end = hdr->fresh; |
| todo = NULL; |
| if(index == end) { |
| end = CHUNK_END(hdr,simp->chunksize); |
| todo = hdr->first_ctor; |
| } |
| to = index + POSTBUFFER_SIZE/2; |
| if(to >= end) { |
| to = end; |
| if(to == CHUNK_END(hdr,simp->chunksize)) { |
| simp->usable_list = hdr->next; |
| hdr->next = (struct simp_header*)-1; |
| } |
| } |
| if(to > hdr->fresh) |
| hdr->fresh = to; |
| hdr->index = to; |
| length = ((unsigned long)to) - (unsigned long)index; |
| |
| memcpy(buffer, index, length); |
| |
| spin_unlock(&simp->lock); |
| |
| to = buffer + (length/sizeof(void*)); |
| #ifdef DEAD_BEEF |
| do { |
| make_dead_beef(hdr, *buffer); |
| } while(++buffer < to); |
| #else |
| if(todo) { |
| do { |
| todo(*buffer); |
| } while(++buffer < to); |
| } |
| #endif |
| return to; |
| } |
| |
| #ifdef SIMP_DEBUG |
| long check_header(struct simp_header * hdr, void * ptr) |
| { |
| void ** test; |
| |
| if(!hdr) { |
| printk("SIMP: simp_free() with NULL pointer\n"); |
| return 1; |
| } |
| if(strncmp(hdr->magic, global_magic, 32)) { |
| printk("SIMP: simpe_free() with bad ptr %p, or header corruption\n", ptr); |
| return 1; |
| } |
| /* This is brute force, but I don't want to pay for any |
| * overhead if debugging is not enabled, in particular |
| * no space overhead for keeping hashtables etc. */ |
| test = hdr->index; |
| while(test < CHUNK_END(hdr,hdr->father->chunksize)) { |
| if(*test++ == ptr) { |
| printk("SIMP: trying to simp_free(%p) again\n", ptr); |
| return 1; |
| } |
| } |
| return 0; |
| } |
| #endif |
| |
| static inline |
| void ** __bunch_free(struct simp * simp, void ** buffer, void ** stop, int one) |
| { |
| while(buffer > stop) { |
| void * elem = buffer[-1]; |
| struct simp_header * hdr = __CHUNK_BASE(elem,simp); |
| void ** index = hdr->index; |
| index--; |
| hdr->index = index; |
| *index = elem; |
| if(hdr->next == (struct simp_header*)-1) { |
| hdr->next = simp->usable_list; |
| simp->usable_list = hdr; |
| } |
| if(one) { |
| buffer--; |
| continue; |
| } |
| |
| buffer -= 2; |
| elem = *buffer; |
| hdr = __CHUNK_BASE(elem,simp); |
| index = hdr->index; |
| index--; |
| hdr->index = index; |
| *index = elem; |
| if(hdr->next == (struct simp_header*)-1) { |
| hdr->next = simp->usable_list; |
| simp->usable_list = hdr; |
| } |
| } |
| return buffer; |
| } |
| |
| void ** bunch_free(struct simp * simp, void ** buffer) |
| { |
| void ** stop = buffer - POSTBUFFER_SIZE/3; |
| void ** res; |
| |
| spin_lock(&simp->lock); |
| res = __bunch_free(simp, buffer, stop, 0); |
| spin_unlock(&simp->lock); |
| return res; |
| } |
| |
| long __simp_garbage(struct simp * simp) |
| { |
| struct simp_header ** base = &simp->usable_list; |
| struct simp_header * del; |
| int res = 0; |
| int j; |
| |
| spin_lock(&simp->lock); |
| for(j = 0; j < NR_PROCESSORS; j++) { |
| struct per_cpu * private = &simp->private[j]; |
| private->buffer_pos = |
| __bunch_free(simp, |
| private->buffer_pos, |
| private->postbuffer, 1); |
| } |
| |
| del = *base; |
| while(del && simp->nr_header > simp->reserve) { |
| if(del->index == del->emptypos) { |
| if(simp->dtor) { |
| void ** ptr = del->index; |
| while(ptr < CHUNK_END(del,simp->chunksize)) { |
| simp->dtor(*ptr++); |
| } |
| } |
| *base = del->next; |
| #ifdef SIMP_DEBUG |
| memset(del, 0, CHUNK_SIZE(simp->order)); |
| #endif |
| free_pages((unsigned long)del, simp->order); |
| res++; |
| simp->nr_header--; |
| } else |
| base = &del->next; |
| del = *base; |
| } |
| spin_unlock(&simp->lock); |
| return res; |
| } |
| |
| long simp_garbage(void) |
| { |
| int i; |
| int res; |
| /* Note: costs do not matter here. Any heavy thrashing of |
| * simp chunks that could be caused by pools stealing each |
| * other's memory has to be considered a BUG :-) |
| * Simply avoid memory shortages by conservative allocating |
| * policies. |
| */ |
| res = 0; |
| if(global) { |
| for(i = 0; i < global->nr_simps; i++) { |
| struct simp * simp = &global->simps[i]; |
| if(simp->name) |
| res += __simp_garbage(simp); |
| } |
| } |
| return res; |
| } |
| |
| int __simp_statistics(struct simp * simp, long * nr_free, int * posted) |
| { |
| struct simp_header * hdr; |
| long free = 0; |
| int nr_usable = 0; |
| int post = 0; |
| int j; |
| |
| for(j = 0; j < NR_PROCESSORS; j++) { |
| struct per_cpu * private = &simp->private[j]; |
| long len = (unsigned long)private->buffer_pos - |
| (unsigned long)private->postbuffer; |
| post += len / sizeof(void*); |
| } |
| *posted = post; |
| |
| for(hdr = simp->usable_list; hdr; hdr = hdr->next) { |
| long len = (long)CHUNK_END(hdr,simp->chunksize) - |
| (long)hdr->index; |
| free += len / sizeof(void*); |
| nr_usable++; |
| } |
| *nr_free = free; |
| return nr_usable; |
| } |
| |
| long __try_delete(struct simp * simp) |
| { |
| long nr_free; |
| int posted; |
| simp->reserve = 0; |
| __simp_garbage(simp); |
| __simp_statistics(simp, &nr_free, &posted); |
| if(!simp->nr_header) { |
| memset(simp, 0, sizeof(struct simp)); |
| } |
| return simp->nr_header * simp->max_elems - nr_free - posted; |
| |
| } |
| |
| int get_simpinfo(char * buf) |
| { |
| int i; |
| char * pos; |
| #ifdef BENCHMARK |
| benchmark(); |
| #endif |
| pos = buf + sprintf(buf, " PoolName ElemSize RealSize ChnkSize El/Ch Alloced Pstd Hdrs Usabl FreeElms\n"); |
| if (!global) |
| goto out; |
| for(i = 0; i < global->nr_simps; i++) { |
| struct simp * simp = &global->simps[i]; |
| int posted; |
| int nr_usable; |
| long nr_free; |
| if(!simp->name) |
| continue; |
| nr_usable = __simp_statistics(simp, &nr_free, &posted); |
| pos += sprintf(pos, "%10s %8ld %8ld %8ld %5ld %8ld %4d %5ld %5d %8ld\n", |
| simp->name, simp->size, simp->real_size, |
| PAGE_SIZE << simp->order, simp->max_elems, |
| simp->nr_header * simp->max_elems - nr_free - posted, |
| posted, simp->nr_header, nr_usable, nr_free); |
| } |
| out: |
| *pos = '\0'; |
| return (long)pos - (long)buf; |
| } |
| |