| /* |
| * Copyright (c) 2006 Silicon Graphics, Inc. |
| * All Rights Reserved. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License as |
| * published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it would be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <pthread.h> |
| |
| #include "libxfs_priv.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_bit.h" |
| |
| #define CACHE_DEBUG 1 |
| #undef CACHE_DEBUG |
| #define CACHE_DEBUG 1 |
| #undef CACHE_ABORT |
| /* #define CACHE_ABORT 1 */ |
| |
| #define CACHE_SHAKE_COUNT 64 |
| |
| static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *); |
| |
| struct cache * |
| cache_init( |
| int flags, |
| unsigned int hashsize, |
| struct cache_operations *cache_operations) |
| { |
| struct cache * cache; |
| unsigned int i, maxcount; |
| |
| maxcount = hashsize * HASH_CACHE_RATIO; |
| |
| if (!(cache = malloc(sizeof(struct cache)))) |
| return NULL; |
| if (!(cache->c_hash = calloc(hashsize, sizeof(struct cache_hash)))) { |
| free(cache); |
| return NULL; |
| } |
| |
| cache->c_flags = flags; |
| cache->c_count = 0; |
| cache->c_max = 0; |
| cache->c_hits = 0; |
| cache->c_misses = 0; |
| cache->c_maxcount = maxcount; |
| cache->c_hashsize = hashsize; |
| cache->c_hashshift = libxfs_highbit32(hashsize); |
| cache->hash = cache_operations->hash; |
| cache->alloc = cache_operations->alloc; |
| cache->flush = cache_operations->flush; |
| cache->relse = cache_operations->relse; |
| cache->compare = cache_operations->compare; |
| cache->bulkrelse = cache_operations->bulkrelse ? |
| cache_operations->bulkrelse : cache_generic_bulkrelse; |
| pthread_mutex_init(&cache->c_mutex, NULL); |
| |
| for (i = 0; i < hashsize; i++) { |
| list_head_init(&cache->c_hash[i].ch_list); |
| cache->c_hash[i].ch_count = 0; |
| pthread_mutex_init(&cache->c_hash[i].ch_mutex, NULL); |
| } |
| |
| for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) { |
| list_head_init(&cache->c_mrus[i].cm_list); |
| cache->c_mrus[i].cm_count = 0; |
| pthread_mutex_init(&cache->c_mrus[i].cm_mutex, NULL); |
| } |
| return cache; |
| } |
| |
| void |
| cache_expand( |
| struct cache * cache) |
| { |
| pthread_mutex_lock(&cache->c_mutex); |
| #ifdef CACHE_DEBUG |
| fprintf(stderr, "doubling cache size to %d\n", 2 * cache->c_maxcount); |
| #endif |
| cache->c_maxcount *= 2; |
| pthread_mutex_unlock(&cache->c_mutex); |
| } |
| |
| void |
| cache_walk( |
| struct cache * cache, |
| cache_walk_t visit) |
| { |
| struct cache_hash * hash; |
| struct list_head * head; |
| struct list_head * pos; |
| unsigned int i; |
| |
| for (i = 0; i < cache->c_hashsize; i++) { |
| hash = &cache->c_hash[i]; |
| head = &hash->ch_list; |
| pthread_mutex_lock(&hash->ch_mutex); |
| for (pos = head->next; pos != head; pos = pos->next) |
| visit((struct cache_node *)pos); |
| pthread_mutex_unlock(&hash->ch_mutex); |
| } |
| } |
| |
| #ifdef CACHE_ABORT |
| #define cache_abort() abort() |
| #else |
| #define cache_abort() do { } while (0) |
| #endif |
| |
| #ifdef CACHE_DEBUG |
| static void |
| cache_zero_check( |
| struct cache_node * node) |
| { |
| if (node->cn_count > 0) { |
| fprintf(stderr, "%s: refcount is %u, not zero (node=%p)\n", |
| __FUNCTION__, node->cn_count, node); |
| cache_abort(); |
| } |
| } |
| #define cache_destroy_check(c) cache_walk((c), cache_zero_check) |
| #else |
| #define cache_destroy_check(c) do { } while (0) |
| #endif |
| |
| void |
| cache_destroy( |
| struct cache * cache) |
| { |
| unsigned int i; |
| |
| cache_destroy_check(cache); |
| for (i = 0; i < cache->c_hashsize; i++) { |
| list_head_destroy(&cache->c_hash[i].ch_list); |
| pthread_mutex_destroy(&cache->c_hash[i].ch_mutex); |
| } |
| for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) { |
| list_head_destroy(&cache->c_mrus[i].cm_list); |
| pthread_mutex_destroy(&cache->c_mrus[i].cm_mutex); |
| } |
| pthread_mutex_destroy(&cache->c_mutex); |
| free(cache->c_hash); |
| free(cache); |
| } |
| |
| static unsigned int |
| cache_generic_bulkrelse( |
| struct cache * cache, |
| struct list_head * list) |
| { |
| struct cache_node * node; |
| unsigned int count = 0; |
| |
| while (!list_empty(list)) { |
| node = list_entry(list->next, struct cache_node, cn_mru); |
| pthread_mutex_destroy(&node->cn_mutex); |
| list_del_init(&node->cn_mru); |
| cache->relse(node); |
| count++; |
| } |
| |
| return count; |
| } |
| |
| /* |
| * Park unflushable nodes on their own special MRU so that cache_shake() doesn't |
| * end up repeatedly scanning them in the futile attempt to clean them before |
| * reclaim. |
| */ |
| static void |
| cache_add_to_dirty_mru( |
| struct cache *cache, |
| struct cache_node *node) |
| { |
| struct cache_mru *mru = &cache->c_mrus[CACHE_DIRTY_PRIORITY]; |
| |
| pthread_mutex_lock(&mru->cm_mutex); |
| node->cn_old_priority = node->cn_priority; |
| node->cn_priority = CACHE_DIRTY_PRIORITY; |
| list_add(&node->cn_mru, &mru->cm_list); |
| mru->cm_count++; |
| pthread_mutex_unlock(&mru->cm_mutex); |
| } |
| |
| /* |
| * We've hit the limit on cache size, so we need to start reclaiming nodes we've |
| * used. The MRU specified by the priority is shaken. Returns new priority at |
| * end of the call (in case we call again). We are not allowed to reclaim dirty |
| * objects, so we have to flush them first. If flushing fails, we move them to |
| * the "dirty, unreclaimable" list. |
| * |
| * Hence we skip priorities > CACHE_MAX_PRIORITY unless "purge" is set as we |
| * park unflushable (and hence unreclaimable) buffers at these priorities. |
| * Trying to shake unreclaimable buffer lists when there is memory pressure is a |
| * waste of time and CPU and greatly slows down cache node recycling operations. |
| * Hence we only try to free them if we are being asked to purge the cache of |
| * all entries. |
| */ |
| static unsigned int |
| cache_shake( |
| struct cache * cache, |
| unsigned int priority, |
| bool purge) |
| { |
| struct cache_mru *mru; |
| struct cache_hash * hash; |
| struct list_head temp; |
| struct list_head * head; |
| struct list_head * pos; |
| struct list_head * n; |
| struct cache_node * node; |
| unsigned int count; |
| |
| ASSERT(priority <= CACHE_DIRTY_PRIORITY); |
| if (priority > CACHE_MAX_PRIORITY && !purge) |
| priority = 0; |
| |
| mru = &cache->c_mrus[priority]; |
| count = 0; |
| list_head_init(&temp); |
| head = &mru->cm_list; |
| |
| pthread_mutex_lock(&mru->cm_mutex); |
| for (pos = head->prev, n = pos->prev; pos != head; |
| pos = n, n = pos->prev) { |
| node = list_entry(pos, struct cache_node, cn_mru); |
| |
| if (pthread_mutex_trylock(&node->cn_mutex) != 0) |
| continue; |
| |
| /* memory pressure is not allowed to release dirty objects */ |
| if (cache->flush(node) && !purge) { |
| list_del(&node->cn_mru); |
| mru->cm_count--; |
| node->cn_priority = -1; |
| pthread_mutex_unlock(&node->cn_mutex); |
| cache_add_to_dirty_mru(cache, node); |
| continue; |
| } |
| |
| hash = cache->c_hash + node->cn_hashidx; |
| if (pthread_mutex_trylock(&hash->ch_mutex) != 0) { |
| pthread_mutex_unlock(&node->cn_mutex); |
| continue; |
| } |
| ASSERT(node->cn_count == 0); |
| ASSERT(node->cn_priority == priority); |
| node->cn_priority = -1; |
| |
| list_move(&node->cn_mru, &temp); |
| list_del_init(&node->cn_hash); |
| hash->ch_count--; |
| mru->cm_count--; |
| pthread_mutex_unlock(&hash->ch_mutex); |
| pthread_mutex_unlock(&node->cn_mutex); |
| |
| count++; |
| if (!purge && count == CACHE_SHAKE_COUNT) |
| break; |
| } |
| pthread_mutex_unlock(&mru->cm_mutex); |
| |
| if (count > 0) { |
| cache->bulkrelse(cache, &temp); |
| |
| pthread_mutex_lock(&cache->c_mutex); |
| cache->c_count -= count; |
| pthread_mutex_unlock(&cache->c_mutex); |
| } |
| |
| return (count == CACHE_SHAKE_COUNT) ? priority : ++priority; |
| } |
| |
| /* |
| * Allocate a new hash node (updating atomic counter in the process), |
| * unless doing so will push us over the maximum cache size. |
| */ |
| static struct cache_node * |
| cache_node_allocate( |
| struct cache * cache, |
| cache_key_t key) |
| { |
| unsigned int nodesfree; |
| struct cache_node * node; |
| |
| pthread_mutex_lock(&cache->c_mutex); |
| nodesfree = (cache->c_count < cache->c_maxcount); |
| if (nodesfree) { |
| cache->c_count++; |
| if (cache->c_count > cache->c_max) |
| cache->c_max = cache->c_count; |
| } |
| cache->c_misses++; |
| pthread_mutex_unlock(&cache->c_mutex); |
| if (!nodesfree) |
| return NULL; |
| node = cache->alloc(key); |
| if (node == NULL) { /* uh-oh */ |
| pthread_mutex_lock(&cache->c_mutex); |
| cache->c_count--; |
| pthread_mutex_unlock(&cache->c_mutex); |
| return NULL; |
| } |
| pthread_mutex_init(&node->cn_mutex, NULL); |
| list_head_init(&node->cn_mru); |
| node->cn_count = 1; |
| node->cn_priority = 0; |
| node->cn_old_priority = -1; |
| return node; |
| } |
| |
| int |
| cache_overflowed( |
| struct cache * cache) |
| { |
| return cache->c_maxcount == cache->c_max; |
| } |
| |
| |
| static int |
| __cache_node_purge( |
| struct cache * cache, |
| struct cache_node * node) |
| { |
| int count; |
| struct cache_mru * mru; |
| |
| pthread_mutex_lock(&node->cn_mutex); |
| count = node->cn_count; |
| if (count != 0) { |
| pthread_mutex_unlock(&node->cn_mutex); |
| return count; |
| } |
| |
| /* can't purge dirty objects */ |
| if (cache->flush(node)) { |
| pthread_mutex_unlock(&node->cn_mutex); |
| return 1; |
| } |
| |
| mru = &cache->c_mrus[node->cn_priority]; |
| pthread_mutex_lock(&mru->cm_mutex); |
| list_del_init(&node->cn_mru); |
| mru->cm_count--; |
| pthread_mutex_unlock(&mru->cm_mutex); |
| |
| pthread_mutex_unlock(&node->cn_mutex); |
| pthread_mutex_destroy(&node->cn_mutex); |
| list_del_init(&node->cn_hash); |
| cache->relse(node); |
| return 0; |
| } |
| |
| /* |
| * Lookup in the cache hash table. With any luck we'll get a cache |
| * hit, in which case this will all be over quickly and painlessly. |
| * Otherwise, we allocate a new node, taking care not to expand the |
| * cache beyond the requested maximum size (shrink it if it would). |
| * Returns one if hit in cache, otherwise zero. A node is _always_ |
| * returned, however. |
| */ |
| int |
| cache_node_get( |
| struct cache * cache, |
| cache_key_t key, |
| struct cache_node ** nodep) |
| { |
| struct cache_node * node = NULL; |
| struct cache_hash * hash; |
| struct cache_mru * mru; |
| struct list_head * head; |
| struct list_head * pos; |
| struct list_head * n; |
| unsigned int hashidx; |
| int priority = 0; |
| int purged = 0; |
| |
| hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift); |
| hash = cache->c_hash + hashidx; |
| head = &hash->ch_list; |
| |
| for (;;) { |
| pthread_mutex_lock(&hash->ch_mutex); |
| for (pos = head->next, n = pos->next; pos != head; |
| pos = n, n = pos->next) { |
| int result; |
| |
| node = list_entry(pos, struct cache_node, cn_hash); |
| result = cache->compare(node, key); |
| switch (result) { |
| case CACHE_HIT: |
| break; |
| case CACHE_PURGE: |
| if ((cache->c_flags & CACHE_MISCOMPARE_PURGE) && |
| !__cache_node_purge(cache, node)) { |
| purged++; |
| hash->ch_count--; |
| } |
| /* FALL THROUGH */ |
| case CACHE_MISS: |
| goto next_object; |
| } |
| |
| /* |
| * node found, bump node's reference count, remove it |
| * from its MRU list, and update stats. |
| */ |
| pthread_mutex_lock(&node->cn_mutex); |
| |
| if (node->cn_count == 0) { |
| ASSERT(node->cn_priority >= 0); |
| ASSERT(!list_empty(&node->cn_mru)); |
| mru = &cache->c_mrus[node->cn_priority]; |
| pthread_mutex_lock(&mru->cm_mutex); |
| mru->cm_count--; |
| list_del_init(&node->cn_mru); |
| pthread_mutex_unlock(&mru->cm_mutex); |
| if (node->cn_old_priority != -1) { |
| ASSERT(node->cn_priority == |
| CACHE_DIRTY_PRIORITY); |
| node->cn_priority = node->cn_old_priority; |
| node->cn_old_priority = -1; |
| } |
| } |
| node->cn_count++; |
| |
| pthread_mutex_unlock(&node->cn_mutex); |
| pthread_mutex_unlock(&hash->ch_mutex); |
| |
| pthread_mutex_lock(&cache->c_mutex); |
| cache->c_hits++; |
| pthread_mutex_unlock(&cache->c_mutex); |
| |
| *nodep = node; |
| return 0; |
| next_object: |
| continue; /* what the hell, gcc? */ |
| } |
| pthread_mutex_unlock(&hash->ch_mutex); |
| /* |
| * not found, allocate a new entry |
| */ |
| node = cache_node_allocate(cache, key); |
| if (node) |
| break; |
| priority = cache_shake(cache, priority, false); |
| /* |
| * We start at 0; if we free CACHE_SHAKE_COUNT we get |
| * back the same priority, if not we get back priority+1. |
| * If we exceed CACHE_MAX_PRIORITY all slots are full; grow it. |
| */ |
| if (priority > CACHE_MAX_PRIORITY) { |
| priority = 0; |
| cache_expand(cache); |
| } |
| } |
| |
| node->cn_hashidx = hashidx; |
| |
| /* add new node to appropriate hash */ |
| pthread_mutex_lock(&hash->ch_mutex); |
| hash->ch_count++; |
| list_add(&node->cn_hash, &hash->ch_list); |
| pthread_mutex_unlock(&hash->ch_mutex); |
| |
| if (purged) { |
| pthread_mutex_lock(&cache->c_mutex); |
| cache->c_count -= purged; |
| pthread_mutex_unlock(&cache->c_mutex); |
| } |
| |
| *nodep = node; |
| return 1; |
| } |
| |
| void |
| cache_node_put( |
| struct cache * cache, |
| struct cache_node * node) |
| { |
| struct cache_mru * mru; |
| |
| pthread_mutex_lock(&node->cn_mutex); |
| #ifdef CACHE_DEBUG |
| if (node->cn_count < 1) { |
| fprintf(stderr, "%s: node put on refcount %u (node=%p)\n", |
| __FUNCTION__, node->cn_count, node); |
| cache_abort(); |
| } |
| if (!list_empty(&node->cn_mru)) { |
| fprintf(stderr, "%s: node put on node (%p) in MRU list\n", |
| __FUNCTION__, node); |
| cache_abort(); |
| } |
| #endif |
| node->cn_count--; |
| |
| if (node->cn_count == 0) { |
| /* add unreferenced node to appropriate MRU for shaker */ |
| mru = &cache->c_mrus[node->cn_priority]; |
| pthread_mutex_lock(&mru->cm_mutex); |
| mru->cm_count++; |
| list_add(&node->cn_mru, &mru->cm_list); |
| pthread_mutex_unlock(&mru->cm_mutex); |
| } |
| |
| pthread_mutex_unlock(&node->cn_mutex); |
| } |
| |
| void |
| cache_node_set_priority( |
| struct cache * cache, |
| struct cache_node * node, |
| int priority) |
| { |
| if (priority < 0) |
| priority = 0; |
| else if (priority > CACHE_MAX_PRIORITY) |
| priority = CACHE_MAX_PRIORITY; |
| |
| pthread_mutex_lock(&node->cn_mutex); |
| ASSERT(node->cn_count > 0); |
| node->cn_priority = priority; |
| node->cn_old_priority = -1; |
| pthread_mutex_unlock(&node->cn_mutex); |
| } |
| |
| int |
| cache_node_get_priority( |
| struct cache_node * node) |
| { |
| int priority; |
| |
| pthread_mutex_lock(&node->cn_mutex); |
| priority = node->cn_priority; |
| pthread_mutex_unlock(&node->cn_mutex); |
| |
| return priority; |
| } |
| |
| |
| /* |
| * Purge a specific node from the cache. Reference count must be zero. |
| */ |
| int |
| cache_node_purge( |
| struct cache * cache, |
| cache_key_t key, |
| struct cache_node * node) |
| { |
| struct list_head * head; |
| struct list_head * pos; |
| struct list_head * n; |
| struct cache_hash * hash; |
| int count = -1; |
| |
| hash = cache->c_hash + cache->hash(key, cache->c_hashsize, |
| cache->c_hashshift); |
| head = &hash->ch_list; |
| pthread_mutex_lock(&hash->ch_mutex); |
| for (pos = head->next, n = pos->next; pos != head; |
| pos = n, n = pos->next) { |
| if ((struct cache_node *)pos != node) |
| continue; |
| |
| count = __cache_node_purge(cache, node); |
| if (!count) |
| hash->ch_count--; |
| break; |
| } |
| pthread_mutex_unlock(&hash->ch_mutex); |
| |
| if (count == 0) { |
| pthread_mutex_lock(&cache->c_mutex); |
| cache->c_count--; |
| pthread_mutex_unlock(&cache->c_mutex); |
| } |
| #ifdef CACHE_DEBUG |
| if (count >= 1) { |
| fprintf(stderr, "%s: refcount was %u, not zero (node=%p)\n", |
| __FUNCTION__, count, node); |
| cache_abort(); |
| } |
| if (count == -1) { |
| fprintf(stderr, "%s: purge node not found! (node=%p)\n", |
| __FUNCTION__, node); |
| cache_abort(); |
| } |
| #endif |
| return count == 0; |
| } |
| |
| /* |
| * Purge all nodes from the cache. All reference counts must be zero. |
| */ |
| void |
| cache_purge( |
| struct cache * cache) |
| { |
| int i; |
| |
| for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) |
| cache_shake(cache, i, true); |
| |
| #ifdef CACHE_DEBUG |
| if (cache->c_count != 0) { |
| /* flush referenced nodes to disk */ |
| cache_flush(cache); |
| fprintf(stderr, "%s: shake on cache %p left %u nodes!?\n", |
| __FUNCTION__, cache, cache->c_count); |
| cache_abort(); |
| } |
| #endif |
| } |
| |
| /* |
| * Flush all nodes in the cache to disk. |
| */ |
| void |
| cache_flush( |
| struct cache * cache) |
| { |
| struct cache_hash * hash; |
| struct list_head * head; |
| struct list_head * pos; |
| struct cache_node * node; |
| int i; |
| |
| if (!cache->flush) |
| return; |
| |
| for (i = 0; i < cache->c_hashsize; i++) { |
| hash = &cache->c_hash[i]; |
| |
| pthread_mutex_lock(&hash->ch_mutex); |
| head = &hash->ch_list; |
| for (pos = head->next; pos != head; pos = pos->next) { |
| node = (struct cache_node *)pos; |
| pthread_mutex_lock(&node->cn_mutex); |
| cache->flush(node); |
| pthread_mutex_unlock(&node->cn_mutex); |
| } |
| pthread_mutex_unlock(&hash->ch_mutex); |
| } |
| } |
| |
| #define HASH_REPORT (3 * HASH_CACHE_RATIO) |
| void |
| cache_report( |
| FILE *fp, |
| const char *name, |
| struct cache *cache) |
| { |
| int i; |
| unsigned long count, index, total; |
| unsigned long hash_bucket_lengths[HASH_REPORT + 2]; |
| |
| if ((cache->c_hits + cache->c_misses) == 0) |
| return; |
| |
| /* report cache summary */ |
| fprintf(fp, "%s: %p\n" |
| "Max supported entries = %u\n" |
| "Max utilized entries = %u\n" |
| "Active entries = %u\n" |
| "Hash table size = %u\n" |
| "Hits = %llu\n" |
| "Misses = %llu\n" |
| "Hit ratio = %5.2f\n", |
| name, cache, |
| cache->c_maxcount, |
| cache->c_max, |
| cache->c_count, |
| cache->c_hashsize, |
| cache->c_hits, |
| cache->c_misses, |
| (double)cache->c_hits * 100 / |
| (cache->c_hits + cache->c_misses) |
| ); |
| |
| for (i = 0; i <= CACHE_MAX_PRIORITY; i++) |
| fprintf(fp, "MRU %d entries = %6u (%3u%%)\n", |
| i, cache->c_mrus[i].cm_count, |
| cache->c_mrus[i].cm_count * 100 / cache->c_count); |
| |
| i = CACHE_DIRTY_PRIORITY; |
| fprintf(fp, "Dirty MRU %d entries = %6u (%3u%%)\n", |
| i, cache->c_mrus[i].cm_count, |
| cache->c_mrus[i].cm_count * 100 / cache->c_count); |
| |
| /* report hash bucket lengths */ |
| bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths)); |
| |
| for (i = 0; i < cache->c_hashsize; i++) { |
| count = cache->c_hash[i].ch_count; |
| if (count > HASH_REPORT) |
| index = HASH_REPORT + 1; |
| else |
| index = count; |
| hash_bucket_lengths[index]++; |
| } |
| |
| total = 0; |
| for (i = 0; i < HASH_REPORT + 1; i++) { |
| total += i * hash_bucket_lengths[i]; |
| if (hash_bucket_lengths[i] == 0) |
| continue; |
| fprintf(fp, "Hash buckets with %2d entries %6ld (%3ld%%)\n", |
| i, hash_bucket_lengths[i], |
| (i * hash_bucket_lengths[i] * 100) / cache->c_count); |
| } |
| if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */ |
| fprintf(fp, "Hash buckets with >%2d entries %6ld (%3ld%%)\n", |
| i - 1, hash_bucket_lengths[i], |
| ((cache->c_count - total) * 100) / cache->c_count); |
| } |