|  | /* | 
|  | * fs/logfs/gc.c	- garbage collection code | 
|  | * | 
|  | * As should be obvious for Linux kernel code, license is GPLv2 | 
|  | * | 
|  | * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> | 
|  | */ | 
|  | #include "logfs.h" | 
|  | #include <linux/sched.h> | 
|  | #include <linux/slab.h> | 
|  |  | 
|  | /* | 
|  | * Wear leveling needs to kick in when the difference between low erase | 
|  | * counts and high erase counts gets too big.  A good value for "too big" | 
|  | * may be somewhat below 10% of maximum erase count for the device. | 
|  | * Why not 397, to pick a nice round number with no specific meaning? :) | 
|  | * | 
|  | * WL_RATELIMIT is the minimum time between two wear level events.  A huge | 
|  | * number of segments may fulfil the requirements for wear leveling at the | 
|  | * same time.  If that happens we don't want to cause a latency from hell, | 
|  | * but just gently pick one segment every so often and minimize overhead. | 
|  | */ | 
|  | #define WL_DELTA 397 | 
|  | #define WL_RATELIMIT 100 | 
|  | #define MAX_OBJ_ALIASES	2600 | 
|  | #define SCAN_RATIO 512	/* number of scanned segments per gc'd segment */ | 
|  | #define LIST_SIZE 64	/* base size of candidate lists */ | 
|  | #define SCAN_ROUNDS 128	/* maximum number of complete medium scans */ | 
|  | #define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */ | 
|  |  | 
|  | static int no_free_segments(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  |  | 
|  | return super->s_free_list.count; | 
|  | } | 
|  |  | 
|  | /* journal has distance -1, top-most ifile layer distance 0 */ | 
|  | static u8 root_distance(struct super_block *sb, gc_level_t __gc_level) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | u8 gc_level = (__force u8)__gc_level; | 
|  |  | 
|  | switch (gc_level) { | 
|  | case 0: /* fall through */ | 
|  | case 1: /* fall through */ | 
|  | case 2: /* fall through */ | 
|  | case 3: | 
|  | /* file data or indirect blocks */ | 
|  | return super->s_ifile_levels + super->s_iblock_levels - gc_level; | 
|  | case 6: /* fall through */ | 
|  | case 7: /* fall through */ | 
|  | case 8: /* fall through */ | 
|  | case 9: | 
|  | /* inode file data or indirect blocks */ | 
|  | return super->s_ifile_levels - (gc_level - 6); | 
|  | default: | 
|  | printk(KERN_ERR"LOGFS: segment of unknown level %x found\n", | 
|  | gc_level); | 
|  | WARN_ON(1); | 
|  | return super->s_ifile_levels + super->s_iblock_levels; | 
|  | } | 
|  | } | 
|  |  | 
|  | static int segment_is_reserved(struct super_block *sb, u32 segno) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct logfs_area *area; | 
|  | void *reserved; | 
|  | int i; | 
|  |  | 
|  | /* Some segments are reserved.  Just pretend they were all valid */ | 
|  | reserved = btree_lookup32(&super->s_reserved_segments, segno); | 
|  | if (reserved) | 
|  | return 1; | 
|  |  | 
|  | /* Currently open segments */ | 
|  | for_each_area(i) { | 
|  | area = super->s_area[i]; | 
|  | if (area->a_is_open && area->a_segno == segno) | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void logfs_mark_segment_bad(struct super_block *sb, u32 segno) | 
|  | { | 
|  | BUG(); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Returns the bytes consumed by valid objects in this segment.  Object headers | 
|  | * are counted, the segment header is not. | 
|  | */ | 
|  | static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec, | 
|  | gc_level_t *gc_level) | 
|  | { | 
|  | struct logfs_segment_entry se; | 
|  | u32 ec_level; | 
|  |  | 
|  | logfs_get_segment_entry(sb, segno, &se); | 
|  | if (se.ec_level == cpu_to_be32(BADSEG) || | 
|  | se.valid == cpu_to_be32(RESERVED)) | 
|  | return RESERVED; | 
|  |  | 
|  | ec_level = be32_to_cpu(se.ec_level); | 
|  | *ec = ec_level >> 4; | 
|  | *gc_level = GC_LEVEL(ec_level & 0xf); | 
|  | return be32_to_cpu(se.valid); | 
|  | } | 
|  |  | 
|  | static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino, | 
|  | u64 bix, gc_level_t gc_level) | 
|  | { | 
|  | struct inode *inode; | 
|  | int err, cookie; | 
|  |  | 
|  | inode = logfs_safe_iget(sb, ino, &cookie); | 
|  | err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0); | 
|  | BUG_ON(err); | 
|  | logfs_safe_iput(inode, cookie); | 
|  | } | 
|  |  | 
|  | static u32 logfs_gc_segment(struct super_block *sb, u32 segno) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct logfs_segment_header sh; | 
|  | struct logfs_object_header oh; | 
|  | u64 ofs, ino, bix; | 
|  | u32 seg_ofs, logical_segno, cleaned = 0; | 
|  | int err, len, valid; | 
|  | gc_level_t gc_level; | 
|  |  | 
|  | LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb); | 
|  |  | 
|  | btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS); | 
|  | err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh); | 
|  | BUG_ON(err); | 
|  | gc_level = GC_LEVEL(sh.level); | 
|  | logical_segno = be32_to_cpu(sh.segno); | 
|  | if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) { | 
|  | logfs_mark_segment_bad(sb, segno); | 
|  | cleaned = -1; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE; | 
|  | seg_ofs + sizeof(oh) < super->s_segsize; ) { | 
|  | ofs = dev_ofs(sb, logical_segno, seg_ofs); | 
|  | err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh), | 
|  | &oh); | 
|  | BUG_ON(err); | 
|  |  | 
|  | if (!memchr_inv(&oh, 0xff, sizeof(oh))) | 
|  | break; | 
|  |  | 
|  | if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) { | 
|  | logfs_mark_segment_bad(sb, segno); | 
|  | cleaned = super->s_segsize - 1; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | ino = be64_to_cpu(oh.ino); | 
|  | bix = be64_to_cpu(oh.bix); | 
|  | len = sizeof(oh) + be16_to_cpu(oh.len); | 
|  | valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level); | 
|  | if (valid == 1) { | 
|  | logfs_cleanse_block(sb, ofs, ino, bix, gc_level); | 
|  | cleaned += len; | 
|  | } else if (valid == 2) { | 
|  | /* Will be invalid upon journal commit */ | 
|  | cleaned += len; | 
|  | } | 
|  | seg_ofs += len; | 
|  | } | 
|  | out: | 
|  | btree_remove32(&super->s_reserved_segments, segno); | 
|  | return cleaned; | 
|  | } | 
|  |  | 
|  | static struct gc_candidate *add_list(struct gc_candidate *cand, | 
|  | struct candidate_list *list) | 
|  | { | 
|  | struct rb_node **p = &list->rb_tree.rb_node; | 
|  | struct rb_node *parent = NULL; | 
|  | struct gc_candidate *cur; | 
|  | int comp; | 
|  |  | 
|  | cand->list = list; | 
|  | while (*p) { | 
|  | parent = *p; | 
|  | cur = rb_entry(parent, struct gc_candidate, rb_node); | 
|  |  | 
|  | if (list->sort_by_ec) | 
|  | comp = cand->erase_count < cur->erase_count; | 
|  | else | 
|  | comp = cand->valid < cur->valid; | 
|  |  | 
|  | if (comp) | 
|  | p = &parent->rb_left; | 
|  | else | 
|  | p = &parent->rb_right; | 
|  | } | 
|  | rb_link_node(&cand->rb_node, parent, p); | 
|  | rb_insert_color(&cand->rb_node, &list->rb_tree); | 
|  |  | 
|  | if (list->count <= list->maxcount) { | 
|  | list->count++; | 
|  | return NULL; | 
|  | } | 
|  | cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node); | 
|  | rb_erase(&cand->rb_node, &list->rb_tree); | 
|  | cand->list = NULL; | 
|  | return cand; | 
|  | } | 
|  |  | 
|  | static void remove_from_list(struct gc_candidate *cand) | 
|  | { | 
|  | struct candidate_list *list = cand->list; | 
|  |  | 
|  | rb_erase(&cand->rb_node, &list->rb_tree); | 
|  | list->count--; | 
|  | } | 
|  |  | 
|  | static void free_candidate(struct super_block *sb, struct gc_candidate *cand) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  |  | 
|  | btree_remove32(&super->s_cand_tree, cand->segno); | 
|  | kfree(cand); | 
|  | } | 
|  |  | 
|  | u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec) | 
|  | { | 
|  | struct gc_candidate *cand; | 
|  | u32 segno; | 
|  |  | 
|  | BUG_ON(list->count == 0); | 
|  |  | 
|  | cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node); | 
|  | remove_from_list(cand); | 
|  | segno = cand->segno; | 
|  | if (ec) | 
|  | *ec = cand->erase_count; | 
|  | free_candidate(sb, cand); | 
|  | return segno; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We have several lists to manage segments with.  The reserve_list is used to | 
|  | * deal with bad blocks.  We try to keep the best (lowest ec) segments on this | 
|  | * list. | 
|  | * The free_list contains free segments for normal usage.  It usually gets the | 
|  | * second pick after the reserve_list.  But when the free_list is running short | 
|  | * it is more important to keep the free_list full than to keep a reserve. | 
|  | * | 
|  | * Segments that are not free are put onto a per-level low_list.  If we have | 
|  | * to run garbage collection, we pick a candidate from there.  All segments on | 
|  | * those lists should have at least some free space so GC will make progress. | 
|  | * | 
|  | * And last we have the ec_list, which is used to pick segments for wear | 
|  | * leveling. | 
|  | * | 
|  | * If all appropriate lists are full, we simply free the candidate and forget | 
|  | * about that segment for a while.  We have better candidates for each purpose. | 
|  | */ | 
|  | static void __add_candidate(struct super_block *sb, struct gc_candidate *cand) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE; | 
|  |  | 
|  | if (cand->valid == 0) { | 
|  | /* 100% free segments */ | 
|  | log_gc_noisy("add reserve segment %x (ec %x) at %llx\n", | 
|  | cand->segno, cand->erase_count, | 
|  | dev_ofs(sb, cand->segno, 0)); | 
|  | cand = add_list(cand, &super->s_reserve_list); | 
|  | if (cand) { | 
|  | log_gc_noisy("add free segment %x (ec %x) at %llx\n", | 
|  | cand->segno, cand->erase_count, | 
|  | dev_ofs(sb, cand->segno, 0)); | 
|  | cand = add_list(cand, &super->s_free_list); | 
|  | } | 
|  | } else { | 
|  | /* good candidates for Garbage Collection */ | 
|  | if (cand->valid < full) | 
|  | cand = add_list(cand, &super->s_low_list[cand->dist]); | 
|  | /* good candidates for wear leveling, | 
|  | * segments that were recently written get ignored */ | 
|  | if (cand) | 
|  | cand = add_list(cand, &super->s_ec_list); | 
|  | } | 
|  | if (cand) | 
|  | free_candidate(sb, cand); | 
|  | } | 
|  |  | 
|  | static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec, | 
|  | u8 dist) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct gc_candidate *cand; | 
|  |  | 
|  | cand = kmalloc(sizeof(*cand), GFP_NOFS); | 
|  | if (!cand) | 
|  | return -ENOMEM; | 
|  |  | 
|  | cand->segno = segno; | 
|  | cand->valid = valid; | 
|  | cand->erase_count = ec; | 
|  | cand->dist = dist; | 
|  |  | 
|  | btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS); | 
|  | __add_candidate(sb, cand); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void remove_segment_from_lists(struct super_block *sb, u32 segno) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct gc_candidate *cand; | 
|  |  | 
|  | cand = btree_lookup32(&super->s_cand_tree, segno); | 
|  | if (cand) { | 
|  | remove_from_list(cand); | 
|  | free_candidate(sb, cand); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void scan_segment(struct super_block *sb, u32 segno) | 
|  | { | 
|  | u32 valid, ec = 0; | 
|  | gc_level_t gc_level = 0; | 
|  | u8 dist; | 
|  |  | 
|  | if (segment_is_reserved(sb, segno)) | 
|  | return; | 
|  |  | 
|  | remove_segment_from_lists(sb, segno); | 
|  | valid = logfs_valid_bytes(sb, segno, &ec, &gc_level); | 
|  | if (valid == RESERVED) | 
|  | return; | 
|  |  | 
|  | dist = root_distance(sb, gc_level); | 
|  | add_candidate(sb, segno, valid, ec, dist); | 
|  | } | 
|  |  | 
|  | static struct gc_candidate *first_in_list(struct candidate_list *list) | 
|  | { | 
|  | if (list->count == 0) | 
|  | return NULL; | 
|  | return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Find the best segment for garbage collection.  Main criterion is | 
|  | * the segment requiring the least effort to clean.  Secondary | 
|  | * criterion is to GC on the lowest level available. | 
|  | * | 
|  | * So we search the least effort segment on the lowest level first, | 
|  | * then move up and pick another segment iff is requires significantly | 
|  | * less effort.  Hence the LOGFS_MAX_OBJECTSIZE in the comparison. | 
|  | */ | 
|  | static struct gc_candidate *get_candidate(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | int i, max_dist; | 
|  | struct gc_candidate *cand = NULL, *this; | 
|  |  | 
|  | max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS); | 
|  |  | 
|  | for (i = max_dist; i >= 0; i--) { | 
|  | this = first_in_list(&super->s_low_list[i]); | 
|  | if (!this) | 
|  | continue; | 
|  | if (!cand) | 
|  | cand = this; | 
|  | if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid) | 
|  | cand = this; | 
|  | } | 
|  | return cand; | 
|  | } | 
|  |  | 
|  | static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | gc_level_t gc_level; | 
|  | u32 cleaned, valid, segno, ec; | 
|  | u8 dist; | 
|  |  | 
|  | if (!cand) { | 
|  | log_gc("GC attempted, but no candidate found\n"); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | segno = cand->segno; | 
|  | dist = cand->dist; | 
|  | valid = logfs_valid_bytes(sb, segno, &ec, &gc_level); | 
|  | free_candidate(sb, cand); | 
|  | log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n", | 
|  | segno, (u64)segno << super->s_segshift, | 
|  | dist, no_free_segments(sb), valid, | 
|  | super->s_free_bytes); | 
|  | cleaned = logfs_gc_segment(sb, segno); | 
|  | log_gc("GC segment #%02x complete - now %x valid\n", segno, | 
|  | valid - cleaned); | 
|  | BUG_ON(cleaned != valid); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | static int logfs_gc_once(struct super_block *sb) | 
|  | { | 
|  | struct gc_candidate *cand; | 
|  |  | 
|  | cand = get_candidate(sb); | 
|  | if (cand) | 
|  | remove_from_list(cand); | 
|  | return __logfs_gc_once(sb, cand); | 
|  | } | 
|  |  | 
|  | /* returns 1 if a wrap occurs, 0 otherwise */ | 
|  | static int logfs_scan_some(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | u32 segno; | 
|  | int i, ret = 0; | 
|  |  | 
|  | segno = super->s_sweeper; | 
|  | for (i = SCAN_RATIO; i > 0; i--) { | 
|  | segno++; | 
|  | if (segno >= super->s_no_segs) { | 
|  | segno = 0; | 
|  | ret = 1; | 
|  | /* Break out of the loop.  We want to read a single | 
|  | * block from the segment size on next invocation if | 
|  | * SCAN_RATIO is set to match block size | 
|  | */ | 
|  | break; | 
|  | } | 
|  |  | 
|  | scan_segment(sb, segno); | 
|  | } | 
|  | super->s_sweeper = segno; | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * In principle, this function should loop forever, looking for GC candidates | 
|  | * and moving data.  LogFS is designed in such a way that this loop is | 
|  | * guaranteed to terminate. | 
|  | * | 
|  | * Limiting the loop to some iterations serves purely to catch cases when | 
|  | * these guarantees have failed.  An actual endless loop is an obvious bug | 
|  | * and should be reported as such. | 
|  | */ | 
|  | static void __logfs_gc_pass(struct super_block *sb, int target) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct logfs_block *block; | 
|  | int round, progress, last_progress = 0; | 
|  |  | 
|  | /* | 
|  | * Doing too many changes to the segfile at once would result | 
|  | * in a large number of aliases.  Write the journal before | 
|  | * things get out of hand. | 
|  | */ | 
|  | if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES) | 
|  | logfs_write_anchor(sb); | 
|  |  | 
|  | if (no_free_segments(sb) >= target && | 
|  | super->s_no_object_aliases < MAX_OBJ_ALIASES) | 
|  | return; | 
|  |  | 
|  | log_gc("__logfs_gc_pass(%x)\n", target); | 
|  | for (round = 0; round < SCAN_ROUNDS; ) { | 
|  | if (no_free_segments(sb) >= target) | 
|  | goto write_alias; | 
|  |  | 
|  | /* Sync in-memory state with on-medium state in case they | 
|  | * diverged */ | 
|  | logfs_write_anchor(sb); | 
|  | round += logfs_scan_some(sb); | 
|  | if (no_free_segments(sb) >= target) | 
|  | goto write_alias; | 
|  | progress = logfs_gc_once(sb); | 
|  | if (progress) | 
|  | last_progress = round; | 
|  | else if (round - last_progress > 2) | 
|  | break; | 
|  | continue; | 
|  |  | 
|  | /* | 
|  | * The goto logic is nasty, I just don't know a better way to | 
|  | * code it.  GC is supposed to ensure two things: | 
|  | * 1. Enough free segments are available. | 
|  | * 2. The number of aliases is bounded. | 
|  | * When 1. is achieved, we take a look at 2. and write back | 
|  | * some alias-containing blocks, if necessary.  However, after | 
|  | * each such write we need to go back to 1., as writes can | 
|  | * consume free segments. | 
|  | */ | 
|  | write_alias: | 
|  | if (super->s_no_object_aliases < MAX_OBJ_ALIASES) | 
|  | return; | 
|  | if (list_empty(&super->s_object_alias)) { | 
|  | /* All aliases are still in btree */ | 
|  | return; | 
|  | } | 
|  | log_gc("Write back one alias\n"); | 
|  | block = list_entry(super->s_object_alias.next, | 
|  | struct logfs_block, alias_list); | 
|  | block->ops->write_block(block); | 
|  | /* | 
|  | * To round off the nasty goto logic, we reset round here.  It | 
|  | * is a safety-net for GC not making any progress and limited | 
|  | * to something reasonably small.  If incremented it for every | 
|  | * single alias, the loop could terminate rather quickly. | 
|  | */ | 
|  | round = 0; | 
|  | } | 
|  | LOGFS_BUG(sb); | 
|  | } | 
|  |  | 
|  | static int wl_ratelimit(struct super_block *sb, u64 *next_event) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  |  | 
|  | if (*next_event < super->s_gec) { | 
|  | *next_event = super->s_gec + WL_RATELIMIT; | 
|  | return 0; | 
|  | } | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | static void logfs_wl_pass(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct gc_candidate *wl_cand, *free_cand; | 
|  |  | 
|  | if (wl_ratelimit(sb, &super->s_wl_gec_ostore)) | 
|  | return; | 
|  |  | 
|  | wl_cand = first_in_list(&super->s_ec_list); | 
|  | if (!wl_cand) | 
|  | return; | 
|  | free_cand = first_in_list(&super->s_free_list); | 
|  | if (!free_cand) | 
|  | return; | 
|  |  | 
|  | if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) { | 
|  | remove_from_list(wl_cand); | 
|  | __logfs_gc_once(sb, wl_cand); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The journal needs wear leveling as well.  But moving the journal is an | 
|  | * expensive operation so we try to avoid it as much as possible.  And if we | 
|  | * have to do it, we move the whole journal, not individual segments. | 
|  | * | 
|  | * Ratelimiting is not strictly necessary here, it mainly serves to avoid the | 
|  | * calculations.  First we check whether moving the journal would be a | 
|  | * significant improvement.  That means that a) the current journal segments | 
|  | * have more wear than the future journal segments and b) the current journal | 
|  | * segments have more wear than normal ostore segments. | 
|  | * Rationale for b) is that we don't have to move the journal if it is aging | 
|  | * less than the ostore, even if the reserve segments age even less (they are | 
|  | * excluded from wear leveling, after all). | 
|  | * Next we check that the superblocks have less wear than the journal.  Since | 
|  | * moving the journal requires writing the superblocks, we have to protect the | 
|  | * superblocks even more than the journal. | 
|  | * | 
|  | * Also we double the acceptable wear difference, compared to ostore wear | 
|  | * leveling.  Journal data is read and rewritten rapidly, comparatively.  So | 
|  | * soft errors have much less time to accumulate and we allow the journal to | 
|  | * be a bit worse than the ostore. | 
|  | */ | 
|  | static void logfs_journal_wl_pass(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct gc_candidate *cand; | 
|  | u32 min_journal_ec = -1, max_reserve_ec = 0; | 
|  | int i; | 
|  |  | 
|  | if (wl_ratelimit(sb, &super->s_wl_gec_journal)) | 
|  | return; | 
|  |  | 
|  | if (super->s_reserve_list.count < super->s_no_journal_segs) { | 
|  | /* Reserve is not full enough to move complete journal */ | 
|  | return; | 
|  | } | 
|  |  | 
|  | journal_for_each(i) | 
|  | if (super->s_journal_seg[i]) | 
|  | min_journal_ec = min(min_journal_ec, | 
|  | super->s_journal_ec[i]); | 
|  | cand = rb_entry(rb_first(&super->s_free_list.rb_tree), | 
|  | struct gc_candidate, rb_node); | 
|  | max_reserve_ec = cand->erase_count; | 
|  | for (i = 0; i < 2; i++) { | 
|  | struct logfs_segment_entry se; | 
|  | u32 segno = seg_no(sb, super->s_sb_ofs[i]); | 
|  | u32 ec; | 
|  |  | 
|  | logfs_get_segment_entry(sb, segno, &se); | 
|  | ec = be32_to_cpu(se.ec_level) >> 4; | 
|  | max_reserve_ec = max(max_reserve_ec, ec); | 
|  | } | 
|  |  | 
|  | if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) { | 
|  | do_logfs_journal_wl_pass(sb); | 
|  | } | 
|  | } | 
|  |  | 
|  | void logfs_gc_pass(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  |  | 
|  | //BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex)); | 
|  | /* Write journal before free space is getting saturated with dirty | 
|  | * objects. | 
|  | */ | 
|  | if (super->s_dirty_used_bytes + super->s_dirty_free_bytes | 
|  | + LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes) | 
|  | logfs_write_anchor(sb); | 
|  | __logfs_gc_pass(sb, super->s_total_levels); | 
|  | logfs_wl_pass(sb); | 
|  | logfs_journal_wl_pass(sb); | 
|  | } | 
|  |  | 
|  | static int check_area(struct super_block *sb, int i) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | struct logfs_area *area = super->s_area[i]; | 
|  | gc_level_t gc_level; | 
|  | u32 cleaned, valid, ec; | 
|  | u32 segno = area->a_segno; | 
|  | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); | 
|  |  | 
|  | if (!area->a_is_open) | 
|  | return 0; | 
|  |  | 
|  | if (super->s_devops->can_write_buf(sb, ofs) == 0) | 
|  | return 0; | 
|  |  | 
|  | printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs); | 
|  | /* | 
|  | * The device cannot write back the write buffer.  Most likely the | 
|  | * wbuf was already written out and the system crashed at some point | 
|  | * before the journal commit happened.  In that case we wouldn't have | 
|  | * to do anything.  But if the crash happened before the wbuf was | 
|  | * written out correctly, we must GC this segment.  So assume the | 
|  | * worst and always do the GC run. | 
|  | */ | 
|  | area->a_is_open = 0; | 
|  | valid = logfs_valid_bytes(sb, segno, &ec, &gc_level); | 
|  | cleaned = logfs_gc_segment(sb, segno); | 
|  | if (cleaned != valid) | 
|  | return -EIO; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int logfs_check_areas(struct super_block *sb) | 
|  | { | 
|  | int i, err; | 
|  |  | 
|  | for_each_area(i) { | 
|  | err = check_area(sb, i); | 
|  | if (err) | 
|  | return err; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void logfs_init_candlist(struct candidate_list *list, int maxcount, | 
|  | int sort_by_ec) | 
|  | { | 
|  | list->count = 0; | 
|  | list->maxcount = maxcount; | 
|  | list->sort_by_ec = sort_by_ec; | 
|  | list->rb_tree = RB_ROOT; | 
|  | } | 
|  |  | 
|  | int logfs_init_gc(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | int i; | 
|  |  | 
|  | btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool); | 
|  | logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1); | 
|  | logfs_init_candlist(&super->s_reserve_list, | 
|  | super->s_bad_seg_reserve, 1); | 
|  | for_each_area(i) | 
|  | logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0); | 
|  | logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void logfs_cleanup_list(struct super_block *sb, | 
|  | struct candidate_list *list) | 
|  | { | 
|  | struct gc_candidate *cand; | 
|  |  | 
|  | while (list->count) { | 
|  | cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate, | 
|  | rb_node); | 
|  | remove_from_list(cand); | 
|  | free_candidate(sb, cand); | 
|  | } | 
|  | BUG_ON(list->rb_tree.rb_node); | 
|  | } | 
|  |  | 
|  | void logfs_cleanup_gc(struct super_block *sb) | 
|  | { | 
|  | struct logfs_super *super = logfs_super(sb); | 
|  | int i; | 
|  |  | 
|  | if (!super->s_free_list.count) | 
|  | return; | 
|  |  | 
|  | /* | 
|  | * FIXME: The btree may still contain a single empty node.  So we | 
|  | * call the grim visitor to clean up that mess.  Btree code should | 
|  | * do it for us, really. | 
|  | */ | 
|  | btree_grim_visitor32(&super->s_cand_tree, 0, NULL); | 
|  | logfs_cleanup_list(sb, &super->s_free_list); | 
|  | logfs_cleanup_list(sb, &super->s_reserve_list); | 
|  | for_each_area(i) | 
|  | logfs_cleanup_list(sb, &super->s_low_list[i]); | 
|  | logfs_cleanup_list(sb, &super->s_ec_list); | 
|  | } |