| /* |
| * Orphan inode management |
| * |
| * LOG_ORPHAN_ADD and LOG_ORPHAN_DEL are log records of frontend |
| * operation for orphan state. With it, we don't need any write to FS |
| * except log blocks. If the orphan is short life, it will be handled |
| * by this. |
| * |
| * However, if the orphan is long life, it can make log blocks too long. |
| * So, to prevent it, if orphan inodes are still living until unify, we |
| * store those inum into sb->otree. With it, we can obsolete log blocks. |
| * |
| * On replay, we can know the inum of orphan inodes yet not destroyed by |
| * checking sb->otree, LOG_ORPHAN_ADD, and LOG_ORPHAN_DEL. (Note, orphan |
| * inum of LOG_ORPHAN_ADD can be destroyed by same inum of LOG_ORPHAN_DEL). |
| */ |
| |
| #include "tux3.h" |
| #include "ileaf.h" |
| |
| #ifndef trace |
| #define trace trace_on |
| #endif |
| |
| /* Frontend modification cache for orphan */ |
| struct orphan { |
| inum_t inum; |
| struct list_head list; |
| }; |
| |
| /* list_entry() for orphan inode list */ |
| #define orphan_list_entry(x) list_entry(x, struct tux3_inode, orphan_list) |
| /* list_entry() for orphan object (orphan del, LOG_ORPHAN_ADD on replay) list */ |
| #define orphan_entry(x) list_entry(x, struct orphan, list) |
| |
| static struct orphan *alloc_orphan(inum_t inum) |
| { |
| struct orphan *orphan = malloc(sizeof(struct orphan)); |
| if (!orphan) |
| return ERR_PTR(-ENOMEM); |
| |
| INIT_LIST_HEAD(&orphan->list); |
| orphan->inum = inum; |
| return orphan; |
| } |
| |
| static void free_orphan(struct orphan *orphan) |
| { |
| free(orphan); |
| } |
| |
| /* Caller must care about locking if needed */ |
| void clean_orphan_list(struct list_head *head) |
| { |
| while (!list_empty(head)) { |
| struct orphan *orphan = orphan_entry(head->next); |
| list_del(&orphan->list); |
| free_orphan(orphan); |
| } |
| } |
| |
| /* |
| * FIXME: maybe, we can share code more with inode.c and iattr.c. |
| * |
| * And this is supporting ORPHAN_ATTR only, and assuming there is only |
| * ORPHAN_ATTR. We are better to support multiple attributes in |
| * otree. |
| */ |
| enum { ORPHAN_ATTR, }; |
| static unsigned orphan_asize[] = { |
| /* Fixed size attrs */ |
| [ORPHAN_ATTR] = 0, |
| }; |
| |
| static int oattr_encoded_size(struct btree *btree, void *data) |
| { |
| return orphan_asize[ORPHAN_ATTR] + 2; |
| } |
| |
| static void oattr_encode(struct btree *btree, void *data, void *attrs, int size) |
| { |
| encode_kind(attrs, ORPHAN_ATTR, btree->sb->version); |
| } |
| |
| struct ileaf_attr_ops oattr_ops = { |
| .magic = cpu_to_be16(TUX3_MAGIC_OLEAF), |
| .encoded_size = oattr_encoded_size, |
| .encode = oattr_encode, |
| }; |
| |
| /* Add inum into sb->otree */ |
| int tux3_unify_orphan_add(struct sb *sb, struct list_head *orphan_add) |
| { |
| struct btree *otree = otree_btree(sb); |
| struct cursor *cursor; |
| int err = 0; |
| |
| if (list_empty(orphan_add)) |
| return 0; |
| |
| down_write(&otree->lock); |
| if (!has_root(otree)) |
| err = alloc_empty_btree(otree); |
| up_write(&otree->lock); |
| if (err) |
| return err; |
| |
| /* FIXME: +1 may not be enough to add multiple */ |
| cursor = alloc_cursor(otree, 1); /* +1 for new depth */ |
| if (!cursor) |
| return -ENOMEM; |
| |
| down_write(&cursor->btree->lock); |
| while (!list_empty(orphan_add)) { |
| struct tux3_inode *tuxnode =orphan_list_entry(orphan_add->next); |
| |
| trace("inum %Lu", tuxnode->inum); |
| |
| /* FIXME: what to do if error? */ |
| err = btree_probe(cursor, tuxnode->inum); |
| if (err) |
| goto out; |
| |
| /* Write orphan inum into orphan btree */ |
| struct ileaf_req rq = { |
| .key = { |
| .start = tuxnode->inum, |
| .len = 1, |
| }, |
| }; |
| err = btree_write(cursor, &rq.key); |
| release_cursor(cursor); |
| if (err) |
| goto out; |
| |
| list_del_init(&tuxnode->orphan_list); |
| } |
| out: |
| up_write(&cursor->btree->lock); |
| free_cursor(cursor); |
| |
| return err; |
| } |
| |
| /* Delete inum from sb->otree */ |
| int tux3_unify_orphan_del(struct sb *sb, struct list_head *orphan_del) |
| { |
| struct btree *otree = otree_btree(sb); |
| int err; |
| |
| /* FIXME: we don't want to remove inum one by one */ |
| while (!list_empty(orphan_del)) { |
| struct orphan *orphan = orphan_entry(orphan_del->next); |
| |
| trace("inum %Lu", orphan->inum); |
| |
| /* Remove inum from orphan btree */ |
| err = btree_chop(otree, orphan->inum, 1); |
| if (err) |
| return err; |
| |
| list_del(&orphan->list); |
| free_orphan(orphan); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * Make inode as orphan, and logging it. Then if orphan is living until |
| * unify, orphan will be written to sb->otree. |
| */ |
| int tux3_make_orphan_add(struct inode *inode) |
| { |
| struct sb *sb = tux_sb(inode->i_sb); |
| struct tux3_inode *tuxnode = tux_inode(inode); |
| |
| trace("inum %Lu", tuxnode->inum); |
| |
| assert(list_empty(&tuxnode->orphan_list)); |
| list_add(&tuxnode->orphan_list, &sb->orphan_add); |
| |
| log_orphan_add(sb, sb->version, tuxnode->inum); |
| |
| return 0; |
| } |
| |
| /* |
| * FIXME: We may be able to merge this with inode deletion, and this |
| * may be able to be used for deferred inode deletion too with some |
| * tweaks. |
| */ |
| static int add_defer_orphan_del(struct sb *sb, inum_t inum) |
| { |
| struct orphan *orphan = alloc_orphan(inum); |
| if (IS_ERR(orphan)) |
| return PTR_ERR(orphan); |
| |
| /* Add orphan deletion (from sb->otree) request. */ |
| list_add(&orphan->list, &sb->orphan_del); |
| |
| return 0; |
| } |
| |
| /* Clear inode as orphan (inode was destroyed), and logging it. */ |
| int tux3_make_orphan_del(struct inode *inode) |
| { |
| struct sb *sb = tux_sb(inode->i_sb); |
| struct tux3_inode *tuxnode = tux_inode(inode); |
| |
| trace("inum %Lu", tuxnode->inum); |
| |
| if (!list_empty(&tuxnode->orphan_list)) { |
| /* This orphan is not applied to sb->otree yet. */ |
| list_del_init(&tuxnode->orphan_list); |
| } else { |
| /* This orphan was already applied to sb->otree. */ |
| int err = add_defer_orphan_del(sb, tuxnode->inum); |
| if (err) { |
| /* FIXME: what to do? */ |
| tux3_warn(sb, |
| "orphan inum %Lu was leaved due to low memory", |
| tuxnode->inum); |
| return err; |
| } |
| } |
| |
| log_orphan_del(sb, sb->version, tuxnode->inum); |
| |
| return 0; |
| } |
| |
| /* |
| * On replay, we collects orphan logs at first. Then, we reconstruct |
| * infos for orphan at end of replay. |
| */ |
| |
| static struct orphan *replay_find_orphan(struct list_head *head, inum_t inum) |
| { |
| struct orphan *orphan; |
| list_for_each_entry(orphan, head, list) { |
| if (orphan->inum == inum) |
| return orphan; |
| } |
| return NULL; |
| } |
| |
| int replay_orphan_add(struct replay *rp, unsigned version, inum_t inum) |
| { |
| struct sb *sb = rp->sb; |
| struct orphan *orphan; |
| |
| if (sb->version != version) |
| return 0; |
| |
| orphan = alloc_orphan(inum); |
| if (IS_ERR(orphan)) |
| return PTR_ERR(orphan); |
| |
| assert(!replay_find_orphan(&rp->log_orphan_add, inum)); |
| /* Remember LOG_ORPHAN_ADD */ |
| list_add(&orphan->list, &rp->log_orphan_add); |
| |
| return 0; |
| } |
| |
| int replay_orphan_del(struct replay *rp, unsigned version, inum_t inum) |
| { |
| struct sb *sb = rp->sb; |
| struct orphan *orphan; |
| |
| if (sb->version != version) |
| return 0; |
| |
| orphan = replay_find_orphan(&rp->log_orphan_add, inum); |
| if (orphan) { |
| /* There was prior LOG_ORPHAN_ADD, cancel it. */ |
| list_del(&orphan->list); |
| free_orphan(orphan); |
| return 0; |
| } |
| |
| /* Orphan inum in sb->otree became dead. Add deletion request. */ |
| return add_defer_orphan_del(sb, inum); |
| } |
| |
| /* Destroy or clean orphan inodes of destroy candidate */ |
| void replay_iput_orphan_inodes(struct sb *sb, |
| struct list_head *orphan_in_otree, |
| int destroy) |
| { |
| struct tux3_inode *tuxnode, *safe; |
| |
| /* orphan inodes not in sb->otree */ |
| list_for_each_entry_safe(tuxnode, safe, &sb->orphan_add, orphan_list) { |
| struct inode *inode = &tuxnode->vfs_inode; |
| |
| if (!destroy) { |
| /* Set i_nlink = 1 prevent to destroy inode. */ |
| set_nlink(inode, 1); |
| list_del_init(&tuxnode->orphan_list); |
| } |
| iput(inode); |
| } |
| |
| /* orphan inodes in sb->otree */ |
| list_for_each_entry_safe(tuxnode, safe, orphan_in_otree, orphan_list) { |
| struct inode *inode = &tuxnode->vfs_inode; |
| |
| /* list_empty(&inode->orphan_list) tells it is in otree */ |
| list_del_init(&tuxnode->orphan_list); |
| |
| if (!destroy) { |
| /* Set i_nlink = 1 prevent to destroy inode. */ |
| set_nlink(inode, 1); |
| } |
| iput(inode); |
| } |
| } |
| |
| static int load_orphan_inode(struct sb *sb, inum_t inum, struct list_head *head) |
| { |
| struct inode *inode; |
| |
| inode = tux3_iget(sb, inum); |
| if (IS_ERR(inode)) { |
| int err = PTR_ERR(inode); |
| tux3_err(sb, "%s: orphan inum %Lu not found (err %d)", |
| __func__, inum, err); |
| return err; |
| } |
| assert(inode->i_nlink == 0); |
| |
| tux3_mark_inode_orphan(tux_inode(inode)); |
| /* List inode up, then caller will decide what to do */ |
| list_add(&tux_inode(inode)->orphan_list, head); |
| |
| return 0; |
| } |
| |
| static int load_enum_inode(struct btree *btree, inum_t inum, void *attrs, |
| unsigned size, void *data) |
| { |
| struct replay *rp = data; |
| struct sb *sb = rp->sb; |
| unsigned kind, version; |
| |
| assert(size == 2); |
| decode_kind(attrs, &kind, &version); |
| if (version != sb->version || kind != ORPHAN_ATTR) |
| return 0; |
| |
| /* If inum is in orphan_del, it was dead already */ |
| if (replay_find_orphan(&sb->orphan_del, inum)) |
| return 0; |
| |
| return load_orphan_inode(sb, inum, &rp->orphan_in_otree); |
| } |
| |
| /* Load orphan inode from sb->otree */ |
| static int load_otree_orphan_inode(struct replay *rp) |
| { |
| struct sb *sb = rp->sb; |
| struct btree *otree = otree_btree(sb); |
| struct ileaf_enumrate_cb cb = { |
| .callback = load_enum_inode, |
| .data = rp, |
| }; |
| int err; |
| |
| if (!has_root(&sb->otree)) |
| return 0; |
| |
| struct cursor *cursor = alloc_cursor(otree, 0); |
| if (!cursor) |
| return -ENOMEM; |
| |
| down_write(&cursor->btree->lock); |
| err = btree_probe(cursor, 0); |
| if (err) |
| goto error; |
| |
| err = btree_traverse(cursor, 0, TUXKEY_LIMIT, ileaf_enumerate, &cb); |
| /* FIXME: error handling */ |
| |
| release_cursor(cursor); |
| error: |
| up_write(&cursor->btree->lock); |
| free_cursor(cursor); |
| |
| return err; |
| } |
| |
| /* Load all orphan inodes */ |
| int replay_load_orphan_inodes(struct replay *rp) |
| { |
| struct sb *sb = rp->sb; |
| struct list_head *head; |
| int err; |
| |
| head = &rp->log_orphan_add; |
| while (!list_empty(head)) { |
| struct orphan *orphan = orphan_entry(head->next); |
| |
| err = load_orphan_inode(sb, orphan->inum, &sb->orphan_add); |
| if (err) |
| goto error; |
| |
| list_del(&orphan->list); |
| free_orphan(orphan); |
| } |
| |
| err = load_otree_orphan_inode(rp); |
| if (err) |
| goto error; |
| |
| return 0; |
| |
| error: |
| replay_iput_orphan_inodes(sb, &rp->orphan_in_otree, 0); |
| return err; |
| } |