| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 and |
| * only version 2 as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| #include <linux/percpu.h> |
| #include <linux/slab.h> |
| #include <linux/fs.h> |
| #include <linux/kthread.h> |
| #include <linux/list.h> |
| #include <linux/ratelimit.h> |
| #include <linux/writeback.h> |
| #include "euler.h" |
| #include "dep.h" |
| #include "lock.h" |
| #include "dax.h" |
| #include "dht.h" |
| |
| static void do_dep_diradd_oneshot(struct inode *dir_inode, struct dep_node *dep, |
| u64 *bitset); |
| |
| struct flush_list_head { |
| int count; |
| struct llist_head head; |
| }; |
| |
| DEFINE_PER_CPU(struct flush_list_head, flush_list_percpu); |
| |
| #define IFMT_HAS_ROOT(ifmt) \ |
| ((ifmt) == S_IFREG || (ifmt) == S_IFDIR || (ifmt) == S_IFLNK) |
| |
| #define INODE_COND_TRYLOCK(inode, tag, enter_cond, exit_cond, exit_expr) \ |
| do { \ |
| tag: \ |
| if (enter_cond) { \ |
| if (likely(inode_trylock(inode))) { \ |
| /* get the lock, okay */ \ |
| } else { \ |
| if (exit_cond) { \ |
| exit_expr; \ |
| } else { \ |
| cond_resched(); \ |
| goto tag; \ |
| } \ |
| } \ |
| } \ |
| } while (0) |
| |
| static inline void fsync_dir_oneshot(struct inode *dir) |
| { |
| eufs_dir_fsync_oneshot(dir); |
| } |
| |
| static void do_dep_dirrem(struct inode *inode, struct dep_node *dep, |
| u64 *bitset) |
| { |
| struct nv_dict_entry *prevde = dep->prevde; |
| struct nv_dict_entry *de = dep->de; |
| int idx; |
| |
| eufs_dbg("!! %s !!", __func__); |
| NV_ASSERT(de); |
| NV_ASSERT(de->inode); |
| NV_ASSERT(de->name); |
| |
| idx = INDEX(de->hv); |
| bitset[idx / 64] = bitset[idx / 64] | (0x1ull << (idx & 63)); |
| eufs_dbg("bitset-add: dict=%llx, %d %llx\n", |
| eufs_iread_dict(EUFS_PI(inode)), idx, bitset[idx / 64]); |
| |
| /* |
| * This is a removal of a newly created dentry, nothing to do, |
| * the prevde is already manipulated in dht.c |
| */ |
| if (de->volatile_next == EUFS_DIR_DELNEW) |
| return; |
| |
| /* |
| * If dentries immediately following the deleted dentry are |
| * also deleted, prevde->volatile_next will be modified again. |
| * So if we assign prevde->volatile_next to prevde->next, |
| * these deletion will be persisted prematurely. |
| */ |
| if (prevde && !eufs_dentry_is_not_persist(prevde)) { |
| prevde->next = de->next; |
| persist_dentry(prevde); |
| } |
| } |
| |
| static void do_dep_dirrem_reclaim(struct super_block *sb, struct dep_node *dep) |
| { |
| struct nv_dict_entry *de = dep->de; |
| struct eufs_inode __maybe_unused *pi; |
| struct inode *child; |
| |
| pi = s2p(sb, de->inode); |
| child = dep->inode; |
| NV_ASSERT(EUFS_PI(child) == pi); |
| eufs_dbg("dirrem: child_inode=%px\n", child); |
| BUG_ON(!child); |
| eufs_free_name(sb, de); |
| nv_free(sb, de); |
| } |
| |
| #define EUFS_PRINT_BITSET(lvl, bitset) \ |
| eufs_##lvl("bitsets: %llx %llx %llx %llx %llx %llx %llx %llx\n", \ |
| bitset[0], bitset[1], bitset[2], bitset[3], bitset[4], \ |
| bitset[5], bitset[6], bitset[7]) |
| |
| static void eufs_sync_buckets(struct eufs_inode_info *vi, u64 bitset[8]) |
| { |
| struct inode *inode = &vi->vfs_inode; |
| struct super_block *sb = inode->i_sb; |
| struct eufs_inode *pi = EUFS_FRESH_PI(EUFS_PI(inode)); |
| struct nv_dict *dict; |
| int i; |
| |
| /* Volatile buckets */ |
| if (!vi->i_volatile_dict) |
| return; |
| |
| EUFS_PRINT_BITSET(dbg, bitset); |
| |
| BUG_ON(!inode_is_header_locked(inode)); |
| dict = o2p(sb, eufs_iread_dict(pi)); |
| for (i = 0; i < 8; ++i) { |
| int j; |
| bool dirty; |
| int idx; |
| |
| if (!bitset[i]) |
| continue; |
| dirty = false; |
| for (j = 0; j <= 64; ++j) { |
| if (j % 8 == 0 && dirty) { |
| dirty = false; |
| eufs_flush_cacheline(&dict->table[idx]); |
| } |
| if (j == 64) |
| break; |
| if (!(bitset[i] & (0x1ull << j))) |
| continue; |
| idx = i * 64 + j; |
| eufs_dbg_dir("handle index %d (i %d, j %d) of inode=%px\n", |
| idx, i, j, inode); |
| |
| eufs_dbg_dir(" idx=%d dict[idx]=%px vdict[idx]=%px\n", |
| idx, dict->table[idx], |
| vi->i_volatile_dict->table[idx]); |
| |
| if (unlikely(vi->i_volatile_dict->table[idx] == |
| EUFS_DIR_EOC_PTR)) |
| dict->table[idx] = NULL_VAL; |
| else if (vi->i_volatile_dict->table[idx] != NULL) |
| dict->table[idx] = COMPOSE_DICT_HEAD_le64( |
| sb, vi->i_volatile_dict->table[idx]); |
| vi->i_volatile_dict->table[idx] = NULL; |
| dirty = true; |
| } |
| } |
| } |
| |
| /* |
| * Some ideas on fast fsync (of dir): |
| * |
| * 1. Batch and coalescence. The newly inserted dentry should be marked and |
| * during its removal, it should be marked again so that unnecessary dep_diradd |
| * an be prevented. |
| * |
| * 2. Split! The lock (only when there is one lock needed) can be temporarily |
| * given up so between handling two deps. This requires that the dentry pointed |
| * by dir_pi should not be reclaimed (like in RCU). Well, actually, combined |
| * with the following one idea, this is quite acceptable. |
| * |
| * 3. Delayed free. The removal operations can be delayed until the locks are |
| * released. |
| * |
| * |
| * Parallel fsync for a vi is not throughly considered though. |
| * |
| * 4. Detach only if the list is empty? |
| */ |
| static void fsync_rename_inode(struct inode *dir) |
| { |
| struct eufs_inode_info *vi = EUFS_I(dir); |
| |
| if (!vi->i_is_dirty) |
| return; |
| |
| /* I'm holding the lock, so if it's dirty, it's dirty. */ |
| fsync_dir_oneshot(dir); |
| } |
| |
| void fsync_rename_inodes(struct inode *old_dir, struct inode *new_dir, |
| struct inode **locked_inodes) |
| { |
| int i; |
| struct inode *inode; |
| |
| /* |
| * The two parent dirs, might have parent-child relations sometime |
| * before. So we need to transfer these two dirs too. |
| */ |
| for (i = 0; i < EUFS_INODE_CNT_IN_RENAME; i++) { |
| inode = locked_inodes[i]; |
| if (inode) |
| eufs_inode_mark_lock_transferable(inode); |
| } |
| |
| if (old_dir == new_dir) { |
| fsync_rename_inode(old_dir); |
| } else { |
| fsync_rename_inode(old_dir); |
| fsync_rename_inode(new_dir); |
| } |
| |
| for (i = 0; i < EUFS_INODE_CNT_IN_RENAME; i++) { |
| inode = locked_inodes[i]; |
| if (inode) |
| eufs_inode_wait_lock_transfer_done(inode); |
| } |
| } |
| |
| static void eufs_update_persisted_seq(struct eufs_inode_info *vi, |
| struct list_head *head) |
| { |
| if (!list_empty(head)) { |
| struct dep_node *dep = |
| list_last_entry(head, struct dep_node, node); |
| |
| vi->i_persisted_dep_seq = dep->seq; |
| } |
| } |
| |
| static int fsync_dir_bg(struct inode *dir) |
| { |
| struct dep_node *dep, *next; |
| LIST_HEAD(detached_list); |
| LIST_HEAD(dump_list); |
| int i; |
| #define FSYNC_DIR_VI_LOOP_NUM (20) |
| |
| struct eufs_inode_info *vi = EUFS_I(dir); |
| struct super_block *sb = dir->i_sb; |
| struct eufs_sb_info *sbi = EUFS_SB(sb); |
| struct eufs_inode *pi = EUFS_PI(dir); |
| u64 bitset[8] = { 0 }; |
| int dep_count = 0; |
| |
| retry: |
| inode_urgent_lock(dir); |
| |
| /* Phase 1 */ |
| for (i = FSYNC_DIR_VI_LOOP_NUM; i >= 0; --i) { |
| /* Get all deps round by round */ |
| if (i == 0) { |
| /* Last round */ |
| inode_header_lock(dir); |
| } |
| inode_dep_lock(dir); |
| |
| if (list_empty(&vi->i_dep_list) && i > 0) { |
| /* Skip to last round */ |
| i = 1; |
| } |
| list_cut_position(&detached_list, &vi->i_dep_list, |
| vi->i_dep_list.prev); |
| |
| if (i > 0) |
| inode_dep_unlock(dir); |
| |
| /* Do dep one by one. */ |
| list_for_each_entry_safe(dep, next, &detached_list, node) { |
| if (dep->type == DEP_DIRADD) { |
| /* |
| * FIXME: the lockset might be different since |
| * we might have released the inode lock. |
| */ |
| do_dep_diradd_oneshot(dir, dep, bitset); |
| |
| } else if (dep->type == DEP_DIRREM) { |
| do_dep_dirrem(dir, dep, bitset); |
| |
| } else |
| BUG(); |
| } |
| |
| list_splice_tail_init(&detached_list, &dump_list); |
| |
| if (i == 0) { |
| eufs_pbarrier(); |
| |
| if (!list_empty(&dump_list)) |
| /* Phase 2 */ |
| eufs_sync_buckets(vi, bitset); |
| |
| inode_dep_unlock(dir); |
| inode_header_unlock(dir); |
| break; |
| } |
| } |
| |
| inode_urgent_unlock(dir); |
| |
| /* Phase 3 */ |
| inode_lock(dir); |
| |
| if (!list_empty(&vi->i_dep_list)) { |
| inode_unlock(dir); |
| /* To handle new deps between phase 2 & 3 */ |
| /* FIXME: Live lock possible! */ |
| goto retry; |
| } |
| |
| if (dir->i_nlink) |
| eufs_sync_pinode(dir, pi, false); |
| |
| eufs_update_persisted_seq(vi, &dump_list); |
| |
| vi->i_is_persisting = false; |
| vi->i_is_dirty = false; |
| |
| if (dir->i_nlink) |
| persist_pinode(pi); |
| |
| inode_unlock(dir); |
| |
| eufs_pbarrier(); |
| |
| /* Reclaim memory and clear the list */ |
| list_for_each_entry_safe(dep, next, &dump_list, node) { |
| struct inode *child_inode = dep->inode; |
| struct eufs_inode_info *child_vi = EUFS_I(child_inode); |
| |
| if (dep->type == DEP_DIRREM) |
| do_dep_dirrem_reclaim(sb, dep); |
| |
| /* remove from owner list */ |
| spin_lock(&child_vi->i_owner_lock); |
| list_del_init(&dep->owner_node); |
| spin_unlock(&child_vi->i_owner_lock); |
| |
| iput(child_inode); |
| |
| list_del(&dep->node); |
| |
| eufs_free_dep_node(dep); |
| dep_count++; |
| } |
| atomic_sub(dep_count, &sbi->s_nr_dep_nodes); |
| eufs_dbg("@cpu=%d !! fsync dir vi done: inode=%px\n", |
| smp_processor_id(), &vi->vfs_inode); |
| return 0; |
| } |
| |
| static int fsync_nondir_oneshot(struct inode *inode) |
| { |
| struct eufs_inode_info *vi = EUFS_I(inode); |
| struct eufs_inode *pi; |
| |
| /* For files other than dir */ |
| WARN(S_ISDIR(inode->i_mode), "%s on a dir!", __func__); |
| |
| /* Inode needs to remove. Nothing to do */ |
| if (!inode->i_nlink) { |
| vi->i_is_dirty = false; |
| return 0; |
| } |
| |
| pi = EUFS_PI(inode); |
| |
| eufs_sync_pinode(inode, pi, false); |
| |
| persist_pinode(pi); |
| |
| vi->i_is_dirty = false; |
| |
| return 0; |
| } |
| |
| static int fsync_nondir_bg(struct inode *inode) |
| { |
| struct eufs_inode_info *vi = EUFS_I(inode); |
| int r; |
| |
| inode_lock(inode); |
| r = fsync_nondir_oneshot(inode); |
| vi->i_is_persisting = false; |
| inode_unlock(inode); |
| |
| return r; |
| } |
| |
| static void fsync_bg(struct inode *inode) |
| { |
| struct eufs_sb_info *sbi = EUFS_SB(inode->i_sb); |
| |
| wait_on_inode(inode); |
| |
| /* Reading i_mode may need no protection */ |
| if (S_ISDIR(inode->i_mode)) |
| fsync_dir_bg(inode); |
| else |
| fsync_nondir_bg(inode); |
| |
| /* Decrease */ |
| iput(inode); |
| |
| if (atomic_dec_and_test(&sbi->s_nr_dirty_inodes) && sbi->s_draining) { |
| /* end of draining */ |
| sbi->s_draining = false; |
| } |
| } |
| |
| void fsync_oneshot(struct inode *inode) |
| { |
| /* Reading i_mode may need no protection */ |
| if (S_ISDIR(inode->i_mode)) |
| fsync_dir_oneshot(inode); |
| else |
| fsync_nondir_oneshot(inode); |
| } |
| |
| static void do_dep_diradd_oneshot(struct inode *dir_inode, struct dep_node *dep, |
| u64 *bitset) |
| { |
| struct super_block *sb = dir_inode->i_sb; |
| struct nv_dict_entry *de = dep->de; |
| struct inode *inode = dep->inode; |
| struct eufs_inode_info *dir_vi = EUFS_I(dir_inode); |
| struct eufs_inode *pi; |
| struct eufs_inode *fresh_pi; |
| int idx; |
| void *buffer[16]; |
| struct alloc_batch ab; |
| bool lock_transferred = false; |
| |
| idx = INDEX(de->hv); |
| bitset[idx / 64] = bitset[idx / 64] | (0x1ull << (idx & 63)); |
| |
| if (de->volatile_next == EUFS_DIR_DELNEW) { |
| /* |
| * The de is already invisible from both the latest view and |
| * the consistent view. |
| * Will be handled in the corresponding dirrem. |
| */ |
| return; |
| } |
| |
| /* Meow? This equality is the sign of diradd */ |
| WARN(!eufs_dentry_is_not_persist(de), "diradd wrong sign"); |
| |
| pi = s2p(sb, de->inode); |
| |
| wait_on_inode(inode); |
| retry: |
| if (likely(inode_trylock(inode))) { |
| /* Got the lock */ |
| } else { |
| if (eufs_inode_mark_lock_transferring(inode)) { |
| lock_transferred = true; |
| } else { |
| cond_resched(); |
| goto retry; |
| } |
| } |
| |
| eufs_sync_pinode(inode, pi, false); |
| fresh_pi = EUFS_FRESH_PI(pi); |
| |
| if (!lock_transferred) |
| inode_unlock(inode); |
| else |
| eufs_inode_lock_transfer_done(inode); |
| |
| ab.n_used = 0; |
| ab.size = 16; |
| ab.batch = buffer; |
| |
| eufs_alloc_batch_add(sb, &ab, de); |
| /* |
| * force to persist the allocation without checking. |
| * TODO: we should differentiate the link and create syscall to agree |
| * with checking |
| */ |
| eufs_alloc_persist(sb, pi, true); |
| |
| if (S_ISLNK(fresh_pi->i_mode)) { |
| void *root = o2p(sb, eufs_iread_root(fresh_pi)); |
| |
| /* reg file's root is done in btree */ |
| /* In case of Hard link, we must force the allocation persitence */ |
| eufs_alloc_persist(sb, root, true); |
| persist_symlink(root); |
| } else if (S_ISDIR(fresh_pi->i_mode)) { |
| void *root = o2p(sb, eufs_iread_root(fresh_pi)); |
| |
| eufs_alloc_persist(sb, root, false); |
| persist_page(root); |
| } |
| |
| persist_name(sb, de, &ab); |
| |
| eufs_alloc_batch_persist_reset(sb, &ab); |
| |
| persist_pinode(pi); |
| |
| spin_lock(&dir_vi->i_dentry_persist_lock); |
| eufs_dentry_clr_not_persist_flag(de); |
| spin_unlock(&dir_vi->i_dentry_persist_lock); |
| |
| persist_dentry(de); |
| } |
| |
| void eufs_dir_fsync_oneshot(struct inode *dir) |
| { |
| struct dep_node *dep; |
| struct dep_node *next; |
| struct super_block *sb = dir->i_sb; |
| struct eufs_sb_info *sbi = EUFS_SB(sb); |
| struct eufs_inode_info *vi = EUFS_I(dir); |
| LIST_HEAD(detached_list); |
| u64 bitset[8] = { 0 }; |
| int dep_count = 0; |
| |
| BUG_ON(!inode_is_locked(dir)); |
| |
| inode_urgent_lock(dir); |
| |
| /* get all deps */ |
| inode_header_lock(dir); |
| inode_dep_lock(dir); |
| |
| if (list_empty(&vi->i_dep_list)) |
| goto unlock_sync_pinode; |
| |
| list_for_each_entry(dep, &vi->i_dep_list, node) { |
| if (dep->type == DEP_DIRADD) |
| do_dep_diradd_oneshot(dir, dep, bitset); |
| else if (dep->type == DEP_DIRREM) |
| do_dep_dirrem(dir, dep, bitset); |
| else |
| BUG(); |
| } |
| |
| list_splice_init(&vi->i_dep_list, &detached_list); |
| |
| /* sync buckets */ |
| eufs_pbarrier(); |
| eufs_sync_buckets(vi, bitset); |
| |
| unlock_sync_pinode: |
| inode_dep_unlock(dir); |
| inode_header_unlock(dir); |
| |
| /* sync pinode */ |
| if (dir->i_nlink) |
| eufs_sync_pinode(dir, EUFS_PI(dir), false); |
| |
| eufs_pbarrier(); |
| |
| eufs_update_persisted_seq(vi, &detached_list); |
| |
| vi->i_is_dirty = false; |
| |
| /* Reclaim memory and clear the list */ |
| list_for_each_entry_safe(dep, next, &detached_list, node) { |
| struct inode *child_inode = dep->inode; |
| struct eufs_inode_info *child_vinode = EUFS_I(child_inode); |
| |
| spin_lock(&child_vinode->i_owner_lock); |
| list_del_init(&dep->owner_node); |
| spin_unlock(&child_vinode->i_owner_lock); |
| |
| if (dep->type == DEP_DIRREM) { |
| do_dep_dirrem_reclaim(sb, dep); |
| iput(dep->inode); |
| } else if (dep->type == DEP_DIRADD) { |
| iput(dep->inode); |
| } |
| list_del(&dep->node); |
| eufs_free_dep_node(dep); |
| dep_count++; |
| } |
| atomic_sub(dep_count, &sbi->s_nr_dep_nodes); |
| |
| inode_urgent_unlock(dir); |
| } |
| |
| void fsync_on_draining(struct inode *dir, struct inode *inode) |
| { |
| BUG_ON(!dir); |
| BUG_ON(!inode_is_locked(dir)); |
| BUG_ON(inode && !inode_is_locked(inode)); |
| |
| /* for link/unlink/rmdir */ |
| if (inode) |
| eufs_inode_mark_lock_transferable(inode); |
| |
| fsync_dir_oneshot(dir); |
| |
| if (inode) |
| eufs_inode_wait_lock_transfer_done(inode); |
| } |
| |
| #define NR_FLUSH_EACH_ROUND (16) |
| #define FLUSH_START_THRESHOLD (64) |
| |
| static __always_inline int handle_persistees_for_each_cpu( |
| struct super_block *sb, const struct cpumask *mask, int idx) { |
| struct eufs_sb_info *sbi = EUFS_SB(sb); |
| struct llist_node *list; |
| struct llist_head *head; |
| struct eufs_inode_info *vi; |
| struct eufs_inode_info *next; |
| int n_active_list; |
| int cpu; |
| bool need; |
| |
| retry: |
| need = sbi->need_sync[idx]; |
| n_active_list = 0; |
| for_each_cpu(cpu, mask) { |
| head = per_cpu_ptr(sbi->persistee_list, cpu); |
| |
| if (unlikely(llist_empty(head))) |
| continue; |
| |
| n_active_list++; |
| |
| list = llist_del_all(head); |
| |
| eufs_dbg("persister get list %px for cpu%d\n", list, cpu); |
| |
| /* reverse the ordering for better locality? */ |
| llist_for_each_entry_safe(vi, next, list, i_persistee_node) |
| fsync_bg(&vi->vfs_inode); |
| eufs_dbg("persister handled list %px\n", list); |
| } |
| /** |
| * We need a complete round of run for fssync. If |
| * need != sbi->need_sync[idx], need_sync was modified during our last |
| * round. We need to retry to ensure a complete round of run. |
| * It's okay if dirty inodes of a cpu is still being processed by |
| * another persister, since we will wait for all persisters to finish |
| * for fssync. |
| */ |
| if (need != READ_ONCE(sbi->need_sync[idx])) |
| goto retry; |
| if (need) { |
| sbi->need_sync[idx] = false; |
| wake_up(&sbi->sync_wq); |
| } |
| if (READ_ONCE(sbi->need_sync[idx])) |
| goto retry; |
| |
| return n_active_list; |
| } |
| |
| static int persister(void *data) |
| { |
| struct super_block *sb = data; |
| struct eufs_sb_info *sbi = EUFS_SB(sb); |
| const struct cpumask *mask = cpumask_of_node(numa_node_id()); |
| const int period = |
| (persist_period == 0) ? /* default */ (HZ / 4) : |
| /* less than a second */ |
| ((persist_period < 0) ? (HZ / (-persist_period)) : |
| /* more than a second */ |
| (HZ * persist_period)); |
| int idx = 0; |
| int num_persisters = num_sockets * persisters_per_socket; |
| |
| eufs_info("sb=%px cpu=%d cpumask=%*pbl period=%d\n", data, |
| smp_processor_id(), cpumask_pr_args(mask), period); |
| |
| while (idx < num_persisters && sbi->persisters[idx] != current) |
| idx++; |
| BUG_ON(idx >= num_persisters); |
| |
| while (!kthread_should_stop()) { |
| set_current_state(TASK_INTERRUPTIBLE); |
| schedule_timeout(period); |
| handle_persistees_for_each_cpu(sb, mask, idx); |
| } |
| |
| while (handle_persistees_for_each_cpu(sb, mask, idx)) |
| cpu_relax(); |
| |
| eufs_info("finalizing on %d\n", smp_processor_id()); |
| |
| return 0; |
| } |
| |
| int dep_init(struct super_block *sb) |
| { |
| struct eufs_sb_info *sbi = EUFS_SB(sb); |
| int cpu; |
| int i, j; |
| char name[BDEVNAME_SIZE]; |
| int err; |
| |
| sbi->persistee_list = alloc_percpu(struct llist_head); |
| if (!sbi->persistee_list) { |
| err = -ENOMEM; |
| goto cleanup; |
| } |
| |
| /* init each llist */ |
| for_each_possible_cpu(cpu) |
| init_llist_head(per_cpu_ptr(sbi->persistee_list, cpu)); |
| |
| sbi->persisters = kzalloc(sizeof(struct task_struct *) * |
| persisters_per_socket * num_sockets, |
| GFP_KERNEL); |
| if (!sbi->persisters) { |
| err = -ENOMEM; |
| goto cleanup; |
| } |
| |
| sbi->need_sync = kzalloc( |
| sizeof(bool) * persisters_per_socket * num_sockets, GFP_KERNEL); |
| if (!sbi->need_sync) { |
| err = -ENOMEM; |
| goto cleanup; |
| } |
| |
| init_waitqueue_head(&sbi->sync_wq); |
| |
| bdevname(sb->s_bdev, name); |
| for (i = 0; i < num_sockets; ++i) { |
| for (j = 0; j < persisters_per_socket; ++j) { |
| int idx = i * persisters_per_socket + j; |
| |
| sbi->persisters[idx] = kthread_create_on_node( |
| persister, sb, i, "hmfs/%s-%d.%d", name, i, j); |
| |
| if (IS_ERR(sbi->persisters[idx])) { |
| err = PTR_ERR(sbi->persisters[idx]); |
| pr_err("create persister %s-%d.%d error %d", |
| name, i, j, err); |
| sbi->persisters[idx] = NULL; |
| goto cleanup; |
| } |
| |
| set_cpus_allowed_ptr(sbi->persisters[idx], |
| cpumask_of_node(i)); |
| |
| wake_up_process(sbi->persisters[idx]); |
| } |
| } |
| |
| return 0; |
| |
| cleanup: |
| dep_fini(sb); |
| return err; |
| } |
| |
| void dep_fini(struct super_block *sb) |
| { |
| struct eufs_sb_info *sbi = EUFS_SB(sb); |
| |
| if (sbi->persisters) { |
| int i; |
| |
| for (i = 0; i < persisters_per_socket * num_sockets; ++i) { |
| if (sbi->persisters[i]) { |
| kthread_stop(sbi->persisters[i]); |
| sbi->persisters[i] = NULL; |
| } |
| } |
| |
| kfree(sbi->persisters); |
| sbi->persisters = NULL; |
| } |
| |
| kfree(sbi->need_sync); |
| sbi->need_sync = NULL; |
| |
| free_percpu(sbi->persistee_list); |
| sbi->persistee_list = NULL; |
| } |