| From 3fd48301a72715489115dfbfdf219c7f4eef5b0f Mon Sep 17 00:00:00 2001 |
| From: Al Viro <viro@zeniv.linux.org.uk> |
| Date: Sun, 15 Sep 2019 12:12:39 -0400 |
| Subject: [PATCH] Fix the locking in dcache_readdir() and friends |
| |
| commit d4f4de5e5ef8efde85febb6876cd3c8ab1631999 upstream. |
| |
| There are two problems in dcache_readdir() - one is that lockless traversal |
| of the list needs non-trivial cooperation of d_alloc() (at least a switch |
| to list_add_rcu(), and probably more than just that) and another is that |
| it assumes that no removal will happen without the directory locked exclusive. |
| Said assumption had always been there, never had been stated explicitly and |
| is violated by several places in the kernel (devpts and selinuxfs). |
| |
| * replacement of next_positive() with different calling conventions: |
| it returns struct list_head * instead of struct dentry *; the latter is |
| passed in and out by reference, grabbing the result and dropping the original |
| value. |
| * scan is under ->d_lock. If we run out of timeslice, cursor is moved |
| after the last position we'd reached and we reschedule; then the scan continues |
| from that place. To avoid livelocks between multiple lseek() (with cursors |
| getting moved past each other, never reaching the real entries) we always |
| skip the cursors, need_resched() or not. |
| * returned list_head * is either ->d_child of dentry we'd found or |
| ->d_subdirs of parent (if we got to the end of the list). |
| * dcache_readdir() and dcache_dir_lseek() switched to new helper. |
| dcache_readdir() always holds a reference to dentry passed to dir_emit() now. |
| Cursor is moved to just before the entry where dir_emit() has failed or into |
| the very end of the list, if we'd run out. |
| * move_cursor() eliminated - it had sucky calling conventions and |
| after fixing that it became simply list_move() (in lseek and scan_positives) |
| or list_move_tail() (in readdir). |
| |
| All operations with the list are under ->d_lock now, and we do not |
| depend upon having all file removals done with parent locked exclusive |
| anymore. |
| |
| Cc: stable@vger.kernel.org |
| Reported-by: "zhengbin (A)" <zhengbin13@huawei.com> |
| Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/libfs.c b/fs/libfs.c |
| index 7e52e77692ec..7ac2d4bb4735 100644 |
| --- a/fs/libfs.c |
| +++ b/fs/libfs.c |
| @@ -87,58 +87,47 @@ int dcache_dir_close(struct inode *inode, struct file *file) |
| EXPORT_SYMBOL(dcache_dir_close); |
| |
| /* parent is locked at least shared */ |
| -static struct dentry *next_positive(struct dentry *parent, |
| - struct list_head *from, |
| - int count) |
| +/* |
| + * Returns an element of siblings' list. |
| + * We are looking for <count>th positive after <p>; if |
| + * found, dentry is grabbed and passed to caller via *<res>. |
| + * If no such element exists, the anchor of list is returned |
| + * and *<res> is set to NULL. |
| + */ |
| +static struct list_head *scan_positives(struct dentry *cursor, |
| + struct list_head *p, |
| + loff_t count, |
| + struct dentry **res) |
| { |
| - unsigned *seq = &parent->d_inode->i_dir_seq, n; |
| - struct dentry *res; |
| - struct list_head *p; |
| - bool skipped; |
| - int i; |
| + struct dentry *dentry = cursor->d_parent, *found = NULL; |
| |
| -retry: |
| - i = count; |
| - skipped = false; |
| - n = smp_load_acquire(seq) & ~1; |
| - res = NULL; |
| - rcu_read_lock(); |
| - for (p = from->next; p != &parent->d_subdirs; p = p->next) { |
| + spin_lock(&dentry->d_lock); |
| + while ((p = p->next) != &dentry->d_subdirs) { |
| struct dentry *d = list_entry(p, struct dentry, d_child); |
| - if (!simple_positive(d)) { |
| - skipped = true; |
| - } else if (!--i) { |
| - res = d; |
| - break; |
| + // we must at least skip cursors, to avoid livelocks |
| + if (d->d_flags & DCACHE_DENTRY_CURSOR) |
| + continue; |
| + if (simple_positive(d) && !--count) { |
| + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); |
| + if (simple_positive(d)) |
| + found = dget_dlock(d); |
| + spin_unlock(&d->d_lock); |
| + if (likely(found)) |
| + break; |
| + count = 1; |
| + } |
| + if (need_resched()) { |
| + list_move(&cursor->d_child, p); |
| + p = &cursor->d_child; |
| + spin_unlock(&dentry->d_lock); |
| + cond_resched(); |
| + spin_lock(&dentry->d_lock); |
| } |
| } |
| - rcu_read_unlock(); |
| - if (skipped) { |
| - smp_rmb(); |
| - if (unlikely(*seq != n)) |
| - goto retry; |
| - } |
| - return res; |
| -} |
| - |
| -static void move_cursor(struct dentry *cursor, struct list_head *after) |
| -{ |
| - struct dentry *parent = cursor->d_parent; |
| - unsigned n, *seq = &parent->d_inode->i_dir_seq; |
| - spin_lock(&parent->d_lock); |
| - for (;;) { |
| - n = *seq; |
| - if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) |
| - break; |
| - cpu_relax(); |
| - } |
| - __list_del(cursor->d_child.prev, cursor->d_child.next); |
| - if (after) |
| - list_add(&cursor->d_child, after); |
| - else |
| - list_add_tail(&cursor->d_child, &parent->d_subdirs); |
| - smp_store_release(seq, n + 2); |
| - spin_unlock(&parent->d_lock); |
| + spin_unlock(&dentry->d_lock); |
| + dput(*res); |
| + *res = found; |
| + return p; |
| } |
| |
| loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) |
| @@ -156,17 +145,28 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) |
| return -EINVAL; |
| } |
| if (offset != file->f_pos) { |
| + struct dentry *cursor = file->private_data; |
| + struct dentry *to = NULL; |
| + struct list_head *p; |
| + |
| file->f_pos = offset; |
| - if (file->f_pos >= 2) { |
| - struct dentry *cursor = file->private_data; |
| - struct dentry *to; |
| - loff_t n = file->f_pos - 2; |
| - |
| - inode_lock_shared(dentry->d_inode); |
| - to = next_positive(dentry, &dentry->d_subdirs, n); |
| - move_cursor(cursor, to ? &to->d_child : NULL); |
| - inode_unlock_shared(dentry->d_inode); |
| + inode_lock_shared(dentry->d_inode); |
| + |
| + if (file->f_pos > 2) { |
| + p = scan_positives(cursor, &dentry->d_subdirs, |
| + file->f_pos - 2, &to); |
| + spin_lock(&dentry->d_lock); |
| + list_move(&cursor->d_child, p); |
| + spin_unlock(&dentry->d_lock); |
| + } else { |
| + spin_lock(&dentry->d_lock); |
| + list_del_init(&cursor->d_child); |
| + spin_unlock(&dentry->d_lock); |
| } |
| + |
| + dput(to); |
| + |
| + inode_unlock_shared(dentry->d_inode); |
| } |
| return offset; |
| } |
| @@ -188,25 +188,29 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) |
| { |
| struct dentry *dentry = file->f_path.dentry; |
| struct dentry *cursor = file->private_data; |
| - struct list_head *p = &cursor->d_child; |
| - struct dentry *next; |
| - bool moved = false; |
| + struct list_head *anchor = &dentry->d_subdirs; |
| + struct dentry *next = NULL; |
| + struct list_head *p; |
| |
| if (!dir_emit_dots(file, ctx)) |
| return 0; |
| |
| if (ctx->pos == 2) |
| - p = &dentry->d_subdirs; |
| - while ((next = next_positive(dentry, p, 1)) != NULL) { |
| + p = anchor; |
| + else |
| + p = &cursor->d_child; |
| + |
| + while ((p = scan_positives(cursor, p, 1, &next)) != anchor) { |
| if (!dir_emit(ctx, next->d_name.name, next->d_name.len, |
| d_inode(next)->i_ino, dt_type(d_inode(next)))) |
| break; |
| - moved = true; |
| - p = &next->d_child; |
| ctx->pos++; |
| } |
| - if (moved) |
| - move_cursor(cursor, p); |
| + spin_lock(&dentry->d_lock); |
| + list_move_tail(&cursor->d_child, p); |
| + spin_unlock(&dentry->d_lock); |
| + dput(next); |
| + |
| return 0; |
| } |
| EXPORT_SYMBOL(dcache_readdir); |
| -- |
| 2.7.4 |
| |