Merge branch 'work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull dcache_readdir() fixes from Al Viro:
 "The couple of patches you'd been OK with; no hlist conversion yet, and
  cursors are still in the list of children"

[ Al is referring to future work to avoid some nasty O(n**2) behavior
  with the readdir cursors when you have lots of concurrent readdirs.

  This is just a fix for a race with a trivial cleanup   - Linus ]

* 'work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  libfs: take cursors out of list when moving past the end of directory
  Fix the locking in dcache_readdir() and friends
This commit is contained in:
Linus Torvalds 2019-10-10 08:26:58 -07:00
commit ad338d0543
1 changed files with 71 additions and 66 deletions

View File

@ -89,58 +89,45 @@ int dcache_dir_close(struct inode *inode, struct file *file)
EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_close);
/* parent is locked at least shared */ /* parent is locked at least shared */
static struct dentry *next_positive(struct dentry *parent, /*
struct list_head *from, * Returns an element of siblings' list.
int count) * We are looking for <count>th positive after <p>; if
* found, dentry is grabbed and returned to caller.
* If no such element exists, NULL is returned.
*/
static struct dentry *scan_positives(struct dentry *cursor,
struct list_head *p,
loff_t count,
struct dentry *last)
{ {
unsigned *seq = &parent->d_inode->i_dir_seq, n; struct dentry *dentry = cursor->d_parent, *found = NULL;
struct dentry *res;
struct list_head *p;
bool skipped;
int i;
retry: spin_lock(&dentry->d_lock);
i = count; while ((p = p->next) != &dentry->d_subdirs) {
skipped = false;
n = smp_load_acquire(seq) & ~1;
res = NULL;
rcu_read_lock();
for (p = from->next; p != &parent->d_subdirs; p = p->next) {
struct dentry *d = list_entry(p, struct dentry, d_child); struct dentry *d = list_entry(p, struct dentry, d_child);
if (!simple_positive(d)) { // we must at least skip cursors, to avoid livelocks
skipped = true; if (d->d_flags & DCACHE_DENTRY_CURSOR)
} else if (!--i) { continue;
res = d; if (simple_positive(d) && !--count) {
break; spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
if (simple_positive(d))
found = dget_dlock(d);
spin_unlock(&d->d_lock);
if (likely(found))
break;
count = 1;
}
if (need_resched()) {
list_move(&cursor->d_child, p);
p = &cursor->d_child;
spin_unlock(&dentry->d_lock);
cond_resched();
spin_lock(&dentry->d_lock);
} }
} }
rcu_read_unlock(); spin_unlock(&dentry->d_lock);
if (skipped) { dput(last);
smp_rmb(); return found;
if (unlikely(*seq != n))
goto retry;
}
return res;
}
static void move_cursor(struct dentry *cursor, struct list_head *after)
{
struct dentry *parent = cursor->d_parent;
unsigned n, *seq = &parent->d_inode->i_dir_seq;
spin_lock(&parent->d_lock);
for (;;) {
n = *seq;
if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
break;
cpu_relax();
}
__list_del(cursor->d_child.prev, cursor->d_child.next);
if (after)
list_add(&cursor->d_child, after);
else
list_add_tail(&cursor->d_child, &parent->d_subdirs);
smp_store_release(seq, n + 2);
spin_unlock(&parent->d_lock);
} }
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
@ -158,17 +145,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
return -EINVAL; return -EINVAL;
} }
if (offset != file->f_pos) { if (offset != file->f_pos) {
file->f_pos = offset; struct dentry *cursor = file->private_data;
if (file->f_pos >= 2) { struct dentry *to = NULL;
struct dentry *cursor = file->private_data;
struct dentry *to;
loff_t n = file->f_pos - 2;
inode_lock_shared(dentry->d_inode); inode_lock_shared(dentry->d_inode);
to = next_positive(dentry, &dentry->d_subdirs, n);
move_cursor(cursor, to ? &to->d_child : NULL); if (offset > 2)
inode_unlock_shared(dentry->d_inode); to = scan_positives(cursor, &dentry->d_subdirs,
} offset - 2, NULL);
spin_lock(&dentry->d_lock);
if (to)
list_move(&cursor->d_child, &to->d_child);
else
list_del_init(&cursor->d_child);
spin_unlock(&dentry->d_lock);
dput(to);
file->f_pos = offset;
inode_unlock_shared(dentry->d_inode);
} }
return offset; return offset;
} }
@ -190,25 +185,35 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
{ {
struct dentry *dentry = file->f_path.dentry; struct dentry *dentry = file->f_path.dentry;
struct dentry *cursor = file->private_data; struct dentry *cursor = file->private_data;
struct list_head *p = &cursor->d_child; struct list_head *anchor = &dentry->d_subdirs;
struct dentry *next; struct dentry *next = NULL;
bool moved = false; struct list_head *p;
if (!dir_emit_dots(file, ctx)) if (!dir_emit_dots(file, ctx))
return 0; return 0;
if (ctx->pos == 2) if (ctx->pos == 2)
p = &dentry->d_subdirs; p = anchor;
while ((next = next_positive(dentry, p, 1)) != NULL) { else if (!list_empty(&cursor->d_child))
p = &cursor->d_child;
else
return 0;
while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
if (!dir_emit(ctx, next->d_name.name, next->d_name.len, if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
d_inode(next)->i_ino, dt_type(d_inode(next)))) d_inode(next)->i_ino, dt_type(d_inode(next))))
break; break;
moved = true;
p = &next->d_child;
ctx->pos++; ctx->pos++;
p = &next->d_child;
} }
if (moved) spin_lock(&dentry->d_lock);
move_cursor(cursor, p); if (next)
list_move_tail(&cursor->d_child, &next->d_child);
else
list_del_init(&cursor->d_child);
spin_unlock(&dentry->d_lock);
dput(next);
return 0; return 0;
} }
EXPORT_SYMBOL(dcache_readdir); EXPORT_SYMBOL(dcache_readdir);