From 4ded097bed1663b307f353a0dd6ad931e345834e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 20 Oct 2017 11:41:17 +1100 Subject: [PATCH 01/22] constify more dcache.h inlined helpers. const struct pointers in commit f0d3b3ded999 ("constify dcache.c inlined helpers where possible"). This patch allows 'const' in a couple that were added since then. Signed-off-by: NeilBrown Signed-off-by: Al Viro --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ed1a7cf6923a..4cc3d891ea03 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -358,7 +358,7 @@ static inline void dont_mount(struct dentry *dentry) extern void __d_lookup_done(struct dentry *); -static inline int d_in_lookup(struct dentry *dentry) +static inline int d_in_lookup(const struct dentry *dentry) { return dentry->d_flags & DCACHE_PAR_LOOKUP; } @@ -486,7 +486,7 @@ static inline bool d_really_is_positive(const struct dentry *dentry) return dentry->d_inode != NULL; } -static inline int simple_positive(struct dentry *dentry) +static inline int simple_positive(const struct dentry *dentry) { return d_really_is_positive(dentry) && !d_unhashed(dentry); } From 0632a9ac7bc0a32f8251a53b3925775f0a7c4da6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Mar 2018 00:49:10 -0500 Subject: [PATCH 02/22] take write_seqcount_invalidate() into __d_drop() ... and reorder it with making d_unhashed() true. Signed-off-by: Al Viro --- fs/dcache.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 8945e6cabd93..78974248a148 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -470,30 +470,29 @@ static void dentry_lru_add(struct dentry *dentry) */ static void ___d_drop(struct dentry *dentry) { - if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - /* - * Hashed dentries are normally on the dentry hashtable, - * with the exception of those newly allocated by - * d_obtain_root, which are always IS_ROOT: - */ - if (unlikely(IS_ROOT(dentry))) - b = &dentry->d_sb->s_roots; - else - b = d_hash(dentry->d_name.hash); + struct hlist_bl_head *b; + /* + * Hashed dentries are normally on the dentry hashtable, + * with the exception of those newly allocated by + * d_obtain_root, which are always IS_ROOT: + */ + if (unlikely(IS_ROOT(dentry))) + b = &dentry->d_sb->s_roots; + else + b = d_hash(dentry->d_name.hash); - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - hlist_bl_unlock(b); - /* After this call, in-progress rcu-walk path lookup will fail. */ - write_seqcount_invalidate(&dentry->d_seq); - } + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + hlist_bl_unlock(b); } void __d_drop(struct dentry *dentry) { - ___d_drop(dentry); - dentry->d_hash.pprev = NULL; + if (!d_unhashed(dentry)) { + ___d_drop(dentry); + dentry->d_hash.pprev = NULL; + write_seqcount_invalidate(&dentry->d_seq); + } } EXPORT_SYMBOL(__d_drop); @@ -2853,9 +2852,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* unhash both */ - /* ___d_drop does write_seqcount_barrier, but they're OK to nest. */ - ___d_drop(dentry); - ___d_drop(target); + if (!d_unhashed(dentry)) + ___d_drop(dentry); + if (!d_unhashed(target)) + ___d_drop(target); /* Switch the names.. */ if (exchange) From 06080d100d921848b35196850ec17310469e06ba Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 23 Feb 2018 00:50:20 +0100 Subject: [PATCH 03/22] fs/dcache: Remove stale comment from dentry_kill() Commit 0d98439ea3c6 ("vfs: use lockred "dead" flag to mark unrecoverably dead dentries") removed the `ref' parameter in dentry_kill() but its documentation remained. Remove it. Signed-off-by: John Ogness Signed-off-by: Al Viro --- fs/dcache.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 78974248a148..9a83fc5f440c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -591,7 +591,6 @@ static void __dentry_kill(struct dentry *dentry) /* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ static struct dentry *dentry_kill(struct dentry *dentry) From c1d0c1a2b51e86124b7ba8ff9054698e2036d8e7 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 23 Feb 2018 00:50:21 +0100 Subject: [PATCH 04/22] fs/dcache: Move dentry_kill() below lock_parent() A subsequent patch will modify dentry_kill() to call lock_parent(). Move the dentry_kill() implementation "as is" below lock_parent() first. This will help simplify the review of the subsequent patch with dentry_kill() changes. Signed-off-by: John Ogness Signed-off-by: Al Viro --- fs/dcache.c | 62 ++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 9a83fc5f440c..01c0432ec83a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -588,37 +588,6 @@ static void __dentry_kill(struct dentry *dentry) dentry_free(dentry); } -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static struct dentry *dentry_kill(struct dentry *dentry) - __releases(dentry->d_lock) -{ - struct inode *inode = dentry->d_inode; - struct dentry *parent = NULL; - - if (inode && unlikely(!spin_trylock(&inode->i_lock))) - goto failed; - - if (!IS_ROOT(dentry)) { - parent = dentry->d_parent; - if (unlikely(!spin_trylock(&parent->d_lock))) { - if (inode) - spin_unlock(&inode->i_lock); - goto failed; - } - } - - __dentry_kill(dentry); - return parent; - -failed: - spin_unlock(&dentry->d_lock); - return dentry; /* try again with same dentry */ -} - static inline struct dentry *lock_parent(struct dentry *dentry) { struct dentry *parent = dentry->d_parent; @@ -658,6 +627,37 @@ again: return parent; } +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry *dentry_kill(struct dentry *dentry) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); + return parent; + +failed: + spin_unlock(&dentry->d_lock); + return dentry; /* try again with same dentry */ +} + /* * Try to do a lockless dput(), and return whether that was successful. * From c19457f0aed7fae73bb40e68ffcc72f36e3966a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 21:02:31 -0500 Subject: [PATCH 05/22] d_delete(): get rid of trylock loop just grab ->i_lock first; we have a positive dentry, nothing's going to happen to inode Signed-off-by: Al Viro --- fs/dcache.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 01c0432ec83a..1684b6b262de 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2377,32 +2377,22 @@ EXPORT_SYMBOL(d_hash_and_lookup); void d_delete(struct dentry * dentry) { - struct inode *inode; - int isdir = 0; + struct inode *inode = dentry->d_inode; + int isdir = d_is_dir(dentry); + + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); /* * Are we the only user? */ -again: - spin_lock(&dentry->d_lock); - inode = dentry->d_inode; - isdir = S_ISDIR(inode->i_mode); if (dentry->d_lockref.count == 1) { - if (!spin_trylock(&inode->i_lock)) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - goto again; - } dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_unlink_inode(dentry); - fsnotify_nameremove(dentry, isdir); - return; - } - - if (!d_unhashed(dentry)) + } else { __d_drop(dentry); - - spin_unlock(&dentry->d_lock); - + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + } fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); From 3b3f09f48ba78c0634e929849860a6447d057eed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 21:54:18 -0500 Subject: [PATCH 06/22] get rid of trylock loop in locking dentries on shrink list In case of trylock failure don't re-add to the list - drop the locks and carefully get them in the right order. For shrink_dentry_list(), somebody having grabbed a reference to dentry means that we can kick it off-list, so if we find dentry being modified under us we don't need to play silly buggers with retries anyway - off the list it is. The locking logics taken out into a helper of its own; lock_parent() is no longer used for dentries that can be killed under us. [fix from Eric Biggers folded] Signed-off-by: Al Viro --- fs/dcache.c | 106 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 1684b6b262de..af8501489af5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -974,56 +974,86 @@ restart: } EXPORT_SYMBOL(d_prune_aliases); +/* + * Lock a dentry from shrink list. + * Note that dentry is *not* protected from concurrent dentry_kill(), + * d_delete(), etc. It is protected from freeing (by the fact of + * being on a shrink list), but everything else is fair game. + * Return false if dentry has been disrupted or grabbed, leaving + * the caller to kick it off-list. Otherwise, return true and have + * that dentry's inode and parent both locked. + */ +static bool shrink_lock_dentry(struct dentry *dentry) +{ + struct inode *inode; + struct dentry *parent; + + if (dentry->d_lockref.count) + return false; + + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { + rcu_read_lock(); /* to protect inode */ + spin_unlock(&dentry->d_lock); + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); + if (unlikely(dentry->d_lockref.count)) + goto out; + /* changed inode means that somebody had grabbed it */ + if (unlikely(inode != dentry->d_inode)) + goto out; + rcu_read_unlock(); + } + + parent = dentry->d_parent; + if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock))) + return true; + + rcu_read_lock(); /* to protect parent */ + spin_unlock(&dentry->d_lock); + parent = READ_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + spin_lock(&dentry->d_lock); + goto out; + } + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (likely(!dentry->d_lockref.count)) { + rcu_read_unlock(); + return true; + } + spin_unlock(&parent->d_lock); +out: + if (inode) + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + return false; +} + static void shrink_dentry_list(struct list_head *list) { - struct dentry *dentry, *parent; - while (!list_empty(list)) { + struct dentry *dentry, *parent; struct inode *inode; + dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); - parent = lock_parent(dentry); - - /* - * The dispose list is isolated and dentries are not accounted - * to the LRU here, so we can simply remove it from the list - * here regardless of whether it is referenced or not. - */ - d_shrink_del(dentry); - - /* - * We found an inuse dentry which was not removed from - * the LRU because of laziness during lookup. Do not free it. - */ - if (dentry->d_lockref.count > 0) { + if (!shrink_lock_dentry(dentry)) { + bool can_free = false; + d_shrink_del(dentry); + if (dentry->d_lockref.count < 0) + can_free = dentry->d_flags & DCACHE_MAY_FREE; spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - continue; - } - - - if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { - bool can_free = dentry->d_flags & DCACHE_MAY_FREE; - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); if (can_free) dentry_free(dentry); continue; } - - inode = dentry->d_inode; - if (inode && unlikely(!spin_trylock(&inode->i_lock))) { - d_shrink_add(dentry, list); - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - continue; - } - + d_shrink_del(dentry); + parent = dentry->d_parent; __dentry_kill(dentry); - + if (parent == dentry) + continue; /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also From 65d8eb5a8f5480756105173de147ef5d60163e2f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 22:07:35 -0500 Subject: [PATCH 07/22] now lock_parent() can't run into killed dentry all remaining callers hold either a reference or ->i_lock Signed-off-by: Al Viro --- fs/dcache.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index af8501489af5..916fd57b9d18 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -593,8 +593,6 @@ static inline struct dentry *lock_parent(struct dentry *dentry) struct dentry *parent = dentry->d_parent; if (IS_ROOT(dentry)) return NULL; - if (unlikely(dentry->d_lockref.count < 0)) - return NULL; if (likely(spin_trylock(&parent->d_lock))) return parent; rcu_read_lock(); @@ -614,16 +612,11 @@ again: spin_unlock(&parent->d_lock); goto again; } - if (parent != dentry) { - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (unlikely(dentry->d_lockref.count < 0)) { - spin_unlock(&parent->d_lock); - parent = NULL; - } - } else { - parent = NULL; - } rcu_read_unlock(); + if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + else + parent = NULL; return parent; } From 8b987a46a1e0e93d4cb4babea06ea274e2e2b658 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 22:11:34 -0500 Subject: [PATCH 08/22] split the slow part of lock_parent() off Turn the "trylock failed" part into uninlined __lock_parent(). Signed-off-by: Al Viro --- fs/dcache.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 916fd57b9d18..61819fb32e13 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -588,13 +588,9 @@ static void __dentry_kill(struct dentry *dentry) dentry_free(dentry); } -static inline struct dentry *lock_parent(struct dentry *dentry) +static struct dentry *__lock_parent(struct dentry *dentry) { - struct dentry *parent = dentry->d_parent; - if (IS_ROOT(dentry)) - return NULL; - if (likely(spin_trylock(&parent->d_lock))) - return parent; + struct dentry *parent; rcu_read_lock(); spin_unlock(&dentry->d_lock); again: @@ -620,6 +616,16 @@ again: return parent; } +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + return __lock_parent(dentry); +} + /* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. From a338579f2f3d6a15c78f1dc7de4c248b4183fcea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 21:07:24 -0500 Subject: [PATCH 09/22] dput(): consolidate the "do we need to retain it?" into an inlined helper Signed-off-by: Al Viro --- fs/dcache.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 61819fb32e13..4208376497f4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -626,6 +626,24 @@ static inline struct dentry *lock_parent(struct dentry *dentry) return __lock_parent(dentry); } +static inline bool retain_dentry(struct dentry *dentry) +{ + WARN_ON(d_in_lookup(dentry)); + + /* Unreachable? Get rid of it */ + if (unlikely(d_unhashed(dentry))) + return false; + + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + return false; + + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { + if (dentry->d_op->d_delete(dentry)) + return false; + } + return true; +} + /* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. @@ -804,27 +822,13 @@ repeat: /* Slow case: now with the dentry lock held */ rcu_read_unlock(); - WARN_ON(d_in_lookup(dentry)); - - /* Unreachable? Get rid of it */ - if (unlikely(d_unhashed(dentry))) - goto kill_it; - - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - goto kill_it; - - if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { - if (dentry->d_op->d_delete(dentry)) - goto kill_it; + if (likely(retain_dentry(dentry))) { + dentry_lru_add(dentry); + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + return; } - dentry_lru_add(dentry); - - dentry->d_lockref.count--; - spin_unlock(&dentry->d_lock); - return; - -kill_it: dentry = dentry_kill(dentry); if (dentry) { cond_resched(); From 62d9956cefe6ecc4b43a7fae37af78ba7adaceaa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 6 Mar 2018 21:37:31 -0500 Subject: [PATCH 10/22] handle move to LRU in retain_dentry() Signed-off-by: Al Viro --- fs/dcache.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 4208376497f4..f5609902c6dd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -440,17 +440,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, list_lru_isolate_move(lru, &dentry->d_lru, list); } -/* - * dentry_lru_(add|del)_list) must be called with d_lock held. - */ -static void dentry_lru_add(struct dentry *dentry) -{ - if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) - d_lru_add(dentry); - else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) - dentry->d_flags |= DCACHE_REFERENCED; -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -641,6 +630,12 @@ static inline bool retain_dentry(struct dentry *dentry) if (dentry->d_op->d_delete(dentry)) return false; } + /* retain; LRU fodder */ + dentry->d_lockref.count--; + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) + d_lru_add(dentry); + else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) + dentry->d_flags |= DCACHE_REFERENCED; return true; } @@ -823,8 +818,6 @@ repeat: rcu_read_unlock(); if (likely(retain_dentry(dentry))) { - dentry_lru_add(dentry); - dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return; } From f657a666fd1b1b9fe59963943c74c245ae66f4cc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 21:25:42 -0500 Subject: [PATCH 11/22] get rid of trylock loop around dentry_kill() In case when trylock in there fails, deal with it directly in dentry_kill(). Note that in cases when we drop and retake ->d_lock, we need to recheck whether to retain the dentry. Another thing is that dropping/retaking ->d_lock might have ended up with negative dentry turning into positive; that, of course, can happen only once... Signed-off-by: Al Viro --- fs/dcache.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index f5609902c6dd..f2d945688025 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -651,23 +651,43 @@ static struct dentry *dentry_kill(struct dentry *dentry) struct dentry *parent = NULL; if (inode && unlikely(!spin_trylock(&inode->i_lock))) - goto failed; + goto slow_positive; if (!IS_ROOT(dentry)) { parent = dentry->d_parent; if (unlikely(!spin_trylock(&parent->d_lock))) { - if (inode) - spin_unlock(&inode->i_lock); - goto failed; + parent = __lock_parent(dentry); + if (likely(inode || !dentry->d_inode)) + goto got_locks; + /* negative that became positive */ + if (parent) + spin_unlock(&parent->d_lock); + inode = dentry->d_inode; + goto slow_positive; } } - __dentry_kill(dentry); return parent; -failed: +slow_positive: spin_unlock(&dentry->d_lock); - return dentry; /* try again with same dentry */ + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); + parent = lock_parent(dentry); +got_locks: + if (unlikely(dentry->d_lockref.count != 1)) { + dentry->d_lockref.count--; + } else if (likely(!retain_dentry(dentry))) { + __dentry_kill(dentry); + return parent; + } + /* we are keeping it, after all */ + if (inode) + spin_unlock(&inode->i_lock); + if (parent) + spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); + return NULL; } /* From 8f04da2adbdffed8dc4b2feb00ec3b3d84683885 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 23 Feb 2018 00:50:24 +0100 Subject: [PATCH 12/22] fs/dcache: Avoid a try_lock loop in shrink_dentry_list() shrink_dentry_list() holds dentry->d_lock and needs to acquire dentry->d_inode->i_lock. This cannot be done with a spin_lock() operation because it's the reverse of the regular lock order. To avoid ABBA deadlocks it is done with a trylock loop. Trylock loops are problematic in two scenarios: 1) PREEMPT_RT converts spinlocks to 'sleeping' spinlocks, which are preemptible. As a consequence the i_lock holder can be preempted by a higher priority task. If that task executes the trylock loop it will do so forever and live lock. 2) In virtual machines trylock loops are problematic as well. The VCPU on which the i_lock holder runs can be scheduled out and a task on a different VCPU can loop for a whole time slice. In the worst case this can lead to starvation. Commits 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") and 046b961b45f9 ("shrink_dentry_list(): take parent's d_lock earlier") are addressing exactly those symptoms. Avoid the trylock loop by using dentry_kill(). When pruning ancestors, the same code applies that is used to kill a dentry in dput(). This also has the benefit that the locking order is now the same. First the inode is locked, then the parent. Signed-off-by: John Ogness Signed-off-by: Al Viro --- fs/dcache.c | 41 ++++++++++------------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index f2d945688025..9b4e7fd0ee66 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -992,9 +992,11 @@ EXPORT_SYMBOL(d_prune_aliases); /* * Lock a dentry from shrink list. + * Called under rcu_read_lock() and dentry->d_lock; the former + * guarantees that nothing we access will be freed under us. * Note that dentry is *not* protected from concurrent dentry_kill(), - * d_delete(), etc. It is protected from freeing (by the fact of - * being on a shrink list), but everything else is fair game. + * d_delete(), etc. + * * Return false if dentry has been disrupted or grabbed, leaving * the caller to kick it off-list. Otherwise, return true and have * that dentry's inode and parent both locked. @@ -1009,7 +1011,6 @@ static bool shrink_lock_dentry(struct dentry *dentry) inode = dentry->d_inode; if (inode && unlikely(!spin_trylock(&inode->i_lock))) { - rcu_read_lock(); /* to protect inode */ spin_unlock(&dentry->d_lock); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); @@ -1018,16 +1019,13 @@ static bool shrink_lock_dentry(struct dentry *dentry) /* changed inode means that somebody had grabbed it */ if (unlikely(inode != dentry->d_inode)) goto out; - rcu_read_unlock(); } parent = dentry->d_parent; if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock))) return true; - rcu_read_lock(); /* to protect parent */ spin_unlock(&dentry->d_lock); - parent = READ_ONCE(dentry->d_parent); spin_lock(&parent->d_lock); if (unlikely(parent != dentry->d_parent)) { spin_unlock(&parent->d_lock); @@ -1035,15 +1033,12 @@ static bool shrink_lock_dentry(struct dentry *dentry) goto out; } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (likely(!dentry->d_lockref.count)) { - rcu_read_unlock(); + if (likely(!dentry->d_lockref.count)) return true; - } spin_unlock(&parent->d_lock); out: if (inode) spin_unlock(&inode->i_lock); - rcu_read_unlock(); return false; } @@ -1051,12 +1046,13 @@ static void shrink_dentry_list(struct list_head *list) { while (!list_empty(list)) { struct dentry *dentry, *parent; - struct inode *inode; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); + rcu_read_lock(); if (!shrink_lock_dentry(dentry)) { bool can_free = false; + rcu_read_unlock(); d_shrink_del(dentry); if (dentry->d_lockref.count < 0) can_free = dentry->d_flags & DCACHE_MAY_FREE; @@ -1065,6 +1061,7 @@ static void shrink_dentry_list(struct list_head *list) dentry_free(dentry); continue; } + rcu_read_unlock(); d_shrink_del(dentry); parent = dentry->d_parent; __dentry_kill(dentry); @@ -1077,26 +1074,8 @@ static void shrink_dentry_list(struct list_head *list) * fragmentation. */ dentry = parent; - while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { - parent = lock_parent(dentry); - if (dentry->d_lockref.count != 1) { - dentry->d_lockref.count--; - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - break; - } - inode = dentry->d_inode; /* can't be NULL */ - if (unlikely(!spin_trylock(&inode->i_lock))) { - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - cpu_relax(); - continue; - } - __dentry_kill(dentry); - dentry = parent; - } + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) + dentry = dentry_kill(dentry); } } From 43986d63b60fd0152d9038ee3f0f9294efa8c983 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Feb 2018 02:47:29 -0500 Subject: [PATCH 13/22] dcache.c: trim includes Signed-off-by: Al Viro --- fs/dcache.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 9b4e7fd0ee66..91626643fc86 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -25,17 +25,14 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include #include -#include #include #include "internal.h" #include "mount.h" From 7a5cf791a747640adb2a1b5e3838321b26953a23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Mar 2018 19:15:50 -0500 Subject: [PATCH 14/22] split d_path() and friends into a separate file Those parts of fs/dcache.c are pretty much self-contained. Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/d_path.c | 470 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/dcache.c | 467 +-------------------------------------------------- 3 files changed, 472 insertions(+), 467 deletions(-) create mode 100644 fs/d_path.c diff --git a/fs/Makefile b/fs/Makefile index add789ea270a..c9375fd2c8c4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ ioctl.o readdir.o select.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ - pnode.o splice.o sync.o utimes.o \ + pnode.o splice.o sync.o utimes.o d_path.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o ifeq ($(CONFIG_BLOCK),y) diff --git a/fs/d_path.c b/fs/d_path.c new file mode 100644 index 000000000000..e8fce6b1174f --- /dev/null +++ b/fs/d_path.c @@ -0,0 +1,470 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include +#include +#include "mount.h" + +static int prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +/** + * prepend_name - prepend a pathname in front of current buffer pointer + * @buffer: buffer pointer + * @buflen: allocated length of the buffer + * @name: name string and length qstr structure + * + * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to + * make sure that either the old or the new name pointer and length are + * fetched. However, there may be mismatch between length and pointer. + * The length cannot be trusted, we need to copy it byte-by-byte until + * the length is reached or a null byte is found. It also prepends "/" at + * the beginning of the name. The sequence number check at the caller will + * retry it again when a d_move() does happen. So any garbage in the buffer + * due to mismatched pointer and length will be discarded. + * + * Load acquire is needed to make sure that we see that terminating NUL. + */ +static int prepend_name(char **buffer, int *buflen, const struct qstr *name) +{ + const char *dname = smp_load_acquire(&name->name); /* ^^^ */ + u32 dlen = READ_ONCE(name->len); + char *p; + + *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + if (!c) + break; + *p++ = c; + } + return 0; +} + +/** + * prepend_path - Prepend path string to a buffer + * @path: the dentry/vfsmount to report + * @root: root vfsmnt/dentry + * @buffer: pointer to the end of the buffer + * @buflen: pointer to buffer length + * + * The function will first try to write out the pathname without taking any + * lock other than the RCU read lock to make sure that dentries won't go away. + * It only checks the sequence number of the global rename_lock as any change + * in the dentry's d_seq will be preceded by changes in the rename_lock + * sequence number. If the sequence number had been changed, it will restart + * the whole pathname back-tracing sequence again by taking the rename_lock. + * In this case, there is no need to take the RCU read lock as the recursive + * parent pointer references will keep the dentry chain alive as long as no + * rename operation is performed. + */ +static int prepend_path(const struct path *path, + const struct path *root, + char **buffer, int *buflen) +{ + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; + int error = 0; + unsigned seq, m_seq = 0; + char *bptr; + int blen; + + rcu_read_lock(); +restart_mnt: + read_seqbegin_or_lock(&mount_lock, &m_seq); + seq = 0; + rcu_read_lock(); +restart: + bptr = *buffer; + blen = *buflen; + error = 0; + dentry = path->dentry; + vfsmnt = path->mnt; + mnt = real_mount(vfsmnt); + read_seqbegin_or_lock(&rename_lock, &seq); + while (dentry != root->dentry || vfsmnt != root->mnt) { + struct dentry * parent; + + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + struct mount *parent = READ_ONCE(mnt->mnt_parent); + /* Escaped? */ + if (dentry != vfsmnt->mnt_root) { + bptr = *buffer; + blen = *buflen; + error = 3; + break; + } + /* Global root? */ + if (mnt != parent) { + dentry = READ_ONCE(mnt->mnt_mountpoint); + mnt = parent; + vfsmnt = &mnt->mnt; + continue; + } + if (!error) + error = is_mounted(vfsmnt) ? 1 : 2; + break; + } + parent = dentry->d_parent; + prefetch(parent); + error = prepend_name(&bptr, &blen, &dentry->d_name); + if (error) + break; + + dentry = parent; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + + if (!(m_seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&mount_lock, m_seq)) { + m_seq = 1; + goto restart_mnt; + } + done_seqretry(&mount_lock, m_seq); + + if (error >= 0 && bptr == *buffer) { + if (--blen < 0) + error = -ENAMETOOLONG; + else + *--bptr = '/'; + } + *buffer = bptr; + *buflen = blen; + return error; +} + +/** + * __d_path - return the path of a dentry + * @path: the dentry/vfsmount to report + * @root: root vfsmnt/dentry + * @buf: buffer to return value in + * @buflen: buffer length + * + * Convert a dentry into an ASCII path name. + * + * Returns a pointer into the buffer or an error code if the + * path was too long. + * + * "buflen" should be positive. + * + * If the path is not reachable from the supplied root, return %NULL. + */ +char *__d_path(const struct path *path, + const struct path *root, + char *buf, int buflen) +{ + char *res = buf + buflen; + int error; + + prepend(&res, &buflen, "\0", 1); + error = prepend_path(path, root, &res, &buflen); + + if (error < 0) + return ERR_PTR(error); + if (error > 0) + return NULL; + return res; +} + +char *d_absolute_path(const struct path *path, + char *buf, int buflen) +{ + struct path root = {}; + char *res = buf + buflen; + int error; + + prepend(&res, &buflen, "\0", 1); + error = prepend_path(path, &root, &res, &buflen); + + if (error > 1) + error = -EINVAL; + if (error < 0) + return ERR_PTR(error); + return res; +} + +/* + * same as __d_path but appends "(deleted)" for unlinked files. + */ +static int path_with_deleted(const struct path *path, + const struct path *root, + char **buf, int *buflen) +{ + prepend(buf, buflen, "\0", 1); + if (d_unlinked(path->dentry)) { + int error = prepend(buf, buflen, " (deleted)", 10); + if (error) + return error; + } + + return prepend_path(path, root, buf, buflen); +} + +static int prepend_unreachable(char **buffer, int *buflen) +{ + return prepend(buffer, buflen, "(unreachable)", 13); +} + +static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + } while (read_seqcount_retry(&fs->seq, seq)); +} + +/** + * d_path - return the path of a dentry + * @path: path to report + * @buf: buffer to return value in + * @buflen: buffer length + * + * Convert a dentry into an ASCII path name. If the entry has been deleted + * the string " (deleted)" is appended. Note that this is ambiguous. + * + * Returns a pointer into the buffer or an error code if the path was + * too long. Note: Callers should use the returned pointer, not the passed + * in buffer, to use the name! The implementation often starts at an offset + * into the buffer, and may leave 0 bytes at the start. + * + * "buflen" should be positive. + */ +char *d_path(const struct path *path, char *buf, int buflen) +{ + char *res = buf + buflen; + struct path root; + int error; + + /* + * We have various synthetic filesystems that never get mounted. On + * these filesystems dentries are never used for lookup purposes, and + * thus don't need to be hashed. They also don't need a name until a + * user wants to identify the object in /proc/pid/fd/. The little hack + * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. + */ + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) + return path->dentry->d_op->d_dname(path->dentry, buf, buflen); + + rcu_read_lock(); + get_fs_root_rcu(current->fs, &root); + error = path_with_deleted(path, &root, &res, &buflen); + rcu_read_unlock(); + + if (error < 0) + res = ERR_PTR(error); + return res; +} +EXPORT_SYMBOL(d_path); + +/* + * Helper function for dentry_operations.d_dname() members + */ +char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, + const char *fmt, ...) +{ + va_list args; + char temp[64]; + int sz; + + va_start(args, fmt); + sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; + va_end(args); + + if (sz > sizeof(temp) || sz > buflen) + return ERR_PTR(-ENAMETOOLONG); + + buffer += buflen - sz; + return memcpy(buffer, temp, sz); +} + +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +{ + char *end = buffer + buflen; + /* these dentries are never renamed, so d_lock is not needed */ + if (prepend(&end, &buflen, " (deleted)", 11) || + prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || + prepend(&end, &buflen, "/", 1)) + end = ERR_PTR(-ENAMETOOLONG); + return end; +} +EXPORT_SYMBOL(simple_dname); + +/* + * Write full pathname from the root of the filesystem into the buffer. + */ +static char *__dentry_path(struct dentry *d, char *buf, int buflen) +{ + struct dentry *dentry; + char *end, *retval; + int len, seq = 0; + int error = 0; + + if (buflen < 2) + goto Elong; + + rcu_read_lock(); +restart: + dentry = d; + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); + /* Get '/' right */ + retval = end-1; + *retval = '/'; + read_seqbegin_or_lock(&rename_lock, &seq); + while (!IS_ROOT(dentry)) { + struct dentry *parent = dentry->d_parent; + + prefetch(parent); + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; + + retval = end; + dentry = parent; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) +{ + return __dentry_path(dentry, buf, buflen); +} +EXPORT_SYMBOL(dentry_path_raw); + +char *dentry_path(struct dentry *dentry, char *buf, int buflen) +{ + char *p = NULL; + char *retval; + + if (d_unlinked(dentry)) { + p = buf + buflen; + if (prepend(&p, &buflen, "//deleted", 10) != 0) + goto Elong; + buflen++; + } + retval = __dentry_path(dentry, buf, buflen); + if (!IS_ERR(retval) && p) + *p = '/'; /* restore '/' overriden with '\0' */ + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, + struct path *pwd) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + *pwd = fs->pwd; + } while (read_seqcount_retry(&fs->seq, seq)); +} + +/* + * NOTE! The user-level library version returns a + * character pointer. The kernel system call just + * returns the length of the buffer filled (which + * includes the ending '\0' character), or a negative + * error value. So libc would do something like + * + * char *getcwd(char * buf, size_t size) + * { + * int retval; + * + * retval = sys_getcwd(buf, size); + * if (retval >= 0) + * return buf; + * errno = -retval; + * return NULL; + * } + */ +SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) +{ + int error; + struct path pwd, root; + char *page = __getname(); + + if (!page) + return -ENOMEM; + + rcu_read_lock(); + get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); + + error = -ENOENT; + if (!d_unlinked(pwd.dentry)) { + unsigned long len; + char *cwd = page + PATH_MAX; + int buflen = PATH_MAX; + + prepend(&cwd, &buflen, "\0", 1); + error = prepend_path(&pwd, &root, &cwd, &buflen); + rcu_read_unlock(); + + if (error < 0) + goto out; + + /* Unreachable from current root */ + if (error > 0) { + error = prepend_unreachable(&cwd, &buflen); + if (error) + goto out; + } + + error = -ERANGE; + len = PATH_MAX + page - cwd; + if (len <= size) { + error = len; + if (copy_to_user(buf, cwd, len)) + error = -EFAULT; + } + } else { + rcu_read_unlock(); + } + +out: + __putname(page); + return error; +} diff --git a/fs/dcache.c b/fs/dcache.c index 91626643fc86..c870343f904f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -14,7 +14,7 @@ * the dcache entry is deleted or garbage collected. */ -#include +#include #include #include #include @@ -24,15 +24,11 @@ #include #include #include -#include -#include #include #include #include -#include #include #include -#include #include #include "internal.h" #include "mount.h" @@ -3072,467 +3068,6 @@ out: } EXPORT_SYMBOL(d_splice_alias); -static int prepend(char **buffer, int *buflen, const char *str, int namelen) -{ - *buflen -= namelen; - if (*buflen < 0) - return -ENAMETOOLONG; - *buffer -= namelen; - memcpy(*buffer, str, namelen); - return 0; -} - -/** - * prepend_name - prepend a pathname in front of current buffer pointer - * @buffer: buffer pointer - * @buflen: allocated length of the buffer - * @name: name string and length qstr structure - * - * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to - * make sure that either the old or the new name pointer and length are - * fetched. However, there may be mismatch between length and pointer. - * The length cannot be trusted, we need to copy it byte-by-byte until - * the length is reached or a null byte is found. It also prepends "/" at - * the beginning of the name. The sequence number check at the caller will - * retry it again when a d_move() does happen. So any garbage in the buffer - * due to mismatched pointer and length will be discarded. - * - * Load acquire is needed to make sure that we see that terminating NUL. - */ -static int prepend_name(char **buffer, int *buflen, const struct qstr *name) -{ - const char *dname = smp_load_acquire(&name->name); /* ^^^ */ - u32 dlen = READ_ONCE(name->len); - char *p; - - *buflen -= dlen + 1; - if (*buflen < 0) - return -ENAMETOOLONG; - p = *buffer -= dlen + 1; - *p++ = '/'; - while (dlen--) { - char c = *dname++; - if (!c) - break; - *p++ = c; - } - return 0; -} - -/** - * prepend_path - Prepend path string to a buffer - * @path: the dentry/vfsmount to report - * @root: root vfsmnt/dentry - * @buffer: pointer to the end of the buffer - * @buflen: pointer to buffer length - * - * The function will first try to write out the pathname without taking any - * lock other than the RCU read lock to make sure that dentries won't go away. - * It only checks the sequence number of the global rename_lock as any change - * in the dentry's d_seq will be preceded by changes in the rename_lock - * sequence number. If the sequence number had been changed, it will restart - * the whole pathname back-tracing sequence again by taking the rename_lock. - * In this case, there is no need to take the RCU read lock as the recursive - * parent pointer references will keep the dentry chain alive as long as no - * rename operation is performed. - */ -static int prepend_path(const struct path *path, - const struct path *root, - char **buffer, int *buflen) -{ - struct dentry *dentry; - struct vfsmount *vfsmnt; - struct mount *mnt; - int error = 0; - unsigned seq, m_seq = 0; - char *bptr; - int blen; - - rcu_read_lock(); -restart_mnt: - read_seqbegin_or_lock(&mount_lock, &m_seq); - seq = 0; - rcu_read_lock(); -restart: - bptr = *buffer; - blen = *buflen; - error = 0; - dentry = path->dentry; - vfsmnt = path->mnt; - mnt = real_mount(vfsmnt); - read_seqbegin_or_lock(&rename_lock, &seq); - while (dentry != root->dentry || vfsmnt != root->mnt) { - struct dentry * parent; - - if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { - struct mount *parent = READ_ONCE(mnt->mnt_parent); - /* Escaped? */ - if (dentry != vfsmnt->mnt_root) { - bptr = *buffer; - blen = *buflen; - error = 3; - break; - } - /* Global root? */ - if (mnt != parent) { - dentry = READ_ONCE(mnt->mnt_mountpoint); - mnt = parent; - vfsmnt = &mnt->mnt; - continue; - } - if (!error) - error = is_mounted(vfsmnt) ? 1 : 2; - break; - } - parent = dentry->d_parent; - prefetch(parent); - error = prepend_name(&bptr, &blen, &dentry->d_name); - if (error) - break; - - dentry = parent; - } - if (!(seq & 1)) - rcu_read_unlock(); - if (need_seqretry(&rename_lock, seq)) { - seq = 1; - goto restart; - } - done_seqretry(&rename_lock, seq); - - if (!(m_seq & 1)) - rcu_read_unlock(); - if (need_seqretry(&mount_lock, m_seq)) { - m_seq = 1; - goto restart_mnt; - } - done_seqretry(&mount_lock, m_seq); - - if (error >= 0 && bptr == *buffer) { - if (--blen < 0) - error = -ENAMETOOLONG; - else - *--bptr = '/'; - } - *buffer = bptr; - *buflen = blen; - return error; -} - -/** - * __d_path - return the path of a dentry - * @path: the dentry/vfsmount to report - * @root: root vfsmnt/dentry - * @buf: buffer to return value in - * @buflen: buffer length - * - * Convert a dentry into an ASCII path name. - * - * Returns a pointer into the buffer or an error code if the - * path was too long. - * - * "buflen" should be positive. - * - * If the path is not reachable from the supplied root, return %NULL. - */ -char *__d_path(const struct path *path, - const struct path *root, - char *buf, int buflen) -{ - char *res = buf + buflen; - int error; - - prepend(&res, &buflen, "\0", 1); - error = prepend_path(path, root, &res, &buflen); - - if (error < 0) - return ERR_PTR(error); - if (error > 0) - return NULL; - return res; -} - -char *d_absolute_path(const struct path *path, - char *buf, int buflen) -{ - struct path root = {}; - char *res = buf + buflen; - int error; - - prepend(&res, &buflen, "\0", 1); - error = prepend_path(path, &root, &res, &buflen); - - if (error > 1) - error = -EINVAL; - if (error < 0) - return ERR_PTR(error); - return res; -} - -/* - * same as __d_path but appends "(deleted)" for unlinked files. - */ -static int path_with_deleted(const struct path *path, - const struct path *root, - char **buf, int *buflen) -{ - prepend(buf, buflen, "\0", 1); - if (d_unlinked(path->dentry)) { - int error = prepend(buf, buflen, " (deleted)", 10); - if (error) - return error; - } - - return prepend_path(path, root, buf, buflen); -} - -static int prepend_unreachable(char **buffer, int *buflen) -{ - return prepend(buffer, buflen, "(unreachable)", 13); -} - -static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) -{ - unsigned seq; - - do { - seq = read_seqcount_begin(&fs->seq); - *root = fs->root; - } while (read_seqcount_retry(&fs->seq, seq)); -} - -/** - * d_path - return the path of a dentry - * @path: path to report - * @buf: buffer to return value in - * @buflen: buffer length - * - * Convert a dentry into an ASCII path name. If the entry has been deleted - * the string " (deleted)" is appended. Note that this is ambiguous. - * - * Returns a pointer into the buffer or an error code if the path was - * too long. Note: Callers should use the returned pointer, not the passed - * in buffer, to use the name! The implementation often starts at an offset - * into the buffer, and may leave 0 bytes at the start. - * - * "buflen" should be positive. - */ -char *d_path(const struct path *path, char *buf, int buflen) -{ - char *res = buf + buflen; - struct path root; - int error; - - /* - * We have various synthetic filesystems that never get mounted. On - * these filesystems dentries are never used for lookup purposes, and - * thus don't need to be hashed. They also don't need a name until a - * user wants to identify the object in /proc/pid/fd/. The little hack - * below allows us to generate a name for these objects on demand: - * - * Some pseudo inodes are mountable. When they are mounted - * path->dentry == path->mnt->mnt_root. In that case don't call d_dname - * and instead have d_path return the mounted path. - */ - if (path->dentry->d_op && path->dentry->d_op->d_dname && - (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) - return path->dentry->d_op->d_dname(path->dentry, buf, buflen); - - rcu_read_lock(); - get_fs_root_rcu(current->fs, &root); - error = path_with_deleted(path, &root, &res, &buflen); - rcu_read_unlock(); - - if (error < 0) - res = ERR_PTR(error); - return res; -} -EXPORT_SYMBOL(d_path); - -/* - * Helper function for dentry_operations.d_dname() members - */ -char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, - const char *fmt, ...) -{ - va_list args; - char temp[64]; - int sz; - - va_start(args, fmt); - sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; - va_end(args); - - if (sz > sizeof(temp) || sz > buflen) - return ERR_PTR(-ENAMETOOLONG); - - buffer += buflen - sz; - return memcpy(buffer, temp, sz); -} - -char *simple_dname(struct dentry *dentry, char *buffer, int buflen) -{ - char *end = buffer + buflen; - /* these dentries are never renamed, so d_lock is not needed */ - if (prepend(&end, &buflen, " (deleted)", 11) || - prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || - prepend(&end, &buflen, "/", 1)) - end = ERR_PTR(-ENAMETOOLONG); - return end; -} -EXPORT_SYMBOL(simple_dname); - -/* - * Write full pathname from the root of the filesystem into the buffer. - */ -static char *__dentry_path(struct dentry *d, char *buf, int buflen) -{ - struct dentry *dentry; - char *end, *retval; - int len, seq = 0; - int error = 0; - - if (buflen < 2) - goto Elong; - - rcu_read_lock(); -restart: - dentry = d; - end = buf + buflen; - len = buflen; - prepend(&end, &len, "\0", 1); - /* Get '/' right */ - retval = end-1; - *retval = '/'; - read_seqbegin_or_lock(&rename_lock, &seq); - while (!IS_ROOT(dentry)) { - struct dentry *parent = dentry->d_parent; - - prefetch(parent); - error = prepend_name(&end, &len, &dentry->d_name); - if (error) - break; - - retval = end; - dentry = parent; - } - if (!(seq & 1)) - rcu_read_unlock(); - if (need_seqretry(&rename_lock, seq)) { - seq = 1; - goto restart; - } - done_seqretry(&rename_lock, seq); - if (error) - goto Elong; - return retval; -Elong: - return ERR_PTR(-ENAMETOOLONG); -} - -char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) -{ - return __dentry_path(dentry, buf, buflen); -} -EXPORT_SYMBOL(dentry_path_raw); - -char *dentry_path(struct dentry *dentry, char *buf, int buflen) -{ - char *p = NULL; - char *retval; - - if (d_unlinked(dentry)) { - p = buf + buflen; - if (prepend(&p, &buflen, "//deleted", 10) != 0) - goto Elong; - buflen++; - } - retval = __dentry_path(dentry, buf, buflen); - if (!IS_ERR(retval) && p) - *p = '/'; /* restore '/' overriden with '\0' */ - return retval; -Elong: - return ERR_PTR(-ENAMETOOLONG); -} - -static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, - struct path *pwd) -{ - unsigned seq; - - do { - seq = read_seqcount_begin(&fs->seq); - *root = fs->root; - *pwd = fs->pwd; - } while (read_seqcount_retry(&fs->seq, seq)); -} - -/* - * NOTE! The user-level library version returns a - * character pointer. The kernel system call just - * returns the length of the buffer filled (which - * includes the ending '\0' character), or a negative - * error value. So libc would do something like - * - * char *getcwd(char * buf, size_t size) - * { - * int retval; - * - * retval = sys_getcwd(buf, size); - * if (retval >= 0) - * return buf; - * errno = -retval; - * return NULL; - * } - */ -SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) -{ - int error; - struct path pwd, root; - char *page = __getname(); - - if (!page) - return -ENOMEM; - - rcu_read_lock(); - get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); - - error = -ENOENT; - if (!d_unlinked(pwd.dentry)) { - unsigned long len; - char *cwd = page + PATH_MAX; - int buflen = PATH_MAX; - - prepend(&cwd, &buflen, "\0", 1); - error = prepend_path(&pwd, &root, &cwd, &buflen); - rcu_read_unlock(); - - if (error < 0) - goto out; - - /* Unreachable from current root */ - if (error > 0) { - error = prepend_unreachable(&cwd, &buflen); - if (error) - goto out; - } - - error = -ERANGE; - len = PATH_MAX + page - cwd; - if (len <= size) { - error = len; - if (copy_to_user(buf, cwd, len)) - error = -EFAULT; - } - } else { - rcu_read_unlock(); - } - -out: - __putname(page); - return error; -} - /* * Test whether new_dentry is a subdirectory of old_dentry. * From 903ddaf49329076862d65f7284d825759ff67bd6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Mar 2018 12:47:04 -0500 Subject: [PATCH 15/22] take out orphan externs (empty_string/slash_string) Signed-off-by: Al Viro --- include/linux/dcache.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 82a99d366aec..c84ffbfc5098 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -56,9 +56,7 @@ struct qstr { #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -extern const char empty_string[]; extern const struct qstr empty_name; -extern const char slash_string[]; extern const struct qstr slash_name; struct dentry_stat_t { From a03ece5ff2bd7a9abaa0e8ddfe5f79d79e5984c8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Mar 2018 11:00:45 -0500 Subject: [PATCH 16/22] fold lookup_real() into __lookup_hash() Signed-off-by: Al Viro --- fs/namei.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 921ae32dbc80..a3cd028e8a9b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1473,21 +1473,29 @@ static struct dentry *lookup_dcache(const struct qstr *name, } /* - * Call i_op->lookup on the dentry. The dentry must be negative and - * unhashed. - * - * dir->d_inode->i_mutex must be held + * Parent directory has inode locked exclusive. This is one + * and only case when ->lookup() gets called on non in-lookup + * dentries - as the matter of fact, this only gets called + * when directory is guaranteed to have no in-lookup children + * at all. */ -static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, - unsigned int flags) +static struct dentry *__lookup_hash(const struct qstr *name, + struct dentry *base, unsigned int flags) { + struct dentry *dentry = lookup_dcache(name, base, flags); struct dentry *old; + struct inode *dir = base->d_inode; + + if (dentry) + return dentry; /* Don't create child dentry for a dead directory. */ - if (unlikely(IS_DEADDIR(dir))) { - dput(dentry); + if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); - } + + dentry = d_alloc(base, name); + if (unlikely(!dentry)) + return ERR_PTR(-ENOMEM); old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) { @@ -1497,21 +1505,6 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, return dentry; } -static struct dentry *__lookup_hash(const struct qstr *name, - struct dentry *base, unsigned int flags) -{ - struct dentry *dentry = lookup_dcache(name, base, flags); - - if (dentry) - return dentry; - - dentry = d_alloc(base, name); - if (unlikely(!dentry)) - return ERR_PTR(-ENOMEM); - - return lookup_real(base->d_inode, dentry, flags); -} - static int lookup_fast(struct nameidata *nd, struct path *path, struct inode **inode, unsigned *seqp) From cd1c0c9321999737073dcfc3364e194e02604bce Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Mar 2018 11:01:22 -0500 Subject: [PATCH 17/22] debugfs_lookup(): switch to lookup_one_len_unlocked() Signed-off-by: Al Viro --- fs/debugfs/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 63a998c3f252..13b01351dd1c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -270,10 +270,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) if (!parent) parent = debugfs_mount->mnt_root; - inode_lock(d_inode(parent)); - dentry = lookup_one_len(name, parent, strlen(name)); - inode_unlock(d_inode(parent)); - + dentry = lookup_one_len_unlocked(name, parent, strlen(name)); if (IS_ERR(dentry)) return NULL; if (!d_really_is_positive(dentry)) { From 5bf1ddf7ee0e23598a620ef9ea2b0f00e804859d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Mar 2018 18:06:03 -0500 Subject: [PATCH 18/22] lustre: get rid of pointless casts to struct dentry * ... when feeding const struct dentry * to primitives taking exactly that. Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/llite/dcache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 549369739d80..6cd0318062e8 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -90,7 +90,7 @@ static int ll_dcompare(const struct dentry *dentry, d_count(dentry)); /* mountpoint is always valid */ - if (d_mountpoint((struct dentry *)dentry)) + if (d_mountpoint(dentry)) return 0; if (d_lustre_invalid(dentry)) @@ -111,7 +111,7 @@ static int ll_ddelete(const struct dentry *de) LASSERT(de); CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n", - d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping", + d_lustre_invalid(de) ? "deleting" : "keeping", de, de, de->d_parent, d_inode(de), d_unhashed(de) ? "" : "hashed,", list_empty(&de->d_subdirs) ? "" : "subdirs"); @@ -119,7 +119,7 @@ static int ll_ddelete(const struct dentry *de) /* kernel >= 2.6.38 last refcount is decreased after this function. */ LASSERT(d_count(de) == 1); - if (d_lustre_invalid((struct dentry *)de)) + if (d_lustre_invalid(de)) return 1; return 0; } From a7498968338da9b928f5d8054acc8be6ed2bc14c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Mar 2018 16:40:33 -0500 Subject: [PATCH 19/22] oprofilefs: don't oops on allocation failure ... just short-circuit the creation of potential children Signed-off-by: Al Viro --- drivers/oprofile/oprofilefs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index d77ebbfc67c9..4ea08979312c 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -138,6 +138,9 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name, struct dentry *dentry; struct inode *inode; + if (!root) + return -ENOMEM; + inode_lock(d_inode(root)); dentry = d_alloc_name(root, name); if (!dentry) { From 076515fc926793e162fc6525bed1679ef2bbf269 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Mar 2018 23:15:52 -0500 Subject: [PATCH 20/22] make non-exchanging __d_move() copy ->d_parent rather than swap them Currently d_move(from, to) does the following: * name/parent of from <- old name/parent of to, from hashed there * to is unhashed * name of to is preserved * if from used to be detached, to gets detached * if from used to be attached, parent of to <- old parent of from. That's both user-visibly bogus and complicates reasoning a lot. Much saner semantics would be * name/parent of from <- name/parent of to, from hashed there. * to is unhashed * name/parent of to is unchanged. The price, of course, is that old parent of from might lose a reference. However, * all potentially cross-directory callers of d_move() have both parents pinned directly; typically, dentries themselves are grabbed only after we have grabbed and locked both parents. IOW, the decrement of old parent's refcount in case of d_move() won't reach zero. * __d_move() from d_splice_alias() is done to detached alias. No refcount decrements in that case * __d_move() from __d_unalias() *can* get the refcount to zero. So let's grab a reference to alias' old parent before calling __d_unalias() and dput() it after we'd dropped rename_lock. That does make d_splice_alias() potentially blocking. However, it has no callers in non-sleepable contexts (and the case where we'd grown that dget/dput pair is _very_ rare, so performance is not an issue). Another thing that needs adjustment is unlocking in the end of __d_move(); folded it in. And cleaned the remnants of bogus ordering from the "lock them in the beginning" counterpart - it's never been right and now (well, for 7 years now) we have that thing always serialized on rename_lock anyway. Signed-off-by: Al Viro --- fs/dcache.c | 95 +++++++++++++++++------------------------------------ 1 file changed, 31 insertions(+), 64 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index c870343f904f..0a58038091f2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -67,9 +67,7 @@ * dentry->d_lock * * If no ancestor relationship: - * if (dentry1 < dentry2) - * dentry1->d_lock - * dentry2->d_lock + * arbitrary, since it's serialized on rename_lock */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); @@ -2777,9 +2775,6 @@ static void copy_name(struct dentry *dentry, struct dentry *target) static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) { - /* - * XXXX: do we really need to take target->d_lock? - */ if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) spin_lock(&target->d_parent->d_lock); else { @@ -2793,39 +2788,10 @@ static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) DENTRY_D_LOCK_NESTED); } } - if (target < dentry) { - spin_lock_nested(&target->d_lock, 2); - spin_lock_nested(&dentry->d_lock, 3); - } else { - spin_lock_nested(&dentry->d_lock, 2); - spin_lock_nested(&target->d_lock, 3); - } + spin_lock_nested(&dentry->d_lock, 2); + spin_lock_nested(&target->d_lock, 3); } -static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target) -{ - if (target->d_parent != dentry->d_parent) - spin_unlock(&dentry->d_parent->d_lock); - if (target->d_parent != target) - spin_unlock(&target->d_parent->d_lock); - spin_unlock(&target->d_lock); - spin_unlock(&dentry->d_lock); -} - -/* - * When switching names, the actual string doesn't strictly have to - * be preserved in the target - because we're dropping the target - * anyway. As such, we can just do a simple memcpy() to copy over - * the new name before we switch, unless we are going to rehash - * it. Note that if we *do* unhash the target, we are not allowed - * to rehash it without giving it a new name/hash key - whether - * we swap or overwrite the names here, resulting name won't match - * the reality in filesystem; it's only there for d_path() purposes. - * Note that all of this is happening under rename_lock, so the - * any hash lookup seeing it in the middle of manipulations will - * be discarded anyway. So we do not care what happens to the hash - * key in that case. - */ /* * __d_move - move a dentry * @dentry: entry to move @@ -2840,6 +2806,7 @@ static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target) static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { + struct dentry *old_parent; struct inode *dir = NULL; unsigned n; if (!dentry->d_inode) @@ -2858,49 +2825,47 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_begin(&dentry->d_seq); write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); + old_parent = dentry->d_parent; + /* unhash both */ if (!d_unhashed(dentry)) ___d_drop(dentry); if (!d_unhashed(target)) ___d_drop(target); - /* Switch the names.. */ - if (exchange) - swap_names(dentry, target); - else - copy_name(dentry, target); - - /* rehash in new place(s) */ - __d_rehash(dentry); - if (exchange) - __d_rehash(target); - else - target->d_hash.pprev = NULL; - /* ... and switch them in the tree */ - if (IS_ROOT(dentry)) { - /* splicing a tree */ - dentry->d_flags |= DCACHE_RCUACCESS; - dentry->d_parent = target->d_parent; - target->d_parent = target; - list_del_init(&target->d_child); - list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); + dentry->d_parent = target->d_parent; + if (!exchange) { + copy_name(dentry, target); + target->d_hash.pprev = NULL; + dentry->d_parent->d_lockref.count++; + if (dentry == old_parent) + dentry->d_flags |= DCACHE_RCUACCESS; + else + WARN_ON(!--old_parent->d_lockref.count); } else { - /* swapping two dentries */ - swap(dentry->d_parent, target->d_parent); + target->d_parent = old_parent; + swap_names(dentry, target); list_move(&target->d_child, &target->d_parent->d_subdirs); - list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); - if (exchange) - fsnotify_update_flags(target); - fsnotify_update_flags(dentry); + __d_rehash(target); + fsnotify_update_flags(target); } + list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); + __d_rehash(dentry); + fsnotify_update_flags(dentry); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); if (dir) end_dir_add(dir, n); - dentry_unlock_for_move(dentry, target); + + if (dentry->d_parent != old_parent) + spin_unlock(&dentry->d_parent->d_lock); + if (dentry != old_parent) + spin_unlock(&old_parent->d_lock); + spin_unlock(&target->d_lock); + spin_unlock(&dentry->d_lock); } /* @@ -3048,12 +3013,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) inode->i_sb->s_type->name, inode->i_sb->s_id); } else if (!IS_ROOT(new)) { + struct dentry *old_parent = dget(new->d_parent); int err = __d_unalias(inode, dentry, new); write_sequnlock(&rename_lock); if (err) { dput(new); new = ERR_PTR(err); } + dput(old_parent); } else { __d_move(new, dentry, false); write_sequnlock(&rename_lock); From 42177007aa277af3e37bf2ae3efdfe795c81d700 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 11 Mar 2018 15:15:46 -0400 Subject: [PATCH 21/22] fold dentry_lock_for_move() into its sole caller and clean it up Signed-off-by: Al Viro --- fs/dcache.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 0a58038091f2..c159a4b304cf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2773,25 +2773,6 @@ static void copy_name(struct dentry *dentry, struct dentry *target) kfree_rcu(old_name, u.head); } -static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) -{ - if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) - spin_lock(&target->d_parent->d_lock); - else { - if (d_ancestor(dentry->d_parent, target->d_parent)) { - spin_lock(&dentry->d_parent->d_lock); - spin_lock_nested(&target->d_parent->d_lock, - DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&target->d_parent->d_lock); - spin_lock_nested(&dentry->d_parent->d_lock, - DENTRY_D_LOCK_NESTED); - } - } - spin_lock_nested(&dentry->d_lock, 2); - spin_lock_nested(&target->d_lock, 3); -} - /* * __d_move - move a dentry * @dentry: entry to move @@ -2806,16 +2787,34 @@ static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { - struct dentry *old_parent; + struct dentry *old_parent, *p; struct inode *dir = NULL; unsigned n; - if (!dentry->d_inode) - printk(KERN_WARNING "VFS: moving negative dcache entry\n"); - BUG_ON(d_ancestor(dentry, target)); + WARN_ON(!dentry->d_inode); + if (WARN_ON(dentry == target)) + return; + BUG_ON(d_ancestor(target, dentry)); + old_parent = dentry->d_parent; + p = d_ancestor(old_parent, target); + if (IS_ROOT(dentry)) { + BUG_ON(p); + spin_lock(&target->d_parent->d_lock); + } else if (!p) { + /* target is not a descendent of dentry->d_parent */ + spin_lock(&target->d_parent->d_lock); + spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED); + } else { + BUG_ON(p == dentry); + spin_lock(&old_parent->d_lock); + if (p != target) + spin_lock_nested(&target->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } + spin_lock_nested(&dentry->d_lock, 2); + spin_lock_nested(&target->d_lock, 3); - dentry_lock_for_move(dentry, target); if (unlikely(d_in_lookup(target))) { dir = target->d_parent->d_inode; n = start_dir_add(dir); @@ -2825,8 +2824,6 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_begin(&dentry->d_seq); write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); - old_parent = dentry->d_parent; - /* unhash both */ if (!d_unhashed(dentry)) ___d_drop(dentry); From cbd4a5bcb25b5ed0c1c64bc969b893cad9b78acc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Mar 2018 15:08:21 -0400 Subject: [PATCH 22/22] d_genocide: move export to definition Signed-off-by: Al Viro --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index c159a4b304cf..593079176123 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3095,6 +3095,8 @@ void d_genocide(struct dentry *parent) d_walk(parent, parent, d_genocide_kill, NULL); } +EXPORT_SYMBOL(d_genocide); + void d_tmpfile(struct dentry *dentry, struct inode *inode) { inode_dec_link_count(inode); @@ -3174,8 +3176,6 @@ static void __init dcache_init(void) struct kmem_cache *names_cachep __read_mostly; EXPORT_SYMBOL(names_cachep); -EXPORT_SYMBOL(d_genocide); - void __init vfs_caches_init_early(void) { int i;