From 1a39068954e33f4bf3e09375a8112dcc801c4688 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Mar 2008 20:48:19 -0400 Subject: [PATCH 1/5] [PATCH] reduce stack footprint in namespace.c A lot of places misuse struct nameidata when they need struct path. Signed-off-by: Al Viro --- fs/namespace.c | 72 ++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 7953c96a2071..6324dfc80dc6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -155,15 +155,15 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } -static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) +static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { - old_nd->path.dentry = mnt->mnt_mountpoint; - old_nd->path.mnt = mnt->mnt_parent; + old_path->dentry = mnt->mnt_mountpoint; + old_path->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); - old_nd->path.dentry->d_mounted--; + old_path->dentry->d_mounted--; } void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, @@ -174,12 +174,12 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, dentry->d_mounted++; } -static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) +static void attach_mnt(struct vfsmount *mnt, struct path *path) { - mnt_set_mountpoint(nd->path.mnt, nd->path.dentry, mnt); + mnt_set_mountpoint(path->mnt, path->dentry, mnt); list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(nd->path.mnt, nd->path.dentry)); - list_add_tail(&mnt->mnt_child, &nd->path.mnt->mnt_mounts); + hash(path->mnt, path->dentry)); + list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); } /* @@ -744,7 +744,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, int flag) { struct vfsmount *res, *p, *q, *r, *s; - struct nameidata nd; + struct path path; if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) return NULL; @@ -769,14 +769,14 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = q->mnt_parent; } p = s; - nd.path.mnt = q; - nd.path.dentry = p->mnt_mountpoint; + path.mnt = q; + path.dentry = p->mnt_mountpoint; q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; spin_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); - attach_mnt(q, &nd); + attach_mnt(q, &path); spin_unlock(&vfsmount_lock); } } @@ -876,11 +876,11 @@ void drop_collected_mounts(struct vfsmount *mnt) * in allocations. */ static int attach_recursive_mnt(struct vfsmount *source_mnt, - struct nameidata *nd, struct nameidata *parent_nd) + struct path *path, struct path *parent_path) { LIST_HEAD(tree_list); - struct vfsmount *dest_mnt = nd->path.mnt; - struct dentry *dest_dentry = nd->path.dentry; + struct vfsmount *dest_mnt = path->mnt; + struct dentry *dest_dentry = path->dentry; struct vfsmount *child, *p; if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) @@ -892,9 +892,9 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, } spin_lock(&vfsmount_lock); - if (parent_nd) { - detach_mnt(source_mnt, parent_nd); - attach_mnt(source_mnt, nd); + if (parent_path) { + detach_mnt(source_mnt, parent_path); + attach_mnt(source_mnt, path); touch_mnt_namespace(current->nsproxy->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); @@ -930,7 +930,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) err = -ENOENT; if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) - err = attach_recursive_mnt(mnt, nd, NULL); + err = attach_recursive_mnt(mnt, &nd->path, NULL); out_unlock: mutex_unlock(&nd->path.dentry->d_inode->i_mutex); if (!err) @@ -1059,7 +1059,8 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt) */ static noinline int do_move_mount(struct nameidata *nd, char *old_name) { - struct nameidata old_nd, parent_nd; + struct nameidata old_nd; + struct path parent_path; struct vfsmount *p; int err = 0; if (!capable(CAP_SYS_ADMIN)) @@ -1114,7 +1115,7 @@ static noinline int do_move_mount(struct nameidata *nd, char *old_name) if (p == old_nd.path.mnt) goto out1; - err = attach_recursive_mnt(old_nd.path.mnt, nd, &parent_nd); + err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path); if (err) goto out1; @@ -1128,7 +1129,7 @@ out1: out: up_write(&namespace_sem); if (!err) - path_put(&parent_nd.path); + path_put(&parent_path); path_put(&old_nd.path); return err; } @@ -1683,7 +1684,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) path_put(&old_pwd); } -static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) +static void chroot_fs_refs(struct path *old_root, struct path *new_root) { struct task_struct *g, *p; struct fs_struct *fs; @@ -1695,12 +1696,12 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) if (fs) { atomic_inc(&fs->count); task_unlock(p); - if (fs->root.dentry == old_nd->path.dentry - && fs->root.mnt == old_nd->path.mnt) - set_fs_root(fs, &new_nd->path); - if (fs->pwd.dentry == old_nd->path.dentry - && fs->pwd.mnt == old_nd->path.mnt) - set_fs_pwd(fs, &new_nd->path); + if (fs->root.dentry == old_root->dentry + && fs->root.mnt == old_root->mnt) + set_fs_root(fs, new_root); + if (fs->pwd.dentry == old_root->dentry + && fs->pwd.mnt == old_root->mnt) + set_fs_pwd(fs, new_root); put_fs_struct(fs); } else task_unlock(p); @@ -1737,7 +1738,8 @@ asmlinkage long sys_pivot_root(const char __user * new_root, const char __user * put_old) { struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; + struct nameidata new_nd, old_nd, user_nd; + struct path parent_path, root_parent; int error; if (!capable(CAP_SYS_ADMIN)) @@ -1811,19 +1813,19 @@ asmlinkage long sys_pivot_root(const char __user * new_root, goto out3; } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) goto out3; - detach_mnt(new_nd.path.mnt, &parent_nd); + detach_mnt(new_nd.path.mnt, &parent_path); detach_mnt(user_nd.path.mnt, &root_parent); /* mount old root on put_old */ - attach_mnt(user_nd.path.mnt, &old_nd); + attach_mnt(user_nd.path.mnt, &old_nd.path); /* mount new_root on / */ attach_mnt(new_nd.path.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); spin_unlock(&vfsmount_lock); - chroot_fs_refs(&user_nd, &new_nd); + chroot_fs_refs(&user_nd.path, &new_nd.path); security_sb_post_pivotroot(&user_nd, &new_nd); error = 0; - path_put(&root_parent.path); - path_put(&parent_nd.path); + path_put(&root_parent); + path_put(&parent_path); out2: mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); up_write(&namespace_sem); From 7c4b93d8269b9d35971a8239426b1f6ddc3d5ef7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Mar 2008 23:59:49 -0400 Subject: [PATCH 2/5] [PATCH] count ghost references to vfsmounts make propagate_mount_busy() exclude references from the vfsmounts that had been isolated by umount_tree() and are just waiting for release_mounts() to dispose of their ->mnt_parent/->mnt_mountpoint. Signed-off-by: Al Viro --- fs/namespace.c | 5 ++++- fs/pnode.c | 2 +- include/linux/mount.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 6324dfc80dc6..c175218ebae1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -548,6 +548,7 @@ void release_mounts(struct list_head *head) m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + m->mnt_ghosts--; spin_unlock(&vfsmount_lock); dput(dentry); mntput(m); @@ -572,8 +573,10 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; list_del_init(&p->mnt_child); - if (p->mnt_parent != p) + if (p->mnt_parent != p) { + p->mnt_parent->mnt_ghosts++; p->mnt_mountpoint->d_mounted--; + } change_mnt_propagation(p, MS_PRIVATE); } } diff --git a/fs/pnode.c b/fs/pnode.c index 05ba692bc540..1d8f5447f3f7 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -225,7 +225,7 @@ out: */ static inline int do_refcount_check(struct vfsmount *mnt, int count) { - int mycount = atomic_read(&mnt->mnt_count); + int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; return (mycount > count); } diff --git a/include/linux/mount.h b/include/linux/mount.h index 6d3047d8c91c..dac5e67ff3ee 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -61,6 +61,7 @@ struct vfsmount { atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; + int mnt_ghosts; }; static inline struct vfsmount *mntget(struct vfsmount *mnt) From bcc5c7d2b692e5319db00b0dd020ce98723103b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2008 00:21:53 -0400 Subject: [PATCH 3/5] [PATCH] sanitize locking in mark_mounts_for_expiry() and shrink_submounts() ... and fix a race on access of ->mnt_share et.al. without namespace_sem in the latter. Signed-off-by: Al Viro --- fs/namespace.c | 105 +++++++++++-------------------------------------- 1 file changed, 24 insertions(+), 81 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index c175218ebae1..1c78917ec930 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1210,75 +1210,6 @@ unlock: EXPORT_SYMBOL_GPL(do_add_mount); -static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, - struct list_head *umounts) -{ - spin_lock(&vfsmount_lock); - - /* - * Check if mount is still attached, if not, let whoever holds it deal - * with the sucker - */ - if (mnt->mnt_parent == mnt) { - spin_unlock(&vfsmount_lock); - return; - } - - /* - * Check that it is still dead: the count should now be 2 - as - * contributed by the vfsmount parent and the mntget above - */ - if (!propagate_mount_busy(mnt, 2)) { - /* delete from the namespace */ - touch_mnt_namespace(mnt->mnt_ns); - list_del_init(&mnt->mnt_list); - mnt->mnt_ns = NULL; - umount_tree(mnt, 1, umounts); - spin_unlock(&vfsmount_lock); - } else { - /* - * Someone brought it back to life whilst we didn't have any - * locks held so return it to the expiration list - */ - list_add_tail(&mnt->mnt_expire, mounts); - spin_unlock(&vfsmount_lock); - } -} - -/* - * go through the vfsmounts we've just consigned to the graveyard to - * - check that they're still dead - * - delete the vfsmount from the appropriate namespace under lock - * - dispose of the corpse - */ -static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts) -{ - struct mnt_namespace *ns; - struct vfsmount *mnt; - - while (!list_empty(graveyard)) { - LIST_HEAD(umounts); - mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire); - list_del_init(&mnt->mnt_expire); - - /* don't do anything if the namespace is dead - all the - * vfsmounts from it are going away anyway */ - ns = mnt->mnt_ns; - if (!ns || !ns->root) - continue; - get_mnt_ns(ns); - - spin_unlock(&vfsmount_lock); - down_write(&namespace_sem); - expire_mount(mnt, mounts, &umounts); - up_write(&namespace_sem); - release_mounts(&umounts); - mntput(mnt); - put_mnt_ns(ns); - spin_lock(&vfsmount_lock); - } -} - /* * process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came @@ -1288,10 +1219,12 @@ void mark_mounts_for_expiry(struct list_head *mounts) { struct vfsmount *mnt, *next; LIST_HEAD(graveyard); + LIST_HEAD(umounts); if (list_empty(mounts)) return; + down_write(&namespace_sem); spin_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the @@ -1302,16 +1235,19 @@ void mark_mounts_for_expiry(struct list_head *mounts) */ list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { if (!xchg(&mnt->mnt_expiry_mark, 1) || - atomic_read(&mnt->mnt_count) != 1) + propagate_mount_busy(mnt, 1)) continue; - - mntget(mnt); list_move(&mnt->mnt_expire, &graveyard); } - - expire_mount_list(&graveyard, mounts); - + while (!list_empty(&graveyard)) { + mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire); + touch_mnt_namespace(mnt->mnt_ns); + umount_tree(mnt, 1, &umounts); + } spin_unlock(&vfsmount_lock); + up_write(&namespace_sem); + + release_mounts(&umounts); } EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); @@ -1347,7 +1283,6 @@ resume: } if (!propagate_mount_busy(mnt, 1)) { - mntget(mnt); list_move_tail(&mnt->mnt_expire, graveyard); found++; } @@ -1370,15 +1305,23 @@ resume: void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts) { LIST_HEAD(graveyard); - int found; + LIST_HEAD(umounts); + struct vfsmount *mnt; + down_write(&namespace_sem); spin_lock(&vfsmount_lock); - /* extract submounts of 'mountpoint' from the expiration list */ - while ((found = select_submounts(mountpoint, &graveyard)) != 0) - expire_mount_list(&graveyard, mounts); - + while (select_submounts(mountpoint, &graveyard)) { + while (!list_empty(&graveyard)) { + mnt = list_first_entry(&graveyard, struct vfsmount, + mnt_expire); + touch_mnt_namespace(mnt->mnt_ns); + umount_tree(mnt, 1, &umounts); + } + } spin_unlock(&vfsmount_lock); + up_write(&namespace_sem); + release_mounts(&umounts); } EXPORT_SYMBOL_GPL(shrink_submounts); From c35038becad0adb0e25261fff66d85b1a6ddd0c2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2008 00:46:23 -0400 Subject: [PATCH 4/5] [PATCH] do shrink_submounts() for all fs types ... and take it out of ->umount_begin() instances. Call with all locks already taken (by do_umount()) and leave calling release_mounts() to caller (it will do release_mounts() anyway, so we can just put into the same list). Signed-off-by: Al Viro --- fs/afs/internal.h | 1 - fs/afs/mntpt.c | 8 -------- fs/afs/super.c | 1 - fs/cifs/cifs_dfs_ref.c | 1 - fs/namespace.c | 23 ++++++++++------------- fs/nfs/super.c | 2 -- include/linux/mount.h | 1 - 7 files changed, 10 insertions(+), 27 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5ca3625cd39e..9ba16edc0af2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -573,7 +573,6 @@ extern const struct file_operations afs_mntpt_file_operations; extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); extern void afs_mntpt_kill_timer(void); -extern void afs_umount_begin(struct vfsmount *, int); /* * proc.c diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index a3510b8ba3e7..2f5503902c37 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -283,11 +283,3 @@ void afs_mntpt_kill_timer(void) cancel_delayed_work(&afs_mntpt_expiry_timer); flush_scheduled_work(); } - -/* - * begin unmount by attempting to remove all automounted mountpoints we added - */ -void afs_umount_begin(struct vfsmount *vfsmnt, int flags) -{ - shrink_submounts(vfsmnt, &afs_vfsmounts); -} diff --git a/fs/afs/super.c b/fs/afs/super.c index 36bbce45f44b..4b572b801d8d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = { .write_inode = afs_write_inode, .destroy_inode = afs_destroy_inode, .clear_inode = afs_clear_inode, - .umount_begin = afs_umount_begin, .put_super = afs_put_super, .show_options = generic_show_options, }; diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index a1a95b027136..56c924033b78 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -33,7 +33,6 @@ void dfs_shrink_umount_helper(struct vfsmount *vfsmnt) { mark_mounts_for_expiry(&cifs_dfs_automount_list); mark_mounts_for_expiry(&cifs_dfs_automount_list); - shrink_submounts(vfsmnt, &cifs_dfs_automount_list); } /** diff --git a/fs/namespace.c b/fs/namespace.c index 1c78917ec930..7bd74b25930c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -581,6 +581,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) } } +static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); + static int do_umount(struct vfsmount *mnt, int flags) { struct super_block *sb = mnt->mnt_sb; @@ -653,6 +655,9 @@ static int do_umount(struct vfsmount *mnt, int flags) spin_lock(&vfsmount_lock); event++; + if (!(flags & MNT_DETACH)) + shrink_submounts(mnt, &umount_list); + retval = -EBUSY; if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) @@ -1302,30 +1307,22 @@ resume: * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint */ -void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts) +static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) { LIST_HEAD(graveyard); - LIST_HEAD(umounts); - struct vfsmount *mnt; + struct vfsmount *m; - down_write(&namespace_sem); - spin_lock(&vfsmount_lock); /* extract submounts of 'mountpoint' from the expiration list */ - while (select_submounts(mountpoint, &graveyard)) { + while (select_submounts(mnt, &graveyard)) { while (!list_empty(&graveyard)) { - mnt = list_first_entry(&graveyard, struct vfsmount, + m = list_first_entry(&graveyard, struct vfsmount, mnt_expire); touch_mnt_namespace(mnt->mnt_ns); - umount_tree(mnt, 1, &umounts); + umount_tree(mnt, 1, umounts); } } - spin_unlock(&vfsmount_lock); - up_write(&namespace_sem); - release_mounts(&umounts); } -EXPORT_SYMBOL_GPL(shrink_submounts); - /* * Some copy_from_user() implementations do not return the exact number of * bytes remaining to copy on a fault. But copy_mount_options() requires that. diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dd4dfcd632ec..f9219024f31a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -589,8 +589,6 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); struct rpc_clnt *rpc; - shrink_submounts(vfsmnt, &nfs_automount_list); - if (!(flags & MNT_FORCE)) return; /* -EIO all pending I/O */ diff --git a/include/linux/mount.h b/include/linux/mount.h index dac5e67ff3ee..5ee2df217cdf 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -99,7 +99,6 @@ extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, int mnt_flags, struct list_head *fslist); extern void mark_mounts_for_expiry(struct list_head *mounts); -extern void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts); extern spinlock_t vfsmount_lock; extern dev_t name_to_dev_t(char *name); From 6758f953d05378d907a164c67934cd86183d9c88 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2008 16:14:30 -0400 Subject: [PATCH 5/5] [PATCH] mnt_expire is protected by namespace_sem, no need for vfsmount_lock Signed-off-by: Al Viro --- fs/namespace.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 7bd74b25930c..94f026ec990a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -262,10 +262,8 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, /* stick the duplicate mount on the same expiry list * as the original if that was on one */ if (flag & CL_EXPIRE) { - spin_lock(&vfsmount_lock); if (!list_empty(&old->mnt_expire)) list_add(&mnt->mnt_expire, &old->mnt_expire); - spin_unlock(&vfsmount_lock); } } return mnt; @@ -1127,11 +1125,9 @@ static noinline int do_move_mount(struct nameidata *nd, char *old_name) if (err) goto out1; - spin_lock(&vfsmount_lock); /* if the mount is moved, it should no longer be expire * automatically */ list_del_init(&old_nd.path.mnt->mnt_expire); - spin_unlock(&vfsmount_lock); out1: mutex_unlock(&nd->path.dentry->d_inode->i_mutex); out: @@ -1198,12 +1194,9 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, if ((err = graft_tree(newmnt, nd))) goto unlock; - if (fslist) { - /* add to the specified expiration list */ - spin_lock(&vfsmount_lock); + if (fslist) /* add to the specified expiration list */ list_add_tail(&newmnt->mnt_expire, fslist); - spin_unlock(&vfsmount_lock); - } + up_write(&namespace_sem); return 0;