From f642404a0436a50912c218009ccc7856d48d784c Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 13 May 2019 16:14:32 +0100 Subject: [PATCH] afs: Make vnode->cb_interest RCU safe Use RCU-based freeing for afs_cb_interest struct objects and use RCU on vnode->cb_interest. Use that change to allow afs_check_validity() to use read_seqbegin_or_lock() instead of read_seqlock_excl(). This also requires the caller of afs_check_validity() to hold the RCU read lock across the call. Signed-off-by: David Howells --- fs/afs/callback.c | 21 +++++++------ fs/afs/dir.c | 15 ++++++--- fs/afs/inode.c | 79 +++++++++++++++++++++++++++++------------------ fs/afs/internal.h | 12 +++++-- fs/afs/rotate.c | 18 +++++++---- fs/afs/security.c | 8 +++-- fs/afs/super.c | 4 +-- 7 files changed, 99 insertions(+), 58 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 4876079aa643..d441bef72163 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -94,15 +94,15 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode, struct afs_server *server = entry->server; again: - if (vnode->cb_interest && - likely(vnode->cb_interest == entry->cb_interest)) + vcbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock)); + if (vcbi && likely(vcbi == entry->cb_interest)) return 0; read_lock(&slist->lock); cbi = afs_get_cb_interest(entry->cb_interest); read_unlock(&slist->lock); - vcbi = vnode->cb_interest; if (vcbi) { if (vcbi == cbi) { afs_put_cb_interest(afs_v2net(vnode), cbi); @@ -114,8 +114,9 @@ again: */ if (cbi && vcbi->server == cbi->server) { write_seqlock(&vnode->cb_lock); - old = vnode->cb_interest; - vnode->cb_interest = cbi; + old = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + rcu_assign_pointer(vnode->cb_interest, cbi); write_sequnlock(&vnode->cb_lock); afs_put_cb_interest(afs_v2net(vnode), old); return 0; @@ -160,8 +161,9 @@ again: */ write_seqlock(&vnode->cb_lock); - old = vnode->cb_interest; - vnode->cb_interest = cbi; + old = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + rcu_assign_pointer(vnode->cb_interest, cbi); vnode->cb_s_break = cbi->server->cb_s_break; vnode->cb_v_break = vnode->volume->cb_v_break; clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); @@ -191,10 +193,11 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) vi = NULL; write_unlock(&cbi->server->cb_break_lock); - kfree(vi); + if (vi) + kfree_rcu(vi, rcu); afs_put_server(net, cbi->server); } - kfree(cbi); + kfree_rcu(cbi, rcu); } } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index f7344b045799..338c2260b0a0 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -638,11 +638,12 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, struct key *key) { struct afs_lookup_cookie *cookie; - struct afs_cb_interest *cbi = NULL; + struct afs_cb_interest *dcbi, *cbi = NULL; struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_status_cb *scb; struct afs_iget_data data; struct afs_fs_cursor fc; + struct afs_server *server; struct afs_vnode *dvnode = AFS_FS_I(dir); struct inode *inode = NULL; int ret, i; @@ -658,10 +659,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */ read_seqlock_excl(&dvnode->cb_lock); - if (dvnode->cb_interest && - dvnode->cb_interest->server && - test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags)) - cookie->one_only = true; + dcbi = rcu_dereference_protected(dvnode->cb_interest, + lockdep_is_held(&dvnode->cb_lock.lock)); + if (dcbi) { + server = dcbi->server; + if (server && + test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags)) + cookie->one_only = true; + } read_sequnlock_excl(&dvnode->cb_lock); for (i = 0; i < 50; i++) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 5c95da7d5f3d..ba35b4824408 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -139,9 +139,10 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, vnode->cb_expires_at = ktime_get_real_seconds(); } else { vnode->cb_expires_at = scb->callback.expires_at; - old_cbi = vnode->cb_interest; + old_cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); if (cbi != old_cbi) - vnode->cb_interest = afs_get_cb_interest(cbi); + rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(cbi)); else old_cbi = NULL; set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); @@ -245,9 +246,10 @@ static void afs_apply_callback(struct afs_fs_cursor *fc, if (!afs_cb_is_broken(cb_break, vnode, fc->cbi)) { vnode->cb_expires_at = cb->expires_at; - old = vnode->cb_interest; + old = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); if (old != fc->cbi) { - vnode->cb_interest = afs_get_cb_interest(fc->cbi); + rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(fc->cbi)); afs_put_cb_interest(afs_v2net(vnode), old); } set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); @@ -565,36 +567,46 @@ void afs_zap_data(struct afs_vnode *vnode) */ bool afs_check_validity(struct afs_vnode *vnode) { + struct afs_cb_interest *cbi; + struct afs_server *server; + struct afs_volume *volume = vnode->volume; time64_t now = ktime_get_real_seconds(); bool valid; + unsigned int cb_break, cb_s_break, cb_v_break; + int seq = 0; - /* Quickly check the callback state. Ideally, we'd use read_seqbegin - * here, but we have no way to pass the net namespace to the RCU - * cleanup for the server record. - */ - read_seqlock_excl(&vnode->cb_lock); + do { + read_seqbegin_or_lock(&vnode->cb_lock, &seq); + cb_v_break = READ_ONCE(volume->cb_v_break); + cb_break = vnode->cb_break; - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break || - vnode->cb_v_break != vnode->volume->cb_v_break) { - vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; - vnode->cb_v_break = vnode->volume->cb_v_break; - valid = false; - } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - valid = false; - } else if (vnode->cb_expires_at - 10 <= now) { - valid = false; - } else { + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + cbi = rcu_dereference(vnode->cb_interest); + server = rcu_dereference(cbi->server); + cb_s_break = READ_ONCE(server->cb_s_break); + + if (vnode->cb_s_break != cb_s_break || + vnode->cb_v_break != cb_v_break) { + vnode->cb_s_break = cb_s_break; + vnode->cb_v_break = cb_v_break; + valid = false; + } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { + valid = false; + } else if (vnode->cb_expires_at - 10 <= now) { + valid = false; + } else { + valid = true; + } + } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { valid = true; + } else { + vnode->cb_v_break = cb_v_break; + valid = false; } - } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - valid = true; - } else { - vnode->cb_v_break = vnode->volume->cb_v_break; - valid = false; - } - read_sequnlock_excl(&vnode->cb_lock); + } while (need_seqretry(&vnode->cb_lock, seq)); + + done_seqretry(&vnode->cb_lock, seq); return valid; } @@ -616,7 +628,9 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); + rcu_read_lock(); valid = afs_check_validity(vnode); + rcu_read_unlock(); if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) clear_nlink(&vnode->vfs_inode); @@ -703,6 +717,7 @@ int afs_drop_inode(struct inode *inode) */ void afs_evict_inode(struct inode *inode) { + struct afs_cb_interest *cbi; struct afs_vnode *vnode; vnode = AFS_FS_I(inode); @@ -719,10 +734,14 @@ void afs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - if (vnode->cb_interest) { - afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest); - vnode->cb_interest = NULL; + write_seqlock(&vnode->cb_lock); + cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + if (cbi) { + afs_put_cb_interest(afs_i2net(inode), cbi); + rcu_assign_pointer(vnode->cb_interest, NULL); } + write_sequnlock(&vnode->cb_lock); while (!list_empty(&vnode->wb_keys)) { struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 54688f6ca9e5..3dbb1e840dfd 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -554,7 +554,10 @@ struct afs_server { struct afs_vol_interest { struct hlist_node srv_link; /* Link in server->cb_volumes */ struct hlist_head cb_interests; /* List of callback interests on the server */ - afs_volid_t vid; /* Volume ID to match */ + union { + struct rcu_head rcu; + afs_volid_t vid; /* Volume ID to match */ + }; unsigned int usage; }; @@ -566,7 +569,10 @@ struct afs_cb_interest { struct afs_vol_interest *vol_interest; struct afs_server *server; /* Server on which this interest resides */ struct super_block *sb; /* Superblock on which inodes reside */ - afs_volid_t vid; /* Volume ID to match */ + union { + struct rcu_head rcu; + afs_volid_t vid; /* Volume ID to match */ + }; refcount_t usage; }; @@ -676,7 +682,7 @@ struct afs_vnode { afs_lock_type_t lock_type : 8; /* outstanding callback notification on this file */ - struct afs_cb_interest *cb_interest; /* Server on which this resides */ + struct afs_cb_interest __rcu *cb_interest; /* Server on which this resides */ unsigned int cb_s_break; /* Mass break counter on ->server */ unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 52f3a9910f0d..b00c739e0e63 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -66,7 +66,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, fc->untried = (1UL << fc->server_list->nr_servers) - 1; fc->index = READ_ONCE(fc->server_list->preferred); - cbi = vnode->cb_interest; + cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock)); if (cbi) { /* See if the vnode's preferred record is still available */ for (i = 0; i < fc->server_list->nr_servers; i++) { @@ -87,8 +88,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, /* Note that the callback promise is effectively broken */ write_seqlock(&vnode->cb_lock); - ASSERTCMP(cbi, ==, vnode->cb_interest); - vnode->cb_interest = NULL; + ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest)); + rcu_assign_pointer(vnode->cb_interest, NULL); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) vnode->cb_break++; write_sequnlock(&vnode->cb_lock); @@ -417,7 +418,9 @@ selected_server: if (error < 0) goto failed_set_error; - fc->cbi = afs_get_cb_interest(vnode->cb_interest); + fc->cbi = afs_get_cb_interest( + rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock))); read_lock(&server->fs_lock); alist = rcu_dereference_protected(server->addresses, @@ -487,12 +490,15 @@ failed: bool afs_select_current_fileserver(struct afs_fs_cursor *fc) { struct afs_vnode *vnode = fc->vnode; - struct afs_cb_interest *cbi = vnode->cb_interest; + struct afs_cb_interest *cbi; struct afs_addr_list *alist; int error = fc->ac.error; _enter(""); + cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock)); + switch (error) { case SHRT_MAX: if (!cbi) { @@ -501,7 +507,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) return false; } - fc->cbi = afs_get_cb_interest(vnode->cb_interest); + fc->cbi = afs_get_cb_interest(cbi); read_lock(&cbi->server->fs_lock); alist = rcu_dereference_protected(cbi->server->addresses, diff --git a/fs/afs/security.c b/fs/afs/security.c index 857f09d09ee9..5d8ece98561e 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -146,7 +146,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } if (afs_cb_is_broken(cb_break, vnode, - vnode->cb_interest)) { + rcu_dereference(vnode->cb_interest))) { changed = true; break; } @@ -176,7 +176,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest)) + if (afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest))) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -253,14 +253,16 @@ found: kfree(new); + rcu_read_lock(); spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) && + if (!afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else zap = replacement; spin_unlock(&vnode->lock); + rcu_read_unlock(); afs_put_permits(zap); out_put: afs_put_permits(permits); diff --git a/fs/afs/super.c b/fs/afs/super.c index a81c235f8c57..f76473ad7bbb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -677,7 +677,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) vnode->volume = NULL; vnode->lock_key = NULL; vnode->permit_cache = NULL; - vnode->cb_interest = NULL; + RCU_INIT_POINTER(vnode->cb_interest, NULL); #ifdef CONFIG_AFS_FSCACHE vnode->cache = NULL; #endif @@ -707,7 +707,7 @@ static void afs_destroy_inode(struct inode *inode) _debug("DESTROY INODE %p", inode); - ASSERTCMP(vnode->cb_interest, ==, NULL); + ASSERTCMP(rcu_access_pointer(vnode->cb_interest), ==, NULL); atomic_dec(&afs_count_active_inodes); }